From e272ec0a434189c08683414f3e2e889b0caab24a Mon Sep 17 00:00:00 2001 From: kaleb-himes Date: Thu, 25 Jan 2024 14:07:46 -0700 Subject: [PATCH] Add ARMv8 PAA support to base v5.2.1 resulting in v5.2.3 --- wolfcrypt/src/port/arm/armv8-32-aes-asm.S | 5303 + wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c | 4784 + wolfcrypt/src/port/arm/armv8-32-sha256-asm.S | 2864 + .../src/port/arm/armv8-32-sha256-asm_c.c | 2800 + wolfcrypt/src/port/arm/armv8-32-sha512-asm.S | 11459 +- .../src/port/arm/armv8-32-sha512-asm_c.c | 9157 + wolfcrypt/src/port/arm/armv8-aes.c | 16603 +- wolfcrypt/src/port/arm/armv8-sha256.c | 215 +- wolfcrypt/src/port/arm/armv8-sha3-asm.S | 207 + wolfcrypt/src/port/arm/armv8-sha3-asm_c.c | 178 + wolfcrypt/src/port/arm/armv8-sha512-asm.S | 43 +- wolfcrypt/src/port/arm/armv8-sha512-asm_c.c | 1660 + wolfcrypt/src/port/arm/armv8-sha512.c | 52 +- wolfcrypt/src/sp_arm32.c | 157010 ++++++++++++--- wolfcrypt/src/sp_arm64.c | 73686 +++++-- wolfcrypt/src/sp_c32.c | 21004 +- wolfcrypt/src/sp_c64.c | 19951 +- wolfssl/wolfcrypt/aes.h | 35 +- wolfssl/wolfcrypt/sha512.h | 16 + 19 files changed, 263774 insertions(+), 63253 deletions(-) create mode 100644 wolfcrypt/src/port/arm/armv8-32-aes-asm.S create mode 100644 wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c create mode 100644 wolfcrypt/src/port/arm/armv8-32-sha256-asm.S create mode 100644 wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c create mode 100644 wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c create mode 100644 wolfcrypt/src/port/arm/armv8-sha3-asm.S create mode 100644 wolfcrypt/src/port/arm/armv8-sha3-asm_c.c create mode 100644 wolfcrypt/src/port/arm/armv8-sha512-asm_c.c diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S new file mode 100644 index 000000000..4fcf43a0f --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -0,0 +1,5303 @@ +/* armv8-32-aes-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./aes/aes.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm.S + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) +#ifndef WOLFSSL_ARMASM_INLINE +#ifndef NO_AES +#ifdef HAVE_AES_DECRYPT + .text + .type L_AES_ARM32_td_data, %object + .size L_AES_ARM32_td_data, 1024 + .align 4 +L_AES_ARM32_td_data: + .word 0x5051f4a7 + .word 0x537e4165 + .word 0xc31a17a4 + .word 0x963a275e + .word 0xcb3bab6b + .word 0xf11f9d45 + .word 0xabacfa58 + .word 0x934be303 + .word 0x552030fa + .word 0xf6ad766d + .word 0x9188cc76 + .word 0x25f5024c + .word 0xfc4fe5d7 + .word 0xd7c52acb + .word 0x80263544 + .word 0x8fb562a3 + .word 0x49deb15a + .word 0x6725ba1b + .word 0x9845ea0e + .word 0xe15dfec0 + .word 0x2c32f75 + .word 0x12814cf0 + .word 0xa38d4697 + .word 0xc66bd3f9 + .word 0xe7038f5f + .word 0x9515929c + .word 0xebbf6d7a + .word 0xda955259 + .word 0x2dd4be83 + .word 0xd3587421 + .word 0x2949e069 + .word 0x448ec9c8 + .word 0x6a75c289 + .word 0x78f48e79 + .word 0x6b99583e + .word 0xdd27b971 + .word 0xb6bee14f + .word 0x17f088ad + .word 0x66c920ac + .word 0xb47dce3a + .word 0x1863df4a + .word 0x82e51a31 + .word 0x60975133 + .word 0x4562537f + .word 0xe0b16477 + .word 0x84bb6bae + .word 0x1cfe81a0 + .word 0x94f9082b + .word 0x58704868 + .word 0x198f45fd + .word 0x8794de6c + .word 0xb7527bf8 + .word 0x23ab73d3 + .word 0xe2724b02 + .word 0x57e31f8f + .word 0x2a6655ab + .word 0x7b2eb28 + .word 0x32fb5c2 + .word 0x9a86c57b + .word 0xa5d33708 + .word 0xf2302887 + .word 0xb223bfa5 + .word 0xba02036a + .word 0x5ced1682 + .word 0x2b8acf1c + .word 0x92a779b4 + .word 0xf0f307f2 + .word 0xa14e69e2 + .word 0xcd65daf4 + .word 0xd50605be + .word 0x1fd13462 + .word 0x8ac4a6fe + .word 0x9d342e53 + .word 0xa0a2f355 + .word 0x32058ae1 + .word 0x75a4f6eb + .word 0x390b83ec + .word 0xaa4060ef + .word 0x65e719f + .word 0x51bd6e10 + .word 0xf93e218a + .word 0x3d96dd06 + .word 0xaedd3e05 + .word 0x464de6bd + .word 0xb591548d + .word 0x571c45d + .word 0x6f0406d4 + .word 0xff605015 + .word 0x241998fb + .word 0x97d6bde9 + .word 0xcc894043 + .word 0x7767d99e + .word 0xbdb0e842 + .word 0x8807898b + .word 0x38e7195b + .word 0xdb79c8ee + .word 0x47a17c0a + .word 0xe97c420f + .word 0xc9f8841e + .word 0x0 + .word 0x83098086 + .word 0x48322bed + .word 0xac1e1170 + .word 0x4e6c5a72 + .word 0xfbfd0eff + .word 0x560f8538 + .word 0x1e3daed5 + .word 0x27362d39 + .word 0x640a0fd9 + .word 0x21685ca6 + .word 0xd19b5b54 + .word 0x3a24362e + .word 0xb10c0a67 + .word 0xf9357e7 + .word 0xd2b4ee96 + .word 0x9e1b9b91 + .word 0x4f80c0c5 + .word 0xa261dc20 + .word 0x695a774b + .word 0x161c121a + .word 0xae293ba + .word 0xe5c0a02a + .word 0x433c22e0 + .word 0x1d121b17 + .word 0xb0e090d + .word 0xadf28bc7 + .word 0xb92db6a8 + .word 0xc8141ea9 + .word 0x8557f119 + .word 0x4caf7507 + .word 0xbbee99dd + .word 0xfda37f60 + .word 0x9ff70126 + .word 0xbc5c72f5 + .word 0xc544663b + .word 0x345bfb7e + .word 0x768b4329 + .word 0xdccb23c6 + .word 0x68b6edfc + .word 0x63b8e4f1 + .word 0xcad731dc + .word 0x10426385 + .word 0x40139722 + .word 0x2084c611 + .word 0x7d854a24 + .word 0xf8d2bb3d + .word 0x11aef932 + .word 0x6dc729a1 + .word 0x4b1d9e2f + .word 0xf3dcb230 + .word 0xec0d8652 + .word 0xd077c1e3 + .word 0x6c2bb316 + .word 0x99a970b9 + .word 0xfa119448 + .word 0x2247e964 + .word 0xc4a8fc8c + .word 0x1aa0f03f + .word 0xd8567d2c + .word 0xef223390 + .word 0xc787494e + .word 0xc1d938d1 + .word 0xfe8ccaa2 + .word 0x3698d40b + .word 0xcfa6f581 + .word 0x28a57ade + .word 0x26dab78e + .word 0xa43fadbf + .word 0xe42c3a9d + .word 0xd507892 + .word 0x9b6a5fcc + .word 0x62547e46 + .word 0xc2f68d13 + .word 0xe890d8b8 + .word 0x5e2e39f7 + .word 0xf582c3af + .word 0xbe9f5d80 + .word 0x7c69d093 + .word 0xa96fd52d + .word 0xb3cf2512 + .word 0x3bc8ac99 + .word 0xa710187d + .word 0x6ee89c63 + .word 0x7bdb3bbb + .word 0x9cd2678 + .word 0xf46e5918 + .word 0x1ec9ab7 + .word 0xa8834f9a + .word 0x65e6956e + .word 0x7eaaffe6 + .word 0x821bccf + .word 0xe6ef15e8 + .word 0xd9bae79b + .word 0xce4a6f36 + .word 0xd4ea9f09 + .word 0xd629b07c + .word 0xaf31a4b2 + .word 0x312a3f23 + .word 0x30c6a594 + .word 0xc035a266 + .word 0x37744ebc + .word 0xa6fc82ca + .word 0xb0e090d0 + .word 0x1533a7d8 + .word 0x4af10498 + .word 0xf741ecda + .word 0xe7fcd50 + .word 0x2f1791f6 + .word 0x8d764dd6 + .word 0x4d43efb0 + .word 0x54ccaa4d + .word 0xdfe49604 + .word 0xe39ed1b5 + .word 0x1b4c6a88 + .word 0xb8c12c1f + .word 0x7f466551 + .word 0x49d5eea + .word 0x5d018c35 + .word 0x73fa8774 + .word 0x2efb0b41 + .word 0x5ab3671d + .word 0x5292dbd2 + .word 0x33e91056 + .word 0x136dd647 + .word 0x8c9ad761 + .word 0x7a37a10c + .word 0x8e59f814 + .word 0x89eb133c + .word 0xeecea927 + .word 0x35b761c9 + .word 0xede11ce5 + .word 0x3c7a47b1 + .word 0x599cd2df + .word 0x3f55f273 + .word 0x791814ce + .word 0xbf73c737 + .word 0xea53f7cd + .word 0x5b5ffdaa + .word 0x14df3d6f + .word 0x867844db + .word 0x81caaff3 + .word 0x3eb968c4 + .word 0x2c382434 + .word 0x5fc2a340 + .word 0x72161dc3 + .word 0xcbce225 + .word 0x8b283c49 + .word 0x41ff0d95 + .word 0x7139a801 + .word 0xde080cb3 + .word 0x9cd8b4e4 + .word 0x906456c1 + .word 0x617bcb84 + .word 0x70d532b6 + .word 0x74486c5c + .word 0x42d0b857 +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .type L_AES_ARM32_te_data, %object + .size L_AES_ARM32_te_data, 1024 + .align 4 +L_AES_ARM32_te_data: + .word 0xa5c66363 + .word 0x84f87c7c + .word 0x99ee7777 + .word 0x8df67b7b + .word 0xdfff2f2 + .word 0xbdd66b6b + .word 0xb1de6f6f + .word 0x5491c5c5 + .word 0x50603030 + .word 0x3020101 + .word 0xa9ce6767 + .word 0x7d562b2b + .word 0x19e7fefe + .word 0x62b5d7d7 + .word 0xe64dabab + .word 0x9aec7676 + .word 0x458fcaca + .word 0x9d1f8282 + .word 0x4089c9c9 + .word 0x87fa7d7d + .word 0x15effafa + .word 0xebb25959 + .word 0xc98e4747 + .word 0xbfbf0f0 + .word 0xec41adad + .word 0x67b3d4d4 + .word 0xfd5fa2a2 + .word 0xea45afaf + .word 0xbf239c9c + .word 0xf753a4a4 + .word 0x96e47272 + .word 0x5b9bc0c0 + .word 0xc275b7b7 + .word 0x1ce1fdfd + .word 0xae3d9393 + .word 0x6a4c2626 + .word 0x5a6c3636 + .word 0x417e3f3f + .word 0x2f5f7f7 + .word 0x4f83cccc + .word 0x5c683434 + .word 0xf451a5a5 + .word 0x34d1e5e5 + .word 0x8f9f1f1 + .word 0x93e27171 + .word 0x73abd8d8 + .word 0x53623131 + .word 0x3f2a1515 + .word 0xc080404 + .word 0x5295c7c7 + .word 0x65462323 + .word 0x5e9dc3c3 + .word 0x28301818 + .word 0xa1379696 + .word 0xf0a0505 + .word 0xb52f9a9a + .word 0x90e0707 + .word 0x36241212 + .word 0x9b1b8080 + .word 0x3ddfe2e2 + .word 0x26cdebeb + .word 0x694e2727 + .word 0xcd7fb2b2 + .word 0x9fea7575 + .word 0x1b120909 + .word 0x9e1d8383 + .word 0x74582c2c + .word 0x2e341a1a + .word 0x2d361b1b + .word 0xb2dc6e6e + .word 0xeeb45a5a + .word 0xfb5ba0a0 + .word 0xf6a45252 + .word 0x4d763b3b + .word 0x61b7d6d6 + .word 0xce7db3b3 + .word 0x7b522929 + .word 0x3edde3e3 + .word 0x715e2f2f + .word 0x97138484 + .word 0xf5a65353 + .word 0x68b9d1d1 + .word 0x0 + .word 0x2cc1eded + .word 0x60402020 + .word 0x1fe3fcfc + .word 0xc879b1b1 + .word 0xedb65b5b + .word 0xbed46a6a + .word 0x468dcbcb + .word 0xd967bebe + .word 0x4b723939 + .word 0xde944a4a + .word 0xd4984c4c + .word 0xe8b05858 + .word 0x4a85cfcf + .word 0x6bbbd0d0 + .word 0x2ac5efef + .word 0xe54faaaa + .word 0x16edfbfb + .word 0xc5864343 + .word 0xd79a4d4d + .word 0x55663333 + .word 0x94118585 + .word 0xcf8a4545 + .word 0x10e9f9f9 + .word 0x6040202 + .word 0x81fe7f7f + .word 0xf0a05050 + .word 0x44783c3c + .word 0xba259f9f + .word 0xe34ba8a8 + .word 0xf3a25151 + .word 0xfe5da3a3 + .word 0xc0804040 + .word 0x8a058f8f + .word 0xad3f9292 + .word 0xbc219d9d + .word 0x48703838 + .word 0x4f1f5f5 + .word 0xdf63bcbc + .word 0xc177b6b6 + .word 0x75afdada + .word 0x63422121 + .word 0x30201010 + .word 0x1ae5ffff + .word 0xefdf3f3 + .word 0x6dbfd2d2 + .word 0x4c81cdcd + .word 0x14180c0c + .word 0x35261313 + .word 0x2fc3ecec + .word 0xe1be5f5f + .word 0xa2359797 + .word 0xcc884444 + .word 0x392e1717 + .word 0x5793c4c4 + .word 0xf255a7a7 + .word 0x82fc7e7e + .word 0x477a3d3d + .word 0xacc86464 + .word 0xe7ba5d5d + .word 0x2b321919 + .word 0x95e67373 + .word 0xa0c06060 + .word 0x98198181 + .word 0xd19e4f4f + .word 0x7fa3dcdc + .word 0x66442222 + .word 0x7e542a2a + .word 0xab3b9090 + .word 0x830b8888 + .word 0xca8c4646 + .word 0x29c7eeee + .word 0xd36bb8b8 + .word 0x3c281414 + .word 0x79a7dede + .word 0xe2bc5e5e + .word 0x1d160b0b + .word 0x76addbdb + .word 0x3bdbe0e0 + .word 0x56643232 + .word 0x4e743a3a + .word 0x1e140a0a + .word 0xdb924949 + .word 0xa0c0606 + .word 0x6c482424 + .word 0xe4b85c5c + .word 0x5d9fc2c2 + .word 0x6ebdd3d3 + .word 0xef43acac + .word 0xa6c46262 + .word 0xa8399191 + .word 0xa4319595 + .word 0x37d3e4e4 + .word 0x8bf27979 + .word 0x32d5e7e7 + .word 0x438bc8c8 + .word 0x596e3737 + .word 0xb7da6d6d + .word 0x8c018d8d + .word 0x64b1d5d5 + .word 0xd29c4e4e + .word 0xe049a9a9 + .word 0xb4d86c6c + .word 0xfaac5656 + .word 0x7f3f4f4 + .word 0x25cfeaea + .word 0xafca6565 + .word 0x8ef47a7a + .word 0xe947aeae + .word 0x18100808 + .word 0xd56fbaba + .word 0x88f07878 + .word 0x6f4a2525 + .word 0x725c2e2e + .word 0x24381c1c + .word 0xf157a6a6 + .word 0xc773b4b4 + .word 0x5197c6c6 + .word 0x23cbe8e8 + .word 0x7ca1dddd + .word 0x9ce87474 + .word 0x213e1f1f + .word 0xdd964b4b + .word 0xdc61bdbd + .word 0x860d8b8b + .word 0x850f8a8a + .word 0x90e07070 + .word 0x427c3e3e + .word 0xc471b5b5 + .word 0xaacc6666 + .word 0xd8904848 + .word 0x5060303 + .word 0x1f7f6f6 + .word 0x121c0e0e + .word 0xa3c26161 + .word 0x5f6a3535 + .word 0xf9ae5757 + .word 0xd069b9b9 + .word 0x91178686 + .word 0x5899c1c1 + .word 0x273a1d1d + .word 0xb9279e9e + .word 0x38d9e1e1 + .word 0x13ebf8f8 + .word 0xb32b9898 + .word 0x33221111 + .word 0xbbd26969 + .word 0x70a9d9d9 + .word 0x89078e8e + .word 0xa7339494 + .word 0xb62d9b9b + .word 0x223c1e1e + .word 0x92158787 + .word 0x20c9e9e9 + .word 0x4987cece + .word 0xffaa5555 + .word 0x78502828 + .word 0x7aa5dfdf + .word 0x8f038c8c + .word 0xf859a1a1 + .word 0x80098989 + .word 0x171a0d0d + .word 0xda65bfbf + .word 0x31d7e6e6 + .word 0xc6844242 + .word 0xb8d06868 + .word 0xc3824141 + .word 0xb0299999 + .word 0x775a2d2d + .word 0x111e0f0f + .word 0xcb7bb0b0 + .word 0xfca85454 + .word 0xd66dbbbb + .word 0x3a2c1616 +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT + .text + .type L_AES_ARM32_td, %object + .size L_AES_ARM32_td, 12 + .align 4 +L_AES_ARM32_td: + .word L_AES_ARM32_td_data +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .type L_AES_ARM32_te, %object + .size L_AES_ARM32_te, 12 + .align 4 +L_AES_ARM32_te: + .word L_AES_ARM32_te_data +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT + .text + .align 4 + .globl AES_invert_key + .type AES_invert_key, %function +AES_invert_key: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + adr r12, L_AES_ARM32_te + ldr r12, [r12] + adr lr, L_AES_ARM32_td + ldr lr, [lr] + add r10, r0, r1, lsl #4 + mov r11, r1 +L_AES_invert_key_loop: + ldm r0, {r2, r3, r4, r5} + ldm r10, {r6, r7, r8, r9} + stm r10, {r2, r3, r4, r5} + stm r0!, {r6, r7, r8, r9} + subs r11, r11, #2 + sub r10, r10, #16 + bne L_AES_invert_key_loop + sub r0, r0, r1, lsl #3 + add r0, r0, #16 + sub r11, r1, #1 +L_AES_invert_key_mix_loop: + ldm r0, {r2, r3, r4, r5} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r2, #24 + lsr r6, r6, #24 +#else + uxtb r6, r2 +#endif +#else + ubfx r6, r2, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r2, #16 + lsr r7, r7, #24 +#else + uxtb r7, r2, ror #8 +#endif +#else + ubfx r7, r2, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r2, #8 + lsr r8, r8, #24 +#else + uxtb r8, r2, ror #16 +#endif +#else + ubfx r8, r2, #16, #8 +#endif + lsr r9, r2, #24 + ldrb r6, [r12, r6, lsl #2] + ldrb r7, [r12, r7, lsl #2] + ldrb r8, [r12, r8, lsl #2] + ldrb r9, [r12, r9, lsl #2] + ldr r6, [lr, r6, lsl #2] + ldr r7, [lr, r7, lsl #2] + ldr r8, [lr, r8, lsl #2] + ldr r9, [lr, r9, lsl #2] + eor r8, r8, r6, ror #16 + eor r8, r8, r7, ror #8 + eor r8, r8, r9, ror #24 + str r8, [r0], #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r3, #24 + lsr r6, r6, #24 +#else + uxtb r6, r3 +#endif +#else + ubfx r6, r3, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r3, #16 + lsr r7, r7, #24 +#else + uxtb r7, r3, ror #8 +#endif +#else + ubfx r7, r3, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r3, #8 + lsr r8, r8, #24 +#else + uxtb r8, r3, ror #16 +#endif +#else + ubfx r8, r3, #16, #8 +#endif + lsr r9, r3, #24 + ldrb r6, [r12, r6, lsl #2] + ldrb r7, [r12, r7, lsl #2] + ldrb r8, [r12, r8, lsl #2] + ldrb r9, [r12, r9, lsl #2] + ldr r6, [lr, r6, lsl #2] + ldr r7, [lr, r7, lsl #2] + ldr r8, [lr, r8, lsl #2] + ldr r9, [lr, r9, lsl #2] + eor r8, r8, r6, ror #16 + eor r8, r8, r7, ror #8 + eor r8, r8, r9, ror #24 + str r8, [r0], #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r4, #24 + lsr r6, r6, #24 +#else + uxtb r6, r4 +#endif +#else + ubfx r6, r4, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r4, #16 + lsr r7, r7, #24 +#else + uxtb r7, r4, ror #8 +#endif +#else + ubfx r7, r4, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r4, #8 + lsr r8, r8, #24 +#else + uxtb r8, r4, ror #16 +#endif +#else + ubfx r8, r4, #16, #8 +#endif + lsr r9, r4, #24 + ldrb r6, [r12, r6, lsl #2] + ldrb r7, [r12, r7, lsl #2] + ldrb r8, [r12, r8, lsl #2] + ldrb r9, [r12, r9, lsl #2] + ldr r6, [lr, r6, lsl #2] + ldr r7, [lr, r7, lsl #2] + ldr r8, [lr, r8, lsl #2] + ldr r9, [lr, r9, lsl #2] + eor r8, r8, r6, ror #16 + eor r8, r8, r7, ror #8 + eor r8, r8, r9, ror #24 + str r8, [r0], #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r5, #24 + lsr r6, r6, #24 +#else + uxtb r6, r5 +#endif +#else + ubfx r6, r5, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r5, #16 + lsr r7, r7, #24 +#else + uxtb r7, r5, ror #8 +#endif +#else + ubfx r7, r5, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r5, #8 + lsr r8, r8, #24 +#else + uxtb r8, r5, ror #16 +#endif +#else + ubfx r8, r5, #16, #8 +#endif + lsr r9, r5, #24 + ldrb r6, [r12, r6, lsl #2] + ldrb r7, [r12, r7, lsl #2] + ldrb r8, [r12, r8, lsl #2] + ldrb r9, [r12, r9, lsl #2] + ldr r6, [lr, r6, lsl #2] + ldr r7, [lr, r7, lsl #2] + ldr r8, [lr, r8, lsl #2] + ldr r9, [lr, r9, lsl #2] + eor r8, r8, r6, ror #16 + eor r8, r8, r7, ror #8 + eor r8, r8, r9, ror #24 + str r8, [r0], #4 + subs r11, r11, #1 + bne L_AES_invert_key_mix_loop + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_invert_key,.-AES_invert_key +#endif /* HAVE_AES_DECRYPT */ + .text + .type L_AES_ARM32_rcon, %object + .size L_AES_ARM32_rcon, 40 + .align 4 +L_AES_ARM32_rcon: + .word 0x1000000 + .word 0x2000000 + .word 0x4000000 + .word 0x8000000 + .word 0x10000000 + .word 0x20000000 + .word 0x40000000 + .word 0x80000000 + .word 0x1b000000 + .word 0x36000000 + .text + .align 4 + .globl AES_set_encrypt_key + .type AES_set_encrypt_key, %function +AES_set_encrypt_key: + push {r4, r5, r6, r7, r8, lr} + adr r8, L_AES_ARM32_te + ldr r8, [r8] + adr lr, L_AES_ARM32_rcon + cmp r1, #0x80 + beq L_AES_set_encrypt_key_start_128 + cmp r1, #0xc0 + beq L_AES_set_encrypt_key_start_192 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + # REV r4, r4 + eor r3, r4, r4, ror #16 + bic r3, r3, #0xff0000 + ror r4, r4, #8 + eor r4, r4, r3, lsr #8 + # REV r5, r5 + eor r3, r5, r5, ror #16 + bic r3, r3, #0xff0000 + ror r5, r5, #8 + eor r5, r5, r3, lsr #8 + # REV r6, r6 + eor r3, r6, r6, ror #16 + bic r3, r3, #0xff0000 + ror r6, r6, #8 + eor r6, r6, r3, lsr #8 + # REV r7, r7 + eor r3, r7, r7, ror #16 + bic r3, r3, #0xff0000 + ror r7, r7, #8 + eor r7, r7, r3, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r2!, {r4, r5, r6, r7} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + # REV r4, r4 + eor r3, r4, r4, ror #16 + bic r3, r3, #0xff0000 + ror r4, r4, #8 + eor r4, r4, r3, lsr #8 + # REV r5, r5 + eor r3, r5, r5, ror #16 + bic r3, r3, #0xff0000 + ror r5, r5, #8 + eor r5, r5, r3, lsr #8 + # REV r6, r6 + eor r3, r6, r6, ror #16 + bic r3, r3, #0xff0000 + ror r6, r6, #8 + eor r6, r6, r3, lsr #8 + # REV r7, r7 + eor r3, r7, r7, ror #16 + bic r3, r3, #0xff0000 + ror r7, r7, #8 + eor r7, r7, r3, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r2, {r4, r5, r6, r7} + sub r2, r2, #16 + mov r12, #6 +L_AES_set_encrypt_key_loop_256: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r7, #24 + lsr r4, r4, #24 +#else + uxtb r4, r7 +#endif +#else + ubfx r4, r7, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r7, #16 + lsr r5, r5, #24 +#else + uxtb r5, r7, ror #8 +#endif +#else + ubfx r5, r7, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r7, #8 + lsr r6, r6, #24 +#else + uxtb r6, r7, ror #16 +#endif +#else + ubfx r6, r7, #16, #8 +#endif + lsr r7, r7, #24 + ldrb r4, [r8, r4, lsl #2] + ldrb r5, [r8, r5, lsl #2] + ldrb r6, [r8, r6, lsl #2] + ldrb r7, [r8, r7, lsl #2] + eor r3, r7, r4, lsl #8 + eor r3, r3, r5, lsl #16 + eor r3, r3, r6, lsl #24 + ldm r2!, {r4, r5, r6, r7} + eor r4, r4, r3 + ldm lr!, {r3} + eor r4, r4, r3 + eor r5, r5, r4 + eor r6, r6, r5 + eor r7, r7, r6 + add r2, r2, #16 + stm r2, {r4, r5, r6, r7} + sub r2, r2, #16 + mov r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r3, #16 + lsr r4, r4, #24 +#else + uxtb r4, r3, ror #8 +#endif +#else + ubfx r4, r3, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r3, #8 + lsr r5, r5, #24 +#else + uxtb r5, r3, ror #16 +#endif +#else + ubfx r5, r3, #16, #8 +#endif + lsr r6, r3, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r3, r3, #24 + lsr r3, r3, #24 +#else + uxtb r3, r3 +#endif +#else + ubfx r3, r3, #0, #8 +#endif + ldrb r4, [r8, r4, lsl #2] + ldrb r6, [r8, r6, lsl #2] + ldrb r5, [r8, r5, lsl #2] + ldrb r3, [r8, r3, lsl #2] + eor r3, r3, r4, lsl #8 + eor r3, r3, r5, lsl #16 + eor r3, r3, r6, lsl #24 + ldm r2!, {r4, r5, r6, r7} + eor r4, r4, r3 + eor r5, r5, r4 + eor r6, r6, r5 + eor r7, r7, r6 + add r2, r2, #16 + stm r2, {r4, r5, r6, r7} + sub r2, r2, #16 + subs r12, r12, #1 + bne L_AES_set_encrypt_key_loop_256 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r7, #24 + lsr r4, r4, #24 +#else + uxtb r4, r7 +#endif +#else + ubfx r4, r7, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r7, #16 + lsr r5, r5, #24 +#else + uxtb r5, r7, ror #8 +#endif +#else + ubfx r5, r7, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r7, #8 + lsr r6, r6, #24 +#else + uxtb r6, r7, ror #16 +#endif +#else + ubfx r6, r7, #16, #8 +#endif + lsr r7, r7, #24 + ldrb r4, [r8, r4, lsl #2] + ldrb r5, [r8, r5, lsl #2] + ldrb r6, [r8, r6, lsl #2] + ldrb r7, [r8, r7, lsl #2] + eor r3, r7, r4, lsl #8 + eor r3, r3, r5, lsl #16 + eor r3, r3, r6, lsl #24 + ldm r2!, {r4, r5, r6, r7} + eor r4, r4, r3 + ldm lr!, {r3} + eor r4, r4, r3 + eor r5, r5, r4 + eor r6, r6, r5 + eor r7, r7, r6 + add r2, r2, #16 + stm r2, {r4, r5, r6, r7} + sub r2, r2, #16 + b L_AES_set_encrypt_key_end +L_AES_set_encrypt_key_start_192: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r1, [r0, #20] + ldr r0, [r0, #16] +#else + ldrd r0, r1, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + # REV r4, r4 + eor r3, r4, r4, ror #16 + bic r3, r3, #0xff0000 + ror r4, r4, #8 + eor r4, r4, r3, lsr #8 + # REV r5, r5 + eor r3, r5, r5, ror #16 + bic r3, r3, #0xff0000 + ror r5, r5, #8 + eor r5, r5, r3, lsr #8 + # REV r6, r6 + eor r3, r6, r6, ror #16 + bic r3, r3, #0xff0000 + ror r6, r6, #8 + eor r6, r6, r3, lsr #8 + # REV r7, r7 + eor r3, r7, r7, ror #16 + bic r3, r3, #0xff0000 + ror r7, r7, #8 + eor r7, r7, r3, lsr #8 + # REV r0, r0 + eor r3, r0, r0, ror #16 + bic r3, r3, #0xff0000 + ror r0, r0, #8 + eor r0, r0, r3, lsr #8 + # REV r1, r1 + eor r3, r1, r1, ror #16 + bic r3, r3, #0xff0000 + ror r1, r1, #8 + eor r1, r1, r3, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r0, r0 + rev r1, r1 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r2, {r4, r5, r6, r7} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r0, [r2, #16] + str r1, [r2, #20] +#else + strd r0, r1, [r2, #16] +#endif + mov r7, r1 + mov r12, #7 +L_AES_set_encrypt_key_loop_192: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r0, r7, #24 + lsr r0, r0, #24 +#else + uxtb r0, r7 +#endif +#else + ubfx r0, r7, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r1, r7, #16 + lsr r1, r1, #24 +#else + uxtb r1, r7, ror #8 +#endif +#else + ubfx r1, r7, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r7, #8 + lsr r4, r4, #24 +#else + uxtb r4, r7, ror #16 +#endif +#else + ubfx r4, r7, #16, #8 +#endif + lsr r7, r7, #24 + ldrb r0, [r8, r0, lsl #2] + ldrb r1, [r8, r1, lsl #2] + ldrb r4, [r8, r4, lsl #2] + ldrb r7, [r8, r7, lsl #2] + eor r3, r7, r0, lsl #8 + eor r3, r3, r1, lsl #16 + eor r3, r3, r4, lsl #24 + ldm r2!, {r0, r1, r4, r5, r6, r7} + eor r0, r0, r3 + ldm lr!, {r3} + eor r0, r0, r3 + eor r1, r1, r0 + eor r4, r4, r1 + eor r5, r5, r4 + eor r6, r6, r5 + eor r7, r7, r6 + stm r2, {r0, r1, r4, r5, r6, r7} + subs r12, r12, #1 + bne L_AES_set_encrypt_key_loop_192 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r0, r7, #24 + lsr r0, r0, #24 +#else + uxtb r0, r7 +#endif +#else + ubfx r0, r7, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r1, r7, #16 + lsr r1, r1, #24 +#else + uxtb r1, r7, ror #8 +#endif +#else + ubfx r1, r7, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r7, #8 + lsr r4, r4, #24 +#else + uxtb r4, r7, ror #16 +#endif +#else + ubfx r4, r7, #16, #8 +#endif + lsr r7, r7, #24 + ldrb r0, [r8, r0, lsl #2] + ldrb r1, [r8, r1, lsl #2] + ldrb r4, [r8, r4, lsl #2] + ldrb r7, [r8, r7, lsl #2] + eor r3, r7, r0, lsl #8 + eor r3, r3, r1, lsl #16 + eor r3, r3, r4, lsl #24 + ldm r2!, {r0, r1, r4, r5, r6, r7} + eor r0, r0, r3 + ldm lr!, {r3} + eor r0, r0, r3 + eor r1, r1, r0 + eor r4, r4, r1 + eor r5, r5, r4 + stm r2, {r0, r1, r4, r5} + b L_AES_set_encrypt_key_end +L_AES_set_encrypt_key_start_128: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + # REV r4, r4 + eor r3, r4, r4, ror #16 + bic r3, r3, #0xff0000 + ror r4, r4, #8 + eor r4, r4, r3, lsr #8 + # REV r5, r5 + eor r3, r5, r5, ror #16 + bic r3, r3, #0xff0000 + ror r5, r5, #8 + eor r5, r5, r3, lsr #8 + # REV r6, r6 + eor r3, r6, r6, ror #16 + bic r3, r3, #0xff0000 + ror r6, r6, #8 + eor r6, r6, r3, lsr #8 + # REV r7, r7 + eor r3, r7, r7, ror #16 + bic r3, r3, #0xff0000 + ror r7, r7, #8 + eor r7, r7, r3, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r2, {r4, r5, r6, r7} + mov r12, #10 +L_AES_set_encrypt_key_loop_128: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r7, #24 + lsr r4, r4, #24 +#else + uxtb r4, r7 +#endif +#else + ubfx r4, r7, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r7, #16 + lsr r5, r5, #24 +#else + uxtb r5, r7, ror #8 +#endif +#else + ubfx r5, r7, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r7, #8 + lsr r6, r6, #24 +#else + uxtb r6, r7, ror #16 +#endif +#else + ubfx r6, r7, #16, #8 +#endif + lsr r7, r7, #24 + ldrb r4, [r8, r4, lsl #2] + ldrb r5, [r8, r5, lsl #2] + ldrb r6, [r8, r6, lsl #2] + ldrb r7, [r8, r7, lsl #2] + eor r3, r7, r4, lsl #8 + eor r3, r3, r5, lsl #16 + eor r3, r3, r6, lsl #24 + ldm r2!, {r4, r5, r6, r7} + eor r4, r4, r3 + ldm lr!, {r3} + eor r4, r4, r3 + eor r5, r5, r4 + eor r6, r6, r5 + eor r7, r7, r6 + stm r2, {r4, r5, r6, r7} + subs r12, r12, #1 + bne L_AES_set_encrypt_key_loop_128 +L_AES_set_encrypt_key_end: + pop {r4, r5, r6, r7, r8, pc} + .size AES_set_encrypt_key,.-AES_set_encrypt_key + .text + .align 4 + .globl AES_encrypt_block + .type AES_encrypt_block, %function +AES_encrypt_block: + push {lr} +L_AES_encrypt_block_nr: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r5, #8 + lsr r8, r8, #24 +#else + uxtb r8, r5, ror #16 +#endif +#else + ubfx r8, r5, #16, #8 +#endif + lsr r11, r4, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r6, #16 + lsr lr, lr, #24 +#else + uxtb lr, r6, ror #8 +#endif +#else + ubfx lr, r6, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r7, #24 + lsr r2, r2, #24 +#else + uxtb r2, r7 +#endif +#else + ubfx r2, r7, #0, #8 +#endif + ldr r8, [r0, r8, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r9, r6, #8 + lsr r9, r9, #24 +#else + uxtb r9, r6, ror #16 +#endif +#else + ubfx r9, r6, #16, #8 +#endif + eor r8, r8, r11, ror #24 + lsr r11, r5, #24 + eor r8, r8, lr, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r7, #16 + lsr lr, lr, #24 +#else + uxtb lr, r7, ror #8 +#endif +#else + ubfx lr, r7, #8, #8 +#endif + eor r8, r8, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r4, #24 + lsr r2, r2, #24 +#else + uxtb r2, r4 +#endif +#else + ubfx r2, r4, #0, #8 +#endif + ldr r9, [r0, r9, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r10, r7, #8 + lsr r10, r10, #24 +#else + uxtb r10, r7, ror #16 +#endif +#else + ubfx r10, r7, #16, #8 +#endif + eor r9, r9, r11, ror #24 + lsr r11, r6, #24 + eor r9, r9, lr, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r4, #16 + lsr lr, lr, #24 +#else + uxtb lr, r4, ror #8 +#endif +#else + ubfx lr, r4, #8, #8 +#endif + eor r9, r9, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r5, #24 + lsr r2, r2, #24 +#else + uxtb r2, r5 +#endif +#else + ubfx r2, r5, #0, #8 +#endif + ldr r10, [r0, r10, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r6, #24 + lsr r6, r6, #24 +#else + uxtb r6, r6 +#endif +#else + ubfx r6, r6, #0, #8 +#endif + eor r10, r10, r11, ror #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r11, r4, #8 + lsr r11, r11, #24 +#else + uxtb r11, r4, ror #16 +#endif +#else + ubfx r11, r4, #16, #8 +#endif + eor r10, r10, lr, ror #8 + lsr lr, r7, #24 + eor r10, r10, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r5, #16 + lsr r2, r2, #24 +#else + uxtb r2, r5, ror #8 +#endif +#else + ubfx r2, r5, #8, #8 +#endif + ldr r6, [r0, r6, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r2, [r0, r2, lsl #2] + eor lr, lr, r6, ror #24 + ldm r3!, {r4, r5, r6, r7} + eor r11, r11, lr, ror #24 + eor r11, r11, r2, ror #8 + # XOR in Key Schedule + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r9, #8 + lsr r4, r4, #24 +#else + uxtb r4, r9, ror #16 +#endif +#else + ubfx r4, r9, #16, #8 +#endif + lsr r7, r8, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r10, #16 + lsr lr, lr, #24 +#else + uxtb lr, r10, ror #8 +#endif +#else + ubfx lr, r10, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r11, #24 + lsr r2, r2, #24 +#else + uxtb r2, r11 +#endif +#else + ubfx r2, r11, #0, #8 +#endif + ldr r4, [r0, r4, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r10, #8 + lsr r5, r5, #24 +#else + uxtb r5, r10, ror #16 +#endif +#else + ubfx r5, r10, #16, #8 +#endif + eor r4, r4, r7, ror #24 + lsr r7, r9, #24 + eor r4, r4, lr, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r11, #16 + lsr lr, lr, #24 +#else + uxtb lr, r11, ror #8 +#endif +#else + ubfx lr, r11, #8, #8 +#endif + eor r4, r4, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r8, #24 + lsr r2, r2, #24 +#else + uxtb r2, r8 +#endif +#else + ubfx r2, r8, #0, #8 +#endif + ldr r5, [r0, r5, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r11, #8 + lsr r6, r6, #24 +#else + uxtb r6, r11, ror #16 +#endif +#else + ubfx r6, r11, #16, #8 +#endif + eor r5, r5, r7, ror #24 + lsr r7, r10, #24 + eor r5, r5, lr, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r8, #16 + lsr lr, lr, #24 +#else + uxtb lr, r8, ror #8 +#endif +#else + ubfx lr, r8, #8, #8 +#endif + eor r5, r5, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r9, #24 + lsr r2, r2, #24 +#else + uxtb r2, r9 +#endif +#else + ubfx r2, r9, #0, #8 +#endif + ldr r6, [r0, r6, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r10, r10, #24 + lsr r10, r10, #24 +#else + uxtb r10, r10 +#endif +#else + ubfx r10, r10, #0, #8 +#endif + eor r6, r6, r7, ror #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r8, #8 + lsr r7, r7, #24 +#else + uxtb r7, r8, ror #16 +#endif +#else + ubfx r7, r8, #16, #8 +#endif + eor r6, r6, lr, ror #8 + lsr lr, r11, #24 + eor r6, r6, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r9, #16 + lsr r2, r2, #24 +#else + uxtb r2, r9, ror #8 +#endif +#else + ubfx r2, r9, #8, #8 +#endif + ldr r10, [r0, r10, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr r2, [r0, r2, lsl #2] + eor lr, lr, r10, ror #24 + ldm r3!, {r8, r9, r10, r11} + eor r7, r7, lr, ror #24 + eor r7, r7, r2, ror #8 + # XOR in Key Schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + subs r1, r1, #1 + bne L_AES_encrypt_block_nr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r5, #8 + lsr r8, r8, #24 +#else + uxtb r8, r5, ror #16 +#endif +#else + ubfx r8, r5, #16, #8 +#endif + lsr r11, r4, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r6, #16 + lsr lr, lr, #24 +#else + uxtb lr, r6, ror #8 +#endif +#else + ubfx lr, r6, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r7, #24 + lsr r2, r2, #24 +#else + uxtb r2, r7 +#endif +#else + ubfx r2, r7, #0, #8 +#endif + ldr r8, [r0, r8, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r9, r6, #8 + lsr r9, r9, #24 +#else + uxtb r9, r6, ror #16 +#endif +#else + ubfx r9, r6, #16, #8 +#endif + eor r8, r8, r11, ror #24 + lsr r11, r5, #24 + eor r8, r8, lr, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r7, #16 + lsr lr, lr, #24 +#else + uxtb lr, r7, ror #8 +#endif +#else + ubfx lr, r7, #8, #8 +#endif + eor r8, r8, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r4, #24 + lsr r2, r2, #24 +#else + uxtb r2, r4 +#endif +#else + ubfx r2, r4, #0, #8 +#endif + ldr r9, [r0, r9, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r10, r7, #8 + lsr r10, r10, #24 +#else + uxtb r10, r7, ror #16 +#endif +#else + ubfx r10, r7, #16, #8 +#endif + eor r9, r9, r11, ror #24 + lsr r11, r6, #24 + eor r9, r9, lr, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r4, #16 + lsr lr, lr, #24 +#else + uxtb lr, r4, ror #8 +#endif +#else + ubfx lr, r4, #8, #8 +#endif + eor r9, r9, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r5, #24 + lsr r2, r2, #24 +#else + uxtb r2, r5 +#endif +#else + ubfx r2, r5, #0, #8 +#endif + ldr r10, [r0, r10, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r6, #24 + lsr r6, r6, #24 +#else + uxtb r6, r6 +#endif +#else + ubfx r6, r6, #0, #8 +#endif + eor r10, r10, r11, ror #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r11, r4, #8 + lsr r11, r11, #24 +#else + uxtb r11, r4, ror #16 +#endif +#else + ubfx r11, r4, #16, #8 +#endif + eor r10, r10, lr, ror #8 + lsr lr, r7, #24 + eor r10, r10, r2, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r5, #16 + lsr r2, r2, #24 +#else + uxtb r2, r5, ror #8 +#endif +#else + ubfx r2, r5, #8, #8 +#endif + ldr r6, [r0, r6, lsl #2] + ldr lr, [r0, lr, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r2, [r0, r2, lsl #2] + eor lr, lr, r6, ror #24 + ldm r3!, {r4, r5, r6, r7} + eor r11, r11, lr, ror #24 + eor r11, r11, r2, ror #8 + # XOR in Key Schedule + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r11, #24 + lsr r4, r4, #24 +#else + uxtb r4, r11 +#endif +#else + ubfx r4, r11, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r10, #16 + lsr r7, r7, #24 +#else + uxtb r7, r10, ror #8 +#endif +#else + ubfx r7, r10, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r9, #8 + lsr lr, lr, #24 +#else + uxtb lr, r9, ror #16 +#endif +#else + ubfx lr, r9, #16, #8 +#endif + lsr r2, r8, #24 + ldrb r4, [r0, r4, lsl #2] + ldrb r7, [r0, r7, lsl #2] + ldrb lr, [r0, lr, lsl #2] + ldrb r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r8, #24 + lsr r5, r5, #24 +#else + uxtb r5, r8 +#endif +#else + ubfx r5, r8, #0, #8 +#endif + eor r4, r4, r7, lsl #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r11, #16 + lsr r7, r7, #24 +#else + uxtb r7, r11, ror #8 +#endif +#else + ubfx r7, r11, #8, #8 +#endif + eor r4, r4, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r10, #8 + lsr lr, lr, #24 +#else + uxtb lr, r10, ror #16 +#endif +#else + ubfx lr, r10, #16, #8 +#endif + eor r4, r4, r2, lsl #24 + lsr r2, r9, #24 + ldrb r5, [r0, r5, lsl #2] + ldrb r7, [r0, r7, lsl #2] + ldrb lr, [r0, lr, lsl #2] + ldrb r2, [r0, r2, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r9, #24 + lsr r6, r6, #24 +#else + uxtb r6, r9 +#endif +#else + ubfx r6, r9, #0, #8 +#endif + eor r5, r5, r7, lsl #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r8, #16 + lsr r7, r7, #24 +#else + uxtb r7, r8, ror #8 +#endif +#else + ubfx r7, r8, #8, #8 +#endif + eor r5, r5, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r11, #8 + lsr lr, lr, #24 +#else + uxtb lr, r11, ror #16 +#endif +#else + ubfx lr, r11, #16, #8 +#endif + eor r5, r5, r2, lsl #24 + lsr r2, r10, #24 + ldrb r6, [r0, r6, lsl #2] + ldrb r7, [r0, r7, lsl #2] + ldrb lr, [r0, lr, lsl #2] + ldrb r2, [r0, r2, lsl #2] + lsr r11, r11, #24 + eor r6, r6, r7, lsl #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r10, #24 + lsr r7, r7, #24 +#else + uxtb r7, r10 +#endif +#else + ubfx r7, r10, #0, #8 +#endif + eor r6, r6, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r9, #16 + lsr lr, lr, #24 +#else + uxtb lr, r9, ror #8 +#endif +#else + ubfx lr, r9, #8, #8 +#endif + eor r6, r6, r2, lsl #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r2, r8, #8 + lsr r2, r2, #24 +#else + uxtb r2, r8, ror #16 +#endif +#else + ubfx r2, r8, #16, #8 +#endif + ldrb r11, [r0, r11, lsl #2] + ldrb r7, [r0, r7, lsl #2] + ldrb lr, [r0, lr, lsl #2] + ldrb r2, [r0, r2, lsl #2] + eor lr, lr, r11, lsl #16 + ldm r3, {r8, r9, r10, r11} + eor r7, r7, lr, lsl #8 + eor r7, r7, r2, lsl #16 + # XOR in Key Schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + pop {pc} + .size AES_encrypt_block,.-AES_encrypt_block +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .type L_AES_ARM32_te_ecb, %object + .size L_AES_ARM32_te_ecb, 12 + .align 4 +L_AES_ARM32_te_ecb: + .word L_AES_ARM32_te_data + .text + .align 4 + .globl AES_ECB_encrypt + .type AES_ECB_encrypt, %function +AES_ECB_encrypt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + mov lr, r0 + adr r0, L_AES_ARM32_te_ecb + ldr r0, [r0] + ldr r12, [sp, #36] + push {r3} + cmp r12, #10 + beq L_AES_ECB_encrypt_start_block_128 + cmp r12, #12 + beq L_AES_ECB_encrypt_start_block_192 +L_AES_ECB_encrypt_loop_block_256: + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + push {r1, r2, lr} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_ECB_encrypt_loop_block_256 + b L_AES_ECB_encrypt_end +L_AES_ECB_encrypt_start_block_192: +L_AES_ECB_encrypt_loop_block_192: + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + push {r1, r2, lr} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_ECB_encrypt_loop_block_192 + b L_AES_ECB_encrypt_end +L_AES_ECB_encrypt_start_block_128: +L_AES_ECB_encrypt_loop_block_128: + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + push {r1, r2, lr} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_ECB_encrypt_loop_block_128 +L_AES_ECB_encrypt_end: + pop {r3} + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_ECB_encrypt,.-AES_ECB_encrypt +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC + .text + .type L_AES_ARM32_te_cbc, %object + .size L_AES_ARM32_te_cbc, 12 + .align 4 +L_AES_ARM32_te_cbc: + .word L_AES_ARM32_te_data + .text + .align 4 + .globl AES_CBC_encrypt + .type AES_CBC_encrypt, %function +AES_CBC_encrypt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ldr r8, [sp, #36] + ldr r9, [sp, #40] + mov lr, r0 + adr r0, L_AES_ARM32_te_cbc + ldr r0, [r0] + ldm r9, {r4, r5, r6, r7} + push {r3, r9} + cmp r8, #10 + beq L_AES_CBC_encrypt_start_block_128 + cmp r8, #12 + beq L_AES_CBC_encrypt_start_block_192 +L_AES_CBC_encrypt_loop_block_256: + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + push {r1, r2, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CBC_encrypt_loop_block_256 + b L_AES_CBC_encrypt_end +L_AES_CBC_encrypt_start_block_192: +L_AES_CBC_encrypt_loop_block_192: + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + push {r1, r2, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CBC_encrypt_loop_block_192 + b L_AES_CBC_encrypt_end +L_AES_CBC_encrypt_start_block_128: +L_AES_CBC_encrypt_loop_block_128: + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + push {r1, r2, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CBC_encrypt_loop_block_128 +L_AES_CBC_encrypt_end: + pop {r3, r9} + stm r9, {r4, r5, r6, r7} + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_CBC_encrypt,.-AES_CBC_encrypt +#endif /* HAVE_AES_CBC */ +#ifdef WOLFSSL_AES_COUNTER + .text + .type L_AES_ARM32_te_ctr, %object + .size L_AES_ARM32_te_ctr, 12 + .align 4 +L_AES_ARM32_te_ctr: + .word L_AES_ARM32_te_data + .text + .align 4 + .globl AES_CTR_encrypt + .type AES_CTR_encrypt, %function +AES_CTR_encrypt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ldr r12, [sp, #36] + ldr r8, [sp, #40] + mov lr, r0 + adr r0, L_AES_ARM32_te_ctr + ldr r0, [r0] + ldm r8, {r4, r5, r6, r7} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r10, r4, r4, ror #16 + eor r11, r5, r5, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + eor r4, r4, r10, lsr #8 + eor r5, r5, r11, lsr #8 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r6, r6, #8 + ror r7, r7, #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r8, {r4, r5, r6, r7} + push {r3, r8} + cmp r12, #10 + beq L_AES_CTR_encrypt_start_block_128 + cmp r12, #12 + beq L_AES_CTR_encrypt_start_block_192 +L_AES_CTR_encrypt_loop_block_256: + push {r1, r2, lr} + ldr lr, [sp, #16] + adds r11, r7, #1 + adcs r10, r6, #0 + adcs r9, r5, #0 + adc r8, r4, #0 + stm lr, {r8, r9, r10, r11} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldr r8, [sp, #4] + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + ldm r8, {r4, r5, r6, r7} + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CTR_encrypt_loop_block_256 + b L_AES_CTR_encrypt_end +L_AES_CTR_encrypt_start_block_192: +L_AES_CTR_encrypt_loop_block_192: + push {r1, r2, lr} + ldr lr, [sp, #16] + adds r11, r7, #1 + adcs r10, r6, #0 + adcs r9, r5, #0 + adc r8, r4, #0 + stm lr, {r8, r9, r10, r11} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldr r8, [sp, #4] + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + ldm r8, {r4, r5, r6, r7} + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CTR_encrypt_loop_block_192 + b L_AES_CTR_encrypt_end +L_AES_CTR_encrypt_start_block_128: +L_AES_CTR_encrypt_loop_block_128: + push {r1, r2, lr} + ldr lr, [sp, #16] + adds r11, r7, #1 + adcs r10, r6, #0 + adcs r9, r5, #0 + adc r8, r4, #0 + stm lr, {r8, r9, r10, r11} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldr r8, [sp, #4] + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + ldm r8, {r4, r5, r6, r7} + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CTR_encrypt_loop_block_128 +L_AES_CTR_encrypt_end: + pop {r3, r8} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r10, r4, r4, ror #16 + eor r11, r5, r5, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + eor r4, r4, r10, lsr #8 + eor r5, r5, r11, lsr #8 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r6, r6, #8 + ror r7, r7, #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r8, {r4, r5, r6, r7} + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_CTR_encrypt,.-AES_CTR_encrypt +#endif /* WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) + .text + .align 4 + .globl AES_decrypt_block + .type AES_decrypt_block, %function +AES_decrypt_block: + push {lr} +L_AES_decrypt_block_nr: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r7, #8 + lsr r8, r8, #24 +#else + uxtb r8, r7, ror #16 +#endif +#else + ubfx r8, r7, #16, #8 +#endif + lsr r11, r4, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r6, #16 + lsr r12, r12, #24 +#else + uxtb r12, r6, ror #8 +#endif +#else + ubfx r12, r6, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r5, #24 + lsr lr, lr, #24 +#else + uxtb lr, r5 +#endif +#else + ubfx lr, r5, #0, #8 +#endif + ldr r8, [r0, r8, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r9, r4, #8 + lsr r9, r9, #24 +#else + uxtb r9, r4, ror #16 +#endif +#else + ubfx r9, r4, #16, #8 +#endif + eor r8, r8, r11, ror #24 + lsr r11, r5, #24 + eor r8, r8, r12, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r7, #16 + lsr r12, r12, #24 +#else + uxtb r12, r7, ror #8 +#endif +#else + ubfx r12, r7, #8, #8 +#endif + eor r8, r8, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r6, #24 + lsr lr, lr, #24 +#else + uxtb lr, r6 +#endif +#else + ubfx lr, r6, #0, #8 +#endif + ldr r9, [r0, r9, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r10, r5, #8 + lsr r10, r10, #24 +#else + uxtb r10, r5, ror #16 +#endif +#else + ubfx r10, r5, #16, #8 +#endif + eor r9, r9, r11, ror #24 + lsr r11, r6, #24 + eor r9, r9, r12, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r4, #16 + lsr r12, r12, #24 +#else + uxtb r12, r4, ror #8 +#endif +#else + ubfx r12, r4, #8, #8 +#endif + eor r9, r9, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r7, #24 + lsr lr, lr, #24 +#else + uxtb lr, r7 +#endif +#else + ubfx lr, r7, #0, #8 +#endif + ldr r10, [r0, r10, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r4, #24 + lsr r4, r4, #24 +#else + uxtb r4, r4 +#endif +#else + ubfx r4, r4, #0, #8 +#endif + eor r10, r10, r11, ror #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r11, r6, #8 + lsr r11, r11, #24 +#else + uxtb r11, r6, ror #16 +#endif +#else + ubfx r11, r6, #16, #8 +#endif + eor r10, r10, r12, ror #8 + lsr r12, r7, #24 + eor r10, r10, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r5, #16 + lsr lr, lr, #24 +#else + uxtb lr, r5, ror #8 +#endif +#else + ubfx lr, r5, #8, #8 +#endif + ldr r4, [r0, r4, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + eor r12, r12, r4, ror #24 + ldm r3!, {r4, r5, r6, r7} + eor r11, r11, lr, ror #8 + eor r11, r11, r12, ror #24 + # XOR in Key Schedule + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r11, #8 + lsr r4, r4, #24 +#else + uxtb r4, r11, ror #16 +#endif +#else + ubfx r4, r11, #16, #8 +#endif + lsr r7, r8, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r10, #16 + lsr r12, r12, #24 +#else + uxtb r12, r10, ror #8 +#endif +#else + ubfx r12, r10, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r9, #24 + lsr lr, lr, #24 +#else + uxtb lr, r9 +#endif +#else + ubfx lr, r9, #0, #8 +#endif + ldr r4, [r0, r4, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r8, #8 + lsr r5, r5, #24 +#else + uxtb r5, r8, ror #16 +#endif +#else + ubfx r5, r8, #16, #8 +#endif + eor r4, r4, r7, ror #24 + lsr r7, r9, #24 + eor r4, r4, r12, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r11, #16 + lsr r12, r12, #24 +#else + uxtb r12, r11, ror #8 +#endif +#else + ubfx r12, r11, #8, #8 +#endif + eor r4, r4, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r10, #24 + lsr lr, lr, #24 +#else + uxtb lr, r10 +#endif +#else + ubfx lr, r10, #0, #8 +#endif + ldr r5, [r0, r5, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r9, #8 + lsr r6, r6, #24 +#else + uxtb r6, r9, ror #16 +#endif +#else + ubfx r6, r9, #16, #8 +#endif + eor r5, r5, r7, ror #24 + lsr r7, r10, #24 + eor r5, r5, r12, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r8, #16 + lsr r12, r12, #24 +#else + uxtb r12, r8, ror #8 +#endif +#else + ubfx r12, r8, #8, #8 +#endif + eor r5, r5, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r11, #24 + lsr lr, lr, #24 +#else + uxtb lr, r11 +#endif +#else + ubfx lr, r11, #0, #8 +#endif + ldr r6, [r0, r6, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r8, #24 + lsr r8, r8, #24 +#else + uxtb r8, r8 +#endif +#else + ubfx r8, r8, #0, #8 +#endif + eor r6, r6, r7, ror #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r10, #8 + lsr r7, r7, #24 +#else + uxtb r7, r10, ror #16 +#endif +#else + ubfx r7, r10, #16, #8 +#endif + eor r6, r6, r12, ror #8 + lsr r12, r11, #24 + eor r6, r6, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r9, #16 + lsr lr, lr, #24 +#else + uxtb lr, r9, ror #8 +#endif +#else + ubfx lr, r9, #8, #8 +#endif + ldr r8, [r0, r8, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr r7, [r0, r7, lsl #2] + ldr lr, [r0, lr, lsl #2] + eor r12, r12, r8, ror #24 + ldm r3!, {r8, r9, r10, r11} + eor r7, r7, lr, ror #8 + eor r7, r7, r12, ror #24 + # XOR in Key Schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + subs r1, r1, #1 + bne L_AES_decrypt_block_nr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r8, r7, #8 + lsr r8, r8, #24 +#else + uxtb r8, r7, ror #16 +#endif +#else + ubfx r8, r7, #16, #8 +#endif + lsr r11, r4, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r6, #16 + lsr r12, r12, #24 +#else + uxtb r12, r6, ror #8 +#endif +#else + ubfx r12, r6, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r5, #24 + lsr lr, lr, #24 +#else + uxtb lr, r5 +#endif +#else + ubfx lr, r5, #0, #8 +#endif + ldr r8, [r0, r8, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r9, r4, #8 + lsr r9, r9, #24 +#else + uxtb r9, r4, ror #16 +#endif +#else + ubfx r9, r4, #16, #8 +#endif + eor r8, r8, r11, ror #24 + lsr r11, r5, #24 + eor r8, r8, r12, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r7, #16 + lsr r12, r12, #24 +#else + uxtb r12, r7, ror #8 +#endif +#else + ubfx r12, r7, #8, #8 +#endif + eor r8, r8, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r6, #24 + lsr lr, lr, #24 +#else + uxtb lr, r6 +#endif +#else + ubfx lr, r6, #0, #8 +#endif + ldr r9, [r0, r9, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r10, r5, #8 + lsr r10, r10, #24 +#else + uxtb r10, r5, ror #16 +#endif +#else + ubfx r10, r5, #16, #8 +#endif + eor r9, r9, r11, ror #24 + lsr r11, r6, #24 + eor r9, r9, r12, ror #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r4, #16 + lsr r12, r12, #24 +#else + uxtb r12, r4, ror #8 +#endif +#else + ubfx r12, r4, #8, #8 +#endif + eor r9, r9, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r7, #24 + lsr lr, lr, #24 +#else + uxtb lr, r7 +#endif +#else + ubfx lr, r7, #0, #8 +#endif + ldr r10, [r0, r10, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr lr, [r0, lr, lsl #2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r4, #24 + lsr r4, r4, #24 +#else + uxtb r4, r4 +#endif +#else + ubfx r4, r4, #0, #8 +#endif + eor r10, r10, r11, ror #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r11, r6, #8 + lsr r11, r11, #24 +#else + uxtb r11, r6, ror #16 +#endif +#else + ubfx r11, r6, #16, #8 +#endif + eor r10, r10, r12, ror #8 + lsr r12, r7, #24 + eor r10, r10, lr, ror #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r5, #16 + lsr lr, lr, #24 +#else + uxtb lr, r5, ror #8 +#endif +#else + ubfx lr, r5, #8, #8 +#endif + ldr r4, [r0, r4, lsl #2] + ldr r12, [r0, r12, lsl #2] + ldr r11, [r0, r11, lsl #2] + ldr lr, [r0, lr, lsl #2] + eor r12, r12, r4, ror #24 + ldm r3!, {r4, r5, r6, r7} + eor r11, r11, lr, ror #8 + eor r11, r11, r12, ror #24 + # XOR in Key Schedule + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r4, r9, #24 + lsr r4, r4, #24 +#else + uxtb r4, r9 +#endif +#else + ubfx r4, r9, #0, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r10, #16 + lsr r7, r7, #24 +#else + uxtb r7, r10, ror #8 +#endif +#else + ubfx r7, r10, #8, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r11, #8 + lsr r12, r12, #24 +#else + uxtb r12, r11, ror #16 +#endif +#else + ubfx r12, r11, #16, #8 +#endif + lsr lr, r8, #24 + ldrb r4, [r2, r4] + ldrb r7, [r2, r7] + ldrb r12, [r2, r12] + ldrb lr, [r2, lr] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r5, r10, #24 + lsr r5, r5, #24 +#else + uxtb r5, r10 +#endif +#else + ubfx r5, r10, #0, #8 +#endif + eor r4, r4, r7, lsl #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r11, #16 + lsr r7, r7, #24 +#else + uxtb r7, r11, ror #8 +#endif +#else + ubfx r7, r11, #8, #8 +#endif + eor r4, r4, r12, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r8, #8 + lsr r12, r12, #24 +#else + uxtb r12, r8, ror #16 +#endif +#else + ubfx r12, r8, #16, #8 +#endif + eor r4, r4, lr, lsl #24 + lsr lr, r9, #24 + ldrb r7, [r2, r7] + ldrb lr, [r2, lr] + ldrb r5, [r2, r5] + ldrb r12, [r2, r12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r6, r11, #24 + lsr r6, r6, #24 +#else + uxtb r6, r11 +#endif +#else + ubfx r6, r11, #0, #8 +#endif + eor r5, r5, r7, lsl #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r8, #16 + lsr r7, r7, #24 +#else + uxtb r7, r8, ror #8 +#endif +#else + ubfx r7, r8, #8, #8 +#endif + eor r5, r5, r12, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r9, #8 + lsr r12, r12, #24 +#else + uxtb r12, r9, ror #16 +#endif +#else + ubfx r12, r9, #16, #8 +#endif + eor r5, r5, lr, lsl #24 + lsr lr, r10, #24 + ldrb r7, [r2, r7] + ldrb lr, [r2, lr] + ldrb r6, [r2, r6] + ldrb r12, [r2, r12] + lsr r11, r11, #24 + eor r6, r6, r7, lsl #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r7, r8, #24 + lsr r7, r7, #24 +#else + uxtb r7, r8 +#endif +#else + ubfx r7, r8, #0, #8 +#endif + eor r6, r6, r12, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl r12, r9, #16 + lsr r12, r12, #24 +#else + uxtb r12, r9, ror #8 +#endif +#else + ubfx r12, r9, #8, #8 +#endif + eor r6, r6, lr, lsl #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + lsl lr, r10, #8 + lsr lr, lr, #24 +#else + uxtb lr, r10, ror #16 +#endif +#else + ubfx lr, r10, #16, #8 +#endif + ldrb r11, [r2, r11] + ldrb r12, [r2, r12] + ldrb r7, [r2, r7] + ldrb lr, [r2, lr] + eor r12, r12, r11, lsl #16 + ldm r3, {r8, r9, r10, r11} + eor r7, r7, r12, lsl #8 + eor r7, r7, lr, lsl #16 + # XOR in Key Schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + pop {pc} + .size AES_decrypt_block,.-AES_decrypt_block + .text + .type L_AES_ARM32_td_ecb, %object + .size L_AES_ARM32_td_ecb, 12 + .align 4 +L_AES_ARM32_td_ecb: + .word L_AES_ARM32_td_data + .text + .type L_AES_ARM32_td4, %object + .size L_AES_ARM32_td4, 256 + .align 4 +L_AES_ARM32_td4: + .byte 0x52 + .byte 0x9 + .byte 0x6a + .byte 0xd5 + .byte 0x30 + .byte 0x36 + .byte 0xa5 + .byte 0x38 + .byte 0xbf + .byte 0x40 + .byte 0xa3 + .byte 0x9e + .byte 0x81 + .byte 0xf3 + .byte 0xd7 + .byte 0xfb + .byte 0x7c + .byte 0xe3 + .byte 0x39 + .byte 0x82 + .byte 0x9b + .byte 0x2f + .byte 0xff + .byte 0x87 + .byte 0x34 + .byte 0x8e + .byte 0x43 + .byte 0x44 + .byte 0xc4 + .byte 0xde + .byte 0xe9 + .byte 0xcb + .byte 0x54 + .byte 0x7b + .byte 0x94 + .byte 0x32 + .byte 0xa6 + .byte 0xc2 + .byte 0x23 + .byte 0x3d + .byte 0xee + .byte 0x4c + .byte 0x95 + .byte 0xb + .byte 0x42 + .byte 0xfa + .byte 0xc3 + .byte 0x4e + .byte 0x8 + .byte 0x2e + .byte 0xa1 + .byte 0x66 + .byte 0x28 + .byte 0xd9 + .byte 0x24 + .byte 0xb2 + .byte 0x76 + .byte 0x5b + .byte 0xa2 + .byte 0x49 + .byte 0x6d + .byte 0x8b + .byte 0xd1 + .byte 0x25 + .byte 0x72 + .byte 0xf8 + .byte 0xf6 + .byte 0x64 + .byte 0x86 + .byte 0x68 + .byte 0x98 + .byte 0x16 + .byte 0xd4 + .byte 0xa4 + .byte 0x5c + .byte 0xcc + .byte 0x5d + .byte 0x65 + .byte 0xb6 + .byte 0x92 + .byte 0x6c + .byte 0x70 + .byte 0x48 + .byte 0x50 + .byte 0xfd + .byte 0xed + .byte 0xb9 + .byte 0xda + .byte 0x5e + .byte 0x15 + .byte 0x46 + .byte 0x57 + .byte 0xa7 + .byte 0x8d + .byte 0x9d + .byte 0x84 + .byte 0x90 + .byte 0xd8 + .byte 0xab + .byte 0x0 + .byte 0x8c + .byte 0xbc + .byte 0xd3 + .byte 0xa + .byte 0xf7 + .byte 0xe4 + .byte 0x58 + .byte 0x5 + .byte 0xb8 + .byte 0xb3 + .byte 0x45 + .byte 0x6 + .byte 0xd0 + .byte 0x2c + .byte 0x1e + .byte 0x8f + .byte 0xca + .byte 0x3f + .byte 0xf + .byte 0x2 + .byte 0xc1 + .byte 0xaf + .byte 0xbd + .byte 0x3 + .byte 0x1 + .byte 0x13 + .byte 0x8a + .byte 0x6b + .byte 0x3a + .byte 0x91 + .byte 0x11 + .byte 0x41 + .byte 0x4f + .byte 0x67 + .byte 0xdc + .byte 0xea + .byte 0x97 + .byte 0xf2 + .byte 0xcf + .byte 0xce + .byte 0xf0 + .byte 0xb4 + .byte 0xe6 + .byte 0x73 + .byte 0x96 + .byte 0xac + .byte 0x74 + .byte 0x22 + .byte 0xe7 + .byte 0xad + .byte 0x35 + .byte 0x85 + .byte 0xe2 + .byte 0xf9 + .byte 0x37 + .byte 0xe8 + .byte 0x1c + .byte 0x75 + .byte 0xdf + .byte 0x6e + .byte 0x47 + .byte 0xf1 + .byte 0x1a + .byte 0x71 + .byte 0x1d + .byte 0x29 + .byte 0xc5 + .byte 0x89 + .byte 0x6f + .byte 0xb7 + .byte 0x62 + .byte 0xe + .byte 0xaa + .byte 0x18 + .byte 0xbe + .byte 0x1b + .byte 0xfc + .byte 0x56 + .byte 0x3e + .byte 0x4b + .byte 0xc6 + .byte 0xd2 + .byte 0x79 + .byte 0x20 + .byte 0x9a + .byte 0xdb + .byte 0xc0 + .byte 0xfe + .byte 0x78 + .byte 0xcd + .byte 0x5a + .byte 0xf4 + .byte 0x1f + .byte 0xdd + .byte 0xa8 + .byte 0x33 + .byte 0x88 + .byte 0x7 + .byte 0xc7 + .byte 0x31 + .byte 0xb1 + .byte 0x12 + .byte 0x10 + .byte 0x59 + .byte 0x27 + .byte 0x80 + .byte 0xec + .byte 0x5f + .byte 0x60 + .byte 0x51 + .byte 0x7f + .byte 0xa9 + .byte 0x19 + .byte 0xb5 + .byte 0x4a + .byte 0xd + .byte 0x2d + .byte 0xe5 + .byte 0x7a + .byte 0x9f + .byte 0x93 + .byte 0xc9 + .byte 0x9c + .byte 0xef + .byte 0xa0 + .byte 0xe0 + .byte 0x3b + .byte 0x4d + .byte 0xae + .byte 0x2a + .byte 0xf5 + .byte 0xb0 + .byte 0xc8 + .byte 0xeb + .byte 0xbb + .byte 0x3c + .byte 0x83 + .byte 0x53 + .byte 0x99 + .byte 0x61 + .byte 0x17 + .byte 0x2b + .byte 0x4 + .byte 0x7e + .byte 0xba + .byte 0x77 + .byte 0xd6 + .byte 0x26 + .byte 0xe1 + .byte 0x69 + .byte 0x14 + .byte 0x63 + .byte 0x55 + .byte 0x21 + .byte 0xc + .byte 0x7d +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .align 4 + .globl AES_ECB_decrypt + .type AES_ECB_decrypt, %function +AES_ECB_decrypt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ldr r8, [sp, #36] + mov lr, r0 + adr r0, L_AES_ARM32_td_ecb + ldr r0, [r0] + mov r12, r2 + adr r2, L_AES_ARM32_td4 + cmp r8, #10 + beq L_AES_ECB_decrypt_start_block_128 + cmp r8, #12 + beq L_AES_ECB_decrypt_start_block_192 +L_AES_ECB_decrypt_loop_block_256: + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + push {r1, r3, r12, lr} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_decrypt_block + pop {r1, r3, r12, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_ECB_decrypt_loop_block_256 + b L_AES_ECB_decrypt_end +L_AES_ECB_decrypt_start_block_192: +L_AES_ECB_decrypt_loop_block_192: + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + push {r1, r3, r12, lr} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_decrypt_block + pop {r1, r3, r12, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_ECB_decrypt_loop_block_192 + b L_AES_ECB_decrypt_end +L_AES_ECB_decrypt_start_block_128: +L_AES_ECB_decrypt_loop_block_128: + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + push {r1, r3, r12, lr} + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_decrypt_block + pop {r1, r3, r12, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_ECB_decrypt_loop_block_128 +L_AES_ECB_decrypt_end: + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_ECB_decrypt,.-AES_ECB_decrypt +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC + .text + .align 4 + .globl AES_CBC_decrypt + .type AES_CBC_decrypt, %function +AES_CBC_decrypt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ldr r8, [sp, #36] + ldr r4, [sp, #40] + mov lr, r0 + adr r0, L_AES_ARM32_td_ecb + ldr r0, [r0] + mov r12, r2 + adr r2, L_AES_ARM32_td4 + push {r3, r4} + cmp r8, #10 + beq L_AES_CBC_decrypt_loop_block_128 + cmp r8, #12 + beq L_AES_CBC_decrypt_loop_block_192 +L_AES_CBC_decrypt_loop_block_256: + push {r1, r12, lr} + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [lr, #16] + str r5, [lr, #20] +#else + strd r4, r5, [lr, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [lr, #24] + str r7, [lr, #28] +#else + strd r6, r7, [lr, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_decrypt_block + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm lr, {r8, r9, r10, r11} + pop {r1, r12, lr} + ldr r3, [sp] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + beq L_AES_CBC_decrypt_end_odd + push {r1, r12, lr} + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [lr] + str r5, [lr, #4] +#else + strd r4, r5, [lr] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [lr, #8] + str r7, [lr, #12] +#else + strd r6, r7, [lr, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_decrypt_block + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [lr, #16] + ldr r9, [lr, #20] +#else + ldrd r8, r9, [lr, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [lr, #24] + ldr r11, [lr, #28] +#else + ldrd r10, r11, [lr, #24] +#endif + pop {r1, r12, lr} + ldr r3, [sp] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CBC_decrypt_loop_block_256 + b L_AES_CBC_decrypt_end +L_AES_CBC_decrypt_loop_block_192: + push {r1, r12, lr} + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [lr, #16] + str r5, [lr, #20] +#else + strd r4, r5, [lr, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [lr, #24] + str r7, [lr, #28] +#else + strd r6, r7, [lr, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_decrypt_block + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm lr, {r8, r9, r10, r11} + pop {r1, r12, lr} + ldr r3, [sp] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + beq L_AES_CBC_decrypt_end_odd + push {r1, r12, lr} + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [lr] + str r5, [lr, #4] +#else + strd r4, r5, [lr] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [lr, #8] + str r7, [lr, #12] +#else + strd r6, r7, [lr, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_decrypt_block + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [lr, #16] + ldr r9, [lr, #20] +#else + ldrd r8, r9, [lr, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [lr, #24] + ldr r11, [lr, #28] +#else + ldrd r10, r11, [lr, #24] +#endif + pop {r1, r12, lr} + ldr r3, [sp] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CBC_decrypt_loop_block_192 + b L_AES_CBC_decrypt_end +L_AES_CBC_decrypt_loop_block_128: + push {r1, r12, lr} + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [lr, #16] + str r5, [lr, #20] +#else + strd r4, r5, [lr, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [lr, #24] + str r7, [lr, #28] +#else + strd r6, r7, [lr, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_decrypt_block + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm lr, {r8, r9, r10, r11} + pop {r1, r12, lr} + ldr r3, [sp] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + beq L_AES_CBC_decrypt_end_odd + push {r1, r12, lr} + ldr r4, [lr] + ldr r5, [lr, #4] + ldr r6, [lr, #8] + ldr r7, [lr, #12] + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [lr] + str r5, [lr, #4] +#else + strd r4, r5, [lr] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [lr, #8] + str r7, [lr, #12] +#else + strd r6, r7, [lr, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldm r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_decrypt_block + ldr lr, [sp, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [lr, #16] + ldr r9, [lr, #20] +#else + ldrd r8, r9, [lr, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [lr, #24] + ldr r11, [lr, #28] +#else + ldrd r10, r11, [lr, #24] +#endif + pop {r1, r12, lr} + ldr r3, [sp] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + subs r12, r12, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_CBC_decrypt_loop_block_128 + b L_AES_CBC_decrypt_end +L_AES_CBC_decrypt_end_odd: + ldr r4, [sp, #4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r4, #16] + ldr r9, [r4, #20] +#else + ldrd r8, r9, [r4, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r4, #24] + ldr r11, [r4, #28] +#else + ldrd r10, r11, [r4, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r4] + str r9, [r4, #4] +#else + strd r8, r9, [r4] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r4, #8] + str r11, [r4, #12] +#else + strd r10, r11, [r4, #8] +#endif +L_AES_CBC_decrypt_end: + pop {r3, r4} + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_CBC_decrypt,.-AES_CBC_decrypt +#endif /* HAVE_AES_CBC */ +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC */ +#endif /* HAVE_AES_DECRYPT */ +#ifdef HAVE_AESGCM + .text + .type L_GCM_gmult_len_r, %object + .size L_GCM_gmult_len_r, 64 + .align 4 +L_GCM_gmult_len_r: + .word 0x0 + .word 0x1c200000 + .word 0x38400000 + .word 0x24600000 + .word 0x70800000 + .word 0x6ca00000 + .word 0x48c00000 + .word 0x54e00000 + .word 0xe1000000 + .word 0xfd200000 + .word 0xd9400000 + .word 0xc5600000 + .word 0x91800000 + .word 0x8da00000 + .word 0xa9c00000 + .word 0xb5e00000 + .text + .align 4 + .globl GCM_gmult_len + .type GCM_gmult_len, %function +GCM_gmult_len: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + adr lr, L_GCM_gmult_len_r +L_GCM_gmult_len_start_block: + push {r3} + ldr r12, [r0, #12] + ldr r3, [r2, #12] + eor r12, r12, r3 + lsr r3, r12, #24 + and r3, r3, #15 + add r3, r1, r3, lsl #4 + ldm r3, {r8, r9, r10, r11} + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #28 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #16 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #20 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #8 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #12 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + and r4, r12, #15 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #4 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + ldr r12, [r0, #8] + ldr r3, [r2, #8] + eor r12, r12, r3 + lsr r3, r12, #24 + and r3, r3, #15 + add r3, r1, r3, lsl #4 + ldm r3, {r4, r5, r6, r7} + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #28 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #16 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #20 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #8 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #12 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + and r4, r12, #15 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #4 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + ldr r12, [r0, #4] + ldr r3, [r2, #4] + eor r12, r12, r3 + lsr r3, r12, #24 + and r3, r3, #15 + add r3, r1, r3, lsl #4 + ldm r3, {r4, r5, r6, r7} + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #28 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #16 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #20 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #8 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #12 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + and r4, r12, #15 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #4 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + ldr r12, [r0] + ldr r3, [r2] + eor r12, r12, r3 + lsr r3, r12, #24 + and r3, r3, #15 + add r3, r1, r3, lsl #4 + ldm r3, {r4, r5, r6, r7} + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #28 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #16 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #20 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #8 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #12 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + and r4, r12, #15 + eor r11, r11, r10, lsl #28 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + lsr r6, r10, #4 + and r3, r11, #15 + lsr r11, r11, #4 + lsr r4, r12, #4 + eor r11, r11, r10, lsl #28 + and r4, r4, #15 + ldr r3, [lr, r3, lsl #2] + add r4, r1, r4, lsl #4 + eor r10, r6, r9, lsl #28 + lsr r9, r9, #4 + ldm r4, {r4, r5, r6, r7} + eor r9, r9, r8, lsl #28 + eor r8, r3, r8, lsr #4 + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + # REV r8, r8 + eor r3, r8, r8, ror #16 + bic r3, r3, #0xff0000 + ror r8, r8, #8 + eor r8, r8, r3, lsr #8 + # REV r9, r9 + eor r3, r9, r9, ror #16 + bic r3, r3, #0xff0000 + ror r9, r9, #8 + eor r9, r9, r3, lsr #8 + # REV r10, r10 + eor r3, r10, r10, ror #16 + bic r3, r3, #0xff0000 + ror r10, r10, #8 + eor r10, r10, r3, lsr #8 + # REV r11, r11 + eor r3, r11, r11, ror #16 + bic r3, r3, #0xff0000 + ror r11, r11, #8 + eor r11, r11, r3, lsr #8 +#else + rev r8, r8 + rev r9, r9 + rev r10, r10 + rev r11, r11 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r0, {r8, r9, r10, r11} + pop {r3} + subs r3, r3, #16 + add r2, r2, #16 + bne L_GCM_gmult_len_start_block + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size GCM_gmult_len,.-GCM_gmult_len + .text + .type L_AES_ARM32_te_gcm, %object + .size L_AES_ARM32_te_gcm, 12 + .align 4 +L_AES_ARM32_te_gcm: + .word L_AES_ARM32_te_data + .text + .align 4 + .globl AES_GCM_encrypt + .type AES_GCM_encrypt, %function +AES_GCM_encrypt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ldr r12, [sp, #36] + ldr r8, [sp, #40] + mov lr, r0 + adr r0, L_AES_ARM32_te_gcm + ldr r0, [r0] + ldm r8, {r4, r5, r6, r7} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r10, r4, r4, ror #16 + eor r11, r5, r5, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + eor r4, r4, r10, lsr #8 + eor r5, r5, r11, lsr #8 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r6, r6, #8 + ror r7, r7, #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r8, {r4, r5, r6, r7} + push {r3, r8} + cmp r12, #10 + beq L_AES_GCM_encrypt_start_block_128 + cmp r12, #12 + beq L_AES_GCM_encrypt_start_block_192 +L_AES_GCM_encrypt_loop_block_256: + push {r1, r2, lr} + ldr lr, [sp, #16] + add r7, r7, #1 + ldm r3!, {r8, r9, r10, r11} + str r7, [lr, #12] + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #6 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldr r8, [sp, #4] + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + ldm r8, {r4, r5, r6, r7} + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_GCM_encrypt_loop_block_256 + b L_AES_GCM_encrypt_end +L_AES_GCM_encrypt_start_block_192: +L_AES_GCM_encrypt_loop_block_192: + push {r1, r2, lr} + ldr lr, [sp, #16] + add r7, r7, #1 + ldm r3!, {r8, r9, r10, r11} + str r7, [lr, #12] + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #5 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldr r8, [sp, #4] + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + ldm r8, {r4, r5, r6, r7} + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_GCM_encrypt_loop_block_192 + b L_AES_GCM_encrypt_end +L_AES_GCM_encrypt_start_block_128: +L_AES_GCM_encrypt_loop_block_128: + push {r1, r2, lr} + ldr lr, [sp, #16] + add r7, r7, #1 + ldm r3!, {r8, r9, r10, r11} + str r7, [lr, #12] + # Round: 0 - XOR in key schedule + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + mov r1, #4 + bl AES_encrypt_block + pop {r1, r2, lr} + ldr r3, [sp] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r8, [lr] + ldr r9, [lr, #4] + ldr r10, [lr, #8] + ldr r11, [lr, #12] + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldr r8, [sp, #4] + str r4, [r1] + str r5, [r1, #4] + str r6, [r1, #8] + str r7, [r1, #12] + ldm r8, {r4, r5, r6, r7} + subs r2, r2, #16 + add lr, lr, #16 + add r1, r1, #16 + bne L_AES_GCM_encrypt_loop_block_128 +L_AES_GCM_encrypt_end: + pop {r3, r8} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + eor r10, r4, r4, ror #16 + eor r11, r5, r5, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + eor r4, r4, r10, lsr #8 + eor r5, r5, r11, lsr #8 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r6, r6, #8 + ror r7, r7, #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#else + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + stm r8, {r4, r5, r6, r7} + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size AES_GCM_encrypt,.-AES_GCM_encrypt +#endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ +#endif /* !__aarch64__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c new file mode 100644 index 000000000..f6561c2a0 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -0,0 +1,4784 @@ +/* armv8-32-aes-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./aes/aes.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include +#ifdef WOLFSSL_ARMASM_INLINE + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif /* __KEIL__ */ +#ifndef NO_AES +#include + +#ifdef HAVE_AES_DECRYPT +static const uint32_t L_AES_ARM32_td_data[] = { + 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, + 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, + 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, + 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, + 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, + 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, + 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, + 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, + 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, + 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, + 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, + 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, + 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, + 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, + 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, + 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, + 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, + 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, + 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, + 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, + 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, + 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, + 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, + 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, + 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, + 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, + 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, + 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, + 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, + 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, + 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, + 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, + 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, + 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, + 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, + 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, + 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, + 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, + 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, + 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, + 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, + 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, + 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, + 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, + 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, + 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, + 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, + 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, + 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, + 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, + 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, + 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, + 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, + 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, + 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, + 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, + 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, + 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, + 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, + 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, + 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, + 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, + 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, + 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, +}; + +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +static const uint32_t L_AES_ARM32_te_data[] = { + 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, + 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, + 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, + 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, + 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, + 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, + 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, + 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, + 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, + 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, + 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, + 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, + 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, + 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, + 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, + 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, + 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, + 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, + 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, + 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, + 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, + 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, + 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, + 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, + 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, + 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, + 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, + 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, + 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, + 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, + 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, + 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, + 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, + 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, + 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, + 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, + 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, + 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, + 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, + 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, + 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, + 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, + 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, + 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, + 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, + 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, + 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, + 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, + 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, + 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, + 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, + 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, + 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, + 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, + 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, + 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, + 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, + 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, + 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, + 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, + 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, + 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, + 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, + 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, +}; + +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +static const uint32_t* L_AES_ARM32_td = L_AES_ARM32_td_data; +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +static const uint32_t* L_AES_ARM32_te = L_AES_ARM32_te_data; +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +void AES_invert_key(unsigned char* ks, word32 rounds); +void AES_invert_key(unsigned char* ks_p, word32 rounds_p) +{ + register unsigned char* ks asm ("r0") = (unsigned char*)ks_p; + register word32 rounds asm ("r1") = (word32)rounds_p; + register uint32_t* L_AES_ARM32_te_c asm ("r2") = (uint32_t*)L_AES_ARM32_te; + register uint32_t* L_AES_ARM32_td_c asm ("r3") = (uint32_t*)L_AES_ARM32_td; + + __asm__ __volatile__ ( + "mov r12, %[L_AES_ARM32_te]\n\t" + "mov lr, %[L_AES_ARM32_td]\n\t" + "add r10, %[ks], %[rounds], lsl #4\n\t" + "mov r11, %[rounds]\n\t" + "\n" + "L_AES_invert_key_loop_%=: \n\t" + "ldm %[ks], {r2, r3, r4, r5}\n\t" + "ldm r10, {r6, r7, r8, r9}\n\t" + "stm r10, {r2, r3, r4, r5}\n\t" + "stm %[ks]!, {r6, r7, r8, r9}\n\t" + "subs r11, r11, #2\n\t" + "sub r10, r10, #16\n\t" + "bne L_AES_invert_key_loop_%=\n\t" + "sub %[ks], %[ks], %[rounds], lsl #3\n\t" + "add %[ks], %[ks], #16\n\t" + "sub r11, %[rounds], #1\n\t" + "\n" + "L_AES_invert_key_mix_loop_%=: \n\t" + "ldm %[ks], {r2, r3, r4, r5}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r2, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r2\n\t" +#endif +#else + "ubfx r6, r2, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r2, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r2, ror #8\n\t" +#endif +#else + "ubfx r7, r2, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r2, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r2, ror #16\n\t" +#endif +#else + "ubfx r8, r2, #16, #8\n\t" +#endif + "lsr r9, r2, #24\n\t" + "ldrb r6, [r12, r6, lsl #2]\n\t" + "ldrb r7, [r12, r7, lsl #2]\n\t" + "ldrb r8, [r12, r8, lsl #2]\n\t" + "ldrb r9, [r12, r9, lsl #2]\n\t" + "ldr r6, [lr, r6, lsl #2]\n\t" + "ldr r7, [lr, r7, lsl #2]\n\t" + "ldr r8, [lr, r8, lsl #2]\n\t" + "ldr r9, [lr, r9, lsl #2]\n\t" + "eor r8, r8, r6, ror #16\n\t" + "eor r8, r8, r7, ror #8\n\t" + "eor r8, r8, r9, ror #24\n\t" + "str r8, [%[ks]], #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r3, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r3\n\t" +#endif +#else + "ubfx r6, r3, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r3, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r3, ror #8\n\t" +#endif +#else + "ubfx r7, r3, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r3, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r3, ror #16\n\t" +#endif +#else + "ubfx r8, r3, #16, #8\n\t" +#endif + "lsr r9, r3, #24\n\t" + "ldrb r6, [r12, r6, lsl #2]\n\t" + "ldrb r7, [r12, r7, lsl #2]\n\t" + "ldrb r8, [r12, r8, lsl #2]\n\t" + "ldrb r9, [r12, r9, lsl #2]\n\t" + "ldr r6, [lr, r6, lsl #2]\n\t" + "ldr r7, [lr, r7, lsl #2]\n\t" + "ldr r8, [lr, r8, lsl #2]\n\t" + "ldr r9, [lr, r9, lsl #2]\n\t" + "eor r8, r8, r6, ror #16\n\t" + "eor r8, r8, r7, ror #8\n\t" + "eor r8, r8, r9, ror #24\n\t" + "str r8, [%[ks]], #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r4, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r4\n\t" +#endif +#else + "ubfx r6, r4, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r4, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r4, ror #8\n\t" +#endif +#else + "ubfx r7, r4, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r4, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r4, ror #16\n\t" +#endif +#else + "ubfx r8, r4, #16, #8\n\t" +#endif + "lsr r9, r4, #24\n\t" + "ldrb r6, [r12, r6, lsl #2]\n\t" + "ldrb r7, [r12, r7, lsl #2]\n\t" + "ldrb r8, [r12, r8, lsl #2]\n\t" + "ldrb r9, [r12, r9, lsl #2]\n\t" + "ldr r6, [lr, r6, lsl #2]\n\t" + "ldr r7, [lr, r7, lsl #2]\n\t" + "ldr r8, [lr, r8, lsl #2]\n\t" + "ldr r9, [lr, r9, lsl #2]\n\t" + "eor r8, r8, r6, ror #16\n\t" + "eor r8, r8, r7, ror #8\n\t" + "eor r8, r8, r9, ror #24\n\t" + "str r8, [%[ks]], #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r5, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r5\n\t" +#endif +#else + "ubfx r6, r5, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r5, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r5, ror #8\n\t" +#endif +#else + "ubfx r7, r5, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r5, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r5, ror #16\n\t" +#endif +#else + "ubfx r8, r5, #16, #8\n\t" +#endif + "lsr r9, r5, #24\n\t" + "ldrb r6, [r12, r6, lsl #2]\n\t" + "ldrb r7, [r12, r7, lsl #2]\n\t" + "ldrb r8, [r12, r8, lsl #2]\n\t" + "ldrb r9, [r12, r9, lsl #2]\n\t" + "ldr r6, [lr, r6, lsl #2]\n\t" + "ldr r7, [lr, r7, lsl #2]\n\t" + "ldr r8, [lr, r8, lsl #2]\n\t" + "ldr r9, [lr, r9, lsl #2]\n\t" + "eor r8, r8, r6, ror #16\n\t" + "eor r8, r8, r7, ror #8\n\t" + "eor r8, r8, r9, ror #24\n\t" + "str r8, [%[ks]], #4\n\t" + "subs r11, r11, #1\n\t" + "bne L_AES_invert_key_mix_loop_%=\n\t" + : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* HAVE_AES_DECRYPT */ +static const uint32_t L_AES_ARM32_rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1b000000, 0x36000000, +}; + +void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks); +void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) +{ + register const unsigned char* key asm ("r0") = (const unsigned char*)key_p; + register word32 len asm ("r1") = (word32)len_p; + register unsigned char* ks asm ("r2") = (unsigned char*)ks_p; + register uint32_t* L_AES_ARM32_te_c asm ("r3") = (uint32_t*)L_AES_ARM32_te; + register uint32_t* L_AES_ARM32_rcon_c asm ("r4") = (uint32_t*)&L_AES_ARM32_rcon; + + __asm__ __volatile__ ( + "mov r8, %[L_AES_ARM32_te]\n\t" + "mov lr, %[L_AES_ARM32_rcon]\n\t" + "cmp %[len], #0x80\n\t" + "beq L_AES_set_encrypt_key_start_128_%=\n\t" + "cmp %[len], #0xc0\n\t" + "beq L_AES_set_encrypt_key_start_192_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[key]]\n\t" + "ldr r5, [%[key], #4]\n\t" +#else + "ldrd r4, r5, [%[key]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[key], #8]\n\t" + "ldr r7, [%[key], #12]\n\t" +#else + "ldrd r6, r7, [%[key], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + /* REV r4, r4 */ + "eor r3, r4, r4, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "eor r4, r4, r3, lsr #8\n\t" + /* REV r5, r5 */ + "eor r3, r5, r5, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r5, r5, #8\n\t" + "eor r5, r5, r3, lsr #8\n\t" + /* REV r6, r6 */ + "eor r3, r6, r6, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "eor r6, r6, r3, lsr #8\n\t" + /* REV r7, r7 */ + "eor r3, r7, r7, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r7, r7, #8\n\t" + "eor r7, r7, r3, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm %[ks]!, {r4, r5, r6, r7}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[key], #16]\n\t" + "ldr r5, [%[key], #20]\n\t" +#else + "ldrd r4, r5, [%[key], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[key], #24]\n\t" + "ldr r7, [%[key], #28]\n\t" +#else + "ldrd r6, r7, [%[key], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + /* REV r4, r4 */ + "eor r3, r4, r4, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "eor r4, r4, r3, lsr #8\n\t" + /* REV r5, r5 */ + "eor r3, r5, r5, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r5, r5, #8\n\t" + "eor r5, r5, r3, lsr #8\n\t" + /* REV r6, r6 */ + "eor r3, r6, r6, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "eor r6, r6, r3, lsr #8\n\t" + /* REV r7, r7 */ + "eor r3, r7, r7, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r7, r7, #8\n\t" + "eor r7, r7, r3, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm %[ks], {r4, r5, r6, r7}\n\t" + "sub %[ks], %[ks], #16\n\t" + "mov r12, #6\n\t" + "\n" + "L_AES_set_encrypt_key_loop_256_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r7, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r7\n\t" +#endif +#else + "ubfx r4, r7, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r7, #16\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r7, ror #8\n\t" +#endif +#else + "ubfx r5, r7, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r7, #8\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r7, ror #16\n\t" +#endif +#else + "ubfx r6, r7, #16, #8\n\t" +#endif + "lsr r7, r7, #24\n\t" + "ldrb r4, [r8, r4, lsl #2]\n\t" + "ldrb r5, [r8, r5, lsl #2]\n\t" + "ldrb r6, [r8, r6, lsl #2]\n\t" + "ldrb r7, [r8, r7, lsl #2]\n\t" + "eor r3, r7, r4, lsl #8\n\t" + "eor r3, r3, r5, lsl #16\n\t" + "eor r3, r3, r6, lsl #24\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" + "eor r4, r4, r3\n\t" + "ldm lr!, {r3}\n\t" + "eor r4, r4, r3\n\t" + "eor r5, r5, r4\n\t" + "eor r6, r6, r5\n\t" + "eor r7, r7, r6\n\t" + "add %[ks], %[ks], #16\n\t" + "stm %[ks], {r4, r5, r6, r7}\n\t" + "sub %[ks], %[ks], #16\n\t" + "mov r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r3, #16\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r3, ror #8\n\t" +#endif +#else + "ubfx r4, r3, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r3, #8\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r3, ror #16\n\t" +#endif +#else + "ubfx r5, r3, #16, #8\n\t" +#endif + "lsr r6, r3, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r3, r3, #24\n\t" + "lsr r3, r3, #24\n\t" +#else + "uxtb r3, r3\n\t" +#endif +#else + "ubfx r3, r3, #0, #8\n\t" +#endif + "ldrb r4, [r8, r4, lsl #2]\n\t" + "ldrb r6, [r8, r6, lsl #2]\n\t" + "ldrb r5, [r8, r5, lsl #2]\n\t" + "ldrb r3, [r8, r3, lsl #2]\n\t" + "eor r3, r3, r4, lsl #8\n\t" + "eor r3, r3, r5, lsl #16\n\t" + "eor r3, r3, r6, lsl #24\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" + "eor r4, r4, r3\n\t" + "eor r5, r5, r4\n\t" + "eor r6, r6, r5\n\t" + "eor r7, r7, r6\n\t" + "add %[ks], %[ks], #16\n\t" + "stm %[ks], {r4, r5, r6, r7}\n\t" + "sub %[ks], %[ks], #16\n\t" + "subs r12, r12, #1\n\t" + "bne L_AES_set_encrypt_key_loop_256_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r7, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r7\n\t" +#endif +#else + "ubfx r4, r7, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r7, #16\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r7, ror #8\n\t" +#endif +#else + "ubfx r5, r7, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r7, #8\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r7, ror #16\n\t" +#endif +#else + "ubfx r6, r7, #16, #8\n\t" +#endif + "lsr r7, r7, #24\n\t" + "ldrb r4, [r8, r4, lsl #2]\n\t" + "ldrb r5, [r8, r5, lsl #2]\n\t" + "ldrb r6, [r8, r6, lsl #2]\n\t" + "ldrb r7, [r8, r7, lsl #2]\n\t" + "eor r3, r7, r4, lsl #8\n\t" + "eor r3, r3, r5, lsl #16\n\t" + "eor r3, r3, r6, lsl #24\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" + "eor r4, r4, r3\n\t" + "ldm lr!, {r3}\n\t" + "eor r4, r4, r3\n\t" + "eor r5, r5, r4\n\t" + "eor r6, r6, r5\n\t" + "eor r7, r7, r6\n\t" + "add %[ks], %[ks], #16\n\t" + "stm %[ks], {r4, r5, r6, r7}\n\t" + "sub %[ks], %[ks], #16\n\t" + "b L_AES_set_encrypt_key_end_%=\n\t" + "\n" + "L_AES_set_encrypt_key_start_192_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[key]]\n\t" + "ldr r5, [%[key], #4]\n\t" +#else + "ldrd r4, r5, [%[key]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[key], #8]\n\t" + "ldr r7, [%[key], #12]\n\t" +#else + "ldrd r6, r7, [%[key], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr %[len], [%[key], #20]\n\t" + "ldr %[key], [%[key], #16]\n\t" +#else + "ldrd %[key], %[len], [%[key], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + /* REV r4, r4 */ + "eor r3, r4, r4, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "eor r4, r4, r3, lsr #8\n\t" + /* REV r5, r5 */ + "eor r3, r5, r5, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r5, r5, #8\n\t" + "eor r5, r5, r3, lsr #8\n\t" + /* REV r6, r6 */ + "eor r3, r6, r6, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "eor r6, r6, r3, lsr #8\n\t" + /* REV r7, r7 */ + "eor r3, r7, r7, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r7, r7, #8\n\t" + "eor r7, r7, r3, lsr #8\n\t" + /* REV r0, r0 */ + "eor r3, %[key], %[key], ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror %[key], %[key], #8\n\t" + "eor %[key], %[key], r3, lsr #8\n\t" + /* REV r1, r1 */ + "eor r3, %[len], %[len], ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror %[len], %[len], #8\n\t" + "eor %[len], %[len], r3, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev %[key], %[key]\n\t" + "rev %[len], %[len]\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm %[ks], {r4, r5, r6, r7}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str %[key], [%[ks], #16]\n\t" + "str %[len], [%[ks], #20]\n\t" +#else + "strd %[key], %[len], [%[ks], #16]\n\t" +#endif + "mov r7, %[len]\n\t" + "mov r12, #7\n\t" + "\n" + "L_AES_set_encrypt_key_loop_192_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r0, r7, #24\n\t" + "lsr r0, r0, #24\n\t" +#else + "uxtb r0, r7\n\t" +#endif +#else + "ubfx r0, r7, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r1, r7, #16\n\t" + "lsr r1, r1, #24\n\t" +#else + "uxtb r1, r7, ror #8\n\t" +#endif +#else + "ubfx r1, r7, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r7, #8\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r7, ror #16\n\t" +#endif +#else + "ubfx r4, r7, #16, #8\n\t" +#endif + "lsr r7, r7, #24\n\t" + "ldrb r0, [r8, r0, lsl #2]\n\t" + "ldrb r1, [r8, r1, lsl #2]\n\t" + "ldrb r4, [r8, r4, lsl #2]\n\t" + "ldrb r7, [r8, r7, lsl #2]\n\t" + "eor r3, r7, r0, lsl #8\n\t" + "eor r3, r3, r1, lsl #16\n\t" + "eor r3, r3, r4, lsl #24\n\t" + "ldm %[ks]!, {r0, r1, r4, r5, r6, r7}\n\t" + "eor r0, r0, r3\n\t" + "ldm lr!, {r3}\n\t" + "eor r0, r0, r3\n\t" + "eor r1, r1, r0\n\t" + "eor r4, r4, r1\n\t" + "eor r5, r5, r4\n\t" + "eor r6, r6, r5\n\t" + "eor r7, r7, r6\n\t" + "stm %[ks], {r0, r1, r4, r5, r6, r7}\n\t" + "subs r12, r12, #1\n\t" + "bne L_AES_set_encrypt_key_loop_192_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r0, r7, #24\n\t" + "lsr r0, r0, #24\n\t" +#else + "uxtb r0, r7\n\t" +#endif +#else + "ubfx r0, r7, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r1, r7, #16\n\t" + "lsr r1, r1, #24\n\t" +#else + "uxtb r1, r7, ror #8\n\t" +#endif +#else + "ubfx r1, r7, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r7, #8\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r7, ror #16\n\t" +#endif +#else + "ubfx r4, r7, #16, #8\n\t" +#endif + "lsr r7, r7, #24\n\t" + "ldrb r0, [r8, r0, lsl #2]\n\t" + "ldrb r1, [r8, r1, lsl #2]\n\t" + "ldrb r4, [r8, r4, lsl #2]\n\t" + "ldrb r7, [r8, r7, lsl #2]\n\t" + "eor r3, r7, r0, lsl #8\n\t" + "eor r3, r3, r1, lsl #16\n\t" + "eor r3, r3, r4, lsl #24\n\t" + "ldm %[ks]!, {r0, r1, r4, r5, r6, r7}\n\t" + "eor r0, r0, r3\n\t" + "ldm lr!, {r3}\n\t" + "eor r0, r0, r3\n\t" + "eor r1, r1, r0\n\t" + "eor r4, r4, r1\n\t" + "eor r5, r5, r4\n\t" + "stm %[ks], {r0, r1, r4, r5}\n\t" + "b L_AES_set_encrypt_key_end_%=\n\t" + "\n" + "L_AES_set_encrypt_key_start_128_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[key]]\n\t" + "ldr r5, [%[key], #4]\n\t" +#else + "ldrd r4, r5, [%[key]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[key], #8]\n\t" + "ldr r7, [%[key], #12]\n\t" +#else + "ldrd r6, r7, [%[key], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + /* REV r4, r4 */ + "eor r3, r4, r4, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "eor r4, r4, r3, lsr #8\n\t" + /* REV r5, r5 */ + "eor r3, r5, r5, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r5, r5, #8\n\t" + "eor r5, r5, r3, lsr #8\n\t" + /* REV r6, r6 */ + "eor r3, r6, r6, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "eor r6, r6, r3, lsr #8\n\t" + /* REV r7, r7 */ + "eor r3, r7, r7, ror #16\n\t" + "bic r3, r3, #0xff0000\n\t" + "ror r7, r7, #8\n\t" + "eor r7, r7, r3, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm %[ks], {r4, r5, r6, r7}\n\t" + "mov r12, #10\n\t" + "\n" + "L_AES_set_encrypt_key_loop_128_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r7, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r7\n\t" +#endif +#else + "ubfx r4, r7, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r7, #16\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r7, ror #8\n\t" +#endif +#else + "ubfx r5, r7, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r7, #8\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r7, ror #16\n\t" +#endif +#else + "ubfx r6, r7, #16, #8\n\t" +#endif + "lsr r7, r7, #24\n\t" + "ldrb r4, [r8, r4, lsl #2]\n\t" + "ldrb r5, [r8, r5, lsl #2]\n\t" + "ldrb r6, [r8, r6, lsl #2]\n\t" + "ldrb r7, [r8, r7, lsl #2]\n\t" + "eor r3, r7, r4, lsl #8\n\t" + "eor r3, r3, r5, lsl #16\n\t" + "eor r3, r3, r6, lsl #24\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" + "eor r4, r4, r3\n\t" + "ldm lr!, {r3}\n\t" + "eor r4, r4, r3\n\t" + "eor r5, r5, r4\n\t" + "eor r6, r6, r5\n\t" + "eor r7, r7, r6\n\t" + "stm %[ks], {r4, r5, r6, r7}\n\t" + "subs r12, r12, #1\n\t" + "bne L_AES_set_encrypt_key_loop_128_%=\n\t" + "\n" + "L_AES_set_encrypt_key_end_%=: \n\t" + : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c) + : + : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "cc" + ); +} + +void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks); +void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p) +{ + register const uint32_t* te asm ("r0") = (const uint32_t*)te_p; + register int nr asm ("r1") = (int)nr_p; + register int len asm ("r2") = (int)len_p; + register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p; + + __asm__ __volatile__ ( + "\n" + "L_AES_encrypt_block_nr_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r5, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r5, ror #16\n\t" +#endif +#else + "ubfx r8, r5, #16, #8\n\t" +#endif + "lsr r11, r4, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r6, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r6, ror #8\n\t" +#endif +#else + "ubfx lr, r6, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r7, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r7\n\t" +#endif +#else + "ubfx r2, r7, #0, #8\n\t" +#endif + "ldr r8, [%[te], r8, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r9, r6, #8\n\t" + "lsr r9, r9, #24\n\t" +#else + "uxtb r9, r6, ror #16\n\t" +#endif +#else + "ubfx r9, r6, #16, #8\n\t" +#endif + "eor r8, r8, r11, ror #24\n\t" + "lsr r11, r5, #24\n\t" + "eor r8, r8, lr, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r7, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r7, ror #8\n\t" +#endif +#else + "ubfx lr, r7, #8, #8\n\t" +#endif + "eor r8, r8, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r4, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r4\n\t" +#endif +#else + "ubfx r2, r4, #0, #8\n\t" +#endif + "ldr r9, [%[te], r9, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r10, r7, #8\n\t" + "lsr r10, r10, #24\n\t" +#else + "uxtb r10, r7, ror #16\n\t" +#endif +#else + "ubfx r10, r7, #16, #8\n\t" +#endif + "eor r9, r9, r11, ror #24\n\t" + "lsr r11, r6, #24\n\t" + "eor r9, r9, lr, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r4, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r4, ror #8\n\t" +#endif +#else + "ubfx lr, r4, #8, #8\n\t" +#endif + "eor r9, r9, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r5, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r5\n\t" +#endif +#else + "ubfx r2, r5, #0, #8\n\t" +#endif + "ldr r10, [%[te], r10, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r6, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r6\n\t" +#endif +#else + "ubfx r6, r6, #0, #8\n\t" +#endif + "eor r10, r10, r11, ror #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r11, r4, #8\n\t" + "lsr r11, r11, #24\n\t" +#else + "uxtb r11, r4, ror #16\n\t" +#endif +#else + "ubfx r11, r4, #16, #8\n\t" +#endif + "eor r10, r10, lr, ror #8\n\t" + "lsr lr, r7, #24\n\t" + "eor r10, r10, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r5, #16\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r5, ror #8\n\t" +#endif +#else + "ubfx r2, r5, #8, #8\n\t" +#endif + "ldr r6, [%[te], r6, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" + "eor lr, lr, r6, ror #24\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" + "eor r11, r11, lr, ror #24\n\t" + "eor r11, r11, r2, ror #8\n\t" + /* XOR in Key Schedule */ + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r9, #8\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r9, ror #16\n\t" +#endif +#else + "ubfx r4, r9, #16, #8\n\t" +#endif + "lsr r7, r8, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r10, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r10, ror #8\n\t" +#endif +#else + "ubfx lr, r10, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r11, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r11\n\t" +#endif +#else + "ubfx r2, r11, #0, #8\n\t" +#endif + "ldr r4, [%[te], r4, lsl #2]\n\t" + "ldr r7, [%[te], r7, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r10, #8\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r10, ror #16\n\t" +#endif +#else + "ubfx r5, r10, #16, #8\n\t" +#endif + "eor r4, r4, r7, ror #24\n\t" + "lsr r7, r9, #24\n\t" + "eor r4, r4, lr, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r11, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r11, ror #8\n\t" +#endif +#else + "ubfx lr, r11, #8, #8\n\t" +#endif + "eor r4, r4, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r8, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r8\n\t" +#endif +#else + "ubfx r2, r8, #0, #8\n\t" +#endif + "ldr r5, [%[te], r5, lsl #2]\n\t" + "ldr r7, [%[te], r7, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r11, #8\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r11, ror #16\n\t" +#endif +#else + "ubfx r6, r11, #16, #8\n\t" +#endif + "eor r5, r5, r7, ror #24\n\t" + "lsr r7, r10, #24\n\t" + "eor r5, r5, lr, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r8, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r8, ror #8\n\t" +#endif +#else + "ubfx lr, r8, #8, #8\n\t" +#endif + "eor r5, r5, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r9, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r9\n\t" +#endif +#else + "ubfx r2, r9, #0, #8\n\t" +#endif + "ldr r6, [%[te], r6, lsl #2]\n\t" + "ldr r7, [%[te], r7, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r10, r10, #24\n\t" + "lsr r10, r10, #24\n\t" +#else + "uxtb r10, r10\n\t" +#endif +#else + "ubfx r10, r10, #0, #8\n\t" +#endif + "eor r6, r6, r7, ror #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r8, #8\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r8, ror #16\n\t" +#endif +#else + "ubfx r7, r8, #16, #8\n\t" +#endif + "eor r6, r6, lr, ror #8\n\t" + "lsr lr, r11, #24\n\t" + "eor r6, r6, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r9, #16\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r9, ror #8\n\t" +#endif +#else + "ubfx r2, r9, #8, #8\n\t" +#endif + "ldr r10, [%[te], r10, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r7, [%[te], r7, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" + "eor lr, lr, r10, ror #24\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + "eor r7, r7, lr, ror #24\n\t" + "eor r7, r7, r2, ror #8\n\t" + /* XOR in Key Schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "subs %[nr], %[nr], #1\n\t" + "bne L_AES_encrypt_block_nr_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r5, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r5, ror #16\n\t" +#endif +#else + "ubfx r8, r5, #16, #8\n\t" +#endif + "lsr r11, r4, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r6, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r6, ror #8\n\t" +#endif +#else + "ubfx lr, r6, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r7, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r7\n\t" +#endif +#else + "ubfx r2, r7, #0, #8\n\t" +#endif + "ldr r8, [%[te], r8, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r9, r6, #8\n\t" + "lsr r9, r9, #24\n\t" +#else + "uxtb r9, r6, ror #16\n\t" +#endif +#else + "ubfx r9, r6, #16, #8\n\t" +#endif + "eor r8, r8, r11, ror #24\n\t" + "lsr r11, r5, #24\n\t" + "eor r8, r8, lr, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r7, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r7, ror #8\n\t" +#endif +#else + "ubfx lr, r7, #8, #8\n\t" +#endif + "eor r8, r8, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r4, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r4\n\t" +#endif +#else + "ubfx r2, r4, #0, #8\n\t" +#endif + "ldr r9, [%[te], r9, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r10, r7, #8\n\t" + "lsr r10, r10, #24\n\t" +#else + "uxtb r10, r7, ror #16\n\t" +#endif +#else + "ubfx r10, r7, #16, #8\n\t" +#endif + "eor r9, r9, r11, ror #24\n\t" + "lsr r11, r6, #24\n\t" + "eor r9, r9, lr, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r4, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r4, ror #8\n\t" +#endif +#else + "ubfx lr, r4, #8, #8\n\t" +#endif + "eor r9, r9, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r5, #24\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r5\n\t" +#endif +#else + "ubfx r2, r5, #0, #8\n\t" +#endif + "ldr r10, [%[te], r10, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r6, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r6\n\t" +#endif +#else + "ubfx r6, r6, #0, #8\n\t" +#endif + "eor r10, r10, r11, ror #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r11, r4, #8\n\t" + "lsr r11, r11, #24\n\t" +#else + "uxtb r11, r4, ror #16\n\t" +#endif +#else + "ubfx r11, r4, #16, #8\n\t" +#endif + "eor r10, r10, lr, ror #8\n\t" + "lsr lr, r7, #24\n\t" + "eor r10, r10, r2, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r5, #16\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r5, ror #8\n\t" +#endif +#else + "ubfx r2, r5, #8, #8\n\t" +#endif + "ldr r6, [%[te], r6, lsl #2]\n\t" + "ldr lr, [%[te], lr, lsl #2]\n\t" + "ldr r11, [%[te], r11, lsl #2]\n\t" + "ldr r2, [%[te], r2, lsl #2]\n\t" + "eor lr, lr, r6, ror #24\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" + "eor r11, r11, lr, ror #24\n\t" + "eor r11, r11, r2, ror #8\n\t" + /* XOR in Key Schedule */ + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r11, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r11\n\t" +#endif +#else + "ubfx r4, r11, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r10, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r10, ror #8\n\t" +#endif +#else + "ubfx r7, r10, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r9, #8\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r9, ror #16\n\t" +#endif +#else + "ubfx lr, r9, #16, #8\n\t" +#endif + "lsr r2, r8, #24\n\t" + "ldrb r4, [%[te], r4, lsl #2]\n\t" + "ldrb r7, [%[te], r7, lsl #2]\n\t" + "ldrb lr, [%[te], lr, lsl #2]\n\t" + "ldrb r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r8, #24\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r8\n\t" +#endif +#else + "ubfx r5, r8, #0, #8\n\t" +#endif + "eor r4, r4, r7, lsl #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r11, ror #8\n\t" +#endif +#else + "ubfx r7, r11, #8, #8\n\t" +#endif + "eor r4, r4, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r10, #8\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r10, ror #16\n\t" +#endif +#else + "ubfx lr, r10, #16, #8\n\t" +#endif + "eor r4, r4, r2, lsl #24\n\t" + "lsr r2, r9, #24\n\t" + "ldrb r5, [%[te], r5, lsl #2]\n\t" + "ldrb r7, [%[te], r7, lsl #2]\n\t" + "ldrb lr, [%[te], lr, lsl #2]\n\t" + "ldrb r2, [%[te], r2, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r9, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r9\n\t" +#endif +#else + "ubfx r6, r9, #0, #8\n\t" +#endif + "eor r5, r5, r7, lsl #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r8, ror #8\n\t" +#endif +#else + "ubfx r7, r8, #8, #8\n\t" +#endif + "eor r5, r5, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r11, #8\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r11, ror #16\n\t" +#endif +#else + "ubfx lr, r11, #16, #8\n\t" +#endif + "eor r5, r5, r2, lsl #24\n\t" + "lsr r2, r10, #24\n\t" + "ldrb r6, [%[te], r6, lsl #2]\n\t" + "ldrb r7, [%[te], r7, lsl #2]\n\t" + "ldrb lr, [%[te], lr, lsl #2]\n\t" + "ldrb r2, [%[te], r2, lsl #2]\n\t" + "lsr r11, r11, #24\n\t" + "eor r6, r6, r7, lsl #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r10, #24\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r10\n\t" +#endif +#else + "ubfx r7, r10, #0, #8\n\t" +#endif + "eor r6, r6, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r9, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r9, ror #8\n\t" +#endif +#else + "ubfx lr, r9, #8, #8\n\t" +#endif + "eor r6, r6, r2, lsl #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r2, r8, #8\n\t" + "lsr r2, r2, #24\n\t" +#else + "uxtb r2, r8, ror #16\n\t" +#endif +#else + "ubfx r2, r8, #16, #8\n\t" +#endif + "ldrb r11, [%[te], r11, lsl #2]\n\t" + "ldrb r7, [%[te], r7, lsl #2]\n\t" + "ldrb lr, [%[te], lr, lsl #2]\n\t" + "ldrb r2, [%[te], r2, lsl #2]\n\t" + "eor lr, lr, r11, lsl #16\n\t" + "ldm %[ks], {r8, r9, r10, r11}\n\t" + "eor r7, r7, lr, lsl #8\n\t" + "eor r7, r7, r2, lsl #16\n\t" + /* XOR in Key Schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) + : + : "memory", "lr", "cc" + ); +} + +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +static const uint32_t* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data; +void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); +void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register uint32_t* L_AES_ARM32_te_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_te_ecb; + + __asm__ __volatile__ ( + "mov lr, %[in]\n\t" + "mov r0, %[L_AES_ARM32_te_ecb]\n\t" + "mov r12, r4\n\t" + "push {%[ks]}\n\t" + "cmp r12, #10\n\t" + "beq L_AES_ECB_encrypt_start_block_128_%=\n\t" + "cmp r12, #12\n\t" + "beq L_AES_ECB_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_ECB_encrypt_loop_block_256_%=: \n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "push {r1, %[len], lr}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_ECB_encrypt_loop_block_256_%=\n\t" + "b L_AES_ECB_encrypt_end_%=\n\t" + "\n" + "L_AES_ECB_encrypt_start_block_192_%=: \n\t" + "\n" + "L_AES_ECB_encrypt_loop_block_192_%=: \n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "push {r1, %[len], lr}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_ECB_encrypt_loop_block_192_%=\n\t" + "b L_AES_ECB_encrypt_end_%=\n\t" + "\n" + "L_AES_ECB_encrypt_start_block_128_%=: \n\t" + "\n" + "L_AES_ECB_encrypt_loop_block_128_%=: \n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "push {r1, %[len], lr}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_ECB_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_ECB_encrypt_end_%=: \n\t" + "pop {%[ks]}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) + : + : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC +static const uint32_t* L_AES_ARM32_te_cbc = L_AES_ARM32_te_data; +void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; + register uint32_t* L_AES_ARM32_te_cbc_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_cbc; + + __asm__ __volatile__ ( + "mov r8, r4\n\t" + "mov r9, r5\n\t" + "mov lr, %[in]\n\t" + "mov r0, %[L_AES_ARM32_te_cbc]\n\t" + "ldm r9, {r4, r5, r6, r7}\n\t" + "push {%[ks], r9}\n\t" + "cmp r8, #10\n\t" + "beq L_AES_CBC_encrypt_start_block_128_%=\n\t" + "cmp r8, #12\n\t" + "beq L_AES_CBC_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_CBC_encrypt_loop_block_256_%=: \n\t" + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "push {r1, %[len], lr}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CBC_encrypt_loop_block_256_%=\n\t" + "b L_AES_CBC_encrypt_end_%=\n\t" + "\n" + "L_AES_CBC_encrypt_start_block_192_%=: \n\t" + "\n" + "L_AES_CBC_encrypt_loop_block_192_%=: \n\t" + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "push {r1, %[len], lr}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CBC_encrypt_loop_block_192_%=\n\t" + "b L_AES_CBC_encrypt_end_%=\n\t" + "\n" + "L_AES_CBC_encrypt_start_block_128_%=: \n\t" + "\n" + "L_AES_CBC_encrypt_loop_block_128_%=: \n\t" + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "push {r1, %[len], lr}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CBC_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_CBC_encrypt_end_%=: \n\t" + "pop {%[ks], r9}\n\t" + "stm r9, {r4, r5, r6, r7}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_te_cbc] "+r" (L_AES_ARM32_te_cbc_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* HAVE_AES_CBC */ +#ifdef WOLFSSL_AES_COUNTER +static const uint32_t* L_AES_ARM32_te_ctr = L_AES_ARM32_te_data; +void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; + register uint32_t* L_AES_ARM32_te_ctr_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ctr; + + __asm__ __volatile__ ( + "mov r12, r4\n\t" + "mov r8, r5\n\t" + "mov lr, %[in]\n\t" + "mov r0, %[L_AES_ARM32_te_ctr]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r10, r4, r4, ror #16\n\t" + "eor r11, r5, r5, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "eor r4, r4, r10, lsr #8\n\t" + "eor r5, r5, r11, lsr #8\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm r8, {r4, r5, r6, r7}\n\t" + "push {%[ks], r8}\n\t" + "cmp r12, #10\n\t" + "beq L_AES_CTR_encrypt_start_block_128_%=\n\t" + "cmp r12, #12\n\t" + "beq L_AES_CTR_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_CTR_encrypt_loop_block_256_%=: \n\t" + "push {r1, %[len], lr}\n\t" + "ldr lr, [sp, #16]\n\t" + "adds r11, r7, #1\n\t" + "adcs r10, r6, #0\n\t" + "adcs r9, r5, #0\n\t" + "adc r8, r4, #0\n\t" + "stm lr, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "ldr r8, [sp, #4]\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CTR_encrypt_loop_block_256_%=\n\t" + "b L_AES_CTR_encrypt_end_%=\n\t" + "\n" + "L_AES_CTR_encrypt_start_block_192_%=: \n\t" + "\n" + "L_AES_CTR_encrypt_loop_block_192_%=: \n\t" + "push {r1, %[len], lr}\n\t" + "ldr lr, [sp, #16]\n\t" + "adds r11, r7, #1\n\t" + "adcs r10, r6, #0\n\t" + "adcs r9, r5, #0\n\t" + "adc r8, r4, #0\n\t" + "stm lr, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "ldr r8, [sp, #4]\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CTR_encrypt_loop_block_192_%=\n\t" + "b L_AES_CTR_encrypt_end_%=\n\t" + "\n" + "L_AES_CTR_encrypt_start_block_128_%=: \n\t" + "\n" + "L_AES_CTR_encrypt_loop_block_128_%=: \n\t" + "push {r1, %[len], lr}\n\t" + "ldr lr, [sp, #16]\n\t" + "adds r11, r7, #1\n\t" + "adcs r10, r6, #0\n\t" + "adcs r9, r5, #0\n\t" + "adc r8, r4, #0\n\t" + "stm lr, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "ldr r8, [sp, #4]\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CTR_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_CTR_encrypt_end_%=: \n\t" + "pop {%[ks], r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r10, r4, r4, ror #16\n\t" + "eor r11, r5, r5, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "eor r4, r4, r10, lsr #8\n\t" + "eor r5, r5, r11, lsr #8\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm r8, {r4, r5, r6, r7}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_ctr] "+r" (L_AES_ARM32_te_ctr_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) +void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4); +void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p) +{ + register const uint32_t* td asm ("r0") = (const uint32_t*)td_p; + register int nr asm ("r1") = (int)nr_p; + register const uint8_t* td4 asm ("r2") = (const uint8_t*)td4_p; + + __asm__ __volatile__ ( + "\n" + "L_AES_decrypt_block_nr_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r7, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r7, ror #16\n\t" +#endif +#else + "ubfx r8, r7, #16, #8\n\t" +#endif + "lsr r11, r4, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r6, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r6, ror #8\n\t" +#endif +#else + "ubfx r12, r6, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r5, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r5\n\t" +#endif +#else + "ubfx lr, r5, #0, #8\n\t" +#endif + "ldr r8, [%[td], r8, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r9, r4, #8\n\t" + "lsr r9, r9, #24\n\t" +#else + "uxtb r9, r4, ror #16\n\t" +#endif +#else + "ubfx r9, r4, #16, #8\n\t" +#endif + "eor r8, r8, r11, ror #24\n\t" + "lsr r11, r5, #24\n\t" + "eor r8, r8, r12, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r7, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r7, ror #8\n\t" +#endif +#else + "ubfx r12, r7, #8, #8\n\t" +#endif + "eor r8, r8, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r6, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r6\n\t" +#endif +#else + "ubfx lr, r6, #0, #8\n\t" +#endif + "ldr r9, [%[td], r9, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r10, r5, #8\n\t" + "lsr r10, r10, #24\n\t" +#else + "uxtb r10, r5, ror #16\n\t" +#endif +#else + "ubfx r10, r5, #16, #8\n\t" +#endif + "eor r9, r9, r11, ror #24\n\t" + "lsr r11, r6, #24\n\t" + "eor r9, r9, r12, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r4, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r4, ror #8\n\t" +#endif +#else + "ubfx r12, r4, #8, #8\n\t" +#endif + "eor r9, r9, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r7, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r7\n\t" +#endif +#else + "ubfx lr, r7, #0, #8\n\t" +#endif + "ldr r10, [%[td], r10, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r4, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r4\n\t" +#endif +#else + "ubfx r4, r4, #0, #8\n\t" +#endif + "eor r10, r10, r11, ror #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r11, r6, #8\n\t" + "lsr r11, r11, #24\n\t" +#else + "uxtb r11, r6, ror #16\n\t" +#endif +#else + "ubfx r11, r6, #16, #8\n\t" +#endif + "eor r10, r10, r12, ror #8\n\t" + "lsr r12, r7, #24\n\t" + "eor r10, r10, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r5, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r5, ror #8\n\t" +#endif +#else + "ubfx lr, r5, #8, #8\n\t" +#endif + "ldr r4, [%[td], r4, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" + "eor r12, r12, r4, ror #24\n\t" + "ldm r3!, {r4, r5, r6, r7}\n\t" + "eor r11, r11, lr, ror #8\n\t" + "eor r11, r11, r12, ror #24\n\t" + /* XOR in Key Schedule */ + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r11, #8\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r11, ror #16\n\t" +#endif +#else + "ubfx r4, r11, #16, #8\n\t" +#endif + "lsr r7, r8, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r10, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r10, ror #8\n\t" +#endif +#else + "ubfx r12, r10, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r9, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r9\n\t" +#endif +#else + "ubfx lr, r9, #0, #8\n\t" +#endif + "ldr r4, [%[td], r4, lsl #2]\n\t" + "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r8, #8\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r8, ror #16\n\t" +#endif +#else + "ubfx r5, r8, #16, #8\n\t" +#endif + "eor r4, r4, r7, ror #24\n\t" + "lsr r7, r9, #24\n\t" + "eor r4, r4, r12, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r11, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r11, ror #8\n\t" +#endif +#else + "ubfx r12, r11, #8, #8\n\t" +#endif + "eor r4, r4, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r10, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r10\n\t" +#endif +#else + "ubfx lr, r10, #0, #8\n\t" +#endif + "ldr r5, [%[td], r5, lsl #2]\n\t" + "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r9, #8\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r9, ror #16\n\t" +#endif +#else + "ubfx r6, r9, #16, #8\n\t" +#endif + "eor r5, r5, r7, ror #24\n\t" + "lsr r7, r10, #24\n\t" + "eor r5, r5, r12, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r8, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r8, ror #8\n\t" +#endif +#else + "ubfx r12, r8, #8, #8\n\t" +#endif + "eor r5, r5, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r11, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r11\n\t" +#endif +#else + "ubfx lr, r11, #0, #8\n\t" +#endif + "ldr r6, [%[td], r6, lsl #2]\n\t" + "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r8, #24\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r8\n\t" +#endif +#else + "ubfx r8, r8, #0, #8\n\t" +#endif + "eor r6, r6, r7, ror #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r10, #8\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r10, ror #16\n\t" +#endif +#else + "ubfx r7, r10, #16, #8\n\t" +#endif + "eor r6, r6, r12, ror #8\n\t" + "lsr r12, r11, #24\n\t" + "eor r6, r6, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r9, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r9, ror #8\n\t" +#endif +#else + "ubfx lr, r9, #8, #8\n\t" +#endif + "ldr r8, [%[td], r8, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" + "eor r12, r12, r8, ror #24\n\t" + "ldm r3!, {r8, r9, r10, r11}\n\t" + "eor r7, r7, lr, ror #8\n\t" + "eor r7, r7, r12, ror #24\n\t" + /* XOR in Key Schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "subs %[nr], %[nr], #1\n\t" + "bne L_AES_decrypt_block_nr_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r8, r7, #8\n\t" + "lsr r8, r8, #24\n\t" +#else + "uxtb r8, r7, ror #16\n\t" +#endif +#else + "ubfx r8, r7, #16, #8\n\t" +#endif + "lsr r11, r4, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r6, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r6, ror #8\n\t" +#endif +#else + "ubfx r12, r6, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r5, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r5\n\t" +#endif +#else + "ubfx lr, r5, #0, #8\n\t" +#endif + "ldr r8, [%[td], r8, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r9, r4, #8\n\t" + "lsr r9, r9, #24\n\t" +#else + "uxtb r9, r4, ror #16\n\t" +#endif +#else + "ubfx r9, r4, #16, #8\n\t" +#endif + "eor r8, r8, r11, ror #24\n\t" + "lsr r11, r5, #24\n\t" + "eor r8, r8, r12, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r7, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r7, ror #8\n\t" +#endif +#else + "ubfx r12, r7, #8, #8\n\t" +#endif + "eor r8, r8, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r6, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r6\n\t" +#endif +#else + "ubfx lr, r6, #0, #8\n\t" +#endif + "ldr r9, [%[td], r9, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r10, r5, #8\n\t" + "lsr r10, r10, #24\n\t" +#else + "uxtb r10, r5, ror #16\n\t" +#endif +#else + "ubfx r10, r5, #16, #8\n\t" +#endif + "eor r9, r9, r11, ror #24\n\t" + "lsr r11, r6, #24\n\t" + "eor r9, r9, r12, ror #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r4, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r4, ror #8\n\t" +#endif +#else + "ubfx r12, r4, #8, #8\n\t" +#endif + "eor r9, r9, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r7, #24\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r7\n\t" +#endif +#else + "ubfx lr, r7, #0, #8\n\t" +#endif + "ldr r10, [%[td], r10, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r4, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r4\n\t" +#endif +#else + "ubfx r4, r4, #0, #8\n\t" +#endif + "eor r10, r10, r11, ror #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r11, r6, #8\n\t" + "lsr r11, r11, #24\n\t" +#else + "uxtb r11, r6, ror #16\n\t" +#endif +#else + "ubfx r11, r6, #16, #8\n\t" +#endif + "eor r10, r10, r12, ror #8\n\t" + "lsr r12, r7, #24\n\t" + "eor r10, r10, lr, ror #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r5, #16\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r5, ror #8\n\t" +#endif +#else + "ubfx lr, r5, #8, #8\n\t" +#endif + "ldr r4, [%[td], r4, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" + "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" + "eor r12, r12, r4, ror #24\n\t" + "ldm r3!, {r4, r5, r6, r7}\n\t" + "eor r11, r11, lr, ror #8\n\t" + "eor r11, r11, r12, ror #24\n\t" + /* XOR in Key Schedule */ + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r4, r9, #24\n\t" + "lsr r4, r4, #24\n\t" +#else + "uxtb r4, r9\n\t" +#endif +#else + "ubfx r4, r9, #0, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r10, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r10, ror #8\n\t" +#endif +#else + "ubfx r7, r10, #8, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r11, #8\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r11, ror #16\n\t" +#endif +#else + "ubfx r12, r11, #16, #8\n\t" +#endif + "lsr lr, r8, #24\n\t" + "ldrb r4, [%[td4], r4]\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb r12, [%[td4], r12]\n\t" + "ldrb lr, [%[td4], lr]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r5, r10, #24\n\t" + "lsr r5, r5, #24\n\t" +#else + "uxtb r5, r10\n\t" +#endif +#else + "ubfx r5, r10, #0, #8\n\t" +#endif + "eor r4, r4, r7, lsl #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r11, ror #8\n\t" +#endif +#else + "ubfx r7, r11, #8, #8\n\t" +#endif + "eor r4, r4, r12, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r8, #8\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r8, ror #16\n\t" +#endif +#else + "ubfx r12, r8, #16, #8\n\t" +#endif + "eor r4, r4, lr, lsl #24\n\t" + "lsr lr, r9, #24\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb lr, [%[td4], lr]\n\t" + "ldrb r5, [%[td4], r5]\n\t" + "ldrb r12, [%[td4], r12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r6, r11, #24\n\t" + "lsr r6, r6, #24\n\t" +#else + "uxtb r6, r11\n\t" +#endif +#else + "ubfx r6, r11, #0, #8\n\t" +#endif + "eor r5, r5, r7, lsl #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r8, ror #8\n\t" +#endif +#else + "ubfx r7, r8, #8, #8\n\t" +#endif + "eor r5, r5, r12, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r9, #8\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r9, ror #16\n\t" +#endif +#else + "ubfx r12, r9, #16, #8\n\t" +#endif + "eor r5, r5, lr, lsl #24\n\t" + "lsr lr, r10, #24\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb lr, [%[td4], lr]\n\t" + "ldrb r6, [%[td4], r6]\n\t" + "ldrb r12, [%[td4], r12]\n\t" + "lsr r11, r11, #24\n\t" + "eor r6, r6, r7, lsl #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r7, r8, #24\n\t" + "lsr r7, r7, #24\n\t" +#else + "uxtb r7, r8\n\t" +#endif +#else + "ubfx r7, r8, #0, #8\n\t" +#endif + "eor r6, r6, r12, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl r12, r9, #16\n\t" + "lsr r12, r12, #24\n\t" +#else + "uxtb r12, r9, ror #8\n\t" +#endif +#else + "ubfx r12, r9, #8, #8\n\t" +#endif + "eor r6, r6, lr, lsl #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "lsl lr, r10, #8\n\t" + "lsr lr, lr, #24\n\t" +#else + "uxtb lr, r10, ror #16\n\t" +#endif +#else + "ubfx lr, r10, #16, #8\n\t" +#endif + "ldrb r11, [%[td4], r11]\n\t" + "ldrb r12, [%[td4], r12]\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb lr, [%[td4], lr]\n\t" + "eor r12, r12, r11, lsl #16\n\t" + "ldm r3, {r8, r9, r10, r11}\n\t" + "eor r7, r7, r12, lsl #8\n\t" + "eor r7, r7, lr, lsl #16\n\t" + /* XOR in Key Schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) + : + : "memory", "lr", "cc" + ); +} + +static const uint32_t* L_AES_ARM32_td_ecb = L_AES_ARM32_td_data; +static const unsigned char L_AES_ARM32_td4[] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, +}; + +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); +void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register uint32_t* L_AES_ARM32_td_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_td_ecb; + register unsigned char* L_AES_ARM32_td4_c asm ("r6") = (unsigned char*)&L_AES_ARM32_td4; + + __asm__ __volatile__ ( + "mov r8, r4\n\t" + "mov lr, %[in]\n\t" + "mov r0, %[L_AES_ARM32_td_ecb]\n\t" + "mov r12, %[len]\n\t" + "mov r2, %[L_AES_ARM32_td4]\n\t" + "cmp r8, #10\n\t" + "beq L_AES_ECB_decrypt_start_block_128_%=\n\t" + "cmp r8, #12\n\t" + "beq L_AES_ECB_decrypt_start_block_192_%=\n\t" + "\n" + "L_AES_ECB_decrypt_loop_block_256_%=: \n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "push {r1, %[ks], r12, lr}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_decrypt_block\n\t" + "pop {r1, %[ks], r12, lr}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_ECB_decrypt_loop_block_256_%=\n\t" + "b L_AES_ECB_decrypt_end_%=\n\t" + "\n" + "L_AES_ECB_decrypt_start_block_192_%=: \n\t" + "\n" + "L_AES_ECB_decrypt_loop_block_192_%=: \n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "push {r1, %[ks], r12, lr}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_decrypt_block\n\t" + "pop {r1, %[ks], r12, lr}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_ECB_decrypt_loop_block_192_%=\n\t" + "b L_AES_ECB_decrypt_end_%=\n\t" + "\n" + "L_AES_ECB_decrypt_start_block_128_%=: \n\t" + "\n" + "L_AES_ECB_decrypt_loop_block_128_%=: \n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "push {r1, %[ks], r12, lr}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_decrypt_block\n\t" + "pop {r1, %[ks], r12, lr}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_ECB_decrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_ECB_decrypt_end_%=: \n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC +void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; + register uint32_t* L_AES_ARM32_td_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_td_ecb; + register unsigned char* L_AES_ARM32_td4_c asm ("r7") = (unsigned char*)&L_AES_ARM32_td4; + + __asm__ __volatile__ ( + "mov r8, r4\n\t" + "mov r4, r5\n\t" + "mov lr, %[in]\n\t" + "mov r0, %[L_AES_ARM32_td_ecb]\n\t" + "mov r12, %[len]\n\t" + "mov r2, %[L_AES_ARM32_td4]\n\t" + "push {%[ks]-r4}\n\t" + "cmp r8, #10\n\t" + "beq L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "cmp r8, #12\n\t" + "beq L_AES_CBC_decrypt_loop_block_192_%=\n\t" + "\n" + "L_AES_CBC_decrypt_loop_block_256_%=: \n\t" + "push {r1, r12, lr}\n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [lr, #16]\n\t" + "str r5, [lr, #20]\n\t" +#else + "strd r4, r5, [lr, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [lr, #24]\n\t" + "str r7, [lr, #28]\n\t" +#else + "strd r6, r7, [lr, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_decrypt_block\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm lr, {r8, r9, r10, r11}\n\t" + "pop {r1, r12, lr}\n\t" + "ldr %[ks], [sp]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "beq L_AES_CBC_decrypt_end_odd_%=\n\t" + "push {r1, r12, lr}\n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [lr]\n\t" + "str r5, [lr, #4]\n\t" +#else + "strd r4, r5, [lr]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [lr, #8]\n\t" + "str r7, [lr, #12]\n\t" +#else + "strd r6, r7, [lr, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_decrypt_block\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [lr, #16]\n\t" + "ldr r9, [lr, #20]\n\t" +#else + "ldrd r8, r9, [lr, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [lr, #24]\n\t" + "ldr r11, [lr, #28]\n\t" +#else + "ldrd r10, r11, [lr, #24]\n\t" +#endif + "pop {r1, r12, lr}\n\t" + "ldr %[ks], [sp]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CBC_decrypt_loop_block_256_%=\n\t" + "b L_AES_CBC_decrypt_end_%=\n\t" + "\n" + "L_AES_CBC_decrypt_loop_block_192_%=: \n\t" + "push {r1, r12, lr}\n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [lr, #16]\n\t" + "str r5, [lr, #20]\n\t" +#else + "strd r4, r5, [lr, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [lr, #24]\n\t" + "str r7, [lr, #28]\n\t" +#else + "strd r6, r7, [lr, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_decrypt_block\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm lr, {r8, r9, r10, r11}\n\t" + "pop {r1, r12, lr}\n\t" + "ldr %[ks], [sp]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "beq L_AES_CBC_decrypt_end_odd_%=\n\t" + "push {r1, r12, lr}\n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [lr]\n\t" + "str r5, [lr, #4]\n\t" +#else + "strd r4, r5, [lr]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [lr, #8]\n\t" + "str r7, [lr, #12]\n\t" +#else + "strd r6, r7, [lr, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_decrypt_block\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [lr, #16]\n\t" + "ldr r9, [lr, #20]\n\t" +#else + "ldrd r8, r9, [lr, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [lr, #24]\n\t" + "ldr r11, [lr, #28]\n\t" +#else + "ldrd r10, r11, [lr, #24]\n\t" +#endif + "pop {r1, r12, lr}\n\t" + "ldr %[ks], [sp]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CBC_decrypt_loop_block_192_%=\n\t" + "b L_AES_CBC_decrypt_end_%=\n\t" + "\n" + "L_AES_CBC_decrypt_loop_block_128_%=: \n\t" + "push {r1, r12, lr}\n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [lr, #16]\n\t" + "str r5, [lr, #20]\n\t" +#else + "strd r4, r5, [lr, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [lr, #24]\n\t" + "str r7, [lr, #28]\n\t" +#else + "strd r6, r7, [lr, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_decrypt_block\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm lr, {r8, r9, r10, r11}\n\t" + "pop {r1, r12, lr}\n\t" + "ldr %[ks], [sp]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "beq L_AES_CBC_decrypt_end_odd_%=\n\t" + "push {r1, r12, lr}\n\t" + "ldr r4, [lr]\n\t" + "ldr r5, [lr, #4]\n\t" + "ldr r6, [lr, #8]\n\t" + "ldr r7, [lr, #12]\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [lr]\n\t" + "str r5, [lr, #4]\n\t" +#else + "strd r4, r5, [lr]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [lr, #8]\n\t" + "str r7, [lr, #12]\n\t" +#else + "strd r6, r7, [lr, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_decrypt_block\n\t" + "ldr lr, [sp, #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [lr, #16]\n\t" + "ldr r9, [lr, #20]\n\t" +#else + "ldrd r8, r9, [lr, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [lr, #24]\n\t" + "ldr r11, [lr, #28]\n\t" +#else + "ldrd r10, r11, [lr, #24]\n\t" +#endif + "pop {r1, r12, lr}\n\t" + "ldr %[ks], [sp]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "subs r12, r12, #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "b L_AES_CBC_decrypt_end_%=\n\t" + "\n" + "L_AES_CBC_decrypt_end_odd_%=: \n\t" + "ldr r4, [sp, #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [r4, #16]\n\t" + "ldr r9, [r4, #20]\n\t" +#else + "ldrd r8, r9, [r4, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [r4, #24]\n\t" + "ldr r11, [r4, #28]\n\t" +#else + "ldrd r10, r11, [r4, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [r4]\n\t" + "str r9, [r4, #4]\n\t" +#else + "strd r8, r9, [r4]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [r4, #8]\n\t" + "str r11, [r4, #12]\n\t" +#else + "strd r10, r11, [r4, #8]\n\t" +#endif + "\n" + "L_AES_CBC_decrypt_end_%=: \n\t" + "pop {%[ks]-r4}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) + : + : "memory", "r12", "lr", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* HAVE_AES_CBC */ +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC */ +#endif /* HAVE_AES_DECRYPT */ +#ifdef HAVE_AESGCM +static const uint32_t L_GCM_gmult_len_r[] = { + 0x00000000, 0x1c200000, 0x38400000, 0x24600000, + 0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000, + 0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000, + 0x91800000, 0x8da00000, 0xa9c00000, 0xb5e00000, +}; + +void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len); +void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) +{ + register unsigned char* x asm ("r0") = (unsigned char*)x_p; + register const unsigned char** m asm ("r1") = (const unsigned char**)m_p; + register const unsigned char* data asm ("r2") = (const unsigned char*)data_p; + register unsigned long len asm ("r3") = (unsigned long)len_p; + register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r; + + __asm__ __volatile__ ( + "mov lr, %[L_GCM_gmult_len_r]\n\t" + "\n" + "L_GCM_gmult_len_start_block_%=: \n\t" + "push {r3}\n\t" + "ldr r12, [r0, #12]\n\t" + "ldr %[len], [r2, #12]\n\t" + "eor r12, r12, %[len]\n\t" + "lsr %[len], r12, #24\n\t" + "and %[len], %[len], #15\n\t" + "add %[len], %[m], %[len], lsl #4\n\t" + "ldm %[len], {r8, r9, r10, r11}\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #28\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #16\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #20\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #8\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #12\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "and r4, r12, #15\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "ldr r12, [r0, #8]\n\t" + "ldr %[len], [r2, #8]\n\t" + "eor r12, r12, %[len]\n\t" + "lsr %[len], r12, #24\n\t" + "and %[len], %[len], #15\n\t" + "add %[len], %[m], %[len], lsl #4\n\t" + "ldm %[len], {r4, r5, r6, r7}\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #28\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #16\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #20\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #8\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #12\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "and r4, r12, #15\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "ldr r12, [r0, #4]\n\t" + "ldr %[len], [r2, #4]\n\t" + "eor r12, r12, %[len]\n\t" + "lsr %[len], r12, #24\n\t" + "and %[len], %[len], #15\n\t" + "add %[len], %[m], %[len], lsl #4\n\t" + "ldm %[len], {r4, r5, r6, r7}\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #28\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #16\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #20\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #8\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #12\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "and r4, r12, #15\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "ldr r12, [r0]\n\t" + "ldr %[len], [r2]\n\t" + "eor r12, r12, %[len]\n\t" + "lsr %[len], r12, #24\n\t" + "and %[len], %[len], #15\n\t" + "add %[len], %[m], %[len], lsl #4\n\t" + "ldm %[len], {r4, r5, r6, r7}\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #28\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #16\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #20\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #8\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #12\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "and r4, r12, #15\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" + "lsr r6, r10, #4\n\t" + "and %[len], r11, #15\n\t" + "lsr r11, r11, #4\n\t" + "lsr r4, r12, #4\n\t" + "eor r11, r11, r10, lsl #28\n\t" + "and r4, r4, #15\n\t" + "ldr %[len], [lr, r3, lsl #2]\n\t" + "add r4, %[m], r4, lsl #4\n\t" + "eor r10, r6, r9, lsl #28\n\t" + "lsr r9, r9, #4\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" + "eor r9, r9, r8, lsl #28\n\t" + "eor r8, %[len], r8, lsr #4\n\t" + "eor r8, r8, r4\n\t" + "eor r9, r9, r5\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + /* REV r8, r8 */ + "eor %[len], r8, r8, ror #16\n\t" + "bic %[len], %[len], #0xff0000\n\t" + "ror r8, r8, #8\n\t" + "eor r8, r8, %[len], lsr #8\n\t" + /* REV r9, r9 */ + "eor %[len], r9, r9, ror #16\n\t" + "bic %[len], %[len], #0xff0000\n\t" + "ror r9, r9, #8\n\t" + "eor r9, r9, %[len], lsr #8\n\t" + /* REV r10, r10 */ + "eor %[len], r10, r10, ror #16\n\t" + "bic %[len], %[len], #0xff0000\n\t" + "ror r10, r10, #8\n\t" + "eor r10, r10, %[len], lsr #8\n\t" + /* REV r11, r11 */ + "eor %[len], r11, r11, ror #16\n\t" + "bic %[len], %[len], #0xff0000\n\t" + "ror r11, r11, #8\n\t" + "eor r11, r11, %[len], lsr #8\n\t" +#else + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm %[x], {r8, r9, r10, r11}\n\t" + "pop {r3}\n\t" + "subs %[len], %[len], #16\n\t" + "add %[data], %[data], #16\n\t" + "bne L_GCM_gmult_len_start_block_%=\n\t" + : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) + : + : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +static const uint32_t* L_AES_ARM32_te_gcm = L_AES_ARM32_te_data; +void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; + register uint32_t* L_AES_ARM32_te_gcm_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_gcm; + + __asm__ __volatile__ ( + "mov r12, r4\n\t" + "mov r8, r5\n\t" + "mov lr, %[in]\n\t" + "mov r0, %[L_AES_ARM32_te_gcm]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r10, r4, r4, ror #16\n\t" + "eor r11, r5, r5, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "eor r4, r4, r10, lsr #8\n\t" + "eor r5, r5, r11, lsr #8\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm r8, {r4, r5, r6, r7}\n\t" + "push {%[ks], r8}\n\t" + "cmp r12, #10\n\t" + "beq L_AES_GCM_encrypt_start_block_128_%=\n\t" + "cmp r12, #12\n\t" + "beq L_AES_GCM_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_GCM_encrypt_loop_block_256_%=: \n\t" + "push {r1, %[len], lr}\n\t" + "ldr lr, [sp, #16]\n\t" + "add r7, r7, #1\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + "str r7, [lr, #12]\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #6\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "ldr r8, [sp, #4]\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_GCM_encrypt_loop_block_256_%=\n\t" + "b L_AES_GCM_encrypt_end_%=\n\t" + "\n" + "L_AES_GCM_encrypt_start_block_192_%=: \n\t" + "\n" + "L_AES_GCM_encrypt_loop_block_192_%=: \n\t" + "push {r1, %[len], lr}\n\t" + "ldr lr, [sp, #16]\n\t" + "add r7, r7, #1\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + "str r7, [lr, #12]\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #5\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "ldr r8, [sp, #4]\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_GCM_encrypt_loop_block_192_%=\n\t" + "b L_AES_GCM_encrypt_end_%=\n\t" + "\n" + "L_AES_GCM_encrypt_start_block_128_%=: \n\t" + "\n" + "L_AES_GCM_encrypt_loop_block_128_%=: \n\t" + "push {r1, %[len], lr}\n\t" + "ldr lr, [sp, #16]\n\t" + "add r7, r7, #1\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" + "str r7, [lr, #12]\n\t" + /* Round: 0 - XOR in key schedule */ + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "mov r1, #4\n\t" + "bl AES_encrypt_block\n\t" + "pop {r1, %[len], lr}\n\t" + "ldr %[ks], [sp]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r8, [lr]\n\t" + "ldr r9, [lr, #4]\n\t" + "ldr r10, [lr, #8]\n\t" + "ldr r11, [lr, #12]\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r6, r6, r10\n\t" + "eor r7, r7, r11\n\t" + "ldr r8, [sp, #4]\n\t" + "str r4, [%[out]]\n\t" + "str r5, [%[out], #4]\n\t" + "str r6, [%[out], #8]\n\t" + "str r7, [%[out], #12]\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" + "subs %[len], %[len], #16\n\t" + "add lr, lr, #16\n\t" + "add %[out], %[out], #16\n\t" + "bne L_AES_GCM_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_GCM_encrypt_end_%=: \n\t" + "pop {%[ks], r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "eor r10, r4, r4, ror #16\n\t" + "eor r11, r5, r5, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "eor r4, r4, r10, lsr #8\n\t" + "eor r5, r5, r11, lsr #8\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#else + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "stm r8, {r4, r5, r6, r7}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ +#endif /* !__aarch64__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* WOLFSSL_ARMASM */ + +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S new file mode 100644 index 000000000..cd16c855c --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S @@ -0,0 +1,2864 @@ +/* armv8-32-sha256-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha256.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) +#ifndef WOLFSSL_ARMASM_INLINE +#ifndef NO_SHA256 +#ifdef WOLFSSL_ARMASM_NO_NEON + .text + .type L_SHA256_transform_len_k, %object + .size L_SHA256_transform_len_k, 256 + .align 4 +L_SHA256_transform_len_k: + .word 0x428a2f98 + .word 0x71374491 + .word 0xb5c0fbcf + .word 0xe9b5dba5 + .word 0x3956c25b + .word 0x59f111f1 + .word 0x923f82a4 + .word 0xab1c5ed5 + .word 0xd807aa98 + .word 0x12835b01 + .word 0x243185be + .word 0x550c7dc3 + .word 0x72be5d74 + .word 0x80deb1fe + .word 0x9bdc06a7 + .word 0xc19bf174 + .word 0xe49b69c1 + .word 0xefbe4786 + .word 0xfc19dc6 + .word 0x240ca1cc + .word 0x2de92c6f + .word 0x4a7484aa + .word 0x5cb0a9dc + .word 0x76f988da + .word 0x983e5152 + .word 0xa831c66d + .word 0xb00327c8 + .word 0xbf597fc7 + .word 0xc6e00bf3 + .word 0xd5a79147 + .word 0x6ca6351 + .word 0x14292967 + .word 0x27b70a85 + .word 0x2e1b2138 + .word 0x4d2c6dfc + .word 0x53380d13 + .word 0x650a7354 + .word 0x766a0abb + .word 0x81c2c92e + .word 0x92722c85 + .word 0xa2bfe8a1 + .word 0xa81a664b + .word 0xc24b8b70 + .word 0xc76c51a3 + .word 0xd192e819 + .word 0xd6990624 + .word 0xf40e3585 + .word 0x106aa070 + .word 0x19a4c116 + .word 0x1e376c08 + .word 0x2748774c + .word 0x34b0bcb5 + .word 0x391c0cb3 + .word 0x4ed8aa4a + .word 0x5b9cca4f + .word 0x682e6ff3 + .word 0x748f82ee + .word 0x78a5636f + .word 0x84c87814 + .word 0x8cc70208 + .word 0x90befffa + .word 0xa4506ceb + .word 0xbef9a3f7 + .word 0xc67178f2 + .text + .align 4 + .globl Transform_Sha256_Len + .type Transform_Sha256_Len, %function +Transform_Sha256_Len: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0xc0 + adr r3, L_SHA256_transform_len_k + # Copy digest to add in at end +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #24] + ldr r11, [r0, #28] +#else + ldrd r10, r11, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #64] + str r5, [sp, #68] +#else + strd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #72] + str r7, [sp, #76] +#else + strd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #80] + str r9, [sp, #84] +#else + strd r8, r9, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #88] + str r11, [sp, #92] +#else + strd r10, r11, [sp, #88] +#endif + # Start of loop processing a block +L_SHA256_transform_len_begin: + # Load, Reverse and Store W - 64 bytes +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + ldr r4, [r1] + ldr r5, [r1, #4] + ldr r6, [r1, #8] + ldr r7, [r1, #12] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #8] + str r7, [sp, #12] +#else + strd r6, r7, [sp, #8] +#endif + ldr r4, [r1, #16] + ldr r5, [r1, #20] + ldr r6, [r1, #24] + ldr r7, [r1, #28] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #20] +#else + strd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #24] + str r7, [sp, #28] +#else + strd r6, r7, [sp, #24] +#endif + ldr r4, [r1, #32] + ldr r5, [r1, #36] + ldr r6, [r1, #40] + ldr r7, [r1, #44] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #32] + str r5, [sp, #36] +#else + strd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #40] + str r7, [sp, #44] +#else + strd r6, r7, [sp, #40] +#endif + ldr r4, [r1, #48] + ldr r5, [r1, #52] + ldr r6, [r1, #56] + ldr r7, [r1, #60] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #48] + str r5, [sp, #52] +#else + strd r4, r5, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #56] + str r7, [sp, #60] +#else + strd r6, r7, [sp, #56] +#endif +#else + ldr r4, [r1] + ldr r5, [r1, #4] + ldr r6, [r1, #8] + ldr r7, [r1, #12] + ldr r8, [r1, #16] + ldr r9, [r1, #20] + ldr r10, [r1, #24] + ldr r11, [r1, #28] + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r8, r8 + rev r9, r9 + rev r10, r10 + rev r11, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #8] + str r7, [sp, #12] +#else + strd r6, r7, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #16] + str r9, [sp, #20] +#else + strd r8, r9, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #24] + str r11, [sp, #28] +#else + strd r10, r11, [sp, #24] +#endif + ldr r4, [r1, #32] + ldr r5, [r1, #36] + ldr r6, [r1, #40] + ldr r7, [r1, #44] + ldr r8, [r1, #48] + ldr r9, [r1, #52] + ldr r10, [r1, #56] + ldr r11, [r1, #60] + rev r4, r4 + rev r5, r5 + rev r6, r6 + rev r7, r7 + rev r8, r8 + rev r9, r9 + rev r10, r10 + rev r11, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #32] + str r5, [sp, #36] +#else + strd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #40] + str r7, [sp, #44] +#else + strd r6, r7, [sp, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #48] + str r9, [sp, #52] +#else + strd r8, r9, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #56] + str r11, [sp, #60] +#else + strd r10, r11, [sp, #56] +#endif +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + ldr r11, [r0, #4] + ldr r4, [r0, #8] + eor r11, r11, r4 + mov r12, #3 + # Start of 16 rounds +L_SHA256_transform_len_start: + # Round 0 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r9, [r0, #28] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp] + ldr r6, [r3] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r8, [r0, #12] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #12] + str r9, [r0, #28] + # Calc new W[0] + ldr r6, [sp, #56] + ldr r7, [sp, #36] + ldr r8, [sp, #4] + ldr r9, [sp] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp] + # Round 1 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r9, [r0, #24] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #4] + ldr r6, [r3, #4] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r8, [r0, #8] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #8] + str r9, [r0, #24] + # Calc new W[1] + ldr r6, [sp, #60] + ldr r7, [sp, #40] + ldr r8, [sp, #8] + ldr r9, [sp, #4] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #4] + # Round 2 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r9, [r0, #20] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #8] + ldr r6, [r3, #8] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r8, [r0, #4] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #4] + str r9, [r0, #20] + # Calc new W[2] + ldr r6, [sp] + ldr r7, [sp, #44] + ldr r8, [sp, #12] + ldr r9, [sp, #8] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #8] + # Round 3 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r9, [r0, #16] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #12] + ldr r6, [r3, #12] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r8, [r0] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0] + str r9, [r0, #16] + # Calc new W[3] + ldr r6, [sp, #4] + ldr r7, [sp, #48] + ldr r8, [sp, #16] + ldr r9, [sp, #12] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #12] + # Round 4 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r9, [r0, #12] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #16] + ldr r6, [r3, #16] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r8, [r0, #28] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #28] + str r9, [r0, #12] + # Calc new W[4] + ldr r6, [sp, #8] + ldr r7, [sp, #52] + ldr r8, [sp, #20] + ldr r9, [sp, #16] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #16] + # Round 5 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r9, [r0, #8] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #20] + ldr r6, [r3, #20] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r8, [r0, #24] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #24] + str r9, [r0, #8] + # Calc new W[5] + ldr r6, [sp, #12] + ldr r7, [sp, #56] + ldr r8, [sp, #24] + ldr r9, [sp, #20] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #20] + # Round 6 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r9, [r0, #4] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #24] + ldr r6, [r3, #24] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r8, [r0, #20] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #20] + str r9, [r0, #4] + # Calc new W[6] + ldr r6, [sp, #16] + ldr r7, [sp, #60] + ldr r8, [sp, #28] + ldr r9, [sp, #24] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #24] + # Round 7 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r9, [r0] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #28] + ldr r6, [r3, #28] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r8, [r0, #16] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #16] + str r9, [r0] + # Calc new W[7] + ldr r6, [sp, #20] + ldr r7, [sp] + ldr r8, [sp, #32] + ldr r9, [sp, #28] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #28] + # Round 8 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r9, [r0, #28] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #32] + ldr r6, [r3, #32] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r8, [r0, #12] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #12] + str r9, [r0, #28] + # Calc new W[8] + ldr r6, [sp, #24] + ldr r7, [sp, #4] + ldr r8, [sp, #36] + ldr r9, [sp, #32] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #32] + # Round 9 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r9, [r0, #24] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #36] + ldr r6, [r3, #36] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r8, [r0, #8] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #8] + str r9, [r0, #24] + # Calc new W[9] + ldr r6, [sp, #28] + ldr r7, [sp, #8] + ldr r8, [sp, #40] + ldr r9, [sp, #36] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #36] + # Round 10 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r9, [r0, #20] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #40] + ldr r6, [r3, #40] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r8, [r0, #4] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #4] + str r9, [r0, #20] + # Calc new W[10] + ldr r6, [sp, #32] + ldr r7, [sp, #12] + ldr r8, [sp, #44] + ldr r9, [sp, #40] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #40] + # Round 11 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r9, [r0, #16] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #44] + ldr r6, [r3, #44] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r8, [r0] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0] + str r9, [r0, #16] + # Calc new W[11] + ldr r6, [sp, #36] + ldr r7, [sp, #16] + ldr r8, [sp, #48] + ldr r9, [sp, #44] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #44] + # Round 12 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r9, [r0, #12] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #48] + ldr r6, [r3, #48] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r8, [r0, #28] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #28] + str r9, [r0, #12] + # Calc new W[12] + ldr r6, [sp, #40] + ldr r7, [sp, #20] + ldr r8, [sp, #52] + ldr r9, [sp, #48] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #48] + # Round 13 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r9, [r0, #8] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #52] + ldr r6, [r3, #52] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r8, [r0, #24] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #24] + str r9, [r0, #8] + # Calc new W[13] + ldr r6, [sp, #44] + ldr r7, [sp, #24] + ldr r8, [sp, #56] + ldr r9, [sp, #52] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #52] + # Round 14 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r9, [r0, #4] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #56] + ldr r6, [r3, #56] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r8, [r0, #20] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #20] + str r9, [r0, #4] + # Calc new W[14] + ldr r6, [sp, #48] + ldr r7, [sp, #28] + ldr r8, [sp, #60] + ldr r9, [sp, #56] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #56] + # Round 15 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r9, [r0] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #60] + ldr r6, [r3, #60] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r8, [r0, #16] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #16] + str r9, [r0] + # Calc new W[15] + ldr r6, [sp, #52] + ldr r7, [sp, #32] + ldr r8, [sp] + ldr r9, [sp, #60] + ror r4, r6, #17 + ror r5, r8, #7 + eor r4, r4, r6, ror #19 + eor r5, r5, r8, ror #18 + eor r4, r4, r6, lsr #10 + eor r5, r5, r8, lsr #3 + add r9, r9, r7 + add r4, r4, r5 + add r9, r9, r4 + str r9, [sp, #60] + add r3, r3, #0x40 + subs r12, r12, #1 + bne L_SHA256_transform_len_start + # Round 0 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r9, [r0, #28] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp] + ldr r6, [r3] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r8, [r0, #12] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #12] + str r9, [r0, #28] + # Round 1 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r9, [r0, #24] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #4] + ldr r6, [r3, #4] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r8, [r0, #8] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #8] + str r9, [r0, #24] + # Round 2 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r9, [r0, #20] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #8] + ldr r6, [r3, #8] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r8, [r0, #4] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #4] + str r9, [r0, #20] + # Round 3 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r9, [r0, #16] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #12] + ldr r6, [r3, #12] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r8, [r0] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0] + str r9, [r0, #16] + # Round 4 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r9, [r0, #12] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #16] + ldr r6, [r3, #16] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r8, [r0, #28] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #28] + str r9, [r0, #12] + # Round 5 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r9, [r0, #8] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #20] + ldr r6, [r3, #20] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r8, [r0, #24] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #24] + str r9, [r0, #8] + # Round 6 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r9, [r0, #4] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #24] + ldr r6, [r3, #24] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r8, [r0, #20] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #20] + str r9, [r0, #4] + # Round 7 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r9, [r0] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #28] + ldr r6, [r3, #28] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r8, [r0, #16] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #16] + str r9, [r0] + # Round 8 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r9, [r0, #28] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #32] + ldr r6, [r3, #32] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r8, [r0, #12] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #12] + str r9, [r0, #28] + # Round 9 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r9, [r0, #24] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #36] + ldr r6, [r3, #36] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r8, [r0, #8] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #8] + str r9, [r0, #24] + # Round 10 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r9, [r0, #20] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #40] + ldr r6, [r3, #40] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r8, [r0, #4] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #4] + str r9, [r0, #20] + # Round 11 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r9, [r0, #16] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #44] + ldr r6, [r3, #44] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r8, [r0] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0] + str r9, [r0, #16] + # Round 12 + ldr r5, [r0] + ldr r6, [r0, #4] + ldr r7, [r0, #8] + ldr r9, [r0, #12] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #48] + ldr r6, [r3, #48] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #16] + ldr r6, [r0, #20] + ldr r7, [r0, #24] + ldr r8, [r0, #28] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #28] + str r9, [r0, #12] + # Round 13 + ldr r5, [r0, #28] + ldr r6, [r0] + ldr r7, [r0, #4] + ldr r9, [r0, #8] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #52] + ldr r6, [r3, #52] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r8, [r0, #24] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #24] + str r9, [r0, #8] + # Round 14 + ldr r5, [r0, #24] + ldr r6, [r0, #28] + ldr r7, [r0] + ldr r9, [r0, #4] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #56] + ldr r6, [r3, #56] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #8] + ldr r6, [r0, #12] + ldr r7, [r0, #16] + ldr r8, [r0, #20] + ror r4, r5, #2 + eor r10, r5, r6 + eor r4, r4, r5, ror #13 + and r11, r11, r10 + eor r4, r4, r5, ror #22 + eor r11, r11, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r11 + str r8, [r0, #20] + str r9, [r0, #4] + # Round 15 + ldr r5, [r0, #20] + ldr r6, [r0, #24] + ldr r7, [r0, #28] + ldr r9, [r0] + ror r4, r5, #6 + eor r6, r6, r7 + eor r4, r4, r5, ror #11 + and r6, r6, r5 + eor r4, r4, r5, ror #25 + eor r6, r6, r7 + add r9, r9, r4 + add r9, r9, r6 + ldr r5, [sp, #60] + ldr r6, [r3, #60] + add r9, r9, r5 + add r9, r9, r6 + ldr r5, [r0, #4] + ldr r6, [r0, #8] + ldr r7, [r0, #12] + ldr r8, [r0, #16] + ror r4, r5, #2 + eor r11, r5, r6 + eor r4, r4, r5, ror #13 + and r10, r10, r11 + eor r4, r4, r5, ror #22 + eor r10, r10, r6 + add r8, r8, r9 + add r9, r9, r4 + add r9, r9, r10 + str r8, [r0, #16] + str r9, [r0] + # Add in digest from start +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #64] + ldr r9, [sp, #68] +#else + ldrd r8, r9, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #72] + ldr r11, [sp, #76] +#else + ldrd r10, r11, [sp, #72] +#endif + add r4, r4, r8 + add r5, r5, r9 + add r6, r6, r10 + add r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #64] + str r5, [sp, #68] +#else + strd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #72] + str r7, [sp, #76] +#else + strd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #80] + ldr r9, [sp, #84] +#else + ldrd r8, r9, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #88] + ldr r11, [sp, #92] +#else + ldrd r10, r11, [sp, #88] +#endif + add r4, r4, r8 + add r5, r5, r9 + add r6, r6, r10 + add r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #80] + str r5, [sp, #84] +#else + strd r4, r5, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #88] + str r7, [sp, #92] +#else + strd r6, r7, [sp, #88] +#endif + subs r2, r2, #0x40 + sub r3, r3, #0xc0 + add r1, r1, #0x40 + bne L_SHA256_transform_len_begin + add sp, sp, #0xc0 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size Transform_Sha256_Len,.-Transform_Sha256_Len +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#ifndef WOLFSSL_ARMASM_NO_NEON + .text + .type L_SHA256_transform_neon_len_k, %object + .size L_SHA256_transform_neon_len_k, 256 + .align 4 +L_SHA256_transform_neon_len_k: + .word 0x428a2f98 + .word 0x71374491 + .word 0xb5c0fbcf + .word 0xe9b5dba5 + .word 0x3956c25b + .word 0x59f111f1 + .word 0x923f82a4 + .word 0xab1c5ed5 + .word 0xd807aa98 + .word 0x12835b01 + .word 0x243185be + .word 0x550c7dc3 + .word 0x72be5d74 + .word 0x80deb1fe + .word 0x9bdc06a7 + .word 0xc19bf174 + .word 0xe49b69c1 + .word 0xefbe4786 + .word 0xfc19dc6 + .word 0x240ca1cc + .word 0x2de92c6f + .word 0x4a7484aa + .word 0x5cb0a9dc + .word 0x76f988da + .word 0x983e5152 + .word 0xa831c66d + .word 0xb00327c8 + .word 0xbf597fc7 + .word 0xc6e00bf3 + .word 0xd5a79147 + .word 0x6ca6351 + .word 0x14292967 + .word 0x27b70a85 + .word 0x2e1b2138 + .word 0x4d2c6dfc + .word 0x53380d13 + .word 0x650a7354 + .word 0x766a0abb + .word 0x81c2c92e + .word 0x92722c85 + .word 0xa2bfe8a1 + .word 0xa81a664b + .word 0xc24b8b70 + .word 0xc76c51a3 + .word 0xd192e819 + .word 0xd6990624 + .word 0xf40e3585 + .word 0x106aa070 + .word 0x19a4c116 + .word 0x1e376c08 + .word 0x2748774c + .word 0x34b0bcb5 + .word 0x391c0cb3 + .word 0x4ed8aa4a + .word 0x5b9cca4f + .word 0x682e6ff3 + .word 0x748f82ee + .word 0x78a5636f + .word 0x84c87814 + .word 0x8cc70208 + .word 0x90befffa + .word 0xa4506ceb + .word 0xbef9a3f7 + .word 0xc67178f2 + .text + .align 4 + .fpu neon + .globl Transform_Sha256_Len + .type Transform_Sha256_Len, %function +Transform_Sha256_Len: + push {r4, r5, r6, r7, r8, r9, r10, lr} + vpush {d8-d11} + sub sp, sp, #24 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r0, [sp] + str r1, [sp, #4] +#else + strd r0, r1, [sp] +#endif + str r2, [sp, #8] + adr r12, L_SHA256_transform_neon_len_k + # Load digest into registers +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0] + ldr r3, [r0, #4] +#else + ldrd r2, r3, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + # Start of loop processing a block +L_SHA256_transform_neon_len_begin: + # Load W + vld1.8 {d0-d3}, [r1]! + vld1.8 {d4-d7}, [r1]! +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + vrev32.8 q0, q0 + vrev32.8 q1, q1 + vrev32.8 q2, q2 + vrev32.8 q3, q3 +#else + vrev32.8 d0, d0 + vrev32.8 d1, d1 + vrev32.8 d2, d2 + vrev32.8 d3, d3 + vrev32.8 d4, d4 + vrev32.8 d5, d5 + vrev32.8 d6, d6 + vrev32.8 d7, d7 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + str r1, [sp, #4] + mov lr, #3 + # Start of 16 rounds +L_SHA256_transform_neon_len_start: + # Round 0 + vmov.32 r10, d0[0] + ror r0, r6, #6 + eor r1, r7, r8 + eor r0, r0, r6, ror #11 + and r1, r1, r6 + eor r0, r0, r6, ror #25 + eor r1, r1, r8 + add r9, r9, r0 + add r9, r9, r1 + ldr r0, [r12] + add r9, r9, r10 + add r9, r9, r0 + add r5, r5, r9 + ror r0, r2, #2 + eor r1, r2, r3 + eor r0, r0, r2, ror #13 + eor r10, r3, r4 + and r1, r1, r10 + eor r0, r0, r2, ror #22 + eor r1, r1, r3 + add r9, r9, r0 + add r9, r9, r1 + # Round 1 + vmov.32 r10, d0[1] + # Calc new W[0]-W[1] + vext.8 d10, d0, d1, #4 + ror r0, r5, #6 + vshl.u32 d8, d7, #15 + eor r1, r6, r7 + vsri.u32 d8, d7, #17 + eor r0, r0, r5, ror #11 + vshl.u32 d9, d7, #13 + and r1, r1, r5 + vsri.u32 d9, d7, #19 + eor r0, r0, r5, ror #25 + veor d9, d8 + eor r1, r1, r7 + vshr.u32 d8, d7, #10 + add r8, r8, r0 + veor d9, d8 + add r8, r8, r1 + vadd.i32 d0, d9 + ldr r0, [r12, #4] + vext.8 d11, d4, d5, #4 + add r8, r8, r10 + vadd.i32 d0, d11 + add r8, r8, r0 + vshl.u32 d8, d10, #25 + add r4, r4, r8 + vsri.u32 d8, d10, #7 + ror r0, r9, #2 + vshl.u32 d9, d10, #14 + eor r1, r9, r2 + vsri.u32 d9, d10, #18 + eor r0, r0, r9, ror #13 + veor d9, d8 + eor r10, r2, r3 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r9, ror #22 + vadd.i32 d0, d9 + eor r1, r1, r2 + add r8, r8, r0 + add r8, r8, r1 + # Round 2 + vmov.32 r10, d1[0] + ror r0, r4, #6 + eor r1, r5, r6 + eor r0, r0, r4, ror #11 + and r1, r1, r4 + eor r0, r0, r4, ror #25 + eor r1, r1, r6 + add r7, r7, r0 + add r7, r7, r1 + ldr r0, [r12, #8] + add r7, r7, r10 + add r7, r7, r0 + add r3, r3, r7 + ror r0, r8, #2 + eor r1, r8, r9 + eor r0, r0, r8, ror #13 + eor r10, r9, r2 + and r1, r1, r10 + eor r0, r0, r8, ror #22 + eor r1, r1, r9 + add r7, r7, r0 + add r7, r7, r1 + # Round 3 + vmov.32 r10, d1[1] + # Calc new W[2]-W[3] + vext.8 d10, d1, d2, #4 + ror r0, r3, #6 + vshl.u32 d8, d0, #15 + eor r1, r4, r5 + vsri.u32 d8, d0, #17 + eor r0, r0, r3, ror #11 + vshl.u32 d9, d0, #13 + and r1, r1, r3 + vsri.u32 d9, d0, #19 + eor r0, r0, r3, ror #25 + veor d9, d8 + eor r1, r1, r5 + vshr.u32 d8, d0, #10 + add r6, r6, r0 + veor d9, d8 + add r6, r6, r1 + vadd.i32 d1, d9 + ldr r0, [r12, #12] + vext.8 d11, d5, d6, #4 + add r6, r6, r10 + vadd.i32 d1, d11 + add r6, r6, r0 + vshl.u32 d8, d10, #25 + add r2, r2, r6 + vsri.u32 d8, d10, #7 + ror r0, r7, #2 + vshl.u32 d9, d10, #14 + eor r1, r7, r8 + vsri.u32 d9, d10, #18 + eor r0, r0, r7, ror #13 + veor d9, d8 + eor r10, r8, r9 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r7, ror #22 + vadd.i32 d1, d9 + eor r1, r1, r8 + add r6, r6, r0 + add r6, r6, r1 + # Round 4 + vmov.32 r10, d2[0] + ror r0, r2, #6 + eor r1, r3, r4 + eor r0, r0, r2, ror #11 + and r1, r1, r2 + eor r0, r0, r2, ror #25 + eor r1, r1, r4 + add r5, r5, r0 + add r5, r5, r1 + ldr r0, [r12, #16] + add r5, r5, r10 + add r5, r5, r0 + add r9, r9, r5 + ror r0, r6, #2 + eor r1, r6, r7 + eor r0, r0, r6, ror #13 + eor r10, r7, r8 + and r1, r1, r10 + eor r0, r0, r6, ror #22 + eor r1, r1, r7 + add r5, r5, r0 + add r5, r5, r1 + # Round 5 + vmov.32 r10, d2[1] + # Calc new W[4]-W[5] + vext.8 d10, d2, d3, #4 + ror r0, r9, #6 + vshl.u32 d8, d1, #15 + eor r1, r2, r3 + vsri.u32 d8, d1, #17 + eor r0, r0, r9, ror #11 + vshl.u32 d9, d1, #13 + and r1, r1, r9 + vsri.u32 d9, d1, #19 + eor r0, r0, r9, ror #25 + veor d9, d8 + eor r1, r1, r3 + vshr.u32 d8, d1, #10 + add r4, r4, r0 + veor d9, d8 + add r4, r4, r1 + vadd.i32 d2, d9 + ldr r0, [r12, #20] + vext.8 d11, d6, d7, #4 + add r4, r4, r10 + vadd.i32 d2, d11 + add r4, r4, r0 + vshl.u32 d8, d10, #25 + add r8, r8, r4 + vsri.u32 d8, d10, #7 + ror r0, r5, #2 + vshl.u32 d9, d10, #14 + eor r1, r5, r6 + vsri.u32 d9, d10, #18 + eor r0, r0, r5, ror #13 + veor d9, d8 + eor r10, r6, r7 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r5, ror #22 + vadd.i32 d2, d9 + eor r1, r1, r6 + add r4, r4, r0 + add r4, r4, r1 + # Round 6 + vmov.32 r10, d3[0] + ror r0, r8, #6 + eor r1, r9, r2 + eor r0, r0, r8, ror #11 + and r1, r1, r8 + eor r0, r0, r8, ror #25 + eor r1, r1, r2 + add r3, r3, r0 + add r3, r3, r1 + ldr r0, [r12, #24] + add r3, r3, r10 + add r3, r3, r0 + add r7, r7, r3 + ror r0, r4, #2 + eor r1, r4, r5 + eor r0, r0, r4, ror #13 + eor r10, r5, r6 + and r1, r1, r10 + eor r0, r0, r4, ror #22 + eor r1, r1, r5 + add r3, r3, r0 + add r3, r3, r1 + # Round 7 + vmov.32 r10, d3[1] + # Calc new W[6]-W[7] + vext.8 d10, d3, d4, #4 + ror r0, r7, #6 + vshl.u32 d8, d2, #15 + eor r1, r8, r9 + vsri.u32 d8, d2, #17 + eor r0, r0, r7, ror #11 + vshl.u32 d9, d2, #13 + and r1, r1, r7 + vsri.u32 d9, d2, #19 + eor r0, r0, r7, ror #25 + veor d9, d8 + eor r1, r1, r9 + vshr.u32 d8, d2, #10 + add r2, r2, r0 + veor d9, d8 + add r2, r2, r1 + vadd.i32 d3, d9 + ldr r0, [r12, #28] + vext.8 d11, d7, d0, #4 + add r2, r2, r10 + vadd.i32 d3, d11 + add r2, r2, r0 + vshl.u32 d8, d10, #25 + add r6, r6, r2 + vsri.u32 d8, d10, #7 + ror r0, r3, #2 + vshl.u32 d9, d10, #14 + eor r1, r3, r4 + vsri.u32 d9, d10, #18 + eor r0, r0, r3, ror #13 + veor d9, d8 + eor r10, r4, r5 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r3, ror #22 + vadd.i32 d3, d9 + eor r1, r1, r4 + add r2, r2, r0 + add r2, r2, r1 + # Round 8 + vmov.32 r10, d4[0] + ror r0, r6, #6 + eor r1, r7, r8 + eor r0, r0, r6, ror #11 + and r1, r1, r6 + eor r0, r0, r6, ror #25 + eor r1, r1, r8 + add r9, r9, r0 + add r9, r9, r1 + ldr r0, [r12, #32] + add r9, r9, r10 + add r9, r9, r0 + add r5, r5, r9 + ror r0, r2, #2 + eor r1, r2, r3 + eor r0, r0, r2, ror #13 + eor r10, r3, r4 + and r1, r1, r10 + eor r0, r0, r2, ror #22 + eor r1, r1, r3 + add r9, r9, r0 + add r9, r9, r1 + # Round 9 + vmov.32 r10, d4[1] + # Calc new W[8]-W[9] + vext.8 d10, d4, d5, #4 + ror r0, r5, #6 + vshl.u32 d8, d3, #15 + eor r1, r6, r7 + vsri.u32 d8, d3, #17 + eor r0, r0, r5, ror #11 + vshl.u32 d9, d3, #13 + and r1, r1, r5 + vsri.u32 d9, d3, #19 + eor r0, r0, r5, ror #25 + veor d9, d8 + eor r1, r1, r7 + vshr.u32 d8, d3, #10 + add r8, r8, r0 + veor d9, d8 + add r8, r8, r1 + vadd.i32 d4, d9 + ldr r0, [r12, #36] + vext.8 d11, d0, d1, #4 + add r8, r8, r10 + vadd.i32 d4, d11 + add r8, r8, r0 + vshl.u32 d8, d10, #25 + add r4, r4, r8 + vsri.u32 d8, d10, #7 + ror r0, r9, #2 + vshl.u32 d9, d10, #14 + eor r1, r9, r2 + vsri.u32 d9, d10, #18 + eor r0, r0, r9, ror #13 + veor d9, d8 + eor r10, r2, r3 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r9, ror #22 + vadd.i32 d4, d9 + eor r1, r1, r2 + add r8, r8, r0 + add r8, r8, r1 + # Round 10 + vmov.32 r10, d5[0] + ror r0, r4, #6 + eor r1, r5, r6 + eor r0, r0, r4, ror #11 + and r1, r1, r4 + eor r0, r0, r4, ror #25 + eor r1, r1, r6 + add r7, r7, r0 + add r7, r7, r1 + ldr r0, [r12, #40] + add r7, r7, r10 + add r7, r7, r0 + add r3, r3, r7 + ror r0, r8, #2 + eor r1, r8, r9 + eor r0, r0, r8, ror #13 + eor r10, r9, r2 + and r1, r1, r10 + eor r0, r0, r8, ror #22 + eor r1, r1, r9 + add r7, r7, r0 + add r7, r7, r1 + # Round 11 + vmov.32 r10, d5[1] + # Calc new W[10]-W[11] + vext.8 d10, d5, d6, #4 + ror r0, r3, #6 + vshl.u32 d8, d4, #15 + eor r1, r4, r5 + vsri.u32 d8, d4, #17 + eor r0, r0, r3, ror #11 + vshl.u32 d9, d4, #13 + and r1, r1, r3 + vsri.u32 d9, d4, #19 + eor r0, r0, r3, ror #25 + veor d9, d8 + eor r1, r1, r5 + vshr.u32 d8, d4, #10 + add r6, r6, r0 + veor d9, d8 + add r6, r6, r1 + vadd.i32 d5, d9 + ldr r0, [r12, #44] + vext.8 d11, d1, d2, #4 + add r6, r6, r10 + vadd.i32 d5, d11 + add r6, r6, r0 + vshl.u32 d8, d10, #25 + add r2, r2, r6 + vsri.u32 d8, d10, #7 + ror r0, r7, #2 + vshl.u32 d9, d10, #14 + eor r1, r7, r8 + vsri.u32 d9, d10, #18 + eor r0, r0, r7, ror #13 + veor d9, d8 + eor r10, r8, r9 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r7, ror #22 + vadd.i32 d5, d9 + eor r1, r1, r8 + add r6, r6, r0 + add r6, r6, r1 + # Round 12 + vmov.32 r10, d6[0] + ror r0, r2, #6 + eor r1, r3, r4 + eor r0, r0, r2, ror #11 + and r1, r1, r2 + eor r0, r0, r2, ror #25 + eor r1, r1, r4 + add r5, r5, r0 + add r5, r5, r1 + ldr r0, [r12, #48] + add r5, r5, r10 + add r5, r5, r0 + add r9, r9, r5 + ror r0, r6, #2 + eor r1, r6, r7 + eor r0, r0, r6, ror #13 + eor r10, r7, r8 + and r1, r1, r10 + eor r0, r0, r6, ror #22 + eor r1, r1, r7 + add r5, r5, r0 + add r5, r5, r1 + # Round 13 + vmov.32 r10, d6[1] + # Calc new W[12]-W[13] + vext.8 d10, d6, d7, #4 + ror r0, r9, #6 + vshl.u32 d8, d5, #15 + eor r1, r2, r3 + vsri.u32 d8, d5, #17 + eor r0, r0, r9, ror #11 + vshl.u32 d9, d5, #13 + and r1, r1, r9 + vsri.u32 d9, d5, #19 + eor r0, r0, r9, ror #25 + veor d9, d8 + eor r1, r1, r3 + vshr.u32 d8, d5, #10 + add r4, r4, r0 + veor d9, d8 + add r4, r4, r1 + vadd.i32 d6, d9 + ldr r0, [r12, #52] + vext.8 d11, d2, d3, #4 + add r4, r4, r10 + vadd.i32 d6, d11 + add r4, r4, r0 + vshl.u32 d8, d10, #25 + add r8, r8, r4 + vsri.u32 d8, d10, #7 + ror r0, r5, #2 + vshl.u32 d9, d10, #14 + eor r1, r5, r6 + vsri.u32 d9, d10, #18 + eor r0, r0, r5, ror #13 + veor d9, d8 + eor r10, r6, r7 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r5, ror #22 + vadd.i32 d6, d9 + eor r1, r1, r6 + add r4, r4, r0 + add r4, r4, r1 + # Round 14 + vmov.32 r10, d7[0] + ror r0, r8, #6 + eor r1, r9, r2 + eor r0, r0, r8, ror #11 + and r1, r1, r8 + eor r0, r0, r8, ror #25 + eor r1, r1, r2 + add r3, r3, r0 + add r3, r3, r1 + ldr r0, [r12, #56] + add r3, r3, r10 + add r3, r3, r0 + add r7, r7, r3 + ror r0, r4, #2 + eor r1, r4, r5 + eor r0, r0, r4, ror #13 + eor r10, r5, r6 + and r1, r1, r10 + eor r0, r0, r4, ror #22 + eor r1, r1, r5 + add r3, r3, r0 + add r3, r3, r1 + # Round 15 + vmov.32 r10, d7[1] + # Calc new W[14]-W[15] + vext.8 d10, d7, d0, #4 + ror r0, r7, #6 + vshl.u32 d8, d6, #15 + eor r1, r8, r9 + vsri.u32 d8, d6, #17 + eor r0, r0, r7, ror #11 + vshl.u32 d9, d6, #13 + and r1, r1, r7 + vsri.u32 d9, d6, #19 + eor r0, r0, r7, ror #25 + veor d9, d8 + eor r1, r1, r9 + vshr.u32 d8, d6, #10 + add r2, r2, r0 + veor d9, d8 + add r2, r2, r1 + vadd.i32 d7, d9 + ldr r0, [r12, #60] + vext.8 d11, d3, d4, #4 + add r2, r2, r10 + vadd.i32 d7, d11 + add r2, r2, r0 + vshl.u32 d8, d10, #25 + add r6, r6, r2 + vsri.u32 d8, d10, #7 + ror r0, r3, #2 + vshl.u32 d9, d10, #14 + eor r1, r3, r4 + vsri.u32 d9, d10, #18 + eor r0, r0, r3, ror #13 + veor d9, d8 + eor r10, r4, r5 + vshr.u32 d10, #3 + and r1, r1, r10 + veor d9, d10 + eor r0, r0, r3, ror #22 + vadd.i32 d7, d9 + eor r1, r1, r4 + add r2, r2, r0 + add r2, r2, r1 + add r12, r12, #0x40 + subs lr, lr, #1 + bne L_SHA256_transform_neon_len_start + # Round 0 + vmov.32 r10, d0[0] + ror r0, r6, #6 + eor r1, r7, r8 + eor r0, r0, r6, ror #11 + and r1, r1, r6 + eor r0, r0, r6, ror #25 + eor r1, r1, r8 + add r9, r9, r0 + add r9, r9, r1 + ldr r0, [r12] + add r9, r9, r10 + add r9, r9, r0 + add r5, r5, r9 + ror r0, r2, #2 + eor r1, r2, r3 + eor r0, r0, r2, ror #13 + eor r10, r3, r4 + and r1, r1, r10 + eor r0, r0, r2, ror #22 + eor r1, r1, r3 + add r9, r9, r0 + add r9, r9, r1 + # Round 1 + vmov.32 r10, d0[1] + ror r0, r5, #6 + eor r1, r6, r7 + eor r0, r0, r5, ror #11 + and r1, r1, r5 + eor r0, r0, r5, ror #25 + eor r1, r1, r7 + add r8, r8, r0 + add r8, r8, r1 + ldr r0, [r12, #4] + add r8, r8, r10 + add r8, r8, r0 + add r4, r4, r8 + ror r0, r9, #2 + eor r1, r9, r2 + eor r0, r0, r9, ror #13 + eor r10, r2, r3 + and r1, r1, r10 + eor r0, r0, r9, ror #22 + eor r1, r1, r2 + add r8, r8, r0 + add r8, r8, r1 + # Round 2 + vmov.32 r10, d1[0] + ror r0, r4, #6 + eor r1, r5, r6 + eor r0, r0, r4, ror #11 + and r1, r1, r4 + eor r0, r0, r4, ror #25 + eor r1, r1, r6 + add r7, r7, r0 + add r7, r7, r1 + ldr r0, [r12, #8] + add r7, r7, r10 + add r7, r7, r0 + add r3, r3, r7 + ror r0, r8, #2 + eor r1, r8, r9 + eor r0, r0, r8, ror #13 + eor r10, r9, r2 + and r1, r1, r10 + eor r0, r0, r8, ror #22 + eor r1, r1, r9 + add r7, r7, r0 + add r7, r7, r1 + # Round 3 + vmov.32 r10, d1[1] + ror r0, r3, #6 + eor r1, r4, r5 + eor r0, r0, r3, ror #11 + and r1, r1, r3 + eor r0, r0, r3, ror #25 + eor r1, r1, r5 + add r6, r6, r0 + add r6, r6, r1 + ldr r0, [r12, #12] + add r6, r6, r10 + add r6, r6, r0 + add r2, r2, r6 + ror r0, r7, #2 + eor r1, r7, r8 + eor r0, r0, r7, ror #13 + eor r10, r8, r9 + and r1, r1, r10 + eor r0, r0, r7, ror #22 + eor r1, r1, r8 + add r6, r6, r0 + add r6, r6, r1 + # Round 4 + vmov.32 r10, d2[0] + ror r0, r2, #6 + eor r1, r3, r4 + eor r0, r0, r2, ror #11 + and r1, r1, r2 + eor r0, r0, r2, ror #25 + eor r1, r1, r4 + add r5, r5, r0 + add r5, r5, r1 + ldr r0, [r12, #16] + add r5, r5, r10 + add r5, r5, r0 + add r9, r9, r5 + ror r0, r6, #2 + eor r1, r6, r7 + eor r0, r0, r6, ror #13 + eor r10, r7, r8 + and r1, r1, r10 + eor r0, r0, r6, ror #22 + eor r1, r1, r7 + add r5, r5, r0 + add r5, r5, r1 + # Round 5 + vmov.32 r10, d2[1] + ror r0, r9, #6 + eor r1, r2, r3 + eor r0, r0, r9, ror #11 + and r1, r1, r9 + eor r0, r0, r9, ror #25 + eor r1, r1, r3 + add r4, r4, r0 + add r4, r4, r1 + ldr r0, [r12, #20] + add r4, r4, r10 + add r4, r4, r0 + add r8, r8, r4 + ror r0, r5, #2 + eor r1, r5, r6 + eor r0, r0, r5, ror #13 + eor r10, r6, r7 + and r1, r1, r10 + eor r0, r0, r5, ror #22 + eor r1, r1, r6 + add r4, r4, r0 + add r4, r4, r1 + # Round 6 + vmov.32 r10, d3[0] + ror r0, r8, #6 + eor r1, r9, r2 + eor r0, r0, r8, ror #11 + and r1, r1, r8 + eor r0, r0, r8, ror #25 + eor r1, r1, r2 + add r3, r3, r0 + add r3, r3, r1 + ldr r0, [r12, #24] + add r3, r3, r10 + add r3, r3, r0 + add r7, r7, r3 + ror r0, r4, #2 + eor r1, r4, r5 + eor r0, r0, r4, ror #13 + eor r10, r5, r6 + and r1, r1, r10 + eor r0, r0, r4, ror #22 + eor r1, r1, r5 + add r3, r3, r0 + add r3, r3, r1 + # Round 7 + vmov.32 r10, d3[1] + ror r0, r7, #6 + eor r1, r8, r9 + eor r0, r0, r7, ror #11 + and r1, r1, r7 + eor r0, r0, r7, ror #25 + eor r1, r1, r9 + add r2, r2, r0 + add r2, r2, r1 + ldr r0, [r12, #28] + add r2, r2, r10 + add r2, r2, r0 + add r6, r6, r2 + ror r0, r3, #2 + eor r1, r3, r4 + eor r0, r0, r3, ror #13 + eor r10, r4, r5 + and r1, r1, r10 + eor r0, r0, r3, ror #22 + eor r1, r1, r4 + add r2, r2, r0 + add r2, r2, r1 + # Round 8 + vmov.32 r10, d4[0] + ror r0, r6, #6 + eor r1, r7, r8 + eor r0, r0, r6, ror #11 + and r1, r1, r6 + eor r0, r0, r6, ror #25 + eor r1, r1, r8 + add r9, r9, r0 + add r9, r9, r1 + ldr r0, [r12, #32] + add r9, r9, r10 + add r9, r9, r0 + add r5, r5, r9 + ror r0, r2, #2 + eor r1, r2, r3 + eor r0, r0, r2, ror #13 + eor r10, r3, r4 + and r1, r1, r10 + eor r0, r0, r2, ror #22 + eor r1, r1, r3 + add r9, r9, r0 + add r9, r9, r1 + # Round 9 + vmov.32 r10, d4[1] + ror r0, r5, #6 + eor r1, r6, r7 + eor r0, r0, r5, ror #11 + and r1, r1, r5 + eor r0, r0, r5, ror #25 + eor r1, r1, r7 + add r8, r8, r0 + add r8, r8, r1 + ldr r0, [r12, #36] + add r8, r8, r10 + add r8, r8, r0 + add r4, r4, r8 + ror r0, r9, #2 + eor r1, r9, r2 + eor r0, r0, r9, ror #13 + eor r10, r2, r3 + and r1, r1, r10 + eor r0, r0, r9, ror #22 + eor r1, r1, r2 + add r8, r8, r0 + add r8, r8, r1 + # Round 10 + vmov.32 r10, d5[0] + ror r0, r4, #6 + eor r1, r5, r6 + eor r0, r0, r4, ror #11 + and r1, r1, r4 + eor r0, r0, r4, ror #25 + eor r1, r1, r6 + add r7, r7, r0 + add r7, r7, r1 + ldr r0, [r12, #40] + add r7, r7, r10 + add r7, r7, r0 + add r3, r3, r7 + ror r0, r8, #2 + eor r1, r8, r9 + eor r0, r0, r8, ror #13 + eor r10, r9, r2 + and r1, r1, r10 + eor r0, r0, r8, ror #22 + eor r1, r1, r9 + add r7, r7, r0 + add r7, r7, r1 + # Round 11 + vmov.32 r10, d5[1] + ror r0, r3, #6 + eor r1, r4, r5 + eor r0, r0, r3, ror #11 + and r1, r1, r3 + eor r0, r0, r3, ror #25 + eor r1, r1, r5 + add r6, r6, r0 + add r6, r6, r1 + ldr r0, [r12, #44] + add r6, r6, r10 + add r6, r6, r0 + add r2, r2, r6 + ror r0, r7, #2 + eor r1, r7, r8 + eor r0, r0, r7, ror #13 + eor r10, r8, r9 + and r1, r1, r10 + eor r0, r0, r7, ror #22 + eor r1, r1, r8 + add r6, r6, r0 + add r6, r6, r1 + # Round 12 + vmov.32 r10, d6[0] + ror r0, r2, #6 + eor r1, r3, r4 + eor r0, r0, r2, ror #11 + and r1, r1, r2 + eor r0, r0, r2, ror #25 + eor r1, r1, r4 + add r5, r5, r0 + add r5, r5, r1 + ldr r0, [r12, #48] + add r5, r5, r10 + add r5, r5, r0 + add r9, r9, r5 + ror r0, r6, #2 + eor r1, r6, r7 + eor r0, r0, r6, ror #13 + eor r10, r7, r8 + and r1, r1, r10 + eor r0, r0, r6, ror #22 + eor r1, r1, r7 + add r5, r5, r0 + add r5, r5, r1 + # Round 13 + vmov.32 r10, d6[1] + ror r0, r9, #6 + eor r1, r2, r3 + eor r0, r0, r9, ror #11 + and r1, r1, r9 + eor r0, r0, r9, ror #25 + eor r1, r1, r3 + add r4, r4, r0 + add r4, r4, r1 + ldr r0, [r12, #52] + add r4, r4, r10 + add r4, r4, r0 + add r8, r8, r4 + ror r0, r5, #2 + eor r1, r5, r6 + eor r0, r0, r5, ror #13 + eor r10, r6, r7 + and r1, r1, r10 + eor r0, r0, r5, ror #22 + eor r1, r1, r6 + add r4, r4, r0 + add r4, r4, r1 + # Round 14 + vmov.32 r10, d7[0] + ror r0, r8, #6 + eor r1, r9, r2 + eor r0, r0, r8, ror #11 + and r1, r1, r8 + eor r0, r0, r8, ror #25 + eor r1, r1, r2 + add r3, r3, r0 + add r3, r3, r1 + ldr r0, [r12, #56] + add r3, r3, r10 + add r3, r3, r0 + add r7, r7, r3 + ror r0, r4, #2 + eor r1, r4, r5 + eor r0, r0, r4, ror #13 + eor r10, r5, r6 + and r1, r1, r10 + eor r0, r0, r4, ror #22 + eor r1, r1, r5 + add r3, r3, r0 + add r3, r3, r1 + # Round 15 + vmov.32 r10, d7[1] + ror r0, r7, #6 + eor r1, r8, r9 + eor r0, r0, r7, ror #11 + and r1, r1, r7 + eor r0, r0, r7, ror #25 + eor r1, r1, r9 + add r2, r2, r0 + add r2, r2, r1 + ldr r0, [r12, #60] + add r2, r2, r10 + add r2, r2, r0 + add r6, r6, r2 + ror r0, r3, #2 + eor r1, r3, r4 + eor r0, r0, r3, ror #13 + eor r10, r4, r5 + and r1, r1, r10 + eor r0, r0, r3, ror #22 + eor r1, r1, r4 + add r2, r2, r0 + add r2, r2, r1 + ldr r10, [sp] + # Add in digest from start +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r0, [r10] + ldr r1, [r10, #4] +#else + ldrd r0, r1, [r10] +#endif + add r2, r2, r0 + add r3, r3, r1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r2, [r10] + str r3, [r10, #4] +#else + strd r2, r3, [r10] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r0, [r10, #8] + ldr r1, [r10, #12] +#else + ldrd r0, r1, [r10, #8] +#endif + add r4, r4, r0 + add r5, r5, r1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r10, #8] + str r5, [r10, #12] +#else + strd r4, r5, [r10, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r0, [r10, #16] + ldr r1, [r10, #20] +#else + ldrd r0, r1, [r10, #16] +#endif + add r6, r6, r0 + add r7, r7, r1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r10, #16] + str r7, [r10, #20] +#else + strd r6, r7, [r10, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r0, [r10, #24] + ldr r1, [r10, #28] +#else + ldrd r0, r1, [r10, #24] +#endif + add r8, r8, r0 + add r9, r9, r1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r10, #24] + str r9, [r10, #28] +#else + strd r8, r9, [r10, #24] +#endif + ldr r10, [sp, #8] + ldr r1, [sp, #4] + subs r10, r10, #0x40 + sub r12, r12, #0xc0 + str r10, [sp, #8] + bne L_SHA256_transform_neon_len_begin + add sp, sp, #24 + vpop {d8-d11} + pop {r4, r5, r6, r7, r8, r9, r10, pc} + .size Transform_Sha256_Len,.-Transform_Sha256_Len +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#endif /* !NO_SHA256 */ +#endif /* !__aarch64__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c new file mode 100644 index 000000000..d13b20da6 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -0,0 +1,2800 @@ +/* armv8-32-sha256-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha256.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include +#ifdef WOLFSSL_ARMASM_INLINE + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif /* __KEIL__ */ +#ifndef NO_SHA256 +#include + +#ifdef WOLFSSL_ARMASM_NO_NEON +static const uint32_t L_SHA256_transform_len_k[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); +void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +{ + register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k; + + __asm__ __volatile__ ( + "sub sp, sp, #0xc0\n\t" + /* Copy digest to add in at end */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha256]]\n\t" + "ldr r5, [%[sha256], #4]\n\t" +#else + "ldrd r4, r5, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" +#else + "ldrd r6, r7, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha256], #16]\n\t" + "ldr r9, [%[sha256], #20]\n\t" +#else + "ldrd r8, r9, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[sha256], #24]\n\t" + "ldr r11, [%[sha256], #28]\n\t" +#else + "ldrd r10, r11, [%[sha256], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #64]\n\t" + "str r5, [sp, #68]\n\t" +#else + "strd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #72]\n\t" + "str r7, [sp, #76]\n\t" +#else + "strd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #80]\n\t" + "str r9, [sp, #84]\n\t" +#else + "strd r8, r9, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #88]\n\t" + "str r11, [sp, #92]\n\t" +#else + "strd r10, r11, [sp, #88]\n\t" +#endif + /* Start of loop processing a block */ + "\n" + "L_SHA256_transform_len_begin_%=: \n\t" + /* Load, Reverse and Store W - 64 bytes */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "ldr r4, [%[data]]\n\t" + "ldr r5, [%[data], #4]\n\t" + "ldr r6, [%[data], #8]\n\t" + "ldr r7, [%[data], #12]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else + "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #8]\n\t" + "str r7, [sp, #12]\n\t" +#else + "strd r6, r7, [sp, #8]\n\t" +#endif + "ldr r4, [%[data], #16]\n\t" + "ldr r5, [%[data], #20]\n\t" + "ldr r6, [%[data], #24]\n\t" + "ldr r7, [%[data], #28]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #20]\n\t" +#else + "strd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #24]\n\t" + "str r7, [sp, #28]\n\t" +#else + "strd r6, r7, [sp, #24]\n\t" +#endif + "ldr r4, [%[data], #32]\n\t" + "ldr r5, [%[data], #36]\n\t" + "ldr r6, [%[data], #40]\n\t" + "ldr r7, [%[data], #44]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" +#else + "strd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #40]\n\t" + "str r7, [sp, #44]\n\t" +#else + "strd r6, r7, [sp, #40]\n\t" +#endif + "ldr r4, [%[data], #48]\n\t" + "ldr r5, [%[data], #52]\n\t" + "ldr r6, [%[data], #56]\n\t" + "ldr r7, [%[data], #60]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" +#else + "strd r4, r5, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #56]\n\t" + "str r7, [sp, #60]\n\t" +#else + "strd r6, r7, [sp, #56]\n\t" +#endif +#else + "ldr r4, [%[data]]\n\t" + "ldr r5, [%[data], #4]\n\t" + "ldr r6, [%[data], #8]\n\t" + "ldr r7, [%[data], #12]\n\t" + "ldr r8, [%[data], #16]\n\t" + "ldr r9, [%[data], #20]\n\t" + "ldr r10, [%[data], #24]\n\t" + "ldr r11, [%[data], #28]\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else + "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #8]\n\t" + "str r7, [sp, #12]\n\t" +#else + "strd r6, r7, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #16]\n\t" + "str r9, [sp, #20]\n\t" +#else + "strd r8, r9, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #24]\n\t" + "str r11, [sp, #28]\n\t" +#else + "strd r10, r11, [sp, #24]\n\t" +#endif + "ldr r4, [%[data], #32]\n\t" + "ldr r5, [%[data], #36]\n\t" + "ldr r6, [%[data], #40]\n\t" + "ldr r7, [%[data], #44]\n\t" + "ldr r8, [%[data], #48]\n\t" + "ldr r9, [%[data], #52]\n\t" + "ldr r10, [%[data], #56]\n\t" + "ldr r11, [%[data], #60]\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" +#else + "strd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #40]\n\t" + "str r7, [sp, #44]\n\t" +#else + "strd r6, r7, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #48]\n\t" + "str r9, [sp, #52]\n\t" +#else + "strd r8, r9, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #56]\n\t" + "str r11, [sp, #60]\n\t" +#else + "strd r10, r11, [sp, #56]\n\t" +#endif +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + "ldr r11, [%[sha256], #4]\n\t" + "ldr r4, [%[sha256], #8]\n\t" + "eor r11, r11, r4\n\t" + "mov r12, #3\n\t" + /* Start of 16 rounds */ + "\n" + "L_SHA256_transform_len_start_%=: \n\t" + /* Round 0 */ + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp]\n\t" + "ldr r6, [r3]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r8, [%[sha256], #12]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #12]\n\t" + "str r9, [%[sha256], #28]\n\t" + /* Calc new W[0] */ + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #36]\n\t" + "ldr r8, [sp, #4]\n\t" + "ldr r9, [sp]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp]\n\t" + /* Round 1 */ + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r9, [%[sha256], #24]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #4]\n\t" + "ldr r6, [r3, #4]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r8, [%[sha256], #8]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #8]\n\t" + "str r9, [%[sha256], #24]\n\t" + /* Calc new W[1] */ + "ldr r6, [sp, #60]\n\t" + "ldr r7, [sp, #40]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #4]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #4]\n\t" + /* Round 2 */ + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r9, [%[sha256], #20]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [r3, #8]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r8, [%[sha256], #4]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #4]\n\t" + "str r9, [%[sha256], #20]\n\t" + /* Calc new W[2] */ + "ldr r6, [sp]\n\t" + "ldr r7, [sp, #44]\n\t" + "ldr r8, [sp, #12]\n\t" + "ldr r9, [sp, #8]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #8]\n\t" + /* Round 3 */ + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r9, [%[sha256], #16]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #12]\n\t" + "ldr r6, [r3, #12]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r8, [%[sha256]]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256]]\n\t" + "str r9, [%[sha256], #16]\n\t" + /* Calc new W[3] */ + "ldr r6, [sp, #4]\n\t" + "ldr r7, [sp, #48]\n\t" + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #12]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #12]\n\t" + /* Round 4 */ + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r9, [%[sha256], #12]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #16]\n\t" + "ldr r6, [r3, #16]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r8, [%[sha256], #28]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #28]\n\t" + "str r9, [%[sha256], #12]\n\t" + /* Calc new W[4] */ + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #52]\n\t" + "ldr r8, [sp, #20]\n\t" + "ldr r9, [sp, #16]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #16]\n\t" + /* Round 5 */ + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r9, [%[sha256], #8]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #20]\n\t" + "ldr r6, [r3, #20]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r8, [%[sha256], #24]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #24]\n\t" + "str r9, [%[sha256], #8]\n\t" + /* Calc new W[5] */ + "ldr r6, [sp, #12]\n\t" + "ldr r7, [sp, #56]\n\t" + "ldr r8, [sp, #24]\n\t" + "ldr r9, [sp, #20]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #20]\n\t" + /* Round 6 */ + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r9, [%[sha256], #4]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [r3, #24]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r8, [%[sha256], #20]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #20]\n\t" + "str r9, [%[sha256], #4]\n\t" + /* Calc new W[6] */ + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #60]\n\t" + "ldr r8, [sp, #28]\n\t" + "ldr r9, [sp, #24]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #24]\n\t" + /* Round 7 */ + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r9, [%[sha256]]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #28]\n\t" + "ldr r6, [r3, #28]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r8, [%[sha256], #16]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #16]\n\t" + "str r9, [%[sha256]]\n\t" + /* Calc new W[7] */ + "ldr r6, [sp, #20]\n\t" + "ldr r7, [sp]\n\t" + "ldr r8, [sp, #32]\n\t" + "ldr r9, [sp, #28]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #28]\n\t" + /* Round 8 */ + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #32]\n\t" + "ldr r6, [r3, #32]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r8, [%[sha256], #12]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #12]\n\t" + "str r9, [%[sha256], #28]\n\t" + /* Calc new W[8] */ + "ldr r6, [sp, #24]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #36]\n\t" + "ldr r9, [sp, #32]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #32]\n\t" + /* Round 9 */ + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r9, [%[sha256], #24]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #36]\n\t" + "ldr r6, [r3, #36]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r8, [%[sha256], #8]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #8]\n\t" + "str r9, [%[sha256], #24]\n\t" + /* Calc new W[9] */ + "ldr r6, [sp, #28]\n\t" + "ldr r7, [sp, #8]\n\t" + "ldr r8, [sp, #40]\n\t" + "ldr r9, [sp, #36]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #36]\n\t" + /* Round 10 */ + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r9, [%[sha256], #20]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [r3, #40]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r8, [%[sha256], #4]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #4]\n\t" + "str r9, [%[sha256], #20]\n\t" + /* Calc new W[10] */ + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #12]\n\t" + "ldr r8, [sp, #44]\n\t" + "ldr r9, [sp, #40]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #40]\n\t" + /* Round 11 */ + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r9, [%[sha256], #16]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #44]\n\t" + "ldr r6, [r3, #44]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r8, [%[sha256]]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256]]\n\t" + "str r9, [%[sha256], #16]\n\t" + /* Calc new W[11] */ + "ldr r6, [sp, #36]\n\t" + "ldr r7, [sp, #16]\n\t" + "ldr r8, [sp, #48]\n\t" + "ldr r9, [sp, #44]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #44]\n\t" + /* Round 12 */ + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r9, [%[sha256], #12]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #48]\n\t" + "ldr r6, [r3, #48]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r8, [%[sha256], #28]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #28]\n\t" + "str r9, [%[sha256], #12]\n\t" + /* Calc new W[12] */ + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #20]\n\t" + "ldr r8, [sp, #52]\n\t" + "ldr r9, [sp, #48]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #48]\n\t" + /* Round 13 */ + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r9, [%[sha256], #8]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #52]\n\t" + "ldr r6, [r3, #52]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r8, [%[sha256], #24]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #24]\n\t" + "str r9, [%[sha256], #8]\n\t" + /* Calc new W[13] */ + "ldr r6, [sp, #44]\n\t" + "ldr r7, [sp, #24]\n\t" + "ldr r8, [sp, #56]\n\t" + "ldr r9, [sp, #52]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #52]\n\t" + /* Round 14 */ + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r9, [%[sha256], #4]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #56]\n\t" + "ldr r6, [r3, #56]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r8, [%[sha256], #20]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #20]\n\t" + "str r9, [%[sha256], #4]\n\t" + /* Calc new W[14] */ + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #28]\n\t" + "ldr r8, [sp, #60]\n\t" + "ldr r9, [sp, #56]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #56]\n\t" + /* Round 15 */ + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r9, [%[sha256]]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #60]\n\t" + "ldr r6, [r3, #60]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r8, [%[sha256], #16]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #16]\n\t" + "str r9, [%[sha256]]\n\t" + /* Calc new W[15] */ + "ldr r6, [sp, #52]\n\t" + "ldr r7, [sp, #32]\n\t" + "ldr r8, [sp]\n\t" + "ldr r9, [sp, #60]\n\t" + "ror r4, r6, #17\n\t" + "ror r5, r8, #7\n\t" + "eor r4, r4, r6, ror #19\n\t" + "eor r5, r5, r8, ror #18\n\t" + "eor r4, r4, r6, lsr #10\n\t" + "eor r5, r5, r8, lsr #3\n\t" + "add r9, r9, r7\n\t" + "add r4, r4, r5\n\t" + "add r9, r9, r4\n\t" + "str r9, [sp, #60]\n\t" + "add r3, r3, #0x40\n\t" + "subs r12, r12, #1\n\t" + "bne L_SHA256_transform_len_start_%=\n\t" + /* Round 0 */ + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp]\n\t" + "ldr r6, [r3]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r8, [%[sha256], #12]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #12]\n\t" + "str r9, [%[sha256], #28]\n\t" + /* Round 1 */ + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r9, [%[sha256], #24]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #4]\n\t" + "ldr r6, [r3, #4]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r8, [%[sha256], #8]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #8]\n\t" + "str r9, [%[sha256], #24]\n\t" + /* Round 2 */ + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r9, [%[sha256], #20]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [r3, #8]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r8, [%[sha256], #4]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #4]\n\t" + "str r9, [%[sha256], #20]\n\t" + /* Round 3 */ + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r9, [%[sha256], #16]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #12]\n\t" + "ldr r6, [r3, #12]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r8, [%[sha256]]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256]]\n\t" + "str r9, [%[sha256], #16]\n\t" + /* Round 4 */ + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r9, [%[sha256], #12]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #16]\n\t" + "ldr r6, [r3, #16]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r8, [%[sha256], #28]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #28]\n\t" + "str r9, [%[sha256], #12]\n\t" + /* Round 5 */ + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r9, [%[sha256], #8]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #20]\n\t" + "ldr r6, [r3, #20]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r8, [%[sha256], #24]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #24]\n\t" + "str r9, [%[sha256], #8]\n\t" + /* Round 6 */ + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r9, [%[sha256], #4]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [r3, #24]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r8, [%[sha256], #20]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #20]\n\t" + "str r9, [%[sha256], #4]\n\t" + /* Round 7 */ + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r9, [%[sha256]]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #28]\n\t" + "ldr r6, [r3, #28]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r8, [%[sha256], #16]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #16]\n\t" + "str r9, [%[sha256]]\n\t" + /* Round 8 */ + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #32]\n\t" + "ldr r6, [r3, #32]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r8, [%[sha256], #12]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #12]\n\t" + "str r9, [%[sha256], #28]\n\t" + /* Round 9 */ + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r9, [%[sha256], #24]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #36]\n\t" + "ldr r6, [r3, #36]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r8, [%[sha256], #8]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #8]\n\t" + "str r9, [%[sha256], #24]\n\t" + /* Round 10 */ + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r9, [%[sha256], #20]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [r3, #40]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r8, [%[sha256], #4]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #4]\n\t" + "str r9, [%[sha256], #20]\n\t" + /* Round 11 */ + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r9, [%[sha256], #16]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #44]\n\t" + "ldr r6, [r3, #44]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r8, [%[sha256]]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256]]\n\t" + "str r9, [%[sha256], #16]\n\t" + /* Round 12 */ + "ldr r5, [%[sha256]]\n\t" + "ldr r6, [%[sha256], #4]\n\t" + "ldr r7, [%[sha256], #8]\n\t" + "ldr r9, [%[sha256], #12]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #48]\n\t" + "ldr r6, [r3, #48]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #16]\n\t" + "ldr r6, [%[sha256], #20]\n\t" + "ldr r7, [%[sha256], #24]\n\t" + "ldr r8, [%[sha256], #28]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #28]\n\t" + "str r9, [%[sha256], #12]\n\t" + /* Round 13 */ + "ldr r5, [%[sha256], #28]\n\t" + "ldr r6, [%[sha256]]\n\t" + "ldr r7, [%[sha256], #4]\n\t" + "ldr r9, [%[sha256], #8]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #52]\n\t" + "ldr r6, [r3, #52]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #12]\n\t" + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" + "ldr r8, [%[sha256], #24]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #24]\n\t" + "str r9, [%[sha256], #8]\n\t" + /* Round 14 */ + "ldr r5, [%[sha256], #24]\n\t" + "ldr r6, [%[sha256], #28]\n\t" + "ldr r7, [%[sha256]]\n\t" + "ldr r9, [%[sha256], #4]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #56]\n\t" + "ldr r6, [r3, #56]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #8]\n\t" + "ldr r6, [%[sha256], #12]\n\t" + "ldr r7, [%[sha256], #16]\n\t" + "ldr r8, [%[sha256], #20]\n\t" + "ror r4, r5, #2\n\t" + "eor r10, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r11, r11, r10\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r11, r11, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r11\n\t" + "str r8, [%[sha256], #20]\n\t" + "str r9, [%[sha256], #4]\n\t" + /* Round 15 */ + "ldr r5, [%[sha256], #20]\n\t" + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" + "ldr r9, [%[sha256]]\n\t" + "ror r4, r5, #6\n\t" + "eor r6, r6, r7\n\t" + "eor r4, r4, r5, ror #11\n\t" + "and r6, r6, r5\n\t" + "eor r4, r4, r5, ror #25\n\t" + "eor r6, r6, r7\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [sp, #60]\n\t" + "ldr r6, [r3, #60]\n\t" + "add r9, r9, r5\n\t" + "add r9, r9, r6\n\t" + "ldr r5, [%[sha256], #4]\n\t" + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" + "ldr r8, [%[sha256], #16]\n\t" + "ror r4, r5, #2\n\t" + "eor r11, r5, r6\n\t" + "eor r4, r4, r5, ror #13\n\t" + "and r10, r10, r11\n\t" + "eor r4, r4, r5, ror #22\n\t" + "eor r10, r10, r6\n\t" + "add r8, r8, r9\n\t" + "add r9, r9, r4\n\t" + "add r9, r9, r10\n\t" + "str r8, [%[sha256], #16]\n\t" + "str r9, [%[sha256]]\n\t" + /* Add in digest from start */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha256]]\n\t" + "ldr r5, [%[sha256], #4]\n\t" +#else + "ldrd r4, r5, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha256], #8]\n\t" + "ldr r7, [%[sha256], #12]\n\t" +#else + "ldrd r6, r7, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #64]\n\t" + "ldr r9, [sp, #68]\n\t" +#else + "ldrd r8, r9, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #72]\n\t" + "ldr r11, [sp, #76]\n\t" +#else + "ldrd r10, r11, [sp, #72]\n\t" +#endif + "add r4, r4, r8\n\t" + "add r5, r5, r9\n\t" + "add r6, r6, r10\n\t" + "add r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha256]]\n\t" + "str r5, [%[sha256], #4]\n\t" +#else + "strd r4, r5, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha256], #8]\n\t" + "str r7, [%[sha256], #12]\n\t" +#else + "strd r6, r7, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #64]\n\t" + "str r5, [sp, #68]\n\t" +#else + "strd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #72]\n\t" + "str r7, [sp, #76]\n\t" +#else + "strd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha256], #16]\n\t" + "ldr r5, [%[sha256], #20]\n\t" +#else + "ldrd r4, r5, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha256], #24]\n\t" + "ldr r7, [%[sha256], #28]\n\t" +#else + "ldrd r6, r7, [%[sha256], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #80]\n\t" + "ldr r9, [sp, #84]\n\t" +#else + "ldrd r8, r9, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #88]\n\t" + "ldr r11, [sp, #92]\n\t" +#else + "ldrd r10, r11, [sp, #88]\n\t" +#endif + "add r4, r4, r8\n\t" + "add r5, r5, r9\n\t" + "add r6, r6, r10\n\t" + "add r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha256], #16]\n\t" + "str r5, [%[sha256], #20]\n\t" +#else + "strd r4, r5, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha256], #24]\n\t" + "str r7, [%[sha256], #28]\n\t" +#else + "strd r6, r7, [%[sha256], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #80]\n\t" + "str r5, [sp, #84]\n\t" +#else + "strd r4, r5, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #88]\n\t" + "str r7, [sp, #92]\n\t" +#else + "strd r6, r7, [sp, #88]\n\t" +#endif + "subs %[len], %[len], #0x40\n\t" + "sub r3, r3, #0xc0\n\t" + "add %[data], %[data], #0x40\n\t" + "bne L_SHA256_transform_len_begin_%=\n\t" + "add sp, sp, #0xc0\n\t" + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#include + +#ifndef WOLFSSL_ARMASM_NO_NEON +static const uint32_t L_SHA256_transform_neon_len_k[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); +void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +{ + register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint32_t* L_SHA256_transform_neon_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_neon_len_k; + + __asm__ __volatile__ ( + "sub sp, sp, #24\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str %[sha256], [sp]\n\t" + "str %[data], [sp, #4]\n\t" +#else + "strd %[sha256], %[data], [sp]\n\t" +#endif + "str %[len], [sp, #8]\n\t" + "mov r12, %[L_SHA256_transform_neon_len_k]\n\t" + /* Load digest into registers */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr %[len], [%[sha256]]\n\t" + "ldr r3, [%[sha256], #4]\n\t" +#else + "ldrd %[len], r3, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha256], #8]\n\t" + "ldr r5, [%[sha256], #12]\n\t" +#else + "ldrd r4, r5, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" +#else + "ldrd r6, r7, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" +#else + "ldrd r8, r9, [%[sha256], #24]\n\t" +#endif + /* Start of loop processing a block */ + "\n" + "L_SHA256_transform_neon_len_begin_%=: \n\t" + /* Load W */ + "vld1.8 {d0-d3}, [%[data]]!\n\t" + "vld1.8 {d4-d7}, [%[data]]!\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + "vrev32.8 q0, q0\n\t" + "vrev32.8 q1, q1\n\t" + "vrev32.8 q2, q2\n\t" + "vrev32.8 q3, q3\n\t" +#else + "vrev32.8 d0, d0\n\t" + "vrev32.8 d1, d1\n\t" + "vrev32.8 d2, d2\n\t" + "vrev32.8 d3, d3\n\t" + "vrev32.8 d4, d4\n\t" + "vrev32.8 d5, d5\n\t" + "vrev32.8 d6, d6\n\t" + "vrev32.8 d7, d7\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + "str %[data], [sp, #4]\n\t" + "mov lr, #3\n\t" + /* Start of 16 rounds */ + "\n" + "L_SHA256_transform_neon_len_start_%=: \n\t" + /* Round 0 */ + "vmov.32 r10, d0[0]\n\t" + "ror %[sha256], r6, #6\n\t" + "eor %[data], r7, r8\n\t" + "eor %[sha256], %[sha256], r6, ror #11\n\t" + "and %[data], %[data], r6\n\t" + "eor %[sha256], %[sha256], r6, ror #25\n\t" + "eor %[data], %[data], r8\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + "ldr %[sha256], [r12]\n\t" + "add r9, r9, r10\n\t" + "add r9, r9, %[sha256]\n\t" + "add r5, r5, r9\n\t" + "ror %[sha256], %[len], #2\n\t" + "eor %[data], %[len], r3\n\t" + "eor %[sha256], %[sha256], %[len], ror #13\n\t" + "eor r10, r3, r4\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], %[len], ror #22\n\t" + "eor %[data], %[data], r3\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + /* Round 1 */ + "vmov.32 r10, d0[1]\n\t" + /* Calc new W[0]-W[1] */ + "vext.8 d10, d0, d1, #4\n\t" + "ror %[sha256], r5, #6\n\t" + "vshl.u32 d8, d7, #15\n\t" + "eor %[data], r6, r7\n\t" + "vsri.u32 d8, d7, #17\n\t" + "eor %[sha256], %[sha256], r5, ror #11\n\t" + "vshl.u32 d9, d7, #13\n\t" + "and %[data], %[data], r5\n\t" + "vsri.u32 d9, d7, #19\n\t" + "eor %[sha256], %[sha256], r5, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r7\n\t" + "vshr.u32 d8, d7, #10\n\t" + "add r8, r8, %[sha256]\n\t" + "veor d9, d8\n\t" + "add r8, r8, %[data]\n\t" + "vadd.i32 d0, d9\n\t" + "ldr %[sha256], [r12, #4]\n\t" + "vext.8 d11, d4, d5, #4\n\t" + "add r8, r8, r10\n\t" + "vadd.i32 d0, d11\n\t" + "add r8, r8, %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add r4, r4, r8\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r9, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r9, %[len]\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r9, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, %[len], r3\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r9, ror #22\n\t" + "vadd.i32 d0, d9\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r8, r8, %[sha256]\n\t" + "add r8, r8, %[data]\n\t" + /* Round 2 */ + "vmov.32 r10, d1[0]\n\t" + "ror %[sha256], r4, #6\n\t" + "eor %[data], r5, r6\n\t" + "eor %[sha256], %[sha256], r4, ror #11\n\t" + "and %[data], %[data], r4\n\t" + "eor %[sha256], %[sha256], r4, ror #25\n\t" + "eor %[data], %[data], r6\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + "ldr %[sha256], [r12, #8]\n\t" + "add r7, r7, r10\n\t" + "add r7, r7, %[sha256]\n\t" + "add r3, r3, r7\n\t" + "ror %[sha256], r8, #2\n\t" + "eor %[data], r8, r9\n\t" + "eor %[sha256], %[sha256], r8, ror #13\n\t" + "eor r10, r9, %[len]\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r8, ror #22\n\t" + "eor %[data], %[data], r9\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + /* Round 3 */ + "vmov.32 r10, d1[1]\n\t" + /* Calc new W[2]-W[3] */ + "vext.8 d10, d1, d2, #4\n\t" + "ror %[sha256], r3, #6\n\t" + "vshl.u32 d8, d0, #15\n\t" + "eor %[data], r4, r5\n\t" + "vsri.u32 d8, d0, #17\n\t" + "eor %[sha256], %[sha256], r3, ror #11\n\t" + "vshl.u32 d9, d0, #13\n\t" + "and %[data], %[data], r3\n\t" + "vsri.u32 d9, d0, #19\n\t" + "eor %[sha256], %[sha256], r3, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r5\n\t" + "vshr.u32 d8, d0, #10\n\t" + "add r6, r6, %[sha256]\n\t" + "veor d9, d8\n\t" + "add r6, r6, %[data]\n\t" + "vadd.i32 d1, d9\n\t" + "ldr %[sha256], [r12, #12]\n\t" + "vext.8 d11, d5, d6, #4\n\t" + "add r6, r6, r10\n\t" + "vadd.i32 d1, d11\n\t" + "add r6, r6, %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add %[len], %[len], r6\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r7, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r7, r8\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r7, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, r8, r9\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r7, ror #22\n\t" + "vadd.i32 d1, d9\n\t" + "eor %[data], %[data], r8\n\t" + "add r6, r6, %[sha256]\n\t" + "add r6, r6, %[data]\n\t" + /* Round 4 */ + "vmov.32 r10, d2[0]\n\t" + "ror %[sha256], %[len], #6\n\t" + "eor %[data], r3, r4\n\t" + "eor %[sha256], %[sha256], %[len], ror #11\n\t" + "and %[data], %[data], %[len]\n\t" + "eor %[sha256], %[sha256], %[len], ror #25\n\t" + "eor %[data], %[data], r4\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + "ldr %[sha256], [r12, #16]\n\t" + "add r5, r5, r10\n\t" + "add r5, r5, %[sha256]\n\t" + "add r9, r9, r5\n\t" + "ror %[sha256], r6, #2\n\t" + "eor %[data], r6, r7\n\t" + "eor %[sha256], %[sha256], r6, ror #13\n\t" + "eor r10, r7, r8\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r6, ror #22\n\t" + "eor %[data], %[data], r7\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + /* Round 5 */ + "vmov.32 r10, d2[1]\n\t" + /* Calc new W[4]-W[5] */ + "vext.8 d10, d2, d3, #4\n\t" + "ror %[sha256], r9, #6\n\t" + "vshl.u32 d8, d1, #15\n\t" + "eor %[data], %[len], r3\n\t" + "vsri.u32 d8, d1, #17\n\t" + "eor %[sha256], %[sha256], r9, ror #11\n\t" + "vshl.u32 d9, d1, #13\n\t" + "and %[data], %[data], r9\n\t" + "vsri.u32 d9, d1, #19\n\t" + "eor %[sha256], %[sha256], r9, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r3\n\t" + "vshr.u32 d8, d1, #10\n\t" + "add r4, r4, %[sha256]\n\t" + "veor d9, d8\n\t" + "add r4, r4, %[data]\n\t" + "vadd.i32 d2, d9\n\t" + "ldr %[sha256], [r12, #20]\n\t" + "vext.8 d11, d6, d7, #4\n\t" + "add r4, r4, r10\n\t" + "vadd.i32 d2, d11\n\t" + "add r4, r4, %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add r8, r8, r4\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r5, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r5, r6\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r5, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, r6, r7\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r5, ror #22\n\t" + "vadd.i32 d2, d9\n\t" + "eor %[data], %[data], r6\n\t" + "add r4, r4, %[sha256]\n\t" + "add r4, r4, %[data]\n\t" + /* Round 6 */ + "vmov.32 r10, d3[0]\n\t" + "ror %[sha256], r8, #6\n\t" + "eor %[data], r9, %[len]\n\t" + "eor %[sha256], %[sha256], r8, ror #11\n\t" + "and %[data], %[data], r8\n\t" + "eor %[sha256], %[sha256], r8, ror #25\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + "ldr %[sha256], [r12, #24]\n\t" + "add r3, r3, r10\n\t" + "add r3, r3, %[sha256]\n\t" + "add r7, r7, r3\n\t" + "ror %[sha256], r4, #2\n\t" + "eor %[data], r4, r5\n\t" + "eor %[sha256], %[sha256], r4, ror #13\n\t" + "eor r10, r5, r6\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r4, ror #22\n\t" + "eor %[data], %[data], r5\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + /* Round 7 */ + "vmov.32 r10, d3[1]\n\t" + /* Calc new W[6]-W[7] */ + "vext.8 d10, d3, d4, #4\n\t" + "ror %[sha256], r7, #6\n\t" + "vshl.u32 d8, d2, #15\n\t" + "eor %[data], r8, r9\n\t" + "vsri.u32 d8, d2, #17\n\t" + "eor %[sha256], %[sha256], r7, ror #11\n\t" + "vshl.u32 d9, d2, #13\n\t" + "and %[data], %[data], r7\n\t" + "vsri.u32 d9, d2, #19\n\t" + "eor %[sha256], %[sha256], r7, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r9\n\t" + "vshr.u32 d8, d2, #10\n\t" + "add %[len], %[len], %[sha256]\n\t" + "veor d9, d8\n\t" + "add %[len], %[len], %[data]\n\t" + "vadd.i32 d3, d9\n\t" + "ldr %[sha256], [r12, #28]\n\t" + "vext.8 d11, d7, d0, #4\n\t" + "add %[len], %[len], r10\n\t" + "vadd.i32 d3, d11\n\t" + "add %[len], %[len], %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add r6, r6, %[len]\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r3, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r3, r4\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r3, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, r4, r5\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r3, ror #22\n\t" + "vadd.i32 d3, d9\n\t" + "eor %[data], %[data], r4\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add %[len], %[len], %[data]\n\t" + /* Round 8 */ + "vmov.32 r10, d4[0]\n\t" + "ror %[sha256], r6, #6\n\t" + "eor %[data], r7, r8\n\t" + "eor %[sha256], %[sha256], r6, ror #11\n\t" + "and %[data], %[data], r6\n\t" + "eor %[sha256], %[sha256], r6, ror #25\n\t" + "eor %[data], %[data], r8\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + "ldr %[sha256], [r12, #32]\n\t" + "add r9, r9, r10\n\t" + "add r9, r9, %[sha256]\n\t" + "add r5, r5, r9\n\t" + "ror %[sha256], %[len], #2\n\t" + "eor %[data], %[len], r3\n\t" + "eor %[sha256], %[sha256], %[len], ror #13\n\t" + "eor r10, r3, r4\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], %[len], ror #22\n\t" + "eor %[data], %[data], r3\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + /* Round 9 */ + "vmov.32 r10, d4[1]\n\t" + /* Calc new W[8]-W[9] */ + "vext.8 d10, d4, d5, #4\n\t" + "ror %[sha256], r5, #6\n\t" + "vshl.u32 d8, d3, #15\n\t" + "eor %[data], r6, r7\n\t" + "vsri.u32 d8, d3, #17\n\t" + "eor %[sha256], %[sha256], r5, ror #11\n\t" + "vshl.u32 d9, d3, #13\n\t" + "and %[data], %[data], r5\n\t" + "vsri.u32 d9, d3, #19\n\t" + "eor %[sha256], %[sha256], r5, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r7\n\t" + "vshr.u32 d8, d3, #10\n\t" + "add r8, r8, %[sha256]\n\t" + "veor d9, d8\n\t" + "add r8, r8, %[data]\n\t" + "vadd.i32 d4, d9\n\t" + "ldr %[sha256], [r12, #36]\n\t" + "vext.8 d11, d0, d1, #4\n\t" + "add r8, r8, r10\n\t" + "vadd.i32 d4, d11\n\t" + "add r8, r8, %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add r4, r4, r8\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r9, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r9, %[len]\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r9, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, %[len], r3\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r9, ror #22\n\t" + "vadd.i32 d4, d9\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r8, r8, %[sha256]\n\t" + "add r8, r8, %[data]\n\t" + /* Round 10 */ + "vmov.32 r10, d5[0]\n\t" + "ror %[sha256], r4, #6\n\t" + "eor %[data], r5, r6\n\t" + "eor %[sha256], %[sha256], r4, ror #11\n\t" + "and %[data], %[data], r4\n\t" + "eor %[sha256], %[sha256], r4, ror #25\n\t" + "eor %[data], %[data], r6\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + "ldr %[sha256], [r12, #40]\n\t" + "add r7, r7, r10\n\t" + "add r7, r7, %[sha256]\n\t" + "add r3, r3, r7\n\t" + "ror %[sha256], r8, #2\n\t" + "eor %[data], r8, r9\n\t" + "eor %[sha256], %[sha256], r8, ror #13\n\t" + "eor r10, r9, %[len]\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r8, ror #22\n\t" + "eor %[data], %[data], r9\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + /* Round 11 */ + "vmov.32 r10, d5[1]\n\t" + /* Calc new W[10]-W[11] */ + "vext.8 d10, d5, d6, #4\n\t" + "ror %[sha256], r3, #6\n\t" + "vshl.u32 d8, d4, #15\n\t" + "eor %[data], r4, r5\n\t" + "vsri.u32 d8, d4, #17\n\t" + "eor %[sha256], %[sha256], r3, ror #11\n\t" + "vshl.u32 d9, d4, #13\n\t" + "and %[data], %[data], r3\n\t" + "vsri.u32 d9, d4, #19\n\t" + "eor %[sha256], %[sha256], r3, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r5\n\t" + "vshr.u32 d8, d4, #10\n\t" + "add r6, r6, %[sha256]\n\t" + "veor d9, d8\n\t" + "add r6, r6, %[data]\n\t" + "vadd.i32 d5, d9\n\t" + "ldr %[sha256], [r12, #44]\n\t" + "vext.8 d11, d1, d2, #4\n\t" + "add r6, r6, r10\n\t" + "vadd.i32 d5, d11\n\t" + "add r6, r6, %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add %[len], %[len], r6\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r7, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r7, r8\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r7, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, r8, r9\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r7, ror #22\n\t" + "vadd.i32 d5, d9\n\t" + "eor %[data], %[data], r8\n\t" + "add r6, r6, %[sha256]\n\t" + "add r6, r6, %[data]\n\t" + /* Round 12 */ + "vmov.32 r10, d6[0]\n\t" + "ror %[sha256], %[len], #6\n\t" + "eor %[data], r3, r4\n\t" + "eor %[sha256], %[sha256], %[len], ror #11\n\t" + "and %[data], %[data], %[len]\n\t" + "eor %[sha256], %[sha256], %[len], ror #25\n\t" + "eor %[data], %[data], r4\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + "ldr %[sha256], [r12, #48]\n\t" + "add r5, r5, r10\n\t" + "add r5, r5, %[sha256]\n\t" + "add r9, r9, r5\n\t" + "ror %[sha256], r6, #2\n\t" + "eor %[data], r6, r7\n\t" + "eor %[sha256], %[sha256], r6, ror #13\n\t" + "eor r10, r7, r8\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r6, ror #22\n\t" + "eor %[data], %[data], r7\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + /* Round 13 */ + "vmov.32 r10, d6[1]\n\t" + /* Calc new W[12]-W[13] */ + "vext.8 d10, d6, d7, #4\n\t" + "ror %[sha256], r9, #6\n\t" + "vshl.u32 d8, d5, #15\n\t" + "eor %[data], %[len], r3\n\t" + "vsri.u32 d8, d5, #17\n\t" + "eor %[sha256], %[sha256], r9, ror #11\n\t" + "vshl.u32 d9, d5, #13\n\t" + "and %[data], %[data], r9\n\t" + "vsri.u32 d9, d5, #19\n\t" + "eor %[sha256], %[sha256], r9, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r3\n\t" + "vshr.u32 d8, d5, #10\n\t" + "add r4, r4, %[sha256]\n\t" + "veor d9, d8\n\t" + "add r4, r4, %[data]\n\t" + "vadd.i32 d6, d9\n\t" + "ldr %[sha256], [r12, #52]\n\t" + "vext.8 d11, d2, d3, #4\n\t" + "add r4, r4, r10\n\t" + "vadd.i32 d6, d11\n\t" + "add r4, r4, %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add r8, r8, r4\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r5, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r5, r6\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r5, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, r6, r7\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r5, ror #22\n\t" + "vadd.i32 d6, d9\n\t" + "eor %[data], %[data], r6\n\t" + "add r4, r4, %[sha256]\n\t" + "add r4, r4, %[data]\n\t" + /* Round 14 */ + "vmov.32 r10, d7[0]\n\t" + "ror %[sha256], r8, #6\n\t" + "eor %[data], r9, %[len]\n\t" + "eor %[sha256], %[sha256], r8, ror #11\n\t" + "and %[data], %[data], r8\n\t" + "eor %[sha256], %[sha256], r8, ror #25\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + "ldr %[sha256], [r12, #56]\n\t" + "add r3, r3, r10\n\t" + "add r3, r3, %[sha256]\n\t" + "add r7, r7, r3\n\t" + "ror %[sha256], r4, #2\n\t" + "eor %[data], r4, r5\n\t" + "eor %[sha256], %[sha256], r4, ror #13\n\t" + "eor r10, r5, r6\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r4, ror #22\n\t" + "eor %[data], %[data], r5\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + /* Round 15 */ + "vmov.32 r10, d7[1]\n\t" + /* Calc new W[14]-W[15] */ + "vext.8 d10, d7, d0, #4\n\t" + "ror %[sha256], r7, #6\n\t" + "vshl.u32 d8, d6, #15\n\t" + "eor %[data], r8, r9\n\t" + "vsri.u32 d8, d6, #17\n\t" + "eor %[sha256], %[sha256], r7, ror #11\n\t" + "vshl.u32 d9, d6, #13\n\t" + "and %[data], %[data], r7\n\t" + "vsri.u32 d9, d6, #19\n\t" + "eor %[sha256], %[sha256], r7, ror #25\n\t" + "veor d9, d8\n\t" + "eor %[data], %[data], r9\n\t" + "vshr.u32 d8, d6, #10\n\t" + "add %[len], %[len], %[sha256]\n\t" + "veor d9, d8\n\t" + "add %[len], %[len], %[data]\n\t" + "vadd.i32 d7, d9\n\t" + "ldr %[sha256], [r12, #60]\n\t" + "vext.8 d11, d3, d4, #4\n\t" + "add %[len], %[len], r10\n\t" + "vadd.i32 d7, d11\n\t" + "add %[len], %[len], %[sha256]\n\t" + "vshl.u32 d8, d10, #25\n\t" + "add r6, r6, %[len]\n\t" + "vsri.u32 d8, d10, #7\n\t" + "ror %[sha256], r3, #2\n\t" + "vshl.u32 d9, d10, #14\n\t" + "eor %[data], r3, r4\n\t" + "vsri.u32 d9, d10, #18\n\t" + "eor %[sha256], %[sha256], r3, ror #13\n\t" + "veor d9, d8\n\t" + "eor r10, r4, r5\n\t" + "vshr.u32 d10, #3\n\t" + "and %[data], %[data], r10\n\t" + "veor d9, d10\n\t" + "eor %[sha256], %[sha256], r3, ror #22\n\t" + "vadd.i32 d7, d9\n\t" + "eor %[data], %[data], r4\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add %[len], %[len], %[data]\n\t" + "add r12, r12, #0x40\n\t" + "subs lr, lr, #1\n\t" + "bne L_SHA256_transform_neon_len_start_%=\n\t" + /* Round 0 */ + "vmov.32 r10, d0[0]\n\t" + "ror %[sha256], r6, #6\n\t" + "eor %[data], r7, r8\n\t" + "eor %[sha256], %[sha256], r6, ror #11\n\t" + "and %[data], %[data], r6\n\t" + "eor %[sha256], %[sha256], r6, ror #25\n\t" + "eor %[data], %[data], r8\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + "ldr %[sha256], [r12]\n\t" + "add r9, r9, r10\n\t" + "add r9, r9, %[sha256]\n\t" + "add r5, r5, r9\n\t" + "ror %[sha256], %[len], #2\n\t" + "eor %[data], %[len], r3\n\t" + "eor %[sha256], %[sha256], %[len], ror #13\n\t" + "eor r10, r3, r4\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], %[len], ror #22\n\t" + "eor %[data], %[data], r3\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + /* Round 1 */ + "vmov.32 r10, d0[1]\n\t" + "ror %[sha256], r5, #6\n\t" + "eor %[data], r6, r7\n\t" + "eor %[sha256], %[sha256], r5, ror #11\n\t" + "and %[data], %[data], r5\n\t" + "eor %[sha256], %[sha256], r5, ror #25\n\t" + "eor %[data], %[data], r7\n\t" + "add r8, r8, %[sha256]\n\t" + "add r8, r8, %[data]\n\t" + "ldr %[sha256], [r12, #4]\n\t" + "add r8, r8, r10\n\t" + "add r8, r8, %[sha256]\n\t" + "add r4, r4, r8\n\t" + "ror %[sha256], r9, #2\n\t" + "eor %[data], r9, %[len]\n\t" + "eor %[sha256], %[sha256], r9, ror #13\n\t" + "eor r10, %[len], r3\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r9, ror #22\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r8, r8, %[sha256]\n\t" + "add r8, r8, %[data]\n\t" + /* Round 2 */ + "vmov.32 r10, d1[0]\n\t" + "ror %[sha256], r4, #6\n\t" + "eor %[data], r5, r6\n\t" + "eor %[sha256], %[sha256], r4, ror #11\n\t" + "and %[data], %[data], r4\n\t" + "eor %[sha256], %[sha256], r4, ror #25\n\t" + "eor %[data], %[data], r6\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + "ldr %[sha256], [r12, #8]\n\t" + "add r7, r7, r10\n\t" + "add r7, r7, %[sha256]\n\t" + "add r3, r3, r7\n\t" + "ror %[sha256], r8, #2\n\t" + "eor %[data], r8, r9\n\t" + "eor %[sha256], %[sha256], r8, ror #13\n\t" + "eor r10, r9, %[len]\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r8, ror #22\n\t" + "eor %[data], %[data], r9\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + /* Round 3 */ + "vmov.32 r10, d1[1]\n\t" + "ror %[sha256], r3, #6\n\t" + "eor %[data], r4, r5\n\t" + "eor %[sha256], %[sha256], r3, ror #11\n\t" + "and %[data], %[data], r3\n\t" + "eor %[sha256], %[sha256], r3, ror #25\n\t" + "eor %[data], %[data], r5\n\t" + "add r6, r6, %[sha256]\n\t" + "add r6, r6, %[data]\n\t" + "ldr %[sha256], [r12, #12]\n\t" + "add r6, r6, r10\n\t" + "add r6, r6, %[sha256]\n\t" + "add %[len], %[len], r6\n\t" + "ror %[sha256], r7, #2\n\t" + "eor %[data], r7, r8\n\t" + "eor %[sha256], %[sha256], r7, ror #13\n\t" + "eor r10, r8, r9\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r7, ror #22\n\t" + "eor %[data], %[data], r8\n\t" + "add r6, r6, %[sha256]\n\t" + "add r6, r6, %[data]\n\t" + /* Round 4 */ + "vmov.32 r10, d2[0]\n\t" + "ror %[sha256], %[len], #6\n\t" + "eor %[data], r3, r4\n\t" + "eor %[sha256], %[sha256], %[len], ror #11\n\t" + "and %[data], %[data], %[len]\n\t" + "eor %[sha256], %[sha256], %[len], ror #25\n\t" + "eor %[data], %[data], r4\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + "ldr %[sha256], [r12, #16]\n\t" + "add r5, r5, r10\n\t" + "add r5, r5, %[sha256]\n\t" + "add r9, r9, r5\n\t" + "ror %[sha256], r6, #2\n\t" + "eor %[data], r6, r7\n\t" + "eor %[sha256], %[sha256], r6, ror #13\n\t" + "eor r10, r7, r8\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r6, ror #22\n\t" + "eor %[data], %[data], r7\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + /* Round 5 */ + "vmov.32 r10, d2[1]\n\t" + "ror %[sha256], r9, #6\n\t" + "eor %[data], %[len], r3\n\t" + "eor %[sha256], %[sha256], r9, ror #11\n\t" + "and %[data], %[data], r9\n\t" + "eor %[sha256], %[sha256], r9, ror #25\n\t" + "eor %[data], %[data], r3\n\t" + "add r4, r4, %[sha256]\n\t" + "add r4, r4, %[data]\n\t" + "ldr %[sha256], [r12, #20]\n\t" + "add r4, r4, r10\n\t" + "add r4, r4, %[sha256]\n\t" + "add r8, r8, r4\n\t" + "ror %[sha256], r5, #2\n\t" + "eor %[data], r5, r6\n\t" + "eor %[sha256], %[sha256], r5, ror #13\n\t" + "eor r10, r6, r7\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r5, ror #22\n\t" + "eor %[data], %[data], r6\n\t" + "add r4, r4, %[sha256]\n\t" + "add r4, r4, %[data]\n\t" + /* Round 6 */ + "vmov.32 r10, d3[0]\n\t" + "ror %[sha256], r8, #6\n\t" + "eor %[data], r9, %[len]\n\t" + "eor %[sha256], %[sha256], r8, ror #11\n\t" + "and %[data], %[data], r8\n\t" + "eor %[sha256], %[sha256], r8, ror #25\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + "ldr %[sha256], [r12, #24]\n\t" + "add r3, r3, r10\n\t" + "add r3, r3, %[sha256]\n\t" + "add r7, r7, r3\n\t" + "ror %[sha256], r4, #2\n\t" + "eor %[data], r4, r5\n\t" + "eor %[sha256], %[sha256], r4, ror #13\n\t" + "eor r10, r5, r6\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r4, ror #22\n\t" + "eor %[data], %[data], r5\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + /* Round 7 */ + "vmov.32 r10, d3[1]\n\t" + "ror %[sha256], r7, #6\n\t" + "eor %[data], r8, r9\n\t" + "eor %[sha256], %[sha256], r7, ror #11\n\t" + "and %[data], %[data], r7\n\t" + "eor %[sha256], %[sha256], r7, ror #25\n\t" + "eor %[data], %[data], r9\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add %[len], %[len], %[data]\n\t" + "ldr %[sha256], [r12, #28]\n\t" + "add %[len], %[len], r10\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add r6, r6, %[len]\n\t" + "ror %[sha256], r3, #2\n\t" + "eor %[data], r3, r4\n\t" + "eor %[sha256], %[sha256], r3, ror #13\n\t" + "eor r10, r4, r5\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r3, ror #22\n\t" + "eor %[data], %[data], r4\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add %[len], %[len], %[data]\n\t" + /* Round 8 */ + "vmov.32 r10, d4[0]\n\t" + "ror %[sha256], r6, #6\n\t" + "eor %[data], r7, r8\n\t" + "eor %[sha256], %[sha256], r6, ror #11\n\t" + "and %[data], %[data], r6\n\t" + "eor %[sha256], %[sha256], r6, ror #25\n\t" + "eor %[data], %[data], r8\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + "ldr %[sha256], [r12, #32]\n\t" + "add r9, r9, r10\n\t" + "add r9, r9, %[sha256]\n\t" + "add r5, r5, r9\n\t" + "ror %[sha256], %[len], #2\n\t" + "eor %[data], %[len], r3\n\t" + "eor %[sha256], %[sha256], %[len], ror #13\n\t" + "eor r10, r3, r4\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], %[len], ror #22\n\t" + "eor %[data], %[data], r3\n\t" + "add r9, r9, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" + /* Round 9 */ + "vmov.32 r10, d4[1]\n\t" + "ror %[sha256], r5, #6\n\t" + "eor %[data], r6, r7\n\t" + "eor %[sha256], %[sha256], r5, ror #11\n\t" + "and %[data], %[data], r5\n\t" + "eor %[sha256], %[sha256], r5, ror #25\n\t" + "eor %[data], %[data], r7\n\t" + "add r8, r8, %[sha256]\n\t" + "add r8, r8, %[data]\n\t" + "ldr %[sha256], [r12, #36]\n\t" + "add r8, r8, r10\n\t" + "add r8, r8, %[sha256]\n\t" + "add r4, r4, r8\n\t" + "ror %[sha256], r9, #2\n\t" + "eor %[data], r9, %[len]\n\t" + "eor %[sha256], %[sha256], r9, ror #13\n\t" + "eor r10, %[len], r3\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r9, ror #22\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r8, r8, %[sha256]\n\t" + "add r8, r8, %[data]\n\t" + /* Round 10 */ + "vmov.32 r10, d5[0]\n\t" + "ror %[sha256], r4, #6\n\t" + "eor %[data], r5, r6\n\t" + "eor %[sha256], %[sha256], r4, ror #11\n\t" + "and %[data], %[data], r4\n\t" + "eor %[sha256], %[sha256], r4, ror #25\n\t" + "eor %[data], %[data], r6\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + "ldr %[sha256], [r12, #40]\n\t" + "add r7, r7, r10\n\t" + "add r7, r7, %[sha256]\n\t" + "add r3, r3, r7\n\t" + "ror %[sha256], r8, #2\n\t" + "eor %[data], r8, r9\n\t" + "eor %[sha256], %[sha256], r8, ror #13\n\t" + "eor r10, r9, %[len]\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r8, ror #22\n\t" + "eor %[data], %[data], r9\n\t" + "add r7, r7, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" + /* Round 11 */ + "vmov.32 r10, d5[1]\n\t" + "ror %[sha256], r3, #6\n\t" + "eor %[data], r4, r5\n\t" + "eor %[sha256], %[sha256], r3, ror #11\n\t" + "and %[data], %[data], r3\n\t" + "eor %[sha256], %[sha256], r3, ror #25\n\t" + "eor %[data], %[data], r5\n\t" + "add r6, r6, %[sha256]\n\t" + "add r6, r6, %[data]\n\t" + "ldr %[sha256], [r12, #44]\n\t" + "add r6, r6, r10\n\t" + "add r6, r6, %[sha256]\n\t" + "add %[len], %[len], r6\n\t" + "ror %[sha256], r7, #2\n\t" + "eor %[data], r7, r8\n\t" + "eor %[sha256], %[sha256], r7, ror #13\n\t" + "eor r10, r8, r9\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r7, ror #22\n\t" + "eor %[data], %[data], r8\n\t" + "add r6, r6, %[sha256]\n\t" + "add r6, r6, %[data]\n\t" + /* Round 12 */ + "vmov.32 r10, d6[0]\n\t" + "ror %[sha256], %[len], #6\n\t" + "eor %[data], r3, r4\n\t" + "eor %[sha256], %[sha256], %[len], ror #11\n\t" + "and %[data], %[data], %[len]\n\t" + "eor %[sha256], %[sha256], %[len], ror #25\n\t" + "eor %[data], %[data], r4\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + "ldr %[sha256], [r12, #48]\n\t" + "add r5, r5, r10\n\t" + "add r5, r5, %[sha256]\n\t" + "add r9, r9, r5\n\t" + "ror %[sha256], r6, #2\n\t" + "eor %[data], r6, r7\n\t" + "eor %[sha256], %[sha256], r6, ror #13\n\t" + "eor r10, r7, r8\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r6, ror #22\n\t" + "eor %[data], %[data], r7\n\t" + "add r5, r5, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" + /* Round 13 */ + "vmov.32 r10, d6[1]\n\t" + "ror %[sha256], r9, #6\n\t" + "eor %[data], %[len], r3\n\t" + "eor %[sha256], %[sha256], r9, ror #11\n\t" + "and %[data], %[data], r9\n\t" + "eor %[sha256], %[sha256], r9, ror #25\n\t" + "eor %[data], %[data], r3\n\t" + "add r4, r4, %[sha256]\n\t" + "add r4, r4, %[data]\n\t" + "ldr %[sha256], [r12, #52]\n\t" + "add r4, r4, r10\n\t" + "add r4, r4, %[sha256]\n\t" + "add r8, r8, r4\n\t" + "ror %[sha256], r5, #2\n\t" + "eor %[data], r5, r6\n\t" + "eor %[sha256], %[sha256], r5, ror #13\n\t" + "eor r10, r6, r7\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r5, ror #22\n\t" + "eor %[data], %[data], r6\n\t" + "add r4, r4, %[sha256]\n\t" + "add r4, r4, %[data]\n\t" + /* Round 14 */ + "vmov.32 r10, d7[0]\n\t" + "ror %[sha256], r8, #6\n\t" + "eor %[data], r9, %[len]\n\t" + "eor %[sha256], %[sha256], r8, ror #11\n\t" + "and %[data], %[data], r8\n\t" + "eor %[sha256], %[sha256], r8, ror #25\n\t" + "eor %[data], %[data], %[len]\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + "ldr %[sha256], [r12, #56]\n\t" + "add r3, r3, r10\n\t" + "add r3, r3, %[sha256]\n\t" + "add r7, r7, r3\n\t" + "ror %[sha256], r4, #2\n\t" + "eor %[data], r4, r5\n\t" + "eor %[sha256], %[sha256], r4, ror #13\n\t" + "eor r10, r5, r6\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r4, ror #22\n\t" + "eor %[data], %[data], r5\n\t" + "add r3, r3, %[sha256]\n\t" + "add r3, r3, %[data]\n\t" + /* Round 15 */ + "vmov.32 r10, d7[1]\n\t" + "ror %[sha256], r7, #6\n\t" + "eor %[data], r8, r9\n\t" + "eor %[sha256], %[sha256], r7, ror #11\n\t" + "and %[data], %[data], r7\n\t" + "eor %[sha256], %[sha256], r7, ror #25\n\t" + "eor %[data], %[data], r9\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add %[len], %[len], %[data]\n\t" + "ldr %[sha256], [r12, #60]\n\t" + "add %[len], %[len], r10\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add r6, r6, %[len]\n\t" + "ror %[sha256], r3, #2\n\t" + "eor %[data], r3, r4\n\t" + "eor %[sha256], %[sha256], r3, ror #13\n\t" + "eor r10, r4, r5\n\t" + "and %[data], %[data], r10\n\t" + "eor %[sha256], %[sha256], r3, ror #22\n\t" + "eor %[data], %[data], r4\n\t" + "add %[len], %[len], %[sha256]\n\t" + "add %[len], %[len], %[data]\n\t" + "ldr r10, [sp]\n\t" + /* Add in digest from start */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr %[sha256], [r10]\n\t" + "ldr %[data], [r10, #4]\n\t" +#else + "ldrd %[sha256], %[data], [r10]\n\t" +#endif + "add %[len], %[len], %[sha256]\n\t" + "add r3, r3, %[data]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str %[len], [r10]\n\t" + "str r3, [r10, #4]\n\t" +#else + "strd %[len], r3, [r10]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr %[sha256], [r10, #8]\n\t" + "ldr %[data], [r10, #12]\n\t" +#else + "ldrd %[sha256], %[data], [r10, #8]\n\t" +#endif + "add r4, r4, %[sha256]\n\t" + "add r5, r5, %[data]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [r10, #8]\n\t" + "str r5, [r10, #12]\n\t" +#else + "strd r4, r5, [r10, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr %[sha256], [r10, #16]\n\t" + "ldr %[data], [r10, #20]\n\t" +#else + "ldrd %[sha256], %[data], [r10, #16]\n\t" +#endif + "add r6, r6, %[sha256]\n\t" + "add r7, r7, %[data]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [r10, #16]\n\t" + "str r7, [r10, #20]\n\t" +#else + "strd r6, r7, [r10, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr %[sha256], [r10, #24]\n\t" + "ldr %[data], [r10, #28]\n\t" +#else + "ldrd %[sha256], %[data], [r10, #24]\n\t" +#endif + "add r8, r8, %[sha256]\n\t" + "add r9, r9, %[data]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [r10, #24]\n\t" + "str r9, [r10, #28]\n\t" +#else + "strd r8, r9, [r10, #24]\n\t" +#endif + "ldr r10, [sp, #8]\n\t" + "ldr %[data], [sp, #4]\n\t" + "subs r10, r10, #0x40\n\t" + "sub r12, r12, #0xc0\n\t" + "str r10, [sp, #8]\n\t" + "bne L_SHA256_transform_neon_len_begin_%=\n\t" + "add sp, sp, #24\n\t" + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#endif /* !NO_SHA256 */ +#endif /* !__aarch64__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* WOLFSSL_ARMASM */ + +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index c42cf63b1..b64a07241 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -1,35 +1,33 @@ /* armv8-32-sha512-asm * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ /* Generated using (from wolfssl): * cd ../scripts * ruby ./sha2/sha512.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S */ -#if defined(WOLFSSL_ARMASM) && defined(WOLFSSL_SHA512) -#ifndef __aarch64__ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) +#ifndef WOLFSSL_ARMASM_INLINE +#ifdef WOLFSSL_SHA512 #ifdef WOLFSSL_ARMASM_NO_NEON .text .type L_SHA512_transform_len_k, %object .size L_SHA512_transform_len_k, 640 - .align 3 + .align 4 L_SHA512_transform_len_k: .word 0xd728ae22 .word 0x428a2f98 @@ -192,3856 +190,7512 @@ L_SHA512_transform_len_k: .word 0x4a475817 .word 0x6c44198c .text - .align 2 + .align 4 .globl Transform_Sha512_Len .type Transform_Sha512_Len, %function Transform_Sha512_Len: - push {r4, r5, r6, r7, r8, r9, r10, lr} + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #0xc0 adr r3, L_SHA512_transform_len_k # Copy digest to add in at end - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - ldrd r8, r9, [r0, #24] - str r12, [sp, #128] - str lr, [sp, #132] - strd r4, r5, [sp, #136] - strd r6, r7, [sp, #144] - strd r8, r9, [sp, #152] - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] - ldrd r8, r9, [r0, #56] - str r12, [sp, #160] - str lr, [sp, #164] - strd r4, r5, [sp, #168] - strd r6, r7, [sp, #176] - strd r8, r9, [sp, #184] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #24] + ldr r11, [r0, #28] +#else + ldrd r10, r11, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #128] + str r5, [sp, #132] +#else + strd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #136] + str r7, [sp, #140] +#else + strd r6, r7, [sp, #136] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #144] + str r9, [sp, #148] +#else + strd r8, r9, [sp, #144] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #152] + str r11, [sp, #156] +#else + strd r10, r11, [sp, #152] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #56] + ldr r11, [r0, #60] +#else + ldrd r10, r11, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #160] + str r5, [sp, #164] +#else + strd r4, r5, [sp, #160] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #168] + str r7, [sp, #172] +#else + strd r6, r7, [sp, #168] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #176] + str r9, [sp, #180] +#else + strd r8, r9, [sp, #176] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #184] + str r11, [sp, #188] +#else + strd r10, r11, [sp, #184] +#endif # Start of loop processing a block -L_sha512_len_neon_begin: - # Load, Reverse and Store W - ldr r12, [r1] - ldr lr, [r1, #4] - ldrd r4, r5, [r1, #8] - ldrd r6, r7, [r1, #16] - ldrd r8, r9, [r1, #24] - rev r12, r12 - rev lr, lr +L_SHA512_transform_len_begin: + # Load, Reverse and Store W - 64 bytes +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + ldr r4, [r1] + ldr r5, [r1, #4] + ldr r6, [r1, #8] + ldr r7, [r1, #12] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp] + str r4, [sp, #4] + str r7, [sp, #8] + str r6, [sp, #12] + ldr r4, [r1, #16] + ldr r5, [r1, #20] + ldr r6, [r1, #24] + ldr r7, [r1, #28] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #16] + str r4, [sp, #20] + str r7, [sp, #24] + str r6, [sp, #28] + ldr r4, [r1, #32] + ldr r5, [r1, #36] + ldr r6, [r1, #40] + ldr r7, [r1, #44] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #32] + str r4, [sp, #36] + str r7, [sp, #40] + str r6, [sp, #44] + ldr r4, [r1, #48] + ldr r5, [r1, #52] + ldr r6, [r1, #56] + ldr r7, [r1, #60] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #48] + str r4, [sp, #52] + str r7, [sp, #56] + str r6, [sp, #60] + ldr r4, [r1, #64] + ldr r5, [r1, #68] + ldr r6, [r1, #72] + ldr r7, [r1, #76] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #64] + str r4, [sp, #68] + str r7, [sp, #72] + str r6, [sp, #76] + ldr r4, [r1, #80] + ldr r5, [r1, #84] + ldr r6, [r1, #88] + ldr r7, [r1, #92] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #80] + str r4, [sp, #84] + str r7, [sp, #88] + str r6, [sp, #92] + ldr r4, [r1, #96] + ldr r5, [r1, #100] + ldr r6, [r1, #104] + ldr r7, [r1, #108] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #96] + str r4, [sp, #100] + str r7, [sp, #104] + str r6, [sp, #108] + ldr r4, [r1, #112] + ldr r5, [r1, #116] + ldr r6, [r1, #120] + ldr r7, [r1, #124] + eor r8, r4, r4, ror #16 + eor r9, r5, r5, ror #16 + eor r10, r6, r6, ror #16 + eor r11, r7, r7, ror #16 + bic r8, r8, #0xff0000 + bic r9, r9, #0xff0000 + bic r10, r10, #0xff0000 + bic r11, r11, #0xff0000 + ror r4, r4, #8 + ror r5, r5, #8 + ror r6, r6, #8 + ror r7, r7, #8 + eor r4, r4, r8, lsr #8 + eor r5, r5, r9, lsr #8 + eor r6, r6, r10, lsr #8 + eor r7, r7, r11, lsr #8 + str r5, [sp, #112] + str r4, [sp, #116] + str r7, [sp, #120] + str r6, [sp, #124] +#else + ldr r4, [r1] + ldr r5, [r1, #4] + ldr r6, [r1, #8] + ldr r7, [r1, #12] + ldr r8, [r1, #16] + ldr r9, [r1, #20] + ldr r10, [r1, #24] + ldr r11, [r1, #28] rev r4, r4 rev r5, r5 rev r6, r6 rev r7, r7 rev r8, r8 rev r9, r9 - str lr, [sp] - str r12, [sp, #4] - str r5, [sp, #8] - str r4, [sp, #12] - str r7, [sp, #16] - str r6, [sp, #20] - str r9, [sp, #24] - str r8, [sp, #28] - ldr r12, [r1, #32] - ldr lr, [r1, #36] - ldrd r4, r5, [r1, #40] - ldrd r6, r7, [r1, #48] - ldrd r8, r9, [r1, #56] - rev r12, r12 - rev lr, lr + rev r10, r10 + rev r11, r11 + str r5, [sp] + str r4, [sp, #4] + str r7, [sp, #8] + str r6, [sp, #12] + str r9, [sp, #16] + str r8, [sp, #20] + str r11, [sp, #24] + str r10, [sp, #28] + ldr r4, [r1, #32] + ldr r5, [r1, #36] + ldr r6, [r1, #40] + ldr r7, [r1, #44] + ldr r8, [r1, #48] + ldr r9, [r1, #52] + ldr r10, [r1, #56] + ldr r11, [r1, #60] rev r4, r4 rev r5, r5 rev r6, r6 rev r7, r7 rev r8, r8 rev r9, r9 - str lr, [sp, #32] - str r12, [sp, #36] - str r5, [sp, #40] - str r4, [sp, #44] - str r7, [sp, #48] - str r6, [sp, #52] - str r9, [sp, #56] - str r8, [sp, #60] - ldr r12, [r1, #64] - ldr lr, [r1, #68] - ldrd r4, r5, [r1, #72] - ldrd r6, r7, [r1, #80] - ldrd r8, r9, [r1, #88] - rev r12, r12 - rev lr, lr + rev r10, r10 + rev r11, r11 + str r5, [sp, #32] + str r4, [sp, #36] + str r7, [sp, #40] + str r6, [sp, #44] + str r9, [sp, #48] + str r8, [sp, #52] + str r11, [sp, #56] + str r10, [sp, #60] + ldr r4, [r1, #64] + ldr r5, [r1, #68] + ldr r6, [r1, #72] + ldr r7, [r1, #76] + ldr r8, [r1, #80] + ldr r9, [r1, #84] + ldr r10, [r1, #88] + ldr r11, [r1, #92] rev r4, r4 rev r5, r5 rev r6, r6 rev r7, r7 rev r8, r8 rev r9, r9 - str lr, [sp, #64] - str r12, [sp, #68] - str r5, [sp, #72] - str r4, [sp, #76] - str r7, [sp, #80] - str r6, [sp, #84] - str r9, [sp, #88] - str r8, [sp, #92] - ldr r12, [r1, #96] - ldr lr, [r1, #100] - ldrd r4, r5, [r1, #104] - ldrd r6, r7, [r1, #112] - ldrd r8, r9, [r1, #120] - rev r12, r12 - rev lr, lr + rev r10, r10 + rev r11, r11 + str r5, [sp, #64] + str r4, [sp, #68] + str r7, [sp, #72] + str r6, [sp, #76] + str r9, [sp, #80] + str r8, [sp, #84] + str r11, [sp, #88] + str r10, [sp, #92] + ldr r4, [r1, #96] + ldr r5, [r1, #100] + ldr r6, [r1, #104] + ldr r7, [r1, #108] + ldr r8, [r1, #112] + ldr r9, [r1, #116] + ldr r10, [r1, #120] + ldr r11, [r1, #124] rev r4, r4 rev r5, r5 rev r6, r6 rev r7, r7 rev r8, r8 rev r9, r9 - str lr, [sp, #96] - str r12, [sp, #100] - str r5, [sp, #104] - str r4, [sp, #108] - str r7, [sp, #112] - str r6, [sp, #116] - str r9, [sp, #120] - str r8, [sp, #124] + rev r10, r10 + rev r11, r11 + str r5, [sp, #96] + str r4, [sp, #100] + str r7, [sp, #104] + str r6, [sp, #108] + str r9, [sp, #112] + str r8, [sp, #116] + str r11, [sp, #120] + str r10, [sp, #124] +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ # Pre-calc: b ^ c - ldrd r8, r9, [r0, #8] - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r8, r8, r12 - eor r9, r9, lr - mov r10, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #8] + ldr r11, [r0, #12] +#else + ldrd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r10, r10, r4 + eor r11, r11, r5 + mov r12, #4 # Start of 16 rounds -L_sha512_len_neon_start: +L_SHA512_transform_len_start: # Round 0 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r6, r7, [sp] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #24] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0] - ldr lr, [r0, #4] - strd r6, r7, [r0, #24] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] - str r12, [r0, #56] - str lr, [r0, #60] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp] + ldr r9, [sp, #4] +#else + ldrd r8, r9, [sp] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3] + ldr r7, [r3, #4] +#else + ldrd r6, r7, [r3] +#endif adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else strd r4, r5, [r0, #56] - mov r8, r6 - mov r9, r7 +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[0] - ldr r12, [sp, #112] - ldr lr, [sp, #116] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp] - ldr lr, [sp, #4] - ldrd r6, r7, [sp, #72] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp] - str lr, [sp, #4] - ldr r12, [sp, #8] - ldr lr, [sp, #12] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp] - ldr lr, [sp, #4] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp] - str lr, [sp, #4] - # Round 1 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r6, r7, [sp, #8] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #8] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #16] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #56] - ldr lr, [r0, #60] - strd r6, r7, [r0, #16] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] - str r12, [r0, #48] - str lr, [r0, #52] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #112] + ldr r5, [sp, #116] +#else + ldrd r4, r5, [sp, #112] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #72] + ldr r9, [sp, #76] +#else + ldrd r8, r9, [sp, #72] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif + # Round 1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else strd r4, r5, [r0, #48] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #8] + ldr r9, [sp, #12] +#else + ldrd r8, r9, [sp, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #8] + ldr r7, [r3, #12] +#else + ldrd r6, r7, [r3, #8] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[1] - ldr r12, [sp, #120] - ldr lr, [sp, #124] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #8] - ldr lr, [sp, #12] - ldrd r6, r7, [sp, #80] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #8] - str lr, [sp, #12] - ldr r12, [sp, #16] - ldr lr, [sp, #20] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #8] - ldr lr, [sp, #12] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #8] - str lr, [sp, #12] - # Round 2 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r6, r7, [sp, #16] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #16] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #8] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #48] - ldr lr, [r0, #52] - strd r6, r7, [r0, #8] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] - str r12, [r0, #40] - str lr, [r0, #44] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #120] + ldr r5, [sp, #124] +#else + ldrd r4, r5, [sp, #120] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #80] + ldr r9, [sp, #84] +#else + ldrd r8, r9, [sp, #80] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #12] +#else + strd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #12] +#else + strd r4, r5, [sp, #8] +#endif + # Round 2 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else strd r4, r5, [r0, #40] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #16] + ldr r9, [sp, #20] +#else + ldrd r8, r9, [sp, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #16] + ldr r7, [r3, #20] +#else + ldrd r6, r7, [r3, #16] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[2] - ldr r12, [sp] - ldr lr, [sp, #4] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #16] - ldr lr, [sp, #20] - ldrd r6, r7, [sp, #88] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #16] - str lr, [sp, #20] - ldr r12, [sp, #24] - ldr lr, [sp, #28] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #16] - ldr lr, [sp, #20] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #16] - str lr, [sp, #20] - # Round 3 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r6, r7, [sp, #24] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #24] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #40] - ldr lr, [r0, #44] - strd r6, r7, [r0] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] - str r12, [r0, #32] - str lr, [r0, #36] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #88] + ldr r9, [sp, #92] +#else + ldrd r8, r9, [sp, #88] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #20] +#else + strd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #28] +#else + ldrd r4, r5, [sp, #24] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #20] +#else + strd r4, r5, [sp, #16] +#endif + # Round 3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else strd r4, r5, [r0, #32] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #24] + ldr r9, [sp, #28] +#else + ldrd r8, r9, [sp, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #24] + ldr r7, [r3, #28] +#else + ldrd r6, r7, [r3, #24] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[3] - ldr r12, [sp, #8] - ldr lr, [sp, #12] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #24] - ldr lr, [sp, #28] - ldrd r6, r7, [sp, #96] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #24] - str lr, [sp, #28] - ldr r12, [sp, #32] - ldr lr, [sp, #36] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #24] - ldr lr, [sp, #28] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #24] - str lr, [sp, #28] - # Round 4 - ldr r12, [r0] - ldr lr, [r0, #4] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r6, r7, [sp, #32] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #32] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #56] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #32] - ldr lr, [r0, #36] - strd r6, r7, [r0, #56] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] - str r12, [r0, #24] - str lr, [r0, #28] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #28] +#else + ldrd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #96] + ldr r9, [sp, #100] +#else + ldrd r8, r9, [sp, #96] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #24] + str r5, [sp, #28] +#else + strd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #36] +#else + ldrd r4, r5, [sp, #32] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #28] +#else + ldrd r4, r5, [sp, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #24] + str r5, [sp, #28] +#else + strd r4, r5, [sp, #24] +#endif + # Round 4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else strd r4, r5, [r0, #24] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #32] + ldr r9, [sp, #36] +#else + ldrd r8, r9, [sp, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #32] + ldr r7, [r3, #36] +#else + ldrd r6, r7, [r3, #32] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #56] + str r9, [r0, #60] +#else + strd r8, r9, [r0, #56] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[4] - ldr r12, [sp, #16] - ldr lr, [sp, #20] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #32] - ldr lr, [sp, #36] - ldrd r6, r7, [sp, #104] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #32] - str lr, [sp, #36] - ldr r12, [sp, #40] - ldr lr, [sp, #44] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #32] - ldr lr, [sp, #36] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #32] - str lr, [sp, #36] - # Round 5 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r6, r7, [sp, #40] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #40] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #48] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #24] - ldr lr, [r0, #28] - strd r6, r7, [r0, #48] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] - str r12, [r0, #16] - str lr, [r0, #20] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #36] +#else + ldrd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #104] + ldr r9, [sp, #108] +#else + ldrd r8, r9, [sp, #104] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #32] + str r5, [sp, #36] +#else + strd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #40] + ldr r5, [sp, #44] +#else + ldrd r4, r5, [sp, #40] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #36] +#else + ldrd r4, r5, [sp, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #32] + str r5, [sp, #36] +#else + strd r4, r5, [sp, #32] +#endif + # Round 5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else strd r4, r5, [r0, #16] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #40] + ldr r9, [sp, #44] +#else + ldrd r8, r9, [sp, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #40] + ldr r7, [r3, #44] +#else + ldrd r6, r7, [r3, #40] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #48] + str r9, [r0, #52] +#else + strd r8, r9, [r0, #48] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[5] - ldr r12, [sp, #24] - ldr lr, [sp, #28] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #40] - ldr lr, [sp, #44] - ldrd r6, r7, [sp, #112] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #40] - str lr, [sp, #44] - ldr r12, [sp, #48] - ldr lr, [sp, #52] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #40] - ldr lr, [sp, #44] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #40] - str lr, [sp, #44] - # Round 6 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r6, r7, [sp, #48] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #48] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #40] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #16] - ldr lr, [r0, #20] - strd r6, r7, [r0, #40] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] - str r12, [r0, #8] - str lr, [r0, #12] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #28] +#else + ldrd r4, r5, [sp, #24] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #40] + ldr r5, [sp, #44] +#else + ldrd r4, r5, [sp, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #112] + ldr r9, [sp, #116] +#else + ldrd r8, r9, [sp, #112] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #40] + str r5, [sp, #44] +#else + strd r4, r5, [sp, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #48] + ldr r5, [sp, #52] +#else + ldrd r4, r5, [sp, #48] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #40] + ldr r5, [sp, #44] +#else + ldrd r4, r5, [sp, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #40] + str r5, [sp, #44] +#else + strd r4, r5, [sp, #40] +#endif + # Round 6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else strd r4, r5, [r0, #8] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #48] + ldr r9, [sp, #52] +#else + ldrd r8, r9, [sp, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #48] + ldr r7, [r3, #52] +#else + ldrd r6, r7, [r3, #48] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #40] + str r9, [r0, #44] +#else + strd r8, r9, [r0, #40] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[6] - ldr r12, [sp, #32] - ldr lr, [sp, #36] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #48] - ldr lr, [sp, #52] - ldrd r6, r7, [sp, #120] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #48] - str lr, [sp, #52] - ldr r12, [sp, #56] - ldr lr, [sp, #60] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #48] - ldr lr, [sp, #52] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #48] - str lr, [sp, #52] - # Round 7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r6, r7, [sp, #56] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #56] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #32] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #8] - ldr lr, [r0, #12] - strd r6, r7, [r0, #32] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] - str r12, [r0] - str lr, [r0, #4] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #36] +#else + ldrd r4, r5, [sp, #32] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #48] + ldr r5, [sp, #52] +#else + ldrd r4, r5, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #120] + ldr r9, [sp, #124] +#else + ldrd r8, r9, [sp, #120] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #48] + str r5, [sp, #52] +#else + strd r4, r5, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #56] + ldr r5, [sp, #60] +#else + ldrd r4, r5, [sp, #56] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #48] + ldr r5, [sp, #52] +#else + ldrd r4, r5, [sp, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #48] + str r5, [sp, #52] +#else + strd r4, r5, [sp, #48] +#endif + # Round 7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else strd r4, r5, [r0] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #56] + ldr r9, [sp, #60] +#else + ldrd r8, r9, [sp, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #56] + ldr r7, [r3, #60] +#else + ldrd r6, r7, [r3, #56] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #32] + str r9, [r0, #36] +#else + strd r8, r9, [r0, #32] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[7] - ldr r12, [sp, #40] - ldr lr, [sp, #44] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #56] - ldr lr, [sp, #60] - ldrd r6, r7, [sp] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #56] - str lr, [sp, #60] - ldr r12, [sp, #64] - ldr lr, [sp, #68] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #56] - ldr lr, [sp, #60] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #56] - str lr, [sp, #60] - # Round 8 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r6, r7, [sp, #64] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #64] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #24] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0] - ldr lr, [r0, #4] - strd r6, r7, [r0, #24] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] - str r12, [r0, #56] - str lr, [r0, #60] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #40] + ldr r5, [sp, #44] +#else + ldrd r4, r5, [sp, #40] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #56] + ldr r5, [sp, #60] +#else + ldrd r4, r5, [sp, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp] + ldr r9, [sp, #4] +#else + ldrd r8, r9, [sp] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #56] + str r5, [sp, #60] +#else + strd r4, r5, [sp, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #68] +#else + ldrd r4, r5, [sp, #64] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #56] + ldr r5, [sp, #60] +#else + ldrd r4, r5, [sp, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #56] + str r5, [sp, #60] +#else + strd r4, r5, [sp, #56] +#endif + # Round 8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else strd r4, r5, [r0, #56] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #64] + ldr r9, [sp, #68] +#else + ldrd r8, r9, [sp, #64] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #64] + ldr r7, [r3, #68] +#else + ldrd r6, r7, [r3, #64] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[8] - ldr r12, [sp, #48] - ldr lr, [sp, #52] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #64] - ldr lr, [sp, #68] - ldrd r6, r7, [sp, #8] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #64] - str lr, [sp, #68] - ldr r12, [sp, #72] - ldr lr, [sp, #76] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #64] - ldr lr, [sp, #68] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #64] - str lr, [sp, #68] - # Round 9 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r6, r7, [sp, #72] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #72] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #16] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #56] - ldr lr, [r0, #60] - strd r6, r7, [r0, #16] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] - str r12, [r0, #48] - str lr, [r0, #52] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #48] + ldr r5, [sp, #52] +#else + ldrd r4, r5, [sp, #48] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #68] +#else + ldrd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #8] + ldr r9, [sp, #12] +#else + ldrd r8, r9, [sp, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #64] + str r5, [sp, #68] +#else + strd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #76] +#else + ldrd r4, r5, [sp, #72] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #68] +#else + ldrd r4, r5, [sp, #64] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #64] + str r5, [sp, #68] +#else + strd r4, r5, [sp, #64] +#endif + # Round 9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else strd r4, r5, [r0, #48] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #72] + ldr r9, [sp, #76] +#else + ldrd r8, r9, [sp, #72] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #72] + ldr r7, [r3, #76] +#else + ldrd r6, r7, [r3, #72] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[9] - ldr r12, [sp, #56] - ldr lr, [sp, #60] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #72] - ldr lr, [sp, #76] - ldrd r6, r7, [sp, #16] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #72] - str lr, [sp, #76] - ldr r12, [sp, #80] - ldr lr, [sp, #84] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #72] - ldr lr, [sp, #76] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #72] - str lr, [sp, #76] - # Round 10 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r6, r7, [sp, #80] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #80] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #8] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #48] - ldr lr, [r0, #52] - strd r6, r7, [r0, #8] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] - str r12, [r0, #40] - str lr, [r0, #44] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #56] + ldr r5, [sp, #60] +#else + ldrd r4, r5, [sp, #56] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #76] +#else + ldrd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #16] + ldr r9, [sp, #20] +#else + ldrd r8, r9, [sp, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #72] + str r5, [sp, #76] +#else + strd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #80] + ldr r5, [sp, #84] +#else + ldrd r4, r5, [sp, #80] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #76] +#else + ldrd r4, r5, [sp, #72] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #72] + str r5, [sp, #76] +#else + strd r4, r5, [sp, #72] +#endif + # Round 10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else strd r4, r5, [r0, #40] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #80] + ldr r9, [sp, #84] +#else + ldrd r8, r9, [sp, #80] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #80] + ldr r7, [r3, #84] +#else + ldrd r6, r7, [r3, #80] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[10] - ldr r12, [sp, #64] - ldr lr, [sp, #68] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #80] - ldr lr, [sp, #84] - ldrd r6, r7, [sp, #24] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #80] - str lr, [sp, #84] - ldr r12, [sp, #88] - ldr lr, [sp, #92] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #80] - ldr lr, [sp, #84] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #80] - str lr, [sp, #84] - # Round 11 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r6, r7, [sp, #88] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #88] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #40] - ldr lr, [r0, #44] - strd r6, r7, [r0] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] - str r12, [r0, #32] - str lr, [r0, #36] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #68] +#else + ldrd r4, r5, [sp, #64] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #80] + ldr r5, [sp, #84] +#else + ldrd r4, r5, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #24] + ldr r9, [sp, #28] +#else + ldrd r8, r9, [sp, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #80] + str r5, [sp, #84] +#else + strd r4, r5, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #88] + ldr r5, [sp, #92] +#else + ldrd r4, r5, [sp, #88] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #80] + ldr r5, [sp, #84] +#else + ldrd r4, r5, [sp, #80] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #80] + str r5, [sp, #84] +#else + strd r4, r5, [sp, #80] +#endif + # Round 11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else strd r4, r5, [r0, #32] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #88] + ldr r9, [sp, #92] +#else + ldrd r8, r9, [sp, #88] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #88] + ldr r7, [r3, #92] +#else + ldrd r6, r7, [r3, #88] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[11] - ldr r12, [sp, #72] - ldr lr, [sp, #76] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #88] - ldr lr, [sp, #92] - ldrd r6, r7, [sp, #32] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #88] - str lr, [sp, #92] - ldr r12, [sp, #96] - ldr lr, [sp, #100] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #88] - ldr lr, [sp, #92] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #88] - str lr, [sp, #92] - # Round 12 - ldr r12, [r0] - ldr lr, [r0, #4] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r6, r7, [sp, #96] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #96] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #56] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #32] - ldr lr, [r0, #36] - strd r6, r7, [r0, #56] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] - str r12, [r0, #24] - str lr, [r0, #28] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #76] +#else + ldrd r4, r5, [sp, #72] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #88] + ldr r5, [sp, #92] +#else + ldrd r4, r5, [sp, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #32] + ldr r9, [sp, #36] +#else + ldrd r8, r9, [sp, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #88] + str r5, [sp, #92] +#else + strd r4, r5, [sp, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #88] + ldr r5, [sp, #92] +#else + ldrd r4, r5, [sp, #88] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #88] + str r5, [sp, #92] +#else + strd r4, r5, [sp, #88] +#endif + # Round 12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else strd r4, r5, [r0, #24] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #96] + ldr r9, [sp, #100] +#else + ldrd r8, r9, [sp, #96] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #96] + ldr r7, [r3, #100] +#else + ldrd r6, r7, [r3, #96] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #56] + str r9, [r0, #60] +#else + strd r8, r9, [r0, #56] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[12] - ldr r12, [sp, #80] - ldr lr, [sp, #84] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #96] - ldr lr, [sp, #100] - ldrd r6, r7, [sp, #40] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #96] - str lr, [sp, #100] - ldr r12, [sp, #104] - ldr lr, [sp, #108] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #96] - ldr lr, [sp, #100] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #96] - str lr, [sp, #100] - # Round 13 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r6, r7, [sp, #104] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #104] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #48] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #24] - ldr lr, [r0, #28] - strd r6, r7, [r0, #48] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] - str r12, [r0, #16] - str lr, [r0, #20] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #80] + ldr r5, [sp, #84] +#else + ldrd r4, r5, [sp, #80] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #40] + ldr r9, [sp, #44] +#else + ldrd r8, r9, [sp, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #96] + str r5, [sp, #100] +#else + strd r4, r5, [sp, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #104] + ldr r5, [sp, #108] +#else + ldrd r4, r5, [sp, #104] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #96] + str r5, [sp, #100] +#else + strd r4, r5, [sp, #96] +#endif + # Round 13 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else strd r4, r5, [r0, #16] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #104] + ldr r9, [sp, #108] +#else + ldrd r8, r9, [sp, #104] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #104] + ldr r7, [r3, #108] +#else + ldrd r6, r7, [r3, #104] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #48] + str r9, [r0, #52] +#else + strd r8, r9, [r0, #48] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[13] - ldr r12, [sp, #88] - ldr lr, [sp, #92] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #104] - ldr lr, [sp, #108] - ldrd r6, r7, [sp, #48] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #104] - str lr, [sp, #108] - ldr r12, [sp, #112] - ldr lr, [sp, #116] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #104] - ldr lr, [sp, #108] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #104] - str lr, [sp, #108] - # Round 14 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r6, r7, [sp, #112] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #112] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #40] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #16] - ldr lr, [r0, #20] - strd r6, r7, [r0, #40] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] - str r12, [r0, #8] - str lr, [r0, #12] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #88] + ldr r5, [sp, #92] +#else + ldrd r4, r5, [sp, #88] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #104] + ldr r5, [sp, #108] +#else + ldrd r4, r5, [sp, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #48] + ldr r9, [sp, #52] +#else + ldrd r8, r9, [sp, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #104] + str r5, [sp, #108] +#else + strd r4, r5, [sp, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #112] + ldr r5, [sp, #116] +#else + ldrd r4, r5, [sp, #112] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #104] + ldr r5, [sp, #108] +#else + ldrd r4, r5, [sp, #104] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #104] + str r5, [sp, #108] +#else + strd r4, r5, [sp, #104] +#endif + # Round 14 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else strd r4, r5, [r0, #8] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #112] + ldr r9, [sp, #116] +#else + ldrd r8, r9, [sp, #112] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #112] + ldr r7, [r3, #116] +#else + ldrd r6, r7, [r3, #112] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #40] + str r9, [r0, #44] +#else + strd r8, r9, [r0, #40] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[14] - ldr r12, [sp, #96] - ldr lr, [sp, #100] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #112] - ldr lr, [sp, #116] - ldrd r6, r7, [sp, #56] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #112] - str lr, [sp, #116] - ldr r12, [sp, #120] - ldr lr, [sp, #124] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #112] - ldr lr, [sp, #116] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #112] - str lr, [sp, #116] - # Round 15 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r6, r7, [sp, #120] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #120] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #32] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #8] - ldr lr, [r0, #12] - strd r6, r7, [r0, #32] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] - str r12, [r0] - str lr, [r0, #4] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #112] + ldr r5, [sp, #116] +#else + ldrd r4, r5, [sp, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #56] + ldr r9, [sp, #60] +#else + ldrd r8, r9, [sp, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #112] + str r5, [sp, #116] +#else + strd r4, r5, [sp, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #120] + ldr r5, [sp, #124] +#else + ldrd r4, r5, [sp, #120] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #112] + ldr r5, [sp, #116] +#else + ldrd r4, r5, [sp, #112] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #112] + str r5, [sp, #116] +#else + strd r4, r5, [sp, #112] +#endif + # Round 15 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else strd r4, r5, [r0] - mov r8, r6 - mov r9, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #120] + ldr r9, [sp, #124] +#else + ldrd r8, r9, [sp, #120] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #120] + ldr r7, [r3, #124] +#else + ldrd r6, r7, [r3, #120] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #32] + str r9, [r0, #36] +#else + strd r8, r9, [r0, #32] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif + mov r10, r8 + mov r11, r9 # Calc new W[15] - ldr r12, [sp, #104] - ldr lr, [sp, #108] - lsrs r4, r12, #19 - lsrs r5, lr, #19 - orr r5, r5, r12, lsl #13 - orr r4, r4, lr, lsl #13 - lsls r6, r12, #3 - lsls r7, lr, #3 - orr r7, r7, r12, lsr #29 - orr r6, r6, lr, lsr #29 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #6 - lsrs r7, lr, #6 - orr r6, r6, lr, lsl #26 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #120] - ldr lr, [sp, #124] - ldrd r6, r7, [sp, #64] - adds r12, r12, r4 - adc lr, lr, r5 - adds r12, r12, r6 - adc lr, lr, r7 - str r12, [sp, #120] - str lr, [sp, #124] - ldr r12, [sp] - ldr lr, [sp, #4] - lsrs r4, r12, #1 - lsrs r5, lr, #1 - orr r5, r5, r12, lsl #31 - orr r4, r4, lr, lsl #31 - lsrs r6, r12, #8 - lsrs r7, lr, #8 - orr r7, r7, r12, lsl #24 - orr r6, r6, lr, lsl #24 - eor r5, r5, r7 - eor r4, r4, r6 - lsrs r6, r12, #7 - lsrs r7, lr, #7 - orr r6, r6, lr, lsl #25 - eor r5, r5, r7 - eor r4, r4, r6 - ldr r12, [sp, #120] - ldr lr, [sp, #124] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [sp, #120] - str lr, [sp, #124] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #104] + ldr r5, [sp, #108] +#else + ldrd r4, r5, [sp, #104] +#endif + lsrs r6, r4, #19 + lsrs r7, r5, #19 + orr r7, r7, r4, lsl #13 + orr r6, r6, r5, lsl #13 + lsls r8, r4, #3 + lsls r9, r5, #3 + orr r9, r9, r4, lsr #29 + orr r8, r8, r5, lsr #29 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #6 + lsrs r9, r5, #6 + orr r8, r8, r5, lsl #26 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #120] + ldr r5, [sp, #124] +#else + ldrd r4, r5, [sp, #120] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #64] + ldr r9, [sp, #68] +#else + ldrd r8, r9, [sp, #64] +#endif + adds r4, r4, r6 + adc r5, r5, r7 + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #120] + str r5, [sp, #124] +#else + strd r4, r5, [sp, #120] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif + lsrs r6, r4, #1 + lsrs r7, r5, #1 + orr r7, r7, r4, lsl #31 + orr r6, r6, r5, lsl #31 + lsrs r8, r4, #8 + lsrs r9, r5, #8 + orr r9, r9, r4, lsl #24 + orr r8, r8, r5, lsl #24 + eor r7, r7, r9 + eor r6, r6, r8 + lsrs r8, r4, #7 + lsrs r9, r5, #7 + orr r8, r8, r5, lsl #25 + eor r7, r7, r9 + eor r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #120] + ldr r5, [sp, #124] +#else + ldrd r4, r5, [sp, #120] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #120] + str r5, [sp, #124] +#else + strd r4, r5, [sp, #120] +#endif add r3, r3, #0x80 - subs r10, r10, #1 - bne L_sha512_len_neon_start + subs r12, r12, #1 + bne L_SHA512_transform_len_start # Round 0 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r6, r7, [sp] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #24] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0] - ldr lr, [r0, #4] - strd r6, r7, [r0, #24] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] - str r12, [r0, #56] - str lr, [r0, #60] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp] + ldr r9, [sp, #4] +#else + ldrd r8, r9, [sp] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3] + ldr r7, [r3, #4] +#else + ldrd r6, r7, [r3] +#endif adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else strd r4, r5, [r0, #56] - mov r8, r6 - mov r9, r7 +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif + mov r10, r8 + mov r11, r9 # Round 1 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r6, r7, [sp, #8] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #8] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #16] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #56] - ldr lr, [r0, #60] - strd r6, r7, [r0, #16] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] - str r12, [r0, #48] - str lr, [r0, #52] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #8] + ldr r9, [sp, #12] +#else + ldrd r8, r9, [sp, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #8] + ldr r7, [r3, #12] +#else + ldrd r6, r7, [r3, #8] +#endif adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else strd r4, r5, [r0, #48] - mov r8, r6 - mov r9, r7 +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif + mov r10, r8 + mov r11, r9 # Round 2 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r6, r7, [sp, #16] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #16] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #8] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #48] - ldr lr, [r0, #52] - strd r6, r7, [r0, #8] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] - str r12, [r0, #40] - str lr, [r0, #44] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #16] + ldr r9, [sp, #20] +#else + ldrd r8, r9, [sp, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #16] + ldr r7, [r3, #20] +#else + ldrd r6, r7, [r3, #16] +#endif adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else strd r4, r5, [r0, #40] - mov r8, r6 - mov r9, r7 +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif + mov r10, r8 + mov r11, r9 # Round 3 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r6, r7, [sp, #24] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #24] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #40] - ldr lr, [r0, #44] - strd r6, r7, [r0] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] - str r12, [r0, #32] - str lr, [r0, #36] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #24] + ldr r9, [sp, #28] +#else + ldrd r8, r9, [sp, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #24] + ldr r7, [r3, #28] +#else + ldrd r6, r7, [r3, #24] +#endif adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else strd r4, r5, [r0, #32] - mov r8, r6 - mov r9, r7 +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif + mov r10, r8 + mov r11, r9 # Round 4 - ldr r12, [r0] - ldr lr, [r0, #4] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r6, r7, [sp, #32] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #32] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #56] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #32] - ldr lr, [r0, #36] - strd r6, r7, [r0, #56] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] - str r12, [r0, #24] - str lr, [r0, #28] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else ldrd r4, r5, [r0, #24] - adds r4, r4, r8 - adc r5, r5, r9 +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else strd r4, r5, [r0, #24] - mov r8, r6 - mov r9, r7 - # Round 5 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - ldr r12, [r0, #56] - ldr lr, [r0, #60] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else ldrd r6, r7, [r0, #8] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r6, r7, [sp, #40] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #40] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #48] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #24] - ldr lr, [r0, #28] - strd r6, r7, [r0, #48] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] - str r12, [r0, #16] - str lr, [r0, #20] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #16] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #16] - mov r8, r6 - mov r9, r7 - # Round 6 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r6, r7, [sp, #48] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #48] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #40] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #16] - ldr lr, [r0, #20] - strd r6, r7, [r0, #40] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else ldrd r4, r5, [r0, #24] - str r12, [r0, #8] - str lr, [r0, #12] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #32] + ldr r9, [sp, #36] +#else + ldrd r8, r9, [sp, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #32] + ldr r7, [r3, #36] +#else + ldrd r6, r7, [r3, #32] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #8] - mov r8, r6 - mov r9, r7 - # Round 7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r6, r7, [sp, #56] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #56] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #32] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #8] - ldr lr, [r0, #12] - strd r6, r7, [r0, #32] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] - str r12, [r0] - str lr, [r0, #4] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0] - mov r8, r6 - mov r9, r7 - # Round 8 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r6, r7, [sp, #64] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #64] - adds r12, r12, r6 - adc lr, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #56] + str r9, [r0, #60] +#else + strd r8, r9, [r0, #56] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else ldrd r6, r7, [r0, #24] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #56] - str lr, [r0, #60] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0] - ldr lr, [r0, #4] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else strd r6, r7, [r0, #24] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] - str r12, [r0, #56] - str lr, [r0, #60] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 +#endif + mov r10, r8 + mov r11, r9 + # Round 5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else ldrd r4, r5, [r0, #56] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #56] - mov r8, r6 - mov r9, r7 - # Round 9 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - ldrd r6, r7, [sp, #72] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #72] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #16] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #48] - str lr, [r0, #52] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #56] - ldr lr, [r0, #60] - strd r6, r7, [r0, #16] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] - str r12, [r0, #48] - str lr, [r0, #52] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #48] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #48] - mov r8, r6 - mov r9, r7 - # Round 10 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r6, r7, [sp, #80] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #80] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #8] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #40] - str lr, [r0, #44] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #48] - ldr lr, [r0, #52] - strd r6, r7, [r0, #8] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] - str r12, [r0, #40] - str lr, [r0, #44] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #40] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #40] - mov r8, r6 - mov r9, r7 - # Round 11 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - ldr r12, [r0, #8] - ldr lr, [r0, #12] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - ldrd r6, r7, [sp, #88] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #88] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #32] - str lr, [r0, #36] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #40] - ldr lr, [r0, #44] - strd r6, r7, [r0] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #32] - ldr lr, [r0, #36] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] - str r12, [r0, #32] - str lr, [r0, #36] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #32] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #32] - mov r8, r6 - mov r9, r7 - # Round 12 - ldr r12, [r0] - ldr lr, [r0, #4] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - ldrd r6, r7, [sp, #96] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #96] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #56] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #24] - str lr, [r0, #28] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #32] - ldr lr, [r0, #36] - strd r6, r7, [r0, #56] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #24] - ldr lr, [r0, #28] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] - str r12, [r0, #24] - str lr, [r0, #28] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #24] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #24] - mov r8, r6 - mov r9, r7 - # Round 13 - ldr r12, [r0, #56] - ldr lr, [r0, #60] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - ldr r12, [r0, #56] - ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - ldrd r6, r7, [sp, #104] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #104] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #48] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #16] - str lr, [r0, #20] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #24] - ldr lr, [r0, #28] - strd r6, r7, [r0, #48] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #16] - ldr lr, [r0, #20] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] - str r12, [r0, #16] - str lr, [r0, #20] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #16] - adds r4, r4, r8 - adc r5, r5, r9 +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else strd r4, r5, [r0, #16] - mov r8, r6 - mov r9, r7 - # Round 14 - ldr r12, [r0, #48] - ldr lr, [r0, #52] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - ldr r12, [r0, #48] - ldr lr, [r0, #52] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else ldrd r6, r7, [r0] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - ldrd r6, r7, [sp, #112] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #112] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #40] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0, #8] - str lr, [r0, #12] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #16] - ldr lr, [r0, #20] - strd r6, r7, [r0, #40] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0, #8] - ldr lr, [r0, #12] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] - str r12, [r0, #8] - str lr, [r0, #12] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0, #8] - adds r4, r4, r8 - adc r5, r5, r9 - strd r4, r5, [r0, #8] - mov r8, r6 - mov r9, r7 - # Round 15 - ldr r12, [r0, #40] - ldr lr, [r0, #44] - lsrs r4, r12, #14 - lsrs r5, lr, #14 - orr r5, r5, r12, lsl #18 - orr r4, r4, lr, lsl #18 - lsrs r6, r12, #18 - lsrs r7, lr, #18 - orr r7, r7, r12, lsl #14 - orr r6, r6, lr, lsl #14 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #23 - lsls r7, lr, #23 - orr r7, r7, r12, lsr #9 - orr r6, r6, lr, lsr #9 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - ldr r12, [r0, #40] - ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] - eor r4, r4, r6 - eor r5, r5, r7 - and r4, r4, r12 - and r5, r5, lr - eor r4, r4, r6 - eor r5, r5, r7 - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r6, r7, [sp, #120] - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r4, r5, [r3, #120] - adds r12, r12, r6 - adc lr, lr, r7 - ldrd r6, r7, [r0, #32] - adds r12, r12, r4 - adc lr, lr, r5 - str r12, [r0] - str lr, [r0, #4] - adds r6, r6, r12 - adc r7, r7, lr - ldr r12, [r0, #8] - ldr lr, [r0, #12] - strd r6, r7, [r0, #32] - lsrs r4, r12, #28 - lsrs r5, lr, #28 - orr r5, r5, r12, lsl #4 - orr r4, r4, lr, lsl #4 - lsls r6, r12, #30 - lsls r7, lr, #30 - orr r7, r7, r12, lsr #2 - orr r6, r6, lr, lsr #2 - eor r4, r4, r6 - eor r5, r5, r7 - lsls r6, r12, #25 - lsls r7, lr, #25 - orr r7, r7, r12, lsr #7 - orr r6, r6, lr, lsr #7 - ldr r12, [r0] - ldr lr, [r0, #4] - eor r4, r4, r6 - eor r5, r5, r7 - adds r12, r12, r4 - adc lr, lr, r5 - ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else ldrd r4, r5, [r0, #16] - str r12, [r0] - str lr, [r0, #4] - eor r6, r6, r4 - eor r7, r7, r5 - and r8, r8, r6 - and r9, r9, r7 - eor r8, r8, r4 - eor r9, r9, r5 - ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #40] + ldr r9, [sp, #44] +#else + ldrd r8, r9, [sp, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #40] + ldr r7, [r3, #44] +#else + ldrd r6, r7, [r3, #40] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0] - mov r8, r6 - mov r9, r7 - # Add in digest from start - ldr r12, [r0] - ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [sp, #128] - ldrd r8, r9, [sp, #136] - adds r12, r12, r6 - adc lr, lr, r7 - adds r4, r4, r8 - adc r5, r5, r9 - str r12, [r0] - str lr, [r0, #4] - strd r4, r5, [r0, #8] - str r12, [sp, #128] - str lr, [sp, #132] - strd r4, r5, [sp, #136] - ldr r12, [r0, #16] - ldr lr, [r0, #20] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else ldrd r4, r5, [r0, #24] - ldrd r6, r7, [sp, #144] - ldrd r8, r9, [sp, #152] - adds r12, r12, r6 - adc lr, lr, r7 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #48] + str r9, [r0, #52] +#else + strd r8, r9, [r0, #48] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif + mov r10, r8 + mov r11, r9 + # Round 6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #48] + ldr r9, [sp, #52] +#else + ldrd r8, r9, [sp, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #48] + ldr r7, [r3, #52] +#else + ldrd r6, r7, [r3, #48] +#endif adds r4, r4, r8 adc r5, r5, r9 - str r12, [r0, #16] - str lr, [r0, #20] - strd r4, r5, [r0, #24] - str r12, [sp, #144] - str lr, [sp, #148] - strd r4, r5, [sp, #152] - ldr r12, [r0, #32] - ldr lr, [r0, #36] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #40] + str r9, [r0, #44] +#else + strd r8, r9, [r0, #40] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif + mov r10, r8 + mov r11, r9 + # Round 7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else ldrd r4, r5, [r0, #40] - ldrd r6, r7, [sp, #160] - ldrd r8, r9, [sp, #168] - adds r12, r12, r6 - adc lr, lr, r7 +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #56] + ldr r9, [sp, #60] +#else + ldrd r8, r9, [sp, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #56] + ldr r7, [r3, #60] +#else + ldrd r6, r7, [r3, #56] +#endif adds r4, r4, r8 adc r5, r5, r9 - str r12, [r0, #32] - str lr, [r0, #36] - strd r4, r5, [r0, #40] - str r12, [sp, #160] - str lr, [sp, #164] - strd r4, r5, [sp, #168] - ldr r12, [r0, #48] - ldr lr, [r0, #52] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #32] + str r9, [r0, #36] +#else + strd r8, r9, [r0, #32] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif + mov r10, r8 + mov r11, r9 + # Round 8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else ldrd r4, r5, [r0, #56] - ldrd r6, r7, [sp, #176] - ldrd r8, r9, [sp, #184] - adds r12, r12, r6 - adc lr, lr, r7 +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #64] + ldr r9, [sp, #68] +#else + ldrd r8, r9, [sp, #64] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #64] + ldr r7, [r3, #68] +#else + ldrd r6, r7, [r3, #64] +#endif adds r4, r4, r8 adc r5, r5, r9 - str r12, [r0, #48] - str lr, [r0, #52] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else strd r4, r5, [r0, #56] - str r12, [sp, #176] - str lr, [sp, #180] - strd r4, r5, [sp, #184] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif + mov r10, r8 + mov r11, r9 + # Round 9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #72] + ldr r9, [sp, #76] +#else + ldrd r8, r9, [sp, #72] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #72] + ldr r7, [r3, #76] +#else + ldrd r6, r7, [r3, #72] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif + mov r10, r8 + mov r11, r9 + # Round 10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #80] + ldr r9, [sp, #84] +#else + ldrd r8, r9, [sp, #80] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #80] + ldr r7, [r3, #84] +#else + ldrd r6, r7, [r3, #80] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif + mov r10, r8 + mov r11, r9 + # Round 11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #88] + ldr r9, [sp, #92] +#else + ldrd r8, r9, [sp, #88] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #88] + ldr r7, [r3, #92] +#else + ldrd r6, r7, [r3, #88] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif + mov r10, r8 + mov r11, r9 + # Round 12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #96] + ldr r9, [sp, #100] +#else + ldrd r8, r9, [sp, #96] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #96] + ldr r7, [r3, #100] +#else + ldrd r6, r7, [r3, #96] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #56] + str r9, [r0, #60] +#else + strd r8, r9, [r0, #56] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif + mov r10, r8 + mov r11, r9 + # Round 13 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #104] + ldr r9, [sp, #108] +#else + ldrd r8, r9, [sp, #104] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #104] + ldr r7, [r3, #108] +#else + ldrd r6, r7, [r3, #104] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #48] + ldr r9, [r0, #52] +#else + ldrd r8, r9, [r0, #48] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #48] + str r9, [r0, #52] +#else + strd r8, r9, [r0, #48] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif + mov r10, r8 + mov r11, r9 + # Round 14 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0] + ldr r9, [r0, #4] +#else + ldrd r8, r9, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #112] + ldr r9, [sp, #116] +#else + ldrd r8, r9, [sp, #112] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #112] + ldr r7, [r3, #116] +#else + ldrd r6, r7, [r3, #112] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #40] + ldr r9, [r0, #44] +#else + ldrd r8, r9, [r0, #40] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #40] + str r9, [r0, #44] +#else + strd r8, r9, [r0, #40] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #16] + ldr r9, [r0, #20] +#else + ldrd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif + mov r10, r8 + mov r11, r9 + # Round 15 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif + lsrs r6, r4, #14 + lsrs r7, r5, #14 + orr r7, r7, r4, lsl #18 + orr r6, r6, r5, lsl #18 + lsrs r8, r4, #18 + lsrs r9, r5, #18 + orr r9, r9, r4, lsl #14 + orr r8, r8, r5, lsl #14 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #23 + lsls r9, r5, #23 + orr r9, r9, r4, lsr #9 + orr r8, r8, r5, lsr #9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + and r6, r6, r4 + and r7, r7, r5 + eor r6, r6, r8 + eor r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #120] + ldr r9, [sp, #124] +#else + ldrd r8, r9, [sp, #120] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r3, #120] + ldr r7, [r3, #124] +#else + ldrd r6, r7, [r3, #120] +#endif + adds r4, r4, r8 + adc r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #32] + ldr r9, [r0, #36] +#else + ldrd r8, r9, [r0, #32] +#endif + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + adds r8, r8, r4 + adc r9, r9, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #32] + str r9, [r0, #36] +#else + strd r8, r9, [r0, #32] +#endif + lsrs r6, r4, #28 + lsrs r7, r5, #28 + orr r7, r7, r4, lsl #4 + orr r6, r6, r5, lsl #4 + lsls r8, r4, #30 + lsls r9, r5, #30 + orr r9, r9, r4, lsr #2 + orr r8, r8, r5, lsr #2 + eor r6, r6, r8 + eor r7, r7, r9 + lsls r8, r4, #25 + lsls r9, r5, #25 + orr r9, r9, r4, lsr #7 + orr r8, r8, r5, lsr #7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif + eor r6, r6, r8 + eor r7, r7, r9 + adds r4, r4, r6 + adc r5, r5, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif + eor r8, r8, r6 + eor r9, r9, r7 + and r10, r10, r8 + and r11, r11, r9 + eor r10, r10, r6 + eor r11, r11, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif + mov r10, r8 + mov r11, r9 + # Add in digest from start +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #128] + ldr r9, [sp, #132] +#else + ldrd r8, r9, [sp, #128] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #136] + ldr r11, [sp, #140] +#else + ldrd r10, r11, [sp, #136] +#endif + adds r4, r4, r8 + adc r5, r5, r9 + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #128] + str r5, [sp, #132] +#else + strd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #136] + str r7, [sp, #140] +#else + strd r6, r7, [sp, #136] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #144] + ldr r9, [sp, #148] +#else + ldrd r8, r9, [sp, #144] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #152] + ldr r11, [sp, #156] +#else + ldrd r10, r11, [sp, #152] +#endif + adds r4, r4, r8 + adc r5, r5, r9 + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #144] + str r5, [sp, #148] +#else + strd r4, r5, [sp, #144] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #152] + str r7, [sp, #156] +#else + strd r6, r7, [sp, #152] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #160] + ldr r9, [sp, #164] +#else + ldrd r8, r9, [sp, #160] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #168] + ldr r11, [sp, #172] +#else + ldrd r10, r11, [sp, #168] +#endif + adds r4, r4, r8 + adc r5, r5, r9 + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #160] + str r5, [sp, #164] +#else + strd r4, r5, [sp, #160] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #168] + str r7, [sp, #172] +#else + strd r6, r7, [sp, #168] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #176] + ldr r9, [sp, #180] +#else + ldrd r8, r9, [sp, #176] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #184] + ldr r11, [sp, #188] +#else + ldrd r10, r11, [sp, #184] +#endif + adds r4, r4, r8 + adc r5, r5, r9 + adds r6, r6, r10 + adc r7, r7, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #176] + str r5, [sp, #180] +#else + strd r4, r5, [sp, #176] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #184] + str r7, [sp, #188] +#else + strd r6, r7, [sp, #184] +#endif subs r2, r2, #0x80 sub r3, r3, #0x200 add r1, r1, #0x80 - bne L_sha512_len_neon_begin + bne L_SHA512_transform_len_begin eor r0, r0, r0 add sp, sp, #0xc0 - pop {r4, r5, r6, r7, r8, r9, r10, pc} + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size Transform_Sha512_Len,.-Transform_Sha512_Len #endif /* WOLFSSL_ARMASM_NO_NEON */ #ifndef WOLFSSL_ARMASM_NO_NEON .text .type L_SHA512_transform_neon_len_k, %object .size L_SHA512_transform_neon_len_k, 640 - .align 3 + .align 4 L_SHA512_transform_neon_len_k: .word 0xd728ae22 .word 0x428a2f98 @@ -4204,17 +7858,23 @@ L_SHA512_transform_neon_len_k: .word 0x4a475817 .word 0x6c44198c .text - .align 2 + .align 4 + .fpu neon .globl Transform_Sha512_Len .type Transform_Sha512_Len, %function Transform_Sha512_Len: vpush {d8-d15} + adr r3, L_SHA512_transform_neon_len_k # Load digest into working vars vldm.64 r0, {d0-d7} # Start of loop processing a block -L_sha512_len_neon_begin: +L_SHA512_transform_neon_len_begin: # Load W - vldm.64 r1!, {d16-d31} + vld1.8 {q8, q9}, [r1]! + vld1.8 {q10, q11}, [r1]! + vld1.8 {q12, q13}, [r1]! + vld1.8 {q14, q15}, [r1]! +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT vrev64.8 q8, q8 vrev64.8 q9, q9 vrev64.8 q10, q10 @@ -4223,10 +7883,27 @@ L_sha512_len_neon_begin: vrev64.8 q13, q13 vrev64.8 q14, q14 vrev64.8 q15, q15 - adr r3, L_SHA512_transform_neon_len_k +#else + vrev64.8 d16, d16 + vrev64.8 d17, d17 + vrev64.8 d18, d18 + vrev64.8 d19, d19 + vrev64.8 d20, d20 + vrev64.8 d21, d21 + vrev64.8 d22, d22 + vrev64.8 d23, d23 + vrev64.8 d24, d24 + vrev64.8 d25, d25 + vrev64.8 d26, d26 + vrev64.8 d27, d27 + vrev64.8 d28, d28 + vrev64.8 d29, d29 + vrev64.8 d30, d30 + vrev64.8 d31, d31 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ mov r12, #4 # Start of 16 rounds -L_sha512_len_neon_start: +L_SHA512_transform_neon_len_start: # Round 0 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d4, #50 @@ -4285,6 +7962,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d2, d6 vadd.i64 d6, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[0]-W[1] vext.8 q6, q8, q9, #8 vshl.u64 q4, q15, #45 @@ -4305,6 +7983,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q8, q5 +#else + # Calc new W[0]-W[1] + vmov d12, d17 + vmov d13, d18 + vshl.u64 d8, d30, #45 + vshl.u64 d9, d31, #45 + vsri.u64 d8, d30, #19 + vsri.u64 d9, d31, #19 + vshl.u64 d10, d30, #3 + vshl.u64 d11, d31, #3 + vsri.u64 d10, d30, #61 + vsri.u64 d11, d31, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d30, #6 + vshr.u64 d9, d31, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d16, d10 + vadd.i64 d17, d11 + vmov d14, d25 + vmov d15, d26 + vadd.i64 d16, d14 + vadd.i64 d17, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d16, d10 + vadd.i64 d17, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 2 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d2, #50 @@ -4363,6 +8082,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d0, d4 vadd.i64 d4, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[2]-W[3] vext.8 q6, q9, q10, #8 vshl.u64 q4, q8, #45 @@ -4383,6 +8103,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q9, q5 +#else + # Calc new W[2]-W[3] + vmov d12, d19 + vmov d13, d20 + vshl.u64 d8, d16, #45 + vshl.u64 d9, d17, #45 + vsri.u64 d8, d16, #19 + vsri.u64 d9, d17, #19 + vshl.u64 d10, d16, #3 + vshl.u64 d11, d17, #3 + vsri.u64 d10, d16, #61 + vsri.u64 d11, d17, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d16, #6 + vshr.u64 d9, d17, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d18, d10 + vadd.i64 d19, d11 + vmov d14, d27 + vmov d15, d28 + vadd.i64 d18, d14 + vadd.i64 d19, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d18, d10 + vadd.i64 d19, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 4 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d0, #50 @@ -4441,6 +8202,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d6, d2 vadd.i64 d2, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[4]-W[5] vext.8 q6, q10, q11, #8 vshl.u64 q4, q9, #45 @@ -4461,6 +8223,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q10, q5 +#else + # Calc new W[4]-W[5] + vmov d12, d21 + vmov d13, d22 + vshl.u64 d8, d18, #45 + vshl.u64 d9, d19, #45 + vsri.u64 d8, d18, #19 + vsri.u64 d9, d19, #19 + vshl.u64 d10, d18, #3 + vshl.u64 d11, d19, #3 + vsri.u64 d10, d18, #61 + vsri.u64 d11, d19, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d18, #6 + vshr.u64 d9, d19, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d20, d10 + vadd.i64 d21, d11 + vmov d14, d29 + vmov d15, d30 + vadd.i64 d20, d14 + vadd.i64 d21, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d20, d10 + vadd.i64 d21, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 6 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d6, #50 @@ -4519,6 +8322,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d4, d0 vadd.i64 d0, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[6]-W[7] vext.8 q6, q11, q12, #8 vshl.u64 q4, q10, #45 @@ -4539,6 +8343,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q11, q5 +#else + # Calc new W[6]-W[7] + vmov d12, d23 + vmov d13, d24 + vshl.u64 d8, d20, #45 + vshl.u64 d9, d21, #45 + vsri.u64 d8, d20, #19 + vsri.u64 d9, d21, #19 + vshl.u64 d10, d20, #3 + vshl.u64 d11, d21, #3 + vsri.u64 d10, d20, #61 + vsri.u64 d11, d21, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d20, #6 + vshr.u64 d9, d21, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d22, d10 + vadd.i64 d23, d11 + vmov d14, d31 + vmov d15, d16 + vadd.i64 d22, d14 + vadd.i64 d23, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d22, d10 + vadd.i64 d23, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 8 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d4, #50 @@ -4597,6 +8442,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d2, d6 vadd.i64 d6, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[8]-W[9] vext.8 q6, q12, q13, #8 vshl.u64 q4, q11, #45 @@ -4617,6 +8463,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q12, q5 +#else + # Calc new W[8]-W[9] + vmov d12, d25 + vmov d13, d26 + vshl.u64 d8, d22, #45 + vshl.u64 d9, d23, #45 + vsri.u64 d8, d22, #19 + vsri.u64 d9, d23, #19 + vshl.u64 d10, d22, #3 + vshl.u64 d11, d23, #3 + vsri.u64 d10, d22, #61 + vsri.u64 d11, d23, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d22, #6 + vshr.u64 d9, d23, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d24, d10 + vadd.i64 d25, d11 + vmov d14, d17 + vmov d15, d18 + vadd.i64 d24, d14 + vadd.i64 d25, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d24, d10 + vadd.i64 d25, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 10 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d2, #50 @@ -4675,6 +8562,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d0, d4 vadd.i64 d4, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[10]-W[11] vext.8 q6, q13, q14, #8 vshl.u64 q4, q12, #45 @@ -4695,6 +8583,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q13, q5 +#else + # Calc new W[10]-W[11] + vmov d12, d27 + vmov d13, d28 + vshl.u64 d8, d24, #45 + vshl.u64 d9, d25, #45 + vsri.u64 d8, d24, #19 + vsri.u64 d9, d25, #19 + vshl.u64 d10, d24, #3 + vshl.u64 d11, d25, #3 + vsri.u64 d10, d24, #61 + vsri.u64 d11, d25, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d24, #6 + vshr.u64 d9, d25, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d26, d10 + vadd.i64 d27, d11 + vmov d14, d19 + vmov d15, d20 + vadd.i64 d26, d14 + vadd.i64 d27, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d26, d10 + vadd.i64 d27, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 12 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d0, #50 @@ -4753,6 +8682,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d6, d2 vadd.i64 d2, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[12]-W[13] vext.8 q6, q14, q15, #8 vshl.u64 q4, q13, #45 @@ -4773,6 +8703,47 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q14, q5 +#else + # Calc new W[12]-W[13] + vmov d12, d29 + vmov d13, d30 + vshl.u64 d8, d26, #45 + vshl.u64 d9, d27, #45 + vsri.u64 d8, d26, #19 + vsri.u64 d9, d27, #19 + vshl.u64 d10, d26, #3 + vshl.u64 d11, d27, #3 + vsri.u64 d10, d26, #61 + vsri.u64 d11, d27, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d26, #6 + vshr.u64 d9, d27, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d28, d10 + vadd.i64 d29, d11 + vmov d14, d21 + vmov d15, d22 + vadd.i64 d28, d14 + vadd.i64 d29, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d28, d10 + vadd.i64 d29, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ # Round 14 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d6, #50 @@ -4831,6 +8802,7 @@ L_sha512_len_neon_start: vadd.i64 d10, d9 vadd.i64 d4, d0 vadd.i64 d0, d10 +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT # Calc new W[14]-W[15] vext.8 q6, q15, q8, #8 vshl.u64 q4, q14, #45 @@ -4851,8 +8823,49 @@ L_sha512_len_neon_start: vshr.u64 q6, #7 veor q5, q6 vadd.i64 q15, q5 +#else + # Calc new W[14]-W[15] + vmov d12, d31 + vmov d13, d16 + vshl.u64 d8, d28, #45 + vshl.u64 d9, d29, #45 + vsri.u64 d8, d28, #19 + vsri.u64 d9, d29, #19 + vshl.u64 d10, d28, #3 + vshl.u64 d11, d29, #3 + vsri.u64 d10, d28, #61 + vsri.u64 d11, d29, #61 + veor d10, d8 + veor d11, d9 + vshr.u64 d8, d28, #6 + vshr.u64 d9, d29, #6 + veor d10, d8 + veor d11, d9 + vadd.i64 d30, d10 + vadd.i64 d31, d11 + vmov d14, d23 + vmov d15, d24 + vadd.i64 d30, d14 + vadd.i64 d31, d15 + vshl.u64 d8, d12, #63 + vshl.u64 d9, d13, #63 + vsri.u64 d8, d12, #1 + vsri.u64 d9, d13, #1 + vshl.u64 d10, d12, #56 + vshl.u64 d11, d13, #56 + vsri.u64 d10, d12, #8 + vsri.u64 d11, d13, #8 + veor d10, d8 + veor d11, d9 + vshr.u64 d12, #7 + vshr.u64 d13, #7 + veor d10, d12 + veor d11, d13 + vadd.i64 d30, d10 + vadd.i64 d31, d11 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ subs r12, r12, #1 - bne L_sha512_len_neon_start + bne L_SHA512_transform_neon_len_start # Round 0 vld1.64 {d12}, [r3:64]! vshl.u64 d8, d4, #50 @@ -5319,20 +9332,34 @@ L_sha512_len_neon_start: vadd.i64 d0, d10 # Add in digest from start vldm.64 r0, {d8-d15} +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT vadd.i64 q0, q0, q4 vadd.i64 q1, q1, q5 vadd.i64 q2, q2, q6 vadd.i64 q3, q3, q7 +#else + vadd.i64 d0, d0, d8 + vadd.i64 d1, d1, d9 + vadd.i64 d2, d2, d10 + vadd.i64 d3, d3, d11 + vadd.i64 d4, d4, d12 + vadd.i64 d5, d5, d13 + vadd.i64 d6, d6, d14 + vadd.i64 d7, d7, d15 +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ vstm.64 r0, {d0-d7} subs r2, r2, #0x80 - bne L_sha512_len_neon_begin + sub r3, r3, #0x280 + bne L_SHA512_transform_neon_len_begin vpop {d8-d15} bx lr .size Transform_Sha512_Len,.-Transform_Sha512_Len #endif /* !WOLFSSL_ARMASM_NO_NEON */ -#endif /* !__aarch64__ */ -#endif /* WOLFSSL_ARMASM && WOLFSSL_SHA512 */ +#endif /* WOLFSSL_SHA512 */ +#endif /* !__aarch64__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c new file mode 100644 index 000000000..8656d6832 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -0,0 +1,9157 @@ +/* armv8-32-sha512-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha512.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include +#ifdef WOLFSSL_ARMASM_INLINE + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif /* __KEIL__ */ +#ifdef WOLFSSL_SHA512 +#include + +#ifdef WOLFSSL_ARMASM_NO_NEON +static const uint64_t L_SHA512_transform_len_k[] = { + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, +}; + +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len); +void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) +{ + register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k; + + __asm__ __volatile__ ( + "sub sp, sp, #0xc0\n\t" + /* Copy digest to add in at end */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[sha512], #24]\n\t" + "ldr r11, [%[sha512], #28]\n\t" +#else + "ldrd r10, r11, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #128]\n\t" + "str r5, [sp, #132]\n\t" +#else + "strd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #136]\n\t" + "str r7, [sp, #140]\n\t" +#else + "strd r6, r7, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #144]\n\t" + "str r9, [sp, #148]\n\t" +#else + "strd r8, r9, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #152]\n\t" + "str r11, [sp, #156]\n\t" +#else + "strd r10, r11, [sp, #152]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[sha512], #56]\n\t" + "ldr r11, [%[sha512], #60]\n\t" +#else + "ldrd r10, r11, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #160]\n\t" + "str r5, [sp, #164]\n\t" +#else + "strd r4, r5, [sp, #160]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #168]\n\t" + "str r7, [sp, #172]\n\t" +#else + "strd r6, r7, [sp, #168]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #176]\n\t" + "str r9, [sp, #180]\n\t" +#else + "strd r8, r9, [sp, #176]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #184]\n\t" + "str r11, [sp, #188]\n\t" +#else + "strd r10, r11, [sp, #184]\n\t" +#endif + /* Start of loop processing a block */ + "\n" + "L_SHA512_transform_len_begin_%=: \n\t" + /* Load, Reverse and Store W - 64 bytes */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) + "ldr r4, [%[data]]\n\t" + "ldr r5, [%[data], #4]\n\t" + "ldr r6, [%[data], #8]\n\t" + "ldr r7, [%[data], #12]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp]\n\t" + "str r4, [sp, #4]\n\t" + "str r7, [sp, #8]\n\t" + "str r6, [sp, #12]\n\t" + "ldr r4, [%[data], #16]\n\t" + "ldr r5, [%[data], #20]\n\t" + "ldr r6, [%[data], #24]\n\t" + "ldr r7, [%[data], #28]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #16]\n\t" + "str r4, [sp, #20]\n\t" + "str r7, [sp, #24]\n\t" + "str r6, [sp, #28]\n\t" + "ldr r4, [%[data], #32]\n\t" + "ldr r5, [%[data], #36]\n\t" + "ldr r6, [%[data], #40]\n\t" + "ldr r7, [%[data], #44]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #32]\n\t" + "str r4, [sp, #36]\n\t" + "str r7, [sp, #40]\n\t" + "str r6, [sp, #44]\n\t" + "ldr r4, [%[data], #48]\n\t" + "ldr r5, [%[data], #52]\n\t" + "ldr r6, [%[data], #56]\n\t" + "ldr r7, [%[data], #60]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #48]\n\t" + "str r4, [sp, #52]\n\t" + "str r7, [sp, #56]\n\t" + "str r6, [sp, #60]\n\t" + "ldr r4, [%[data], #64]\n\t" + "ldr r5, [%[data], #68]\n\t" + "ldr r6, [%[data], #72]\n\t" + "ldr r7, [%[data], #76]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #64]\n\t" + "str r4, [sp, #68]\n\t" + "str r7, [sp, #72]\n\t" + "str r6, [sp, #76]\n\t" + "ldr r4, [%[data], #80]\n\t" + "ldr r5, [%[data], #84]\n\t" + "ldr r6, [%[data], #88]\n\t" + "ldr r7, [%[data], #92]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #80]\n\t" + "str r4, [sp, #84]\n\t" + "str r7, [sp, #88]\n\t" + "str r6, [sp, #92]\n\t" + "ldr r4, [%[data], #96]\n\t" + "ldr r5, [%[data], #100]\n\t" + "ldr r6, [%[data], #104]\n\t" + "ldr r7, [%[data], #108]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #96]\n\t" + "str r4, [sp, #100]\n\t" + "str r7, [sp, #104]\n\t" + "str r6, [sp, #108]\n\t" + "ldr r4, [%[data], #112]\n\t" + "ldr r5, [%[data], #116]\n\t" + "ldr r6, [%[data], #120]\n\t" + "ldr r7, [%[data], #124]\n\t" + "eor r8, r4, r4, ror #16\n\t" + "eor r9, r5, r5, ror #16\n\t" + "eor r10, r6, r6, ror #16\n\t" + "eor r11, r7, r7, ror #16\n\t" + "bic r8, r8, #0xff0000\n\t" + "bic r9, r9, #0xff0000\n\t" + "bic r10, r10, #0xff0000\n\t" + "bic r11, r11, #0xff0000\n\t" + "ror r4, r4, #8\n\t" + "ror r5, r5, #8\n\t" + "ror r6, r6, #8\n\t" + "ror r7, r7, #8\n\t" + "eor r4, r4, r8, lsr #8\n\t" + "eor r5, r5, r9, lsr #8\n\t" + "eor r6, r6, r10, lsr #8\n\t" + "eor r7, r7, r11, lsr #8\n\t" + "str r5, [sp, #112]\n\t" + "str r4, [sp, #116]\n\t" + "str r7, [sp, #120]\n\t" + "str r6, [sp, #124]\n\t" +#else + "ldr r4, [%[data]]\n\t" + "ldr r5, [%[data], #4]\n\t" + "ldr r6, [%[data], #8]\n\t" + "ldr r7, [%[data], #12]\n\t" + "ldr r8, [%[data], #16]\n\t" + "ldr r9, [%[data], #20]\n\t" + "ldr r10, [%[data], #24]\n\t" + "ldr r11, [%[data], #28]\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" + "str r5, [sp]\n\t" + "str r4, [sp, #4]\n\t" + "str r7, [sp, #8]\n\t" + "str r6, [sp, #12]\n\t" + "str r9, [sp, #16]\n\t" + "str r8, [sp, #20]\n\t" + "str r11, [sp, #24]\n\t" + "str r10, [sp, #28]\n\t" + "ldr r4, [%[data], #32]\n\t" + "ldr r5, [%[data], #36]\n\t" + "ldr r6, [%[data], #40]\n\t" + "ldr r7, [%[data], #44]\n\t" + "ldr r8, [%[data], #48]\n\t" + "ldr r9, [%[data], #52]\n\t" + "ldr r10, [%[data], #56]\n\t" + "ldr r11, [%[data], #60]\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" + "str r5, [sp, #32]\n\t" + "str r4, [sp, #36]\n\t" + "str r7, [sp, #40]\n\t" + "str r6, [sp, #44]\n\t" + "str r9, [sp, #48]\n\t" + "str r8, [sp, #52]\n\t" + "str r11, [sp, #56]\n\t" + "str r10, [sp, #60]\n\t" + "ldr r4, [%[data], #64]\n\t" + "ldr r5, [%[data], #68]\n\t" + "ldr r6, [%[data], #72]\n\t" + "ldr r7, [%[data], #76]\n\t" + "ldr r8, [%[data], #80]\n\t" + "ldr r9, [%[data], #84]\n\t" + "ldr r10, [%[data], #88]\n\t" + "ldr r11, [%[data], #92]\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" + "str r5, [sp, #64]\n\t" + "str r4, [sp, #68]\n\t" + "str r7, [sp, #72]\n\t" + "str r6, [sp, #76]\n\t" + "str r9, [sp, #80]\n\t" + "str r8, [sp, #84]\n\t" + "str r11, [sp, #88]\n\t" + "str r10, [sp, #92]\n\t" + "ldr r4, [%[data], #96]\n\t" + "ldr r5, [%[data], #100]\n\t" + "ldr r6, [%[data], #104]\n\t" + "ldr r7, [%[data], #108]\n\t" + "ldr r8, [%[data], #112]\n\t" + "ldr r9, [%[data], #116]\n\t" + "ldr r10, [%[data], #120]\n\t" + "ldr r11, [%[data], #124]\n\t" + "rev r4, r4\n\t" + "rev r5, r5\n\t" + "rev r6, r6\n\t" + "rev r7, r7\n\t" + "rev r8, r8\n\t" + "rev r9, r9\n\t" + "rev r10, r10\n\t" + "rev r11, r11\n\t" + "str r5, [sp, #96]\n\t" + "str r4, [sp, #100]\n\t" + "str r7, [sp, #104]\n\t" + "str r6, [sp, #108]\n\t" + "str r9, [sp, #112]\n\t" + "str r8, [sp, #116]\n\t" + "str r11, [sp, #120]\n\t" + "str r10, [sp, #124]\n\t" +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ + /* Pre-calc: b ^ c */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[sha512], #8]\n\t" + "ldr r11, [%[sha512], #12]\n\t" +#else + "ldrd r10, r11, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r10, r10, r4\n\t" + "eor r11, r11, r5\n\t" + "mov r12, #4\n\t" + /* Start of 16 rounds */ + "\n" + "L_SHA512_transform_len_start_%=: \n\t" + /* Round 0 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp]\n\t" + "ldr r9, [sp, #4]\n\t" +#else + "ldrd r8, r9, [sp]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3]\n\t" + "ldr r7, [r3, #4]\n\t" +#else + "ldrd r6, r7, [r3]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #24]\n\t" + "str r9, [%[sha512], #28]\n\t" +#else + "strd r8, r9, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else + "strd r6, r7, [%[sha512], #56]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[0] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #112]\n\t" + "ldr r5, [sp, #116]\n\t" +#else + "ldrd r4, r5, [sp, #112]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else + "ldrd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #72]\n\t" + "ldr r9, [sp, #76]\n\t" +#else + "ldrd r8, r9, [sp, #72]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else + "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else + "ldrd r4, r5, [sp, #8]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else + "ldrd r4, r5, [sp]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else + "strd r4, r5, [sp]\n\t" +#endif + /* Round 1 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" +#else + "ldrd r8, r9, [sp, #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #8]\n\t" + "ldr r7, [r3, #12]\n\t" +#else + "ldrd r6, r7, [r3, #8]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #16]\n\t" + "str r9, [%[sha512], #20]\n\t" +#else + "strd r8, r9, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else + "strd r6, r7, [%[sha512], #48]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #120]\n\t" + "ldr r5, [sp, #124]\n\t" +#else + "ldrd r4, r5, [sp, #120]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else + "ldrd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #80]\n\t" + "ldr r9, [sp, #84]\n\t" +#else + "ldrd r8, r9, [sp, #80]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #12]\n\t" +#else + "strd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else + "ldrd r4, r5, [sp, #16]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else + "ldrd r4, r5, [sp, #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #12]\n\t" +#else + "strd r4, r5, [sp, #8]\n\t" +#endif + /* Round 2 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" +#else + "ldrd r8, r9, [sp, #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #16]\n\t" + "ldr r7, [r3, #20]\n\t" +#else + "ldrd r6, r7, [r3, #16]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #8]\n\t" + "str r9, [%[sha512], #12]\n\t" +#else + "strd r8, r9, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else + "strd r6, r7, [%[sha512], #40]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[2] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else + "ldrd r4, r5, [sp]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else + "ldrd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #88]\n\t" + "ldr r9, [sp, #92]\n\t" +#else + "ldrd r8, r9, [sp, #88]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #20]\n\t" +#else + "strd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #28]\n\t" +#else + "ldrd r4, r5, [sp, #24]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else + "ldrd r4, r5, [sp, #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #20]\n\t" +#else + "strd r4, r5, [sp, #16]\n\t" +#endif + /* Round 3 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #24]\n\t" + "ldr r9, [sp, #28]\n\t" +#else + "ldrd r8, r9, [sp, #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #24]\n\t" + "ldr r7, [r3, #28]\n\t" +#else + "ldrd r6, r7, [r3, #24]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512]]\n\t" + "str r9, [%[sha512], #4]\n\t" +#else + "strd r8, r9, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else + "strd r6, r7, [%[sha512], #32]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else + "ldrd r4, r5, [sp, #8]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #28]\n\t" +#else + "ldrd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #96]\n\t" + "ldr r9, [sp, #100]\n\t" +#else + "ldrd r8, r9, [sp, #96]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #24]\n\t" + "str r5, [sp, #28]\n\t" +#else + "strd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #28]\n\t" +#else + "ldrd r4, r5, [sp, #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #24]\n\t" + "str r5, [sp, #28]\n\t" +#else + "strd r4, r5, [sp, #24]\n\t" +#endif + /* Round 4 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #32]\n\t" + "ldr r9, [sp, #36]\n\t" +#else + "ldrd r8, r9, [sp, #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #32]\n\t" + "ldr r7, [r3, #36]\n\t" +#else + "ldrd r6, r7, [r3, #32]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #56]\n\t" + "str r9, [%[sha512], #60]\n\t" +#else + "strd r8, r9, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else + "strd r6, r7, [%[sha512], #24]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else + "ldrd r4, r5, [sp, #16]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #104]\n\t" + "ldr r9, [sp, #108]\n\t" +#else + "ldrd r8, r9, [sp, #104]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" +#else + "strd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #40]\n\t" + "ldr r5, [sp, #44]\n\t" +#else + "ldrd r4, r5, [sp, #40]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" +#else + "strd r4, r5, [sp, #32]\n\t" +#endif + /* Round 5 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #40]\n\t" + "ldr r9, [sp, #44]\n\t" +#else + "ldrd r8, r9, [sp, #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #40]\n\t" + "ldr r7, [r3, #44]\n\t" +#else + "ldrd r6, r7, [r3, #40]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #48]\n\t" + "str r9, [%[sha512], #52]\n\t" +#else + "strd r8, r9, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else + "strd r6, r7, [%[sha512], #16]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[5] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #28]\n\t" +#else + "ldrd r4, r5, [sp, #24]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #40]\n\t" + "ldr r5, [sp, #44]\n\t" +#else + "ldrd r4, r5, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #112]\n\t" + "ldr r9, [sp, #116]\n\t" +#else + "ldrd r8, r9, [sp, #112]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #40]\n\t" + "str r5, [sp, #44]\n\t" +#else + "strd r4, r5, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" +#else + "ldrd r4, r5, [sp, #48]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #40]\n\t" + "ldr r5, [sp, #44]\n\t" +#else + "ldrd r4, r5, [sp, #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #40]\n\t" + "str r5, [sp, #44]\n\t" +#else + "strd r4, r5, [sp, #40]\n\t" +#endif + /* Round 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #48]\n\t" + "ldr r9, [sp, #52]\n\t" +#else + "ldrd r8, r9, [sp, #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #48]\n\t" + "ldr r7, [r3, #52]\n\t" +#else + "ldrd r6, r7, [r3, #48]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #40]\n\t" + "str r9, [%[sha512], #44]\n\t" +#else + "strd r8, r9, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else + "strd r6, r7, [%[sha512], #8]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[6] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" +#else + "ldrd r4, r5, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #120]\n\t" + "ldr r9, [sp, #124]\n\t" +#else + "ldrd r8, r9, [sp, #120]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" +#else + "strd r4, r5, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" +#else + "ldrd r4, r5, [sp, #56]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" +#else + "ldrd r4, r5, [sp, #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #48]\n\t" + "str r5, [sp, #52]\n\t" +#else + "strd r4, r5, [sp, #48]\n\t" +#endif + /* Round 7 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #56]\n\t" + "ldr r9, [sp, #60]\n\t" +#else + "ldrd r8, r9, [sp, #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #56]\n\t" + "ldr r7, [r3, #60]\n\t" +#else + "ldrd r6, r7, [r3, #56]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #32]\n\t" + "str r9, [%[sha512], #36]\n\t" +#else + "strd r8, r9, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else + "strd r6, r7, [%[sha512]]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[7] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #40]\n\t" + "ldr r5, [sp, #44]\n\t" +#else + "ldrd r4, r5, [sp, #40]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" +#else + "ldrd r4, r5, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp]\n\t" + "ldr r9, [sp, #4]\n\t" +#else + "ldrd r8, r9, [sp]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #56]\n\t" + "str r5, [sp, #60]\n\t" +#else + "strd r4, r5, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #68]\n\t" +#else + "ldrd r4, r5, [sp, #64]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" +#else + "ldrd r4, r5, [sp, #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #56]\n\t" + "str r5, [sp, #60]\n\t" +#else + "strd r4, r5, [sp, #56]\n\t" +#endif + /* Round 8 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #64]\n\t" + "ldr r9, [sp, #68]\n\t" +#else + "ldrd r8, r9, [sp, #64]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #64]\n\t" + "ldr r7, [r3, #68]\n\t" +#else + "ldrd r6, r7, [r3, #64]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #24]\n\t" + "str r9, [%[sha512], #28]\n\t" +#else + "strd r8, r9, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else + "strd r6, r7, [%[sha512], #56]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[8] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" +#else + "ldrd r4, r5, [sp, #48]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #68]\n\t" +#else + "ldrd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" +#else + "ldrd r8, r9, [sp, #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #64]\n\t" + "str r5, [sp, #68]\n\t" +#else + "strd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #76]\n\t" +#else + "ldrd r4, r5, [sp, #72]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #68]\n\t" +#else + "ldrd r4, r5, [sp, #64]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #64]\n\t" + "str r5, [sp, #68]\n\t" +#else + "strd r4, r5, [sp, #64]\n\t" +#endif + /* Round 9 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #72]\n\t" + "ldr r9, [sp, #76]\n\t" +#else + "ldrd r8, r9, [sp, #72]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #72]\n\t" + "ldr r7, [r3, #76]\n\t" +#else + "ldrd r6, r7, [r3, #72]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #16]\n\t" + "str r9, [%[sha512], #20]\n\t" +#else + "strd r8, r9, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else + "strd r6, r7, [%[sha512], #48]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[9] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" +#else + "ldrd r4, r5, [sp, #56]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #76]\n\t" +#else + "ldrd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" +#else + "ldrd r8, r9, [sp, #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #72]\n\t" + "str r5, [sp, #76]\n\t" +#else + "strd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #80]\n\t" + "ldr r5, [sp, #84]\n\t" +#else + "ldrd r4, r5, [sp, #80]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #76]\n\t" +#else + "ldrd r4, r5, [sp, #72]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #72]\n\t" + "str r5, [sp, #76]\n\t" +#else + "strd r4, r5, [sp, #72]\n\t" +#endif + /* Round 10 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #80]\n\t" + "ldr r9, [sp, #84]\n\t" +#else + "ldrd r8, r9, [sp, #80]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #80]\n\t" + "ldr r7, [r3, #84]\n\t" +#else + "ldrd r6, r7, [r3, #80]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #8]\n\t" + "str r9, [%[sha512], #12]\n\t" +#else + "strd r8, r9, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else + "strd r6, r7, [%[sha512], #40]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[10] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #68]\n\t" +#else + "ldrd r4, r5, [sp, #64]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #80]\n\t" + "ldr r5, [sp, #84]\n\t" +#else + "ldrd r4, r5, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #24]\n\t" + "ldr r9, [sp, #28]\n\t" +#else + "ldrd r8, r9, [sp, #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #80]\n\t" + "str r5, [sp, #84]\n\t" +#else + "strd r4, r5, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #88]\n\t" + "ldr r5, [sp, #92]\n\t" +#else + "ldrd r4, r5, [sp, #88]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #80]\n\t" + "ldr r5, [sp, #84]\n\t" +#else + "ldrd r4, r5, [sp, #80]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #80]\n\t" + "str r5, [sp, #84]\n\t" +#else + "strd r4, r5, [sp, #80]\n\t" +#endif + /* Round 11 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #88]\n\t" + "ldr r9, [sp, #92]\n\t" +#else + "ldrd r8, r9, [sp, #88]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #88]\n\t" + "ldr r7, [r3, #92]\n\t" +#else + "ldrd r6, r7, [r3, #88]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512]]\n\t" + "str r9, [%[sha512], #4]\n\t" +#else + "strd r8, r9, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else + "strd r6, r7, [%[sha512], #32]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[11] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #76]\n\t" +#else + "ldrd r4, r5, [sp, #72]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #88]\n\t" + "ldr r5, [sp, #92]\n\t" +#else + "ldrd r4, r5, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #32]\n\t" + "ldr r9, [sp, #36]\n\t" +#else + "ldrd r8, r9, [sp, #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #88]\n\t" + "str r5, [sp, #92]\n\t" +#else + "strd r4, r5, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else + "ldrd r4, r5, [sp, #96]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #88]\n\t" + "ldr r5, [sp, #92]\n\t" +#else + "ldrd r4, r5, [sp, #88]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #88]\n\t" + "str r5, [sp, #92]\n\t" +#else + "strd r4, r5, [sp, #88]\n\t" +#endif + /* Round 12 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #96]\n\t" + "ldr r9, [sp, #100]\n\t" +#else + "ldrd r8, r9, [sp, #96]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #96]\n\t" + "ldr r7, [r3, #100]\n\t" +#else + "ldrd r6, r7, [r3, #96]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #56]\n\t" + "str r9, [%[sha512], #60]\n\t" +#else + "strd r8, r9, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else + "strd r6, r7, [%[sha512], #24]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[12] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #80]\n\t" + "ldr r5, [sp, #84]\n\t" +#else + "ldrd r4, r5, [sp, #80]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else + "ldrd r4, r5, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #40]\n\t" + "ldr r9, [sp, #44]\n\t" +#else + "ldrd r8, r9, [sp, #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #96]\n\t" + "str r5, [sp, #100]\n\t" +#else + "strd r4, r5, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #104]\n\t" + "ldr r5, [sp, #108]\n\t" +#else + "ldrd r4, r5, [sp, #104]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else + "ldrd r4, r5, [sp, #96]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #96]\n\t" + "str r5, [sp, #100]\n\t" +#else + "strd r4, r5, [sp, #96]\n\t" +#endif + /* Round 13 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #104]\n\t" + "ldr r9, [sp, #108]\n\t" +#else + "ldrd r8, r9, [sp, #104]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #104]\n\t" + "ldr r7, [r3, #108]\n\t" +#else + "ldrd r6, r7, [r3, #104]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #48]\n\t" + "str r9, [%[sha512], #52]\n\t" +#else + "strd r8, r9, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else + "strd r6, r7, [%[sha512], #16]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[13] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #88]\n\t" + "ldr r5, [sp, #92]\n\t" +#else + "ldrd r4, r5, [sp, #88]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #104]\n\t" + "ldr r5, [sp, #108]\n\t" +#else + "ldrd r4, r5, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #48]\n\t" + "ldr r9, [sp, #52]\n\t" +#else + "ldrd r8, r9, [sp, #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #104]\n\t" + "str r5, [sp, #108]\n\t" +#else + "strd r4, r5, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #112]\n\t" + "ldr r5, [sp, #116]\n\t" +#else + "ldrd r4, r5, [sp, #112]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #104]\n\t" + "ldr r5, [sp, #108]\n\t" +#else + "ldrd r4, r5, [sp, #104]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #104]\n\t" + "str r5, [sp, #108]\n\t" +#else + "strd r4, r5, [sp, #104]\n\t" +#endif + /* Round 14 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #112]\n\t" + "ldr r9, [sp, #116]\n\t" +#else + "ldrd r8, r9, [sp, #112]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #112]\n\t" + "ldr r7, [r3, #116]\n\t" +#else + "ldrd r6, r7, [r3, #112]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #40]\n\t" + "str r9, [%[sha512], #44]\n\t" +#else + "strd r8, r9, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else + "strd r6, r7, [%[sha512], #8]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[14] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else + "ldrd r4, r5, [sp, #96]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #112]\n\t" + "ldr r5, [sp, #116]\n\t" +#else + "ldrd r4, r5, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #56]\n\t" + "ldr r9, [sp, #60]\n\t" +#else + "ldrd r8, r9, [sp, #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #112]\n\t" + "str r5, [sp, #116]\n\t" +#else + "strd r4, r5, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #120]\n\t" + "ldr r5, [sp, #124]\n\t" +#else + "ldrd r4, r5, [sp, #120]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #112]\n\t" + "ldr r5, [sp, #116]\n\t" +#else + "ldrd r4, r5, [sp, #112]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #112]\n\t" + "str r5, [sp, #116]\n\t" +#else + "strd r4, r5, [sp, #112]\n\t" +#endif + /* Round 15 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #120]\n\t" + "ldr r9, [sp, #124]\n\t" +#else + "ldrd r8, r9, [sp, #120]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #120]\n\t" + "ldr r7, [r3, #124]\n\t" +#else + "ldrd r6, r7, [r3, #120]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #32]\n\t" + "str r9, [%[sha512], #36]\n\t" +#else + "strd r8, r9, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else + "strd r6, r7, [%[sha512]]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Calc new W[15] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #104]\n\t" + "ldr r5, [sp, #108]\n\t" +#else + "ldrd r4, r5, [sp, #104]\n\t" +#endif + "lsrs r6, r4, #19\n\t" + "lsrs r7, r5, #19\n\t" + "orr r7, r7, r4, lsl #13\n\t" + "orr r6, r6, r5, lsl #13\n\t" + "lsls r8, r4, #3\n\t" + "lsls r9, r5, #3\n\t" + "orr r9, r9, r4, lsr #29\n\t" + "orr r8, r8, r5, lsr #29\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #6\n\t" + "lsrs r9, r5, #6\n\t" + "orr r8, r8, r5, lsl #26\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #120]\n\t" + "ldr r5, [sp, #124]\n\t" +#else + "ldrd r4, r5, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #64]\n\t" + "ldr r9, [sp, #68]\n\t" +#else + "ldrd r8, r9, [sp, #64]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #120]\n\t" + "str r5, [sp, #124]\n\t" +#else + "strd r4, r5, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else + "ldrd r4, r5, [sp]\n\t" +#endif + "lsrs r6, r4, #1\n\t" + "lsrs r7, r5, #1\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r6, r6, r5, lsl #31\n\t" + "lsrs r8, r4, #8\n\t" + "lsrs r9, r5, #8\n\t" + "orr r9, r9, r4, lsl #24\n\t" + "orr r8, r8, r5, lsl #24\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" + "lsrs r8, r4, #7\n\t" + "lsrs r9, r5, #7\n\t" + "orr r8, r8, r5, lsl #25\n\t" + "eor r7, r7, r9\n\t" + "eor r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #120]\n\t" + "ldr r5, [sp, #124]\n\t" +#else + "ldrd r4, r5, [sp, #120]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #120]\n\t" + "str r5, [sp, #124]\n\t" +#else + "strd r4, r5, [sp, #120]\n\t" +#endif + "add r3, r3, #0x80\n\t" + "subs r12, r12, #1\n\t" + "bne L_SHA512_transform_len_start_%=\n\t" + /* Round 0 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp]\n\t" + "ldr r9, [sp, #4]\n\t" +#else + "ldrd r8, r9, [sp]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3]\n\t" + "ldr r7, [r3, #4]\n\t" +#else + "ldrd r6, r7, [r3]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #24]\n\t" + "str r9, [%[sha512], #28]\n\t" +#else + "strd r8, r9, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else + "strd r6, r7, [%[sha512], #56]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 1 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" +#else + "ldrd r8, r9, [sp, #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #8]\n\t" + "ldr r7, [r3, #12]\n\t" +#else + "ldrd r6, r7, [r3, #8]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #16]\n\t" + "str r9, [%[sha512], #20]\n\t" +#else + "strd r8, r9, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else + "strd r6, r7, [%[sha512], #48]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 2 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" +#else + "ldrd r8, r9, [sp, #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #16]\n\t" + "ldr r7, [r3, #20]\n\t" +#else + "ldrd r6, r7, [r3, #16]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #8]\n\t" + "str r9, [%[sha512], #12]\n\t" +#else + "strd r8, r9, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else + "strd r6, r7, [%[sha512], #40]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 3 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #24]\n\t" + "ldr r9, [sp, #28]\n\t" +#else + "ldrd r8, r9, [sp, #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #24]\n\t" + "ldr r7, [r3, #28]\n\t" +#else + "ldrd r6, r7, [r3, #24]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512]]\n\t" + "str r9, [%[sha512], #4]\n\t" +#else + "strd r8, r9, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else + "strd r6, r7, [%[sha512], #32]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 4 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #32]\n\t" + "ldr r9, [sp, #36]\n\t" +#else + "ldrd r8, r9, [sp, #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #32]\n\t" + "ldr r7, [r3, #36]\n\t" +#else + "ldrd r6, r7, [r3, #32]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #56]\n\t" + "str r9, [%[sha512], #60]\n\t" +#else + "strd r8, r9, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else + "strd r6, r7, [%[sha512], #24]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 5 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #40]\n\t" + "ldr r9, [sp, #44]\n\t" +#else + "ldrd r8, r9, [sp, #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #40]\n\t" + "ldr r7, [r3, #44]\n\t" +#else + "ldrd r6, r7, [r3, #40]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #48]\n\t" + "str r9, [%[sha512], #52]\n\t" +#else + "strd r8, r9, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else + "strd r6, r7, [%[sha512], #16]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #48]\n\t" + "ldr r9, [sp, #52]\n\t" +#else + "ldrd r8, r9, [sp, #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #48]\n\t" + "ldr r7, [r3, #52]\n\t" +#else + "ldrd r6, r7, [r3, #48]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #40]\n\t" + "str r9, [%[sha512], #44]\n\t" +#else + "strd r8, r9, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else + "strd r6, r7, [%[sha512], #8]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 7 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #56]\n\t" + "ldr r9, [sp, #60]\n\t" +#else + "ldrd r8, r9, [sp, #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #56]\n\t" + "ldr r7, [r3, #60]\n\t" +#else + "ldrd r6, r7, [r3, #56]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #32]\n\t" + "str r9, [%[sha512], #36]\n\t" +#else + "strd r8, r9, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else + "strd r6, r7, [%[sha512]]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 8 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #64]\n\t" + "ldr r9, [sp, #68]\n\t" +#else + "ldrd r8, r9, [sp, #64]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #64]\n\t" + "ldr r7, [r3, #68]\n\t" +#else + "ldrd r6, r7, [r3, #64]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #24]\n\t" + "str r9, [%[sha512], #28]\n\t" +#else + "strd r8, r9, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else + "strd r4, r5, [%[sha512], #56]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else + "strd r6, r7, [%[sha512], #56]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 9 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #72]\n\t" + "ldr r9, [sp, #76]\n\t" +#else + "ldrd r8, r9, [sp, #72]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #72]\n\t" + "ldr r7, [r3, #76]\n\t" +#else + "ldrd r6, r7, [r3, #72]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #16]\n\t" + "str r9, [%[sha512], #20]\n\t" +#else + "strd r8, r9, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else + "strd r6, r7, [%[sha512], #48]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 10 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #80]\n\t" + "ldr r9, [sp, #84]\n\t" +#else + "ldrd r8, r9, [sp, #80]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #80]\n\t" + "ldr r7, [r3, #84]\n\t" +#else + "ldrd r6, r7, [r3, #80]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #8]\n\t" + "str r9, [%[sha512], #12]\n\t" +#else + "strd r8, r9, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else + "strd r4, r5, [%[sha512], #40]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else + "strd r6, r7, [%[sha512], #40]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 11 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #88]\n\t" + "ldr r9, [sp, #92]\n\t" +#else + "ldrd r8, r9, [sp, #88]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #88]\n\t" + "ldr r7, [r3, #92]\n\t" +#else + "ldrd r6, r7, [r3, #88]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512]]\n\t" + "str r9, [%[sha512], #4]\n\t" +#else + "strd r8, r9, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else + "strd r6, r7, [%[sha512], #32]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 12 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #96]\n\t" + "ldr r9, [sp, #100]\n\t" +#else + "ldrd r8, r9, [sp, #96]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #96]\n\t" + "ldr r7, [r3, #100]\n\t" +#else + "ldrd r6, r7, [r3, #96]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #56]\n\t" + "str r9, [%[sha512], #60]\n\t" +#else + "strd r8, r9, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else + "strd r4, r5, [%[sha512], #24]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else + "strd r6, r7, [%[sha512], #24]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 13 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else + "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #104]\n\t" + "ldr r9, [sp, #108]\n\t" +#else + "ldrd r8, r9, [sp, #104]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #104]\n\t" + "ldr r7, [r3, #108]\n\t" +#else + "ldrd r6, r7, [r3, #104]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #48]\n\t" + "ldr r9, [%[sha512], #52]\n\t" +#else + "ldrd r8, r9, [%[sha512], #48]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else + "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #48]\n\t" + "str r9, [%[sha512], #52]\n\t" +#else + "strd r8, r9, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else + "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else + "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else + "strd r6, r7, [%[sha512], #16]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 14 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512]]\n\t" + "ldr r9, [%[sha512], #4]\n\t" +#else + "ldrd r8, r9, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #112]\n\t" + "ldr r9, [sp, #116]\n\t" +#else + "ldrd r8, r9, [sp, #112]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #112]\n\t" + "ldr r7, [r3, #116]\n\t" +#else + "ldrd r6, r7, [r3, #112]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #40]\n\t" + "ldr r9, [%[sha512], #44]\n\t" +#else + "ldrd r8, r9, [%[sha512], #40]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #40]\n\t" + "str r9, [%[sha512], #44]\n\t" +#else + "strd r8, r9, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #16]\n\t" + "ldr r9, [%[sha512], #20]\n\t" +#else + "ldrd r8, r9, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else + "strd r4, r5, [%[sha512], #8]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else + "strd r6, r7, [%[sha512], #8]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Round 15 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif + "lsrs r6, r4, #14\n\t" + "lsrs r7, r5, #14\n\t" + "orr r7, r7, r4, lsl #18\n\t" + "orr r6, r6, r5, lsl #18\n\t" + "lsrs r8, r4, #18\n\t" + "lsrs r9, r5, #18\n\t" + "orr r9, r9, r4, lsl #14\n\t" + "orr r8, r8, r5, lsl #14\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #23\n\t" + "lsls r9, r5, #23\n\t" + "orr r9, r9, r4, lsr #9\n\t" + "orr r8, r8, r5, lsr #9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else + "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else + "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else + "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "and r6, r6, r4\n\t" + "and r7, r7, r5\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #120]\n\t" + "ldr r9, [sp, #124]\n\t" +#else + "ldrd r8, r9, [sp, #120]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [r3, #120]\n\t" + "ldr r7, [r3, #124]\n\t" +#else + "ldrd r6, r7, [r3, #120]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #32]\n\t" + "ldr r9, [%[sha512], #36]\n\t" +#else + "ldrd r8, r9, [%[sha512], #32]\n\t" +#endif + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "adds r8, r8, r4\n\t" + "adc r9, r9, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else + "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[sha512], #32]\n\t" + "str r9, [%[sha512], #36]\n\t" +#else + "strd r8, r9, [%[sha512], #32]\n\t" +#endif + "lsrs r6, r4, #28\n\t" + "lsrs r7, r5, #28\n\t" + "orr r7, r7, r4, lsl #4\n\t" + "orr r6, r6, r5, lsl #4\n\t" + "lsls r8, r4, #30\n\t" + "lsls r9, r5, #30\n\t" + "orr r9, r9, r4, lsr #2\n\t" + "orr r8, r8, r5, lsr #2\n\t" + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "lsls r8, r4, #25\n\t" + "lsls r9, r5, #25\n\t" + "orr r9, r9, r4, lsr #7\n\t" + "orr r8, r8, r5, lsr #7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif + "eor r6, r6, r8\n\t" + "eor r7, r7, r9\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else + "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else + "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif + "eor r8, r8, r6\n\t" + "eor r9, r9, r7\n\t" + "and r10, r10, r8\n\t" + "and r11, r11, r9\n\t" + "eor r10, r10, r6\n\t" + "eor r11, r11, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else + "ldrd r6, r7, [%[sha512]]\n\t" +#endif + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else + "strd r6, r7, [%[sha512]]\n\t" +#endif + "mov r10, r8\n\t" + "mov r11, r9\n\t" + /* Add in digest from start */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else + "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else + "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #128]\n\t" + "ldr r9, [sp, #132]\n\t" +#else + "ldrd r8, r9, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #136]\n\t" + "ldr r11, [sp, #140]\n\t" +#else + "ldrd r10, r11, [sp, #136]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else + "strd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else + "strd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #128]\n\t" + "str r5, [sp, #132]\n\t" +#else + "strd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #136]\n\t" + "str r7, [sp, #140]\n\t" +#else + "strd r6, r7, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else + "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else + "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #144]\n\t" + "ldr r9, [sp, #148]\n\t" +#else + "ldrd r8, r9, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #152]\n\t" + "ldr r11, [sp, #156]\n\t" +#else + "ldrd r10, r11, [sp, #152]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else + "strd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else + "strd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #144]\n\t" + "str r5, [sp, #148]\n\t" +#else + "strd r4, r5, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #152]\n\t" + "str r7, [sp, #156]\n\t" +#else + "strd r6, r7, [sp, #152]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else + "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else + "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #160]\n\t" + "ldr r9, [sp, #164]\n\t" +#else + "ldrd r8, r9, [sp, #160]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #168]\n\t" + "ldr r11, [sp, #172]\n\t" +#else + "ldrd r10, r11, [sp, #168]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else + "strd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else + "strd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #160]\n\t" + "str r5, [sp, #164]\n\t" +#else + "strd r4, r5, [sp, #160]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #168]\n\t" + "str r7, [sp, #172]\n\t" +#else + "strd r6, r7, [sp, #168]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else + "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else + "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #176]\n\t" + "ldr r9, [sp, #180]\n\t" +#else + "ldrd r8, r9, [sp, #176]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #184]\n\t" + "ldr r11, [sp, #188]\n\t" +#else + "ldrd r10, r11, [sp, #184]\n\t" +#endif + "adds r4, r4, r8\n\t" + "adc r5, r5, r9\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else + "strd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else + "strd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #176]\n\t" + "str r5, [sp, #180]\n\t" +#else + "strd r4, r5, [sp, #176]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #184]\n\t" + "str r7, [sp, #188]\n\t" +#else + "strd r6, r7, [sp, #184]\n\t" +#endif + "subs %[len], %[len], #0x80\n\t" + "sub r3, r3, #0x200\n\t" + "add %[data], %[data], #0x80\n\t" + "bne L_SHA512_transform_len_begin_%=\n\t" + "eor r0, r0, r0\n\t" + "add sp, sp, #0xc0\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#include + +#ifndef WOLFSSL_ARMASM_NO_NEON +static const uint64_t L_SHA512_transform_neon_len_k[] = { + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, +}; + +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len); +void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) +{ + register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint64_t* L_SHA512_transform_neon_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_neon_len_k; + + __asm__ __volatile__ ( + /* Load digest into working vars */ + "vldm.64 %[sha512], {d0-d7}\n\t" + /* Start of loop processing a block */ + "\n" + "L_SHA512_transform_neon_len_begin_%=: \n\t" + /* Load W */ + "vld1.8 {q8-q9}, [%[data]]!\n\t" + "vld1.8 {q10-q11}, [%[data]]!\n\t" + "vld1.8 {q12-q13}, [%[data]]!\n\t" + "vld1.8 {q14-q15}, [%[data]]!\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + "vrev64.8 q8, q8\n\t" + "vrev64.8 q9, q9\n\t" + "vrev64.8 q10, q10\n\t" + "vrev64.8 q11, q11\n\t" + "vrev64.8 q12, q12\n\t" + "vrev64.8 q13, q13\n\t" + "vrev64.8 q14, q14\n\t" + "vrev64.8 q15, q15\n\t" +#else + "vrev64.8 d16, d16\n\t" + "vrev64.8 d17, d17\n\t" + "vrev64.8 d18, d18\n\t" + "vrev64.8 d19, d19\n\t" + "vrev64.8 d20, d20\n\t" + "vrev64.8 d21, d21\n\t" + "vrev64.8 d22, d22\n\t" + "vrev64.8 d23, d23\n\t" + "vrev64.8 d24, d24\n\t" + "vrev64.8 d25, d25\n\t" + "vrev64.8 d26, d26\n\t" + "vrev64.8 d27, d27\n\t" + "vrev64.8 d28, d28\n\t" + "vrev64.8 d29, d29\n\t" + "vrev64.8 d30, d30\n\t" + "vrev64.8 d31, d31\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + "mov r12, #4\n\t" + /* Start of 16 rounds */ + "\n" + "L_SHA512_transform_neon_len_start_%=: \n\t" + /* Round 0 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d16\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 1 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d17\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[0]-W[1] */ + "vext.8 q6, q8, q9, #8\n\t" + "vshl.u64 q4, q15, #45\n\t" + "vsri.u64 q4, q15, #19\n\t" + "vshl.u64 q5, q15, #3\n\t" + "vsri.u64 q5, q15, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q15, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q8, q5\n\t" + "vext.8 q7, q12, q13, #8\n\t" + "vadd.i64 q8, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q8, q5\n\t" +#else + /* Calc new W[0]-W[1] */ + "vmov d12, d17\n\t" + "vmov d13, d18\n\t" + "vshl.u64 d8, d30, #45\n\t" + "vshl.u64 d9, d31, #45\n\t" + "vsri.u64 d8, d30, #19\n\t" + "vsri.u64 d9, d31, #19\n\t" + "vshl.u64 d10, d30, #3\n\t" + "vshl.u64 d11, d31, #3\n\t" + "vsri.u64 d10, d30, #61\n\t" + "vsri.u64 d11, d31, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d30, #6\n\t" + "vshr.u64 d9, d31, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d16, d10\n\t" + "vadd.i64 d17, d11\n\t" + "vmov d14, d25\n\t" + "vmov d15, d26\n\t" + "vadd.i64 d16, d14\n\t" + "vadd.i64 d17, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d16, d10\n\t" + "vadd.i64 d17, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 2 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d18\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 3 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d19\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[2]-W[3] */ + "vext.8 q6, q9, q10, #8\n\t" + "vshl.u64 q4, q8, #45\n\t" + "vsri.u64 q4, q8, #19\n\t" + "vshl.u64 q5, q8, #3\n\t" + "vsri.u64 q5, q8, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q8, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q9, q5\n\t" + "vext.8 q7, q13, q14, #8\n\t" + "vadd.i64 q9, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q9, q5\n\t" +#else + /* Calc new W[2]-W[3] */ + "vmov d12, d19\n\t" + "vmov d13, d20\n\t" + "vshl.u64 d8, d16, #45\n\t" + "vshl.u64 d9, d17, #45\n\t" + "vsri.u64 d8, d16, #19\n\t" + "vsri.u64 d9, d17, #19\n\t" + "vshl.u64 d10, d16, #3\n\t" + "vshl.u64 d11, d17, #3\n\t" + "vsri.u64 d10, d16, #61\n\t" + "vsri.u64 d11, d17, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d16, #6\n\t" + "vshr.u64 d9, d17, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d18, d10\n\t" + "vadd.i64 d19, d11\n\t" + "vmov d14, d27\n\t" + "vmov d15, d28\n\t" + "vadd.i64 d18, d14\n\t" + "vadd.i64 d19, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d18, d10\n\t" + "vadd.i64 d19, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 4 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d20\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 5 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d21\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[4]-W[5] */ + "vext.8 q6, q10, q11, #8\n\t" + "vshl.u64 q4, q9, #45\n\t" + "vsri.u64 q4, q9, #19\n\t" + "vshl.u64 q5, q9, #3\n\t" + "vsri.u64 q5, q9, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q9, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q10, q5\n\t" + "vext.8 q7, q14, q15, #8\n\t" + "vadd.i64 q10, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q10, q5\n\t" +#else + /* Calc new W[4]-W[5] */ + "vmov d12, d21\n\t" + "vmov d13, d22\n\t" + "vshl.u64 d8, d18, #45\n\t" + "vshl.u64 d9, d19, #45\n\t" + "vsri.u64 d8, d18, #19\n\t" + "vsri.u64 d9, d19, #19\n\t" + "vshl.u64 d10, d18, #3\n\t" + "vshl.u64 d11, d19, #3\n\t" + "vsri.u64 d10, d18, #61\n\t" + "vsri.u64 d11, d19, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d18, #6\n\t" + "vshr.u64 d9, d19, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d20, d10\n\t" + "vadd.i64 d21, d11\n\t" + "vmov d14, d29\n\t" + "vmov d15, d30\n\t" + "vadd.i64 d20, d14\n\t" + "vadd.i64 d21, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d20, d10\n\t" + "vadd.i64 d21, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 6 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d22\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 7 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d23\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[6]-W[7] */ + "vext.8 q6, q11, q12, #8\n\t" + "vshl.u64 q4, q10, #45\n\t" + "vsri.u64 q4, q10, #19\n\t" + "vshl.u64 q5, q10, #3\n\t" + "vsri.u64 q5, q10, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q10, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q11, q5\n\t" + "vext.8 q7, q15, q8, #8\n\t" + "vadd.i64 q11, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q11, q5\n\t" +#else + /* Calc new W[6]-W[7] */ + "vmov d12, d23\n\t" + "vmov d13, d24\n\t" + "vshl.u64 d8, d20, #45\n\t" + "vshl.u64 d9, d21, #45\n\t" + "vsri.u64 d8, d20, #19\n\t" + "vsri.u64 d9, d21, #19\n\t" + "vshl.u64 d10, d20, #3\n\t" + "vshl.u64 d11, d21, #3\n\t" + "vsri.u64 d10, d20, #61\n\t" + "vsri.u64 d11, d21, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d20, #6\n\t" + "vshr.u64 d9, d21, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d22, d10\n\t" + "vadd.i64 d23, d11\n\t" + "vmov d14, d31\n\t" + "vmov d15, d16\n\t" + "vadd.i64 d22, d14\n\t" + "vadd.i64 d23, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d22, d10\n\t" + "vadd.i64 d23, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 8 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d24\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 9 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d25\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[8]-W[9] */ + "vext.8 q6, q12, q13, #8\n\t" + "vshl.u64 q4, q11, #45\n\t" + "vsri.u64 q4, q11, #19\n\t" + "vshl.u64 q5, q11, #3\n\t" + "vsri.u64 q5, q11, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q11, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q12, q5\n\t" + "vext.8 q7, q8, q9, #8\n\t" + "vadd.i64 q12, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q12, q5\n\t" +#else + /* Calc new W[8]-W[9] */ + "vmov d12, d25\n\t" + "vmov d13, d26\n\t" + "vshl.u64 d8, d22, #45\n\t" + "vshl.u64 d9, d23, #45\n\t" + "vsri.u64 d8, d22, #19\n\t" + "vsri.u64 d9, d23, #19\n\t" + "vshl.u64 d10, d22, #3\n\t" + "vshl.u64 d11, d23, #3\n\t" + "vsri.u64 d10, d22, #61\n\t" + "vsri.u64 d11, d23, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d22, #6\n\t" + "vshr.u64 d9, d23, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d24, d10\n\t" + "vadd.i64 d25, d11\n\t" + "vmov d14, d17\n\t" + "vmov d15, d18\n\t" + "vadd.i64 d24, d14\n\t" + "vadd.i64 d25, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d24, d10\n\t" + "vadd.i64 d25, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 10 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d26\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 11 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d27\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[10]-W[11] */ + "vext.8 q6, q13, q14, #8\n\t" + "vshl.u64 q4, q12, #45\n\t" + "vsri.u64 q4, q12, #19\n\t" + "vshl.u64 q5, q12, #3\n\t" + "vsri.u64 q5, q12, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q12, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q13, q5\n\t" + "vext.8 q7, q9, q10, #8\n\t" + "vadd.i64 q13, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q13, q5\n\t" +#else + /* Calc new W[10]-W[11] */ + "vmov d12, d27\n\t" + "vmov d13, d28\n\t" + "vshl.u64 d8, d24, #45\n\t" + "vshl.u64 d9, d25, #45\n\t" + "vsri.u64 d8, d24, #19\n\t" + "vsri.u64 d9, d25, #19\n\t" + "vshl.u64 d10, d24, #3\n\t" + "vshl.u64 d11, d25, #3\n\t" + "vsri.u64 d10, d24, #61\n\t" + "vsri.u64 d11, d25, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d24, #6\n\t" + "vshr.u64 d9, d25, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d26, d10\n\t" + "vadd.i64 d27, d11\n\t" + "vmov d14, d19\n\t" + "vmov d15, d20\n\t" + "vadd.i64 d26, d14\n\t" + "vadd.i64 d27, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d26, d10\n\t" + "vadd.i64 d27, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 12 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d28\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 13 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d29\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[12]-W[13] */ + "vext.8 q6, q14, q15, #8\n\t" + "vshl.u64 q4, q13, #45\n\t" + "vsri.u64 q4, q13, #19\n\t" + "vshl.u64 q5, q13, #3\n\t" + "vsri.u64 q5, q13, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q13, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q14, q5\n\t" + "vext.8 q7, q10, q11, #8\n\t" + "vadd.i64 q14, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q14, q5\n\t" +#else + /* Calc new W[12]-W[13] */ + "vmov d12, d29\n\t" + "vmov d13, d30\n\t" + "vshl.u64 d8, d26, #45\n\t" + "vshl.u64 d9, d27, #45\n\t" + "vsri.u64 d8, d26, #19\n\t" + "vsri.u64 d9, d27, #19\n\t" + "vshl.u64 d10, d26, #3\n\t" + "vshl.u64 d11, d27, #3\n\t" + "vsri.u64 d10, d26, #61\n\t" + "vsri.u64 d11, d27, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d26, #6\n\t" + "vshr.u64 d9, d27, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d28, d10\n\t" + "vadd.i64 d29, d11\n\t" + "vmov d14, d21\n\t" + "vmov d15, d22\n\t" + "vadd.i64 d28, d14\n\t" + "vadd.i64 d29, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d28, d10\n\t" + "vadd.i64 d29, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + /* Round 14 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d30\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 15 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d31\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + /* Calc new W[14]-W[15] */ + "vext.8 q6, q15, q8, #8\n\t" + "vshl.u64 q4, q14, #45\n\t" + "vsri.u64 q4, q14, #19\n\t" + "vshl.u64 q5, q14, #3\n\t" + "vsri.u64 q5, q14, #61\n\t" + "veor q5, q4\n\t" + "vshr.u64 q4, q14, #6\n\t" + "veor q5, q4\n\t" + "vadd.i64 q15, q5\n\t" + "vext.8 q7, q11, q12, #8\n\t" + "vadd.i64 q15, q7\n\t" + "vshl.u64 q4, q6, #63\n\t" + "vsri.u64 q4, q6, #1\n\t" + "vshl.u64 q5, q6, #56\n\t" + "vsri.u64 q5, q6, #8\n\t" + "veor q5, q4\n\t" + "vshr.u64 q6, #7\n\t" + "veor q5, q6\n\t" + "vadd.i64 q15, q5\n\t" +#else + /* Calc new W[14]-W[15] */ + "vmov d12, d31\n\t" + "vmov d13, d16\n\t" + "vshl.u64 d8, d28, #45\n\t" + "vshl.u64 d9, d29, #45\n\t" + "vsri.u64 d8, d28, #19\n\t" + "vsri.u64 d9, d29, #19\n\t" + "vshl.u64 d10, d28, #3\n\t" + "vshl.u64 d11, d29, #3\n\t" + "vsri.u64 d10, d28, #61\n\t" + "vsri.u64 d11, d29, #61\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d8, d28, #6\n\t" + "vshr.u64 d9, d29, #6\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vadd.i64 d30, d10\n\t" + "vadd.i64 d31, d11\n\t" + "vmov d14, d23\n\t" + "vmov d15, d24\n\t" + "vadd.i64 d30, d14\n\t" + "vadd.i64 d31, d15\n\t" + "vshl.u64 d8, d12, #63\n\t" + "vshl.u64 d9, d13, #63\n\t" + "vsri.u64 d8, d12, #1\n\t" + "vsri.u64 d9, d13, #1\n\t" + "vshl.u64 d10, d12, #56\n\t" + "vshl.u64 d11, d13, #56\n\t" + "vsri.u64 d10, d12, #8\n\t" + "vsri.u64 d11, d13, #8\n\t" + "veor d10, d8\n\t" + "veor d11, d9\n\t" + "vshr.u64 d12, #7\n\t" + "vshr.u64 d13, #7\n\t" + "veor d10, d12\n\t" + "veor d11, d13\n\t" + "vadd.i64 d30, d10\n\t" + "vadd.i64 d31, d11\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + "subs r12, r12, #1\n\t" + "bne L_SHA512_transform_neon_len_start_%=\n\t" + /* Round 0 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d16\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 1 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d17\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" + /* Round 2 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d18\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 3 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d19\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" + /* Round 4 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d20\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 5 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d21\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" + /* Round 6 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d22\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 7 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d23\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" + /* Round 8 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d4, #50\n\t" + "vsri.u64 d8, d4, #14\n\t" + "vshl.u64 d9, d0, #36\n\t" + "vsri.u64 d9, d0, #28\n\t" + "vshl.u64 d10, d4, #46\n\t" + "vsri.u64 d10, d4, #18\n\t" + "vshl.u64 d11, d0, #30\n\t" + "vsri.u64 d11, d0, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d4, #23\n\t" + "vsri.u64 d10, d4, #41\n\t" + "vshl.u64 d11, d0, #25\n\t" + "vsri.u64 d11, d0, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d12, d24\n\t" + "vmov d8, d4\n\t" + "veor d10, d1, d2\n\t" + "vadd.i64 d7, d12\n\t" + "vbsl d8, d5, d6\n\t" + "vbsl d10, d0, d2\n\t" + "vadd.i64 d7, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d3, d7\n\t" + "vadd.i64 d7, d10\n\t" + /* Round 9 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d3, #50\n\t" + "vsri.u64 d8, d3, #14\n\t" + "vshl.u64 d9, d7, #36\n\t" + "vsri.u64 d9, d7, #28\n\t" + "vshl.u64 d10, d3, #46\n\t" + "vsri.u64 d10, d3, #18\n\t" + "vshl.u64 d11, d7, #30\n\t" + "vsri.u64 d11, d7, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d3, #23\n\t" + "vsri.u64 d10, d3, #41\n\t" + "vshl.u64 d11, d7, #25\n\t" + "vsri.u64 d11, d7, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d12, d25\n\t" + "vmov d8, d3\n\t" + "veor d10, d0, d1\n\t" + "vadd.i64 d6, d12\n\t" + "vbsl d8, d4, d5\n\t" + "vbsl d10, d7, d1\n\t" + "vadd.i64 d6, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d2, d6\n\t" + "vadd.i64 d6, d10\n\t" + /* Round 10 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d2, #50\n\t" + "vsri.u64 d8, d2, #14\n\t" + "vshl.u64 d9, d6, #36\n\t" + "vsri.u64 d9, d6, #28\n\t" + "vshl.u64 d10, d2, #46\n\t" + "vsri.u64 d10, d2, #18\n\t" + "vshl.u64 d11, d6, #30\n\t" + "vsri.u64 d11, d6, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d2, #23\n\t" + "vsri.u64 d10, d2, #41\n\t" + "vshl.u64 d11, d6, #25\n\t" + "vsri.u64 d11, d6, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d12, d26\n\t" + "vmov d8, d2\n\t" + "veor d10, d7, d0\n\t" + "vadd.i64 d5, d12\n\t" + "vbsl d8, d3, d4\n\t" + "vbsl d10, d6, d0\n\t" + "vadd.i64 d5, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d1, d5\n\t" + "vadd.i64 d5, d10\n\t" + /* Round 11 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d1, #50\n\t" + "vsri.u64 d8, d1, #14\n\t" + "vshl.u64 d9, d5, #36\n\t" + "vsri.u64 d9, d5, #28\n\t" + "vshl.u64 d10, d1, #46\n\t" + "vsri.u64 d10, d1, #18\n\t" + "vshl.u64 d11, d5, #30\n\t" + "vsri.u64 d11, d5, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d1, #23\n\t" + "vsri.u64 d10, d1, #41\n\t" + "vshl.u64 d11, d5, #25\n\t" + "vsri.u64 d11, d5, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d12, d27\n\t" + "vmov d8, d1\n\t" + "veor d10, d6, d7\n\t" + "vadd.i64 d4, d12\n\t" + "vbsl d8, d2, d3\n\t" + "vbsl d10, d5, d7\n\t" + "vadd.i64 d4, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d0, d4\n\t" + "vadd.i64 d4, d10\n\t" + /* Round 12 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d0, #50\n\t" + "vsri.u64 d8, d0, #14\n\t" + "vshl.u64 d9, d4, #36\n\t" + "vsri.u64 d9, d4, #28\n\t" + "vshl.u64 d10, d0, #46\n\t" + "vsri.u64 d10, d0, #18\n\t" + "vshl.u64 d11, d4, #30\n\t" + "vsri.u64 d11, d4, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d0, #23\n\t" + "vsri.u64 d10, d0, #41\n\t" + "vshl.u64 d11, d4, #25\n\t" + "vsri.u64 d11, d4, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d12, d28\n\t" + "vmov d8, d0\n\t" + "veor d10, d5, d6\n\t" + "vadd.i64 d3, d12\n\t" + "vbsl d8, d1, d2\n\t" + "vbsl d10, d4, d6\n\t" + "vadd.i64 d3, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d7, d3\n\t" + "vadd.i64 d3, d10\n\t" + /* Round 13 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d7, #50\n\t" + "vsri.u64 d8, d7, #14\n\t" + "vshl.u64 d9, d3, #36\n\t" + "vsri.u64 d9, d3, #28\n\t" + "vshl.u64 d10, d7, #46\n\t" + "vsri.u64 d10, d7, #18\n\t" + "vshl.u64 d11, d3, #30\n\t" + "vsri.u64 d11, d3, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d7, #23\n\t" + "vsri.u64 d10, d7, #41\n\t" + "vshl.u64 d11, d3, #25\n\t" + "vsri.u64 d11, d3, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d12, d29\n\t" + "vmov d8, d7\n\t" + "veor d10, d4, d5\n\t" + "vadd.i64 d2, d12\n\t" + "vbsl d8, d0, d1\n\t" + "vbsl d10, d3, d5\n\t" + "vadd.i64 d2, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d6, d2\n\t" + "vadd.i64 d2, d10\n\t" + /* Round 14 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d6, #50\n\t" + "vsri.u64 d8, d6, #14\n\t" + "vshl.u64 d9, d2, #36\n\t" + "vsri.u64 d9, d2, #28\n\t" + "vshl.u64 d10, d6, #46\n\t" + "vsri.u64 d10, d6, #18\n\t" + "vshl.u64 d11, d2, #30\n\t" + "vsri.u64 d11, d2, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d6, #23\n\t" + "vsri.u64 d10, d6, #41\n\t" + "vshl.u64 d11, d2, #25\n\t" + "vsri.u64 d11, d2, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d12, d30\n\t" + "vmov d8, d6\n\t" + "veor d10, d3, d4\n\t" + "vadd.i64 d1, d12\n\t" + "vbsl d8, d7, d0\n\t" + "vbsl d10, d2, d4\n\t" + "vadd.i64 d1, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d5, d1\n\t" + "vadd.i64 d1, d10\n\t" + /* Round 15 */ + "vld1.64 {d12}, [r3:64]!\n\t" + "vshl.u64 d8, d5, #50\n\t" + "vsri.u64 d8, d5, #14\n\t" + "vshl.u64 d9, d1, #36\n\t" + "vsri.u64 d9, d1, #28\n\t" + "vshl.u64 d10, d5, #46\n\t" + "vsri.u64 d10, d5, #18\n\t" + "vshl.u64 d11, d1, #30\n\t" + "vsri.u64 d11, d1, #34\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vshl.u64 d10, d5, #23\n\t" + "vsri.u64 d10, d5, #41\n\t" + "vshl.u64 d11, d1, #25\n\t" + "vsri.u64 d11, d1, #39\n\t" + "veor d8, d10\n\t" + "veor d9, d11\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d12, d31\n\t" + "vmov d8, d5\n\t" + "veor d10, d2, d3\n\t" + "vadd.i64 d0, d12\n\t" + "vbsl d8, d6, d7\n\t" + "vbsl d10, d1, d3\n\t" + "vadd.i64 d0, d8\n\t" + "vadd.i64 d10, d9\n\t" + "vadd.i64 d4, d0\n\t" + "vadd.i64 d0, d10\n\t" + /* Add in digest from start */ + "vldm.64 %[sha512], {d8-d15}\n\t" +#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT + "vadd.i64 q0, q0, q4\n\t" + "vadd.i64 q1, q1, q5\n\t" + "vadd.i64 q2, q2, q6\n\t" + "vadd.i64 q3, q3, q7\n\t" +#else + "vadd.i64 d0, d0, d8\n\t" + "vadd.i64 d1, d1, d9\n\t" + "vadd.i64 d2, d2, d10\n\t" + "vadd.i64 d3, d3, d11\n\t" + "vadd.i64 d4, d4, d12\n\t" + "vadd.i64 d5, d5, d13\n\t" + "vadd.i64 d6, d6, d14\n\t" + "vadd.i64 d7, d7, d15\n\t" +#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ + "vstm.64 %[sha512], {d0-d7}\n\t" + "subs %[len], %[len], #0x80\n\t" + "sub r3, r3, #0x280\n\t" + "bne L_SHA512_transform_neon_len_begin_%=\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c) + : + : "memory", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "cc" + ); +} + +#endif /* !WOLFSSL_ARMASM_NO_NEON */ +#endif /* WOLFSSL_SHA512 */ +#endif /* !__aarch64__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* WOLFSSL_ARMASM */ + +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index 5ec2dca65..d25b2f998 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -1,27 +1,17 @@ /* armv8-aes.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ /* - * There are two versions one for 64 (Aarch64) and one for 32 bit (Aarch32). + * There are two versions one for 64 (Aarch64) and one for 32 bit (Aarch32). * If changing one check the other. */ @@ -31,15 +21,17 @@ #endif #include +#include #if !defined(NO_AES) && defined(WOLFSSL_ARMASM) -#ifdef HAVE_FIPS -#undef HAVE_FIPS +#if defined(HAVE_FIPS) && !defined(FIPS_NO_WRAPPERS) +#define FIPS_NO_WRAPPERS #endif +#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO + #include -#include #include #ifdef NO_INLINE #include @@ -53,7 +45,6 @@ #pragma warning(disable: 4127) #endif - static const byte rcon[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36 /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ @@ -115,6 +106,7 @@ static const byte rcon[] = { #ifdef HAVE_AESGCM +#if !defined(__aarch64__) || defined(WOLFSSL_AESGCM_STREAM) static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) { int i; @@ -143,6 +135,7 @@ static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) buf[6] = (sz >> 8) & 0xff; buf[7] = sz & 0xff; } +#endif #endif /* HAVE_AESGCM */ @@ -888,536 +881,575 @@ int wc_AesSetIV(Aes* aes, const byte* iv) /* AES-CTR */ #ifdef WOLFSSL_AES_COUNTER - - /* Increment AES counter */ - static WC_INLINE void IncrementAesCounter(byte* inOutCtr) - { - int i; - - /* in network byte order so start at end and work back */ - for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { - if (++inOutCtr[i]) /* we're done unless we overflow */ - return; - } - } - - int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) - { - byte* tmp; - word32 numBlocks; - - if (aes == NULL || out == NULL || in == NULL) { - return BAD_FUNC_ARG; - } - - tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; - - /* consume any unused bytes left in aes->tmp */ - while (aes->left && sz) { - *(out++) = *(in++) ^ *(tmp++); - aes->left--; - sz--; - } - - /* do as many block size ops as possible */ - numBlocks = sz/AES_BLOCK_SIZE; - if (numBlocks > 0) { - /* pointer needed because it is incremented when read, causing - * an issue with call to encrypt/decrypt leftovers */ - byte* keyPt = (byte*)aes->key; - sz -= numBlocks * AES_BLOCK_SIZE; - switch(aes->rounds) { +static void wc_aes_ctr_encrypt_asm(Aes* aes, byte* out, const byte* in, + byte* keyPt, word32 numBlocks) +{ + switch(aes->rounds) { #ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "#Create vector with the value 1 \n" - "MOVI v15.16b, #1 \n" - "USHR v15.2d, v15.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "EXT v14.16b, v15.16b, v14.16b, #8\n" + "#Create vector with the value 1 \n" + "MOVI v15.16b, #1 \n" + "USHR v15.2d, v15.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v14.16b, v15.16b, v14.16b, #8\n" - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v13.2d}, %[reg] \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v13.2d}, %[reg] \n" - /* double block */ - "1: \n" - "CMP w11, #1 \n" - "BEQ 2f \n" - "CMP w11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "SUB w11, w11, #2 \n" - "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */ + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "CMEQ v12.2d, v15.2d, #0 \n" + "EXT v12.16b, v14.16b, v12.16b, #8 \n" + "SUB v15.2d, v15.2d, v12.2d \n" + "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */ + "CMEQ v12.2d, v13.2d, #0 \n" + "EXT v12.16b, v14.16b, v12.16b, #8 \n" + "SUB v13.2d, v13.2d, v12.2d \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* revert from network order */ - "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v1.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v1.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v2.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v2.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v3.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v3.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v4.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v4.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v5.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v5.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v10.16b \n" - "AESE v15.16b, v6.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v10.16b \n" + "AESE v15.16b, v6.16b \n" + "AESMC v15.16b, v15.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "AESE v15.16b, v7.16b \n" - "AESMC v15.16b, v15.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "AESE v15.16b, v7.16b \n" + "AESMC v15.16b, v15.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v15.16b, v8.16b \n" - "AESMC v15.16b, v15.16b \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v15.16b, v8.16b \n" + "AESMC v15.16b, v15.16b \n" - "EOR v0.16b, v0.16b, v12.16b \n" - "AESE v15.16b, v9.16b \n" - "AESMC v15.16b, v15.16b \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "AESE v15.16b, v9.16b \n" + "AESMC v15.16b, v15.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v15.16b, v10.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v15.16b, v15.16b, v11.16b \n" - "EOR v15.16b, v15.16b, v12.16b \n" - "ST1 {v15.2d}, [%[out]], #16 \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v15.16b, v10.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v15.16b, v15.16b, v11.16b \n" + "EOR v15.16b, v15.16b, v12.16b \n" + "ST1 {v15.2d}, [%[out]], #16 \n" - "B 1b \n" + "B 1b \n" - /* single block */ - "2: \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "#CTR operations, increment counter and xorbuf \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "EOR v0.16b, v0.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" + /* single block */ + "2: \n" + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "CMEQ v15.2d, v13.2d, #0 \n" + "EXT v15.16b, v14.16b, v15.16b, #8 \n" + "SUB v13.2d, v13.2d, v15.2d \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" - "3: \n" - "#store current counter value at the end \n" - "ST1 {v13.2d}, %[regOut] \n" + "3: \n" + "#store current counter value at the end \n" + "ST1 {v13.2d}, %[regOut] \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15" - ); - break; + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15" + ); + break; #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 - case 12: /* AES 192 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "#Create vector with the value 1 \n" - "MOVI v16.16b, #1 \n" - "USHR v16.2d, v16.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "EXT v16.16b, v16.16b, v14.16b, #8\n" + "#Create vector with the value 1 \n" + "MOVI v16.16b, #1 \n" + "USHR v16.2d, v16.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v16.16b, v16.16b, v14.16b, #8\n" - "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n" - "LD1 {v15.2d}, %[reg] \n" - "LD1 {v13.16b}, [%[Key]], #16 \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n" + "LD1 {v15.2d}, %[reg] \n" + "LD1 {v13.16b}, [%[Key]], #16 \n" - /* double block */ - "1: \n" - "CMP w11, #1 \n" - "BEQ 2f \n" - "CMP w11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" - "MOV v0.16b, v15.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "SUB w11, w11, #2 \n" - "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */ - "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */ + "MOV v0.16b, v15.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */ + "CMEQ v14.2d, v17.2d, #0 \n" + "EXT v14.16b, v16.16b, v14.16b, #8 \n" + "SUB v17.2d, v17.2d, v14.2d \n" + "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */ + "CMEQ v14.2d, v15.2d, #0 \n" + "EXT v14.16b, v16.16b, v14.16b, #8 \n" + "SUB v15.2d, v15.2d, v14.2d \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "REV64 v15.16b, v15.16b \n" /* revert from network order */ - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v1.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v1.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v2.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v2.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v3.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v3.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v4.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v4.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v5.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v5.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v6.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v6.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v7.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v7.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v12.16b \n" - "AESE v17.16b, v8.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v12.16b \n" + "AESE v17.16b, v8.16b \n" + "AESMC v17.16b, v17.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - "AESE v17.16b, v9.16b \n" - "AESMC v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "AESE v17.16b, v9.16b \n" + "AESMC v17.16b, v17.16b \n" - "LD1 {v14.2d}, [%[input]], #16 \n" - "AESE v17.16b, v10.16b \n" - "AESMC v17.16b, v17.16b \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "AESE v17.16b, v10.16b \n" + "AESMC v17.16b, v17.16b \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "AESE v17.16b, v11.16b \n" - "AESMC v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "AESE v17.16b, v11.16b \n" + "AESMC v17.16b, v17.16b \n" - "LD1 {v14.2d}, [%[input]], #16 \n" - "AESE v17.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v17.16b, v17.16b, v13.16b \n" - "EOR v17.16b, v17.16b, v14.16b \n" - "ST1 {v17.2d}, [%[out]], #16 \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "AESE v17.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v17.16b, v17.16b, v13.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "ST1 {v17.2d}, [%[out]], #16 \n" - "B 1b \n" + "B 1b \n" - "2: \n" - "LD1 {v14.2d}, [%[input]], #16 \n" - "MOV v0.16b, v15.16b \n" + "2: \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "MOV v0.16b, v15.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* revert from network order */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - "#CTR operations, increment counter and xorbuf \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */ + "CMEQ v17.2d, v15.2d, #0 \n" + "EXT v17.16b, v16.16b, v17.16b, #8 \n" + "SUB v15.2d, v15.2d, v17.2d \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" - "3: \n" - "#store current counter value at the end \n" - "ST1 {v15.2d}, %[regOut] \n" + "3: \n" + "#store current counter value at the end \n" + "ST1 {v15.2d}, %[regOut] \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", - "v16", "v17" - ); - break; + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", + "v16", "v17" + ); + break; #endif /* WOLFSSL_AES_192 */ #ifdef WOLFSSL_AES_256 - case 14: /* AES 256 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "#Create vector with the value 1 \n" - "MOVI v18.16b, #1 \n" - "USHR v18.2d, v18.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v19.16b, v19.16b, v19.16b \n" - "EXT v18.16b, v18.16b, v19.16b, #8\n" + "#Create vector with the value 1 \n" + "MOVI v18.16b, #1 \n" + "USHR v18.2d, v18.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v19.16b, v19.16b, v19.16b \n" + "EXT v18.16b, v18.16b, v19.16b, #8\n" - "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" - "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" - "LD1 {v17.2d}, %[reg] \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, %[reg] \n" - /* double block */ - "1: \n" - "CMP w11, #1 \n" - "BEQ 2f \n" - "CMP w11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" - "MOV v0.16b, v17.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "SUB w11, w11, #2 \n" - "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */ - "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */ + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "CMEQ v16.2d, v19.2d, #0 \n" + "EXT v16.16b, v18.16b, v16.16b, #8 \n" + "SUB v19.2d, v19.2d, v16.2d \n" + "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */ + "CMEQ v16.2d, v17.2d, #0 \n" + "EXT v16.16b, v18.16b, v16.16b, #8 \n" + "SUB v17.2d, v17.2d, v16.2d \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v19.16b, v19.16b, v19.16b, #8 \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v19.16b, v19.16b \n" /* revert from network order */ - "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v19.16b, v19.16b \n" /* revert from network order */ + "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v1.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v1.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v2.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v2.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v3.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v3.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v4.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v4.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v5.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v5.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v6.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v6.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v7.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v7.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v12.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v8.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v8.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v13.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v9.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v9.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v14.16b \n" - "AESE v19.16b, v10.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v14.16b \n" + "AESE v19.16b, v10.16b \n" + "AESMC v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - "AESE v19.16b, v11.16b \n" - "AESMC v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v19.16b, v11.16b \n" + "AESMC v19.16b, v19.16b \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "AESE v19.16b, v12.16b \n" - "AESMC v19.16b, v19.16b \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "AESE v19.16b, v12.16b \n" + "AESMC v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v16.16b \n" - "AESE v19.16b, v13.16b \n" - "AESMC v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "AESE v19.16b, v13.16b \n" + "AESMC v19.16b, v19.16b \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "AESE v19.16b, v14.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v19.16b, v19.16b, v15.16b \n" - "EOR v19.16b, v19.16b, v16.16b \n" - "ST1 {v19.2d}, [%[out]], #16 \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "AESE v19.16b, v14.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v19.16b, v19.16b, v16.16b \n" + "ST1 {v19.2d}, [%[out]], #16 \n" - "B 1b \n" + "B 1b \n" - "2: \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "MOV v0.16b, v17.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v13.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - "#CTR operations, increment counter and xorbuf \n" - "EOR v0.16b, v0.16b, v16.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" + "2: \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "CMEQ v19.2d, v17.2d, #0 \n" + "EXT v19.16b, v18.16b, v19.16b, #8 \n" + "SUB v17.2d, v17.2d, v19.2d \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" - "3: \n" - "#store current counter value at the end \n" - "ST1 {v17.2d}, %[regOut] \n" + "3: \n" + "#store current counter value at the end \n" + "ST1 {v17.2d}, %[regOut] \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", - "v16", "v17", "v18", "v19" - ); - break; + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", + "v16", "v17", "v18", "v19" + ); + break; #endif /* WOLFSSL_AES_256 */ - default: - WOLFSSL_MSG("Bad AES-CTR round value"); - return BAD_FUNC_ARG; - } + } +} - aes->left = 0; - } +int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + byte* tmp; + word32 numBlocks; - /* handle non block size remaining */ - if (sz) { - wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); - IncrementAesCounter((byte*)aes->reg); + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + switch(aes->rounds) { + #ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + #endif /* WOLFSSL_AES_256 */ + break; + default: + WOLFSSL_MSG("Bad AES-CTR round value"); + return BAD_FUNC_ARG; + } - aes->left = AES_BLOCK_SIZE; - tmp = (byte*)aes->tmp; - while (sz--) { - *(out++) = *(in++) ^ *(tmp++); - aes->left--; - } - } - return 0; + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + + /* consume any unused bytes left in aes->tmp */ + while ((aes->left != 0) && (sz != 0)) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + numBlocks = sz / AES_BLOCK_SIZE; + if (numBlocks > 0) { + wc_aes_ctr_encrypt_asm(aes, out, in, (byte*)aes->key, numBlocks); + + sz -= numBlocks * AES_BLOCK_SIZE; + out += numBlocks * AES_BLOCK_SIZE; + in += numBlocks * AES_BLOCK_SIZE; + } + + /* handle non block size remaining */ + if (sz) { + byte zeros[AES_BLOCK_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + wc_aes_ctr_encrypt_asm(aes, (byte*)aes->tmp, zeros, (byte*)aes->key, 1); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; } + } + return 0; +} + +int wc_AesCtrSetKey(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + (void)dir; + return wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); +} #endif /* WOLFSSL_AES_COUNTER */ @@ -1427,1109 +1459,5714 @@ int wc_AesSetIV(Aes* aes, const byte* iv) * Based from GCM implementation in wolfcrypt/src/aes.c */ +/* START script replace AES-GCM Aarch64 with hardware crypto. */ + /* PMULL and RBIT only with AArch64 */ /* Use ARM hardware for polynomial multiply */ -static void GMULT(byte* X, byte* Y) +void GMULT(byte* X, byte* Y) { __asm__ volatile ( - "LD1 {v0.16b}, [%[inX]] \n" - "LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */ + "LD1 {v0.16b}, [%[X]] \n" + "LD1 {v1.16b}, [%[Y]] \n" /* v1 already reflected from set key */ + "MOVI v2.16b, #0x87 \n" "RBIT v0.16b, v0.16b \n" + "USHR v2.2d, v2.2d, #56 \n" + "PMULL v3.1q, v0.1d, v1.1d \n" + "PMULL2 v4.1q, v0.2d, v1.2d \n" + "EXT v5.16b, v1.16b, v1.16b, #8 \n" + "PMULL v6.1q, v0.1d, v5.1d \n" + "PMULL2 v5.1q, v0.2d, v5.2d \n" + "EOR v5.16b, v5.16b, v6.16b \n" + "EXT v6.16b, v3.16b, v4.16b, #8 \n" + "EOR v6.16b, v6.16b, v5.16b \n" + "# Reduce \n" + "PMULL2 v5.1q, v4.2d, v2.2d \n" + "EOR v6.16b, v6.16b, v5.16b \n" + "PMULL2 v5.1q, v6.2d, v2.2d \n" + "MOV v3.D[1], v6.D[0] \n" + "EOR v0.16b, v3.16b, v5.16b \n" - /* Algorithm 1 from Intel GCM white paper. - "Carry-Less Multiplication and Its Usage for Computing the GCM Mode" - */ - "PMULL v3.1q, v0.1d, v1.1d \n" /* a0 * b0 = C */ - "PMULL2 v4.1q, v0.2d, v1.2d \n" /* a1 * b1 = D */ - "EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */ - "PMULL v6.1q, v0.1d, v5.1d \n" /* a0 * b1 = E */ - "PMULL2 v5.1q, v0.2d, v5.2d \n" /* a1 * b0 = F */ - - "#Set a register to all 0s using EOR \n" - "EOR v7.16b, v7.16b, v7.16b \n" - "EOR v5.16b, v5.16b, v6.16b \n" /* F ^ E */ - "EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */ - "EOR v3.16b, v3.16b, v6.16b \n" /* low 128 bits in v3 */ - "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */ - "EOR v4.16b, v4.16b, v6.16b \n" /* high 128 bits in v4 */ - - - /* Based from White Paper "Implementing GCM on ARMv8" - by Conrado P.L. Gouvea and Julio Lopez - reduction on 256bit value using Algorithm 5 */ - "MOVI v8.16b, #0x87 \n" - "USHR v8.2d, v8.2d, #56 \n" - /* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/ - "PMULL2 v5.1q, v4.2d, v8.2d \n" - "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */ - "EOR v4.16b, v4.16b, v6.16b \n" - "EXT v6.16b, v7.16b, v5.16b, #8 \n" - "EOR v3.16b, v3.16b, v6.16b \n" - "PMULL v5.1q, v4.1d, v8.1d \n" - "EOR v4.16b, v3.16b, v5.16b \n" - - "RBIT v4.16b, v4.16b \n" - "STR q4, [%[out]] \n" - : [out] "=r" (X), "=r" (Y) - : [inX] "0" (X), [inY] "1" (Y) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8" + "RBIT v0.16b, v0.16b \n" + "STR q0, [%[X]] \n" + : + : [X] "r" (X), [Y] "r" (Y) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" ); } - -void GHASH(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz, byte* s, word32 sSz) +void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) { - byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; - word32 blocks, partial; - byte* h = aes->H; - XMEMSET(x, 0, AES_BLOCK_SIZE); + __asm__ __volatile__ ( + "LD1 {v3.16b}, %[h] \n" + "MOVI v7.16b, #0x87 \n" + "EOR v0.16b, v0.16b, v0.16b \n" + "USHR v7.2d, v7.2d, #56 \n" - /* Hash in A, the Additional Authentication Data */ - if (aSz != 0 && a != NULL) { - blocks = aSz / AES_BLOCK_SIZE; - partial = aSz % AES_BLOCK_SIZE; - /* do as many blocks as possible */ - while (blocks--) { - xorbuf(x, a, AES_BLOCK_SIZE); - GMULT(x, h); - a += AES_BLOCK_SIZE; - } - if (partial != 0) { - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, a, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, h); - } - } + "# AAD \n" + "CBZ %w[aSz], 20f \n" + "MOV w12, %w[aSz] \n" - /* Hash in C, the Ciphertext */ - if (cSz != 0 && c != NULL) { - blocks = cSz / AES_BLOCK_SIZE; - partial = cSz % AES_BLOCK_SIZE; - while (blocks--) { - xorbuf(x, c, AES_BLOCK_SIZE); - GMULT(x, h); - c += AES_BLOCK_SIZE; - } - if (partial != 0) { - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, c, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, h); - } - } + "CMP x12, #64 \n" + "BLT 15f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v11.1q, v3.2d, v3.2d \n" + "PMULL v10.1q, v3.1d, v3.1d \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v11.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v4.16b, v10.16b, v11.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v10.1q, v4.1d, v3.1d \n" + "PMULL2 v11.1q, v4.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v4.1d, v12.1d \n" + "PMULL2 v12.1q, v4.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v5.16b, v10.16b, v12.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v11.1q, v4.2d, v4.2d \n" + "PMULL v10.1q, v4.1d, v4.1d \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v11.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v6.16b, v10.16b, v11.16b \n" + "14: \n" + "LD1 {v10.2d-v13.2d}, [%[a]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v10.16b, v10.16b \n" + "RBIT v11.16b, v11.16b \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "EOR v10.16b, v10.16b, v0.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v0.1q, v13.1d, v3.1d \n" + "PMULL2 v1.1q, v13.2d, v3.2d \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v2.1q, v13.1d, v3.1d \n" + "PMULL2 v9.1q, v13.2d, v3.2d \n" + "EOR v2.16b, v2.16b, v9.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v8.1q, v12.1d, v4.1d \n" + "PMULL2 v9.1q, v12.2d, v4.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v9.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v12.16b, v9.16b \n" +#else + "EOR v12.16b, v12.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v8.1q, v11.1d, v5.1d \n" + "PMULL2 v9.1q, v11.2d, v5.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v11.16b, v11.16b, v11.16b, #8 \n" + "PMULL v9.1q, v11.1d, v5.1d \n" + "PMULL2 v11.1q, v11.2d, v5.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v11.16b, v9.16b \n" +#else + "EOR v11.16b, v11.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v11.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v8.1q, v10.1d, v6.1d \n" + "PMULL2 v9.1q, v10.2d, v6.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v10.16b, v10.16b, v10.16b, #8 \n" + "PMULL v9.1q, v10.1d, v6.1d \n" + "PMULL2 v10.1q, v10.2d, v6.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v10.16b, v9.16b \n" +#else + "EOR v10.16b, v10.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v10.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v9.16b, v0.16b, v1.16b, #8 \n" + "PMULL2 v8.1q, v1.2d, v7.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v9.16b, v9.16b, v2.16b, v8.16b \n" +#else + "EOR v9.16b, v9.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v9.16b, v9.16b, v8.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v8.1q, v9.2d, v7.2d \n" + "MOV v0.D[1], v9.D[0] \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "CMP x12, #64 \n" + "BGE 14b \n" + "CBZ x12, 20f \n" + "15: \n" + "CMP x12, #16 \n" + "BLT 12f \n" + "11: \n" + "LD1 {v14.2d}, [%[a]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v14.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "PMULL v10.1q, v0.1d, v3.1d \n" + "PMULL2 v11.1q, v0.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v0.1d, v12.1d \n" + "PMULL2 v12.1q, v0.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v0.16b, v10.16b, v12.16b \n" + "CMP x12, #16 \n" + "BGE 11b \n" + "CBZ x12, 120f \n" + "12: \n" + "# Partial AAD \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "MOV x14, x12 \n" + "ST1 {v14.2d}, [%[scratch]] \n" + "13: \n" + "LDRB w13, [%[a]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 13b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v14.2d}, [%[scratch]] \n" + "RBIT v14.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "PMULL v10.1q, v0.1d, v3.1d \n" + "PMULL2 v11.1q, v0.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v0.1d, v12.1d \n" + "PMULL2 v12.1q, v0.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v0.16b, v10.16b, v12.16b \n" - /* Hash in the lengths of A and C in bits */ - FlattenSzInBits(&scratch[0], aSz); - FlattenSzInBits(&scratch[8], cSz); - xorbuf(x, scratch, AES_BLOCK_SIZE); + "20: \n" + "# Cipher Text \n" + "CBZ %w[cSz], 120f \n" + "MOV w12, %w[cSz] \n" - /* Copy the result (minus last GMULT) into s. */ - XMEMCPY(s, x, sSz); + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v11.1q, v3.2d, v3.2d \n" + "PMULL v10.1q, v3.1d, v3.1d \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v11.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v4.16b, v10.16b, v11.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v10.1q, v4.1d, v3.1d \n" + "PMULL2 v11.1q, v4.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v4.1d, v12.1d \n" + "PMULL2 v12.1q, v4.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v5.16b, v10.16b, v12.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v11.1q, v4.2d, v4.2d \n" + "PMULL v10.1q, v4.1d, v4.1d \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v11.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v6.16b, v10.16b, v11.16b \n" + "114: \n" + "LD1 {v10.2d-v13.2d}, [%[c]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v10.16b, v10.16b \n" + "RBIT v11.16b, v11.16b \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "EOR v10.16b, v10.16b, v0.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v0.1q, v13.1d, v3.1d \n" + "PMULL2 v1.1q, v13.2d, v3.2d \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v2.1q, v13.1d, v3.1d \n" + "PMULL2 v9.1q, v13.2d, v3.2d \n" + "EOR v2.16b, v2.16b, v9.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v8.1q, v12.1d, v4.1d \n" + "PMULL2 v9.1q, v12.2d, v4.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v9.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v12.16b, v9.16b \n" +#else + "EOR v12.16b, v12.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v8.1q, v11.1d, v5.1d \n" + "PMULL2 v9.1q, v11.2d, v5.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v11.16b, v11.16b, v11.16b, #8 \n" + "PMULL v9.1q, v11.1d, v5.1d \n" + "PMULL2 v11.1q, v11.2d, v5.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v11.16b, v9.16b \n" +#else + "EOR v11.16b, v11.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v11.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v8.1q, v10.1d, v6.1d \n" + "PMULL2 v9.1q, v10.2d, v6.2d \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "EOR v1.16b, v1.16b, v9.16b \n" + "EXT v10.16b, v10.16b, v10.16b, #8 \n" + "PMULL v9.1q, v10.1d, v6.1d \n" + "PMULL2 v10.1q, v10.2d, v6.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v2.16b, v2.16b, v10.16b, v9.16b \n" +#else + "EOR v10.16b, v10.16b, v9.16b \n" + "EOR v2.16b, v2.16b, v10.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v9.16b, v0.16b, v1.16b, #8 \n" + "PMULL2 v8.1q, v1.2d, v7.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v9.16b, v9.16b, v2.16b, v8.16b \n" +#else + "EOR v9.16b, v9.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v9.16b, v9.16b, v8.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v8.1q, v9.2d, v7.2d \n" + "MOV v0.D[1], v9.D[0] \n" + "EOR v0.16b, v0.16b, v8.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v14.2d}, [%[c]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v14.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "PMULL v10.1q, v0.1d, v3.1d \n" + "PMULL2 v11.1q, v0.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v0.1d, v12.1d \n" + "PMULL2 v12.1q, v0.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v0.16b, v10.16b, v12.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial cipher text \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "MOV x14, x12 \n" + "ST1 {v14.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[c]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v14.2d}, [%[scratch]] \n" + "RBIT v14.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "PMULL v10.1q, v0.1d, v3.1d \n" + "PMULL2 v11.1q, v0.2d, v3.2d \n" + "EXT v12.16b, v3.16b, v3.16b, #8 \n" + "PMULL v13.1q, v0.1d, v12.1d \n" + "PMULL2 v12.1q, v0.2d, v12.2d \n" + "EOR v12.16b, v12.16b, v13.16b \n" + "EXT v13.16b, v10.16b, v11.16b, #8 \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "# Reduce \n" + "PMULL2 v12.1q, v11.2d, v7.2d \n" + "EOR v13.16b, v13.16b, v12.16b \n" + "PMULL2 v12.1q, v13.2d, v7.2d \n" + "MOV v10.D[1], v13.D[0] \n" + "EOR v0.16b, v10.16b, v12.16b \n" + "120: \n" + "RBIT v0.16b, v0.16b \n" + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[cSz], %x[cSz], #3 \n" + "MOV v10.D[0], %x[aSz] \n" + "MOV v10.D[1], %x[cSz] \n" + "REV64 v10.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v10.16b \n" + "ST1 {v0.16b}, [%[scratch]] \n" + : [cSz] "+r" (cSz), [c] "+r" (c), [aSz] "+r" (aSz), [a] "+r" (a) + : [scratch] "r" (scratch), [h] "m" (gcm->H) + : "cc", "memory", "w12", "w13", "x14", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14" + ); + + XMEMCPY(s, scratch, sSz); } - #ifdef WOLFSSL_AES_128 /* internal function : see wc_AesGcmEncrypt */ static int Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) + const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { - word32 blocks; - word32 partial; byte counter[AES_BLOCK_SIZE]; - byte initialCounter[AES_BLOCK_SIZE]; - byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; - /* Noticed different optimization levels treated head of array different. - Some cases was stack pointer plus offset others was a regester containing - address. To make uniform for passing in to inline assembly code am using - pointers to the head of each local array. + * Some cases was stack pointer plus offset others was a register containing + * address. To make uniform for passing in to inline assembly code am using + * pointers to the head of each local array. */ byte* ctr = counter; - byte* iCtr = initialCounter; - byte* xPt = x; - byte* sPt = scratch; - byte* keyPt; /* pointer to handle pointer advencment */ + byte* keyPt = (byte*)aes->key; - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + XMEMSET(counter, 0, AES_BLOCK_SIZE); if (ivSz == GCM_NONCE_MID_SZ) { - XMEMCPY(initialCounter, iv, ivSz); - initialCounter[AES_BLOCK_SIZE - 1] = 1; + XMEMCPY(counter, iv, GCM_NONCE_MID_SZ); + counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); - GMULT(initialCounter, aes->H); - } - XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); - - - /* Hash in the Additional Authentication Data */ - XMEMSET(x, 0, AES_BLOCK_SIZE); - if (authInSz != 0 && authIn != NULL) { - blocks = authInSz / AES_BLOCK_SIZE; - partial = authInSz % AES_BLOCK_SIZE; - /* do as many blocks as possible */ - while (blocks--) { - xorbuf(x, authIn, AES_BLOCK_SIZE); - GMULT(x, aes->H); - authIn += AES_BLOCK_SIZE; - } - if (partial != 0) { - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, authIn, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, aes->H); - } + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } - /* do as many blocks as possible */ - blocks = sz / AES_BLOCK_SIZE; - partial = sz % AES_BLOCK_SIZE; - if (blocks > 0) { - keyPt = (byte*)aes->key; - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v13.2d}, [%[ctr]] \n" - - "#Create vector with the value 1 \n" - "MOVI v14.16b, #1 \n" - "USHR v14.2d, v14.2d, #56 \n" - "EOR v22.16b, v22.16b, v22.16b \n" - "EXT v14.16b, v14.16b, v22.16b, #8\n" - - - /*************************************************** - Get first out block for GHASH using AES encrypt - ***************************************************/ - "REV64 v13.16b, v13.16b \n" /* network order */ - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v16.2d}, %[inY] \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "MOVI v23.16b, #0x87 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "USHR v23.2d, v23.2d, #56 \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - - "EOR v0.16b, v0.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "MOV v15.16b, v0.16b \n" - - "CBZ w11, 1f \n" /* only one block jump to final GHASH */ - - "LD1 {v12.2d}, [%[input]], #16 \n" - - /*************************************************** - Interweave GHASH and encrypt if more then 1 block - ***************************************************/ - "2: \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "EOR v15.16b, v17.16b, v15.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */ - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ - "MOV v0.16b, v13.16b \n" - "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" - "AESE v0.16b, v10.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" - - "EOR v0.16b, v0.16b, v12.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "MOV v15.16b, v0.16b \n" - "RBIT v17.16b, v19.16b \n" - - "CBZ w11, 1f \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "B 2b \n" - - /*************************************************** - GHASH on last block - ***************************************************/ - "1: \n" - "EOR v15.16b, v17.16b, v15.16b \n" - "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */ - - "#store current AES counter value \n" - "ST1 {v13.2d}, [%[ctrOut]] \n" - "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ - "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ - "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ - "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ - - "#Reduce product from multiplication \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ - "EOR v19.16b, v19.16b, v21.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "EOR v18.16b, v18.16b, v21.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "RBIT v17.16b, v19.16b \n" - "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */ - - :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in) - ,[xOut] "=r" (xPt),"=m" (aes->H) - :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), - [input] "3" (in) - ,[inX] "4" (xPt), [inY] "m" (aes->H) - : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24" - ); - } - - /* take care of partial block sizes leftover */ - if (partial != 0) { - IncrementGcmCounter(counter); - wc_AesEncrypt(aes, counter, scratch); - xorbuf(scratch, in, partial); - XMEMCPY(out, scratch, partial); - - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, out, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, aes->H); - } - - /* Hash in the lengths of A and C in bits */ - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - FlattenSzInBits(&scratch[0], authInSz); - FlattenSzInBits(&scratch[8], sz); - xorbuf(x, scratch, AES_BLOCK_SIZE); - XMEMCPY(scratch, x, AES_BLOCK_SIZE); - - keyPt = (byte*)aes->key; __asm__ __volatile__ ( - - "LD1 {v16.16b}, [%[tag]] \n" - "LD1 {v17.16b}, %[h] \n" - "RBIT v16.16b, v16.16b \n" - - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */ - "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */ - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */ - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */ - "LD1 {v0.2d}, [%[ctr]] \n" - - "#Set a register to all 0s using EOR \n" - "EOR v22.16b, v22.16b, v22.16b \n" - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "LD1 {v16.16b}, %[h] \n" + "# v23 = 0x00000000000000870000000000000087 reflected 0xe1.... \n" "MOVI v23.16b, #0x87 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v17.16b, v17.16b \n" "USHR v23.2d, v23.2d, #56 \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "AESE v0.16b, v10.16b \n" - "RBIT v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "EOR v19.16b, v19.16b, v0.16b \n" - "STR q19, [%[out]] \n" + "CBZ %w[aSz], 120f \n" - :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr) - :[tag] "0" (sPt), [Key] "1" (keyPt), - [ctr] "2" (iCtr) , [h] "m" (aes->H) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", - "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24" + "MOV w12, %w[aSz] \n" + + "# GHASH AAD \n" + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "114: \n" + "LD1 {v18.2d-v21.2d}, [%[aad]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v30.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v30.16b, #8 \n" + "PMULL2 v14.1q, v30.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v15.2d}, [%[aad]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial AAD \n" + "EOR v15.16b, v15.16b, v15.16b \n" + "MOV x14, x12 \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[aad]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "120: \n" + + "# Encrypt plaintext and GHASH ciphertext \n" + "LDR w12, [%[ctr], #12] \n" + "MOV w11, %w[sz] \n" + "REV w12, w12 \n" + "CMP w11, #64 \n" + "BLT 80f \n" + "CMP %w[aSz], #64 \n" + "BGE 82f \n" + + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "82: \n" + "# Should we do 8 blocks at a time? \n" + "CMP w11, #512 \n" + "BLT 80f \n" + + "# Calculate H^[5-8] - GMULT partials \n" + "# Multiply H and H^4 => H^5 \n" + "PMULL v18.1q, v26.1d, v16.1d \n" + "PMULL2 v19.1q, v26.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v26.1d, v20.1d \n" + "PMULL2 v20.1q, v26.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v9.16b, v18.16b, v20.16b \n" + "# Square H^3 - H^6 \n" + "PMULL2 v19.1q, v25.2d, v25.2d \n" + "PMULL v18.1q, v25.1d, v25.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v10.16b, v18.16b, v19.16b \n" + "# Multiply H and H^6 => H^7 \n" + "PMULL v18.1q, v10.1d, v16.1d \n" + "PMULL2 v19.1q, v10.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v10.1d, v20.1d \n" + "PMULL2 v20.1q, v10.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v11.16b, v18.16b, v20.16b \n" + "# Square H^4 => H^8 \n" + "PMULL2 v19.1q, v26.2d, v26.2d \n" + "PMULL v18.1q, v26.1d, v26.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v4.16b, v18.16b, v19.16b \n" + + "# First encrypt - no GHASH \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v12.16b, v12.16b, v5.16b \n" + "EOR v13.16b, v13.16b, v6.16b \n" + "EOR v14.16b, v14.16b, v7.16b \n" + "EOR v15.16b, v15.16b, v8.16b \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "ST1 {v12.2d-v15.2d}, [%[out]], #64 \n \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + + "81: \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "REV w15, w15 \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "REV w14, w14 \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "REV w13, w13 \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "REV w15, w15 \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "REV w14, w14 \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "REV w13, w13 \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v9.1d \n" + "PMULL2 v3.1q, v15.2d, v9.2d \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "PMULL v3.1q, v15.1d, v9.1d \n" + "PMULL2 v15.1q, v15.2d, v9.2d \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v10.1d \n" + "PMULL2 v3.1q, v14.2d, v10.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v3.1q, v14.1d, v10.1d \n" + "PMULL2 v14.1q, v14.2d, v10.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v11.1d \n" + "PMULL2 v3.1q, v13.2d, v11.2d \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL v3.1q, v13.1d, v11.1d \n" + "PMULL2 v13.1q, v13.2d, v11.2d \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v4.1d \n" + "PMULL2 v3.1q, v12.2d, v4.2d \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v3.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v12.16b, v12.16b, v5.16b \n" + "EOR v13.16b, v13.16b, v6.16b \n" + "EOR v14.16b, v14.16b, v7.16b \n" + "EOR v15.16b, v15.16b, v8.16b \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "ST1 {v12.2d-v15.2d}, [%[out]], #64 \n \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + + "CMP w11, #128 \n" + "BGE 81b \n" + + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v9.1d \n" + "PMULL2 v3.1q, v15.2d, v9.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "PMULL v3.1q, v15.1d, v9.1d \n" + "PMULL2 v15.1q, v15.2d, v9.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v10.1d \n" + "PMULL2 v3.1q, v14.2d, v10.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "PMULL v3.1q, v14.1d, v10.1d \n" + "PMULL2 v14.1q, v14.2d, v10.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v11.1d \n" + "PMULL2 v3.1q, v13.2d, v11.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v3.1q, v13.1d, v11.1d \n" + "PMULL2 v13.1q, v13.2d, v11.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v4.1d \n" + "PMULL2 v3.1q, v12.2d, v4.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v3.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "EOR v17.16b, v17.16b, v2.16b \n" + + "80: \n" + "LD1 {v22.2d}, [%[ctr]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "# Can we do 4 blocks at a time? \n" + "CMP w11, #64 \n" + "BLT 10f \n" + + "# First encrypt - no GHASH \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "EOR v27.16b, v27.16b, v11.16b \n" + "AESE v28.16b, v10.16b \n" + "EOR v28.16b, v28.16b, v11.16b \n" + "AESE v29.16b, v10.16b \n" + "EOR v29.16b, v29.16b, v11.16b \n" + "AESE v30.16b, v10.16b \n" + "EOR v30.16b, v30.16b, v11.16b \n" + + "# XOR in input \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "# Store cipher text \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BLT 12f \n" + + "11: \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "REV w15, w15 \n" + "RBIT v19.16b, v19.16b \n" + "REV w14, w14 \n" + "RBIT v20.16b, v20.16b \n" + "REV w13, w13 \n" + "RBIT v21.16b, v21.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "EOR v27.16b, v27.16b, v11.16b \n" + "AESE v28.16b, v10.16b \n" + "EOR v28.16b, v28.16b, v11.16b \n" + "AESE v29.16b, v10.16b \n" + "EOR v29.16b, v29.16b, v11.16b \n" + "AESE v30.16b, v10.16b \n" + "EOR v30.16b, v30.16b, v11.16b \n" + + "# XOR in input \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "# Store cipher text \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BGE 11b \n" + + "12: \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + + "10: \n" + "CBZ w11, 30f \n" + "CMP w11, #16 \n" + "BLT 20f \n" + "# Encrypt first block for GHASH \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v31.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + + "# When only one full block to encrypt go straight to GHASH \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "LD1 {v31.2d}, [%[input]], #16 \n" + + "# Interweave GHASH and encrypt if more then 1 block \n" + "2: \n" + "RBIT v15.16b, v15.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "LD1 {v31.2d}, [%[input]], #16 \n" + "B 2b \n" + + "# GHASH on last block \n" + "1: \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "20: \n" + "CBZ w11, 30f \n" + "EOR v31.16b, v31.16b, v31.16b \n" + "MOV x15, x11 \n" + "ST1 {v31.2d}, [%[scratch]] \n" + "23: \n" + "LDRB w14, [%[input]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 23b \n" + "SUB %[scratch], %[scratch], x11 \n" + "LD1 {v31.2d}, [%[scratch]] \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "MOV x15, x11 \n" + "24: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[out]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 24b \n" + "MOV x15, #16 \n" + "EOR w14, w14, w14 \n" + "SUB x15, x15, x11 \n" + "25: \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 25b \n" + "SUB %[scratch], %[scratch], #16 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "30: \n" + "# store current counter value at the end \n" + "REV w13, w12 \n" + "MOV v22.S[3], w13 \n" + "LD1 {v0.2d}, [%[ctr]] \n" + "ST1 {v22.2d}, [%[ctr]] \n" + + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[sz], %x[sz], #3 \n" + "MOV v15.d[0], %x[aSz] \n" + "MOV v15.d[1], %x[sz] \n" + "REV64 v15.16b, v15.16b \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "RBIT v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v17.16b \n \n" + "CMP %w[tagSz], #16 \n" + "BNE 40f \n" + "ST1 {v0.2d}, [%[tag]] \n" + "B 41f \n" + "40: \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV x15, %x[tagSz] \n" + "44: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[tag]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 44b \n" + "SUB %[scratch], %[scratch], %x[tagSz] \n" + "41: \n" + + : [out] "+r" (out), [input] "+r" (in), [Key] "+r" (keyPt), + [aSz] "+r" (authInSz), [sz] "+r" (sz), [aad] "+r" (authIn) + : [ctr] "r" (ctr), [scratch] "r" (scratch), + [h] "m" (aes->gcm.H), [tag] "r" (authTag), [tagSz] "r" (authTagSz) + : "cc", "memory", "x11", "x12", "w13", "x14", "x15", "w16", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); - if (authTagSz > AES_BLOCK_SIZE) { - XMEMCPY(authTag, scratch, AES_BLOCK_SIZE); - } - else { - /* authTagSz can be smaller than AES_BLOCK_SIZE */ - XMEMCPY(authTag, scratch, authTagSz); - } return 0; } #endif /* WOLFSSL_AES_128 */ - #ifdef WOLFSSL_AES_192 /* internal function : see wc_AesGcmEncrypt */ static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) + const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { - word32 blocks; - word32 partial; byte counter[AES_BLOCK_SIZE]; - byte initialCounter[AES_BLOCK_SIZE]; - byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; - /* Noticed different optimization levels treated head of array different. - Some cases was stack pointer plus offset others was a regester containing - address. To make uniform for passing in to inline assembly code am using - pointers to the head of each local array. + * Some cases was stack pointer plus offset others was a register containing + * address. To make uniform for passing in to inline assembly code am using + * pointers to the head of each local array. */ byte* ctr = counter; - byte* iCtr = initialCounter; - byte* xPt = x; - byte* sPt = scratch; - byte* keyPt; /* pointer to handle pointer advencment */ + byte* keyPt = (byte*)aes->key; - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + XMEMSET(counter, 0, AES_BLOCK_SIZE); if (ivSz == GCM_NONCE_MID_SZ) { - XMEMCPY(initialCounter, iv, ivSz); - initialCounter[AES_BLOCK_SIZE - 1] = 1; + XMEMCPY(counter, iv, GCM_NONCE_MID_SZ); + counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); - GMULT(initialCounter, aes->H); - } - XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); - - - /* Hash in the Additional Authentication Data */ - XMEMSET(x, 0, AES_BLOCK_SIZE); - if (authInSz != 0 && authIn != NULL) { - blocks = authInSz / AES_BLOCK_SIZE; - partial = authInSz % AES_BLOCK_SIZE; - /* do as many blocks as possible */ - while (blocks--) { - xorbuf(x, authIn, AES_BLOCK_SIZE); - GMULT(x, aes->H); - authIn += AES_BLOCK_SIZE; - } - if (partial != 0) { - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, authIn, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, aes->H); - } + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } - /* do as many blocks as possible */ - blocks = sz / AES_BLOCK_SIZE; - partial = sz % AES_BLOCK_SIZE; - if (blocks > 0) { - keyPt = (byte*)aes->key; - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v13.2d}, [%[ctr]] \n" - - "#Create vector with the value 1 \n" - "MOVI v14.16b, #1 \n" - "USHR v14.2d, v14.2d, #56 \n" - "EOR v22.16b, v22.16b, v22.16b \n" - "EXT v14.16b, v14.16b, v22.16b, #8\n" - - - /*************************************************** - Get first out block for GHASH using AES encrypt - ***************************************************/ - "REV64 v13.16b, v13.16b \n" /* network order */ - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v16.2d}, %[inY] \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "MOVI v23.16b, #0x87 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "USHR v23.2d, v23.2d, #56 \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v30.16b \n" - "EOR v0.16b, v0.16b, v31.16b \n" - - "EOR v0.16b, v0.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "MOV v15.16b, v0.16b \n" - - "CBZ w11, 1f \n" /* only one block jump to final GHASH */ - "LD1 {v12.2d}, [%[input]], #16 \n" - - /*************************************************** - Interweave GHASH and encrypt if more then 1 block - ***************************************************/ - "2: \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "EOR v15.16b, v17.16b, v15.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */ - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ - "MOV v0.16b, v13.16b \n" - "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" - "AESE v0.16b, v30.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "EOR v0.16b, v0.16b, v31.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" - - "EOR v0.16b, v0.16b, v12.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "MOV v15.16b, v0.16b \n" - "RBIT v17.16b, v19.16b \n" - - "CBZ w11, 1f \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "B 2b \n" - - /*************************************************** - GHASH on last block - ***************************************************/ - "1: \n" - "EOR v15.16b, v17.16b, v15.16b \n" - "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */ - - "#store current AES counter value \n" - "ST1 {v13.2d}, [%[ctrOut]] \n" - "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ - "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ - "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ - "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ - - "#Reduce product from multiplication \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ - "EOR v19.16b, v19.16b, v21.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "EOR v18.16b, v18.16b, v21.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "RBIT v17.16b, v19.16b \n" - "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */ - - :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in) - ,[xOut] "=r" (xPt),"=m" (aes->H) - :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), - [input] "3" (in) - ,[inX] "4" (xPt), [inY] "m" (aes->H) - : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", - "v24","v25","v26","v27","v28","v29","v30","v31" - ); - } - - /* take care of partial block sizes leftover */ - if (partial != 0) { - IncrementGcmCounter(counter); - wc_AesEncrypt(aes, counter, scratch); - xorbuf(scratch, in, partial); - XMEMCPY(out, scratch, partial); - - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, out, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, aes->H); - } - - /* Hash in the lengths of A and C in bits */ - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - FlattenSzInBits(&scratch[0], authInSz); - FlattenSzInBits(&scratch[8], sz); - xorbuf(x, scratch, AES_BLOCK_SIZE); - XMEMCPY(scratch, x, AES_BLOCK_SIZE); - - keyPt = (byte*)aes->key; __asm__ __volatile__ ( - - "LD1 {v16.16b}, [%[tag]] \n" - "LD1 {v17.16b}, %[h] \n" - "RBIT v16.16b, v16.16b \n" - - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */ - "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */ - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */ - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n" - "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */ - "LD1 {v0.2d}, [%[ctr]] \n" - - "#Set a register to all 0s using EOR \n" - "EOR v22.16b, v22.16b, v22.16b \n" - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "LD1 {v16.16b}, %[h] \n" + "# v23 = 0x00000000000000870000000000000087 reflected 0xe1.... \n" "MOVI v23.16b, #0x87 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v17.16b, v17.16b \n" "USHR v23.2d, v23.2d, #56 \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "AESE v0.16b, v30.16b \n" - "RBIT v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v31.16b \n" - "EOR v19.16b, v19.16b, v0.16b \n" - "STR q19, [%[out]] \n" + "CBZ %w[aSz], 120f \n" - :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr) - :[tag] "0" (sPt), [Key] "1" (keyPt), - [ctr] "2" (iCtr) , [h] "m" (aes->H) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", - "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24" + "MOV w12, %w[aSz] \n" + + "# GHASH AAD \n" + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "114: \n" + "LD1 {v18.2d-v21.2d}, [%[aad]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v30.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v30.16b, #8 \n" + "PMULL2 v14.1q, v30.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v15.2d}, [%[aad]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial AAD \n" + "EOR v15.16b, v15.16b, v15.16b \n" + "MOV x14, x12 \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[aad]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "120: \n" + + "# Encrypt plaintext and GHASH ciphertext \n" + "LDR w12, [%[ctr], #12] \n" + "MOV w11, %w[sz] \n" + "REV w12, w12 \n" + "CMP w11, #64 \n" + "BLT 80f \n" + "CMP %w[aSz], #64 \n" + "BGE 82f \n" + + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "82: \n" + "# Should we do 8 blocks at a time? \n" + "CMP w11, #512 \n" + "BLT 80f \n" + + "# Calculate H^[5-8] - GMULT partials \n" + "# Multiply H and H^4 => H^5 \n" + "PMULL v18.1q, v26.1d, v16.1d \n" + "PMULL2 v19.1q, v26.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v26.1d, v20.1d \n" + "PMULL2 v20.1q, v26.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v9.16b, v18.16b, v20.16b \n" + "# Square H^3 - H^6 \n" + "PMULL2 v19.1q, v25.2d, v25.2d \n" + "PMULL v18.1q, v25.1d, v25.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v10.16b, v18.16b, v19.16b \n" + "# Multiply H and H^6 => H^7 \n" + "PMULL v18.1q, v10.1d, v16.1d \n" + "PMULL2 v19.1q, v10.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v10.1d, v20.1d \n" + "PMULL2 v20.1q, v10.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v11.16b, v18.16b, v20.16b \n" + "# Square H^4 => H^8 \n" + "PMULL2 v19.1q, v26.2d, v26.2d \n" + "PMULL v18.1q, v26.1d, v26.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v4.16b, v18.16b, v19.16b \n" + + "# First encrypt - no GHASH \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v12.16b, v12.16b, v5.16b \n" + "EOR v13.16b, v13.16b, v6.16b \n" + "EOR v14.16b, v14.16b, v7.16b \n" + "EOR v15.16b, v15.16b, v8.16b \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "ST1 {v12.2d-v15.2d}, [%[out]], #64 \n \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + + "81: \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "REV w15, w15 \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "REV w14, w14 \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "REV w13, w13 \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "REV w15, w15 \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "REV w14, w14 \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "REV w13, w13 \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v9.1d \n" + "PMULL2 v3.1q, v15.2d, v9.2d \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "PMULL v3.1q, v15.1d, v9.1d \n" + "PMULL2 v15.1q, v15.2d, v9.2d \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v10.1d \n" + "PMULL2 v3.1q, v14.2d, v10.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v3.1q, v14.1d, v10.1d \n" + "PMULL2 v14.1q, v14.2d, v10.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v11.1d \n" + "PMULL2 v3.1q, v13.2d, v11.2d \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL v3.1q, v13.1d, v11.1d \n" + "PMULL2 v13.1q, v13.2d, v11.2d \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v4.1d \n" + "PMULL2 v3.1q, v12.2d, v4.2d \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v3.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v12.16b, v12.16b, v5.16b \n" + "EOR v13.16b, v13.16b, v6.16b \n" + "EOR v14.16b, v14.16b, v7.16b \n" + "EOR v15.16b, v15.16b, v8.16b \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "ST1 {v12.2d-v15.2d}, [%[out]], #64 \n \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + + "CMP w11, #128 \n" + "BGE 81b \n" + + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v9.1d \n" + "PMULL2 v3.1q, v15.2d, v9.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "PMULL v3.1q, v15.1d, v9.1d \n" + "PMULL2 v15.1q, v15.2d, v9.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v10.1d \n" + "PMULL2 v3.1q, v14.2d, v10.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "PMULL v3.1q, v14.1d, v10.1d \n" + "PMULL2 v14.1q, v14.2d, v10.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v11.1d \n" + "PMULL2 v3.1q, v13.2d, v11.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v3.1q, v13.1d, v11.1d \n" + "PMULL2 v13.1q, v13.2d, v11.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v4.1d \n" + "PMULL2 v3.1q, v12.2d, v4.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v3.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "EOR v17.16b, v17.16b, v2.16b \n" + + "80: \n" + "LD1 {v22.2d}, [%[ctr]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" + "# Can we do 4 blocks at a time? \n" + "CMP w11, #64 \n" + "BLT 10f \n" + + "# First encrypt - no GHASH \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v12.16b \n" + "EOR v27.16b, v27.16b, v13.16b \n" + "AESE v28.16b, v12.16b \n" + "EOR v28.16b, v28.16b, v13.16b \n" + "AESE v29.16b, v12.16b \n" + "EOR v29.16b, v29.16b, v13.16b \n" + "AESE v30.16b, v12.16b \n" + "EOR v30.16b, v30.16b, v13.16b \n" + + "# XOR in input \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "# Store cipher text \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BLT 12f \n" + + "11: \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "REV w15, w15 \n" + "RBIT v19.16b, v19.16b \n" + "REV w14, w14 \n" + "RBIT v20.16b, v20.16b \n" + "REV w13, w13 \n" + "RBIT v21.16b, v21.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v12.16b \n" + "EOR v27.16b, v27.16b, v13.16b \n" + "AESE v28.16b, v12.16b \n" + "EOR v28.16b, v28.16b, v13.16b \n" + "AESE v29.16b, v12.16b \n" + "EOR v29.16b, v29.16b, v13.16b \n" + "AESE v30.16b, v12.16b \n" + "EOR v30.16b, v30.16b, v13.16b \n" + + "# XOR in input \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "# Store cipher text \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BGE 11b \n" + + "12: \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + + "10: \n" + "CBZ w11, 30f \n" + "CMP w11, #16 \n" + "BLT 20f \n" + "# Encrypt first block for GHASH \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v31.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + + "# When only one full block to encrypt go straight to GHASH \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "LD1 {v31.2d}, [%[input]], #16 \n" + + "# Interweave GHASH and encrypt if more then 1 block \n" + "2: \n" + "RBIT v15.16b, v15.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "LD1 {v31.2d}, [%[input]], #16 \n" + "B 2b \n" + + "# GHASH on last block \n" + "1: \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "20: \n" + "CBZ w11, 30f \n" + "EOR v31.16b, v31.16b, v31.16b \n" + "MOV x15, x11 \n" + "ST1 {v31.2d}, [%[scratch]] \n" + "23: \n" + "LDRB w14, [%[input]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 23b \n" + "SUB %[scratch], %[scratch], x11 \n" + "LD1 {v31.2d}, [%[scratch]] \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "MOV x15, x11 \n" + "24: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[out]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 24b \n" + "MOV x15, #16 \n" + "EOR w14, w14, w14 \n" + "SUB x15, x15, x11 \n" + "25: \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 25b \n" + "SUB %[scratch], %[scratch], #16 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "30: \n" + "# store current counter value at the end \n" + "REV w13, w12 \n" + "MOV v22.S[3], w13 \n" + "LD1 {v0.2d}, [%[ctr]] \n" + "ST1 {v22.2d}, [%[ctr]] \n" + + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[sz], %x[sz], #3 \n" + "MOV v15.d[0], %x[aSz] \n" + "MOV v15.d[1], %x[sz] \n" + "REV64 v15.16b, v15.16b \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "RBIT v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v17.16b \n \n" + "CMP %w[tagSz], #16 \n" + "BNE 40f \n" + "ST1 {v0.2d}, [%[tag]] \n" + "B 41f \n" + "40: \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV x15, %x[tagSz] \n" + "44: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[tag]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 44b \n" + "SUB %[scratch], %[scratch], %x[tagSz] \n" + "41: \n" + + : [out] "+r" (out), [input] "+r" (in), [Key] "+r" (keyPt), + [aSz] "+r" (authInSz), [sz] "+r" (sz), [aad] "+r" (authIn) + : [ctr] "r" (ctr), [scratch] "r" (scratch), + [h] "m" (aes->gcm.H), [tag] "r" (authTag), [tagSz] "r" (authTagSz) + : "cc", "memory", "x11", "x12", "w13", "x14", "x15", "w16", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); - if (authTagSz > AES_BLOCK_SIZE) { - XMEMCPY(authTag, scratch, AES_BLOCK_SIZE); - } - else { - /* authTagSz can be smaller than AES_BLOCK_SIZE */ - XMEMCPY(authTag, scratch, authTagSz); - } - return 0; } #endif /* WOLFSSL_AES_192 */ - #ifdef WOLFSSL_AES_256 /* internal function : see wc_AesGcmEncrypt */ static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) + const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { - word32 blocks; - word32 partial; byte counter[AES_BLOCK_SIZE]; - byte initialCounter[AES_BLOCK_SIZE]; - byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; - /* Noticed different optimization levels treated head of array different. - Some cases was stack pointer plus offset others was a regester containing - address. To make uniform for passing in to inline assembly code am using - pointers to the head of each local array. + * Some cases was stack pointer plus offset others was a register containing + * address. To make uniform for passing in to inline assembly code am using + * pointers to the head of each local array. */ byte* ctr = counter; - byte* iCtr = initialCounter; - byte* xPt = x; - byte* sPt = scratch; - byte* keyPt; /* pointer to handle pointer advencment */ + byte* keyPt = (byte*)aes->key; - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + XMEMSET(counter, 0, AES_BLOCK_SIZE); if (ivSz == GCM_NONCE_MID_SZ) { - XMEMCPY(initialCounter, iv, ivSz); - initialCounter[AES_BLOCK_SIZE - 1] = 1; + XMEMCPY(counter, iv, GCM_NONCE_MID_SZ); + counter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); - GMULT(initialCounter, aes->H); - } - XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); - - - /* Hash in the Additional Authentication Data */ - XMEMSET(x, 0, AES_BLOCK_SIZE); - if (authInSz != 0 && authIn != NULL) { - blocks = authInSz / AES_BLOCK_SIZE; - partial = authInSz % AES_BLOCK_SIZE; - /* do as many blocks as possible */ - while (blocks--) { - xorbuf(x, authIn, AES_BLOCK_SIZE); - GMULT(x, aes->H); - authIn += AES_BLOCK_SIZE; - } - if (partial != 0) { - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, authIn, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, aes->H); - } + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); } - /* do as many blocks as possible */ - blocks = sz / AES_BLOCK_SIZE; - partial = sz % AES_BLOCK_SIZE; - if (blocks > 0) { - keyPt = (byte*)aes->key; - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v13.2d}, [%[ctr]] \n" - - "#Create vector with the value 1 \n" - "MOVI v14.16b, #1 \n" - "USHR v14.2d, v14.2d, #56 \n" - "EOR v22.16b, v22.16b, v22.16b \n" - "EXT v14.16b, v14.16b, v22.16b, #8\n" - - - /*************************************************** - Get first out block for GHASH using AES encrypt - ***************************************************/ - "REV64 v13.16b, v13.16b \n" /* network order */ - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v16.2d}, %[inY] \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "MOVI v23.16b, #0x87 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "USHR v23.2d, v23.2d, #56 \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v28.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v29.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v30.16b \n" - "EOR v0.16b, v0.16b, v31.16b \n" - - "EOR v0.16b, v0.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "MOV v15.16b, v0.16b \n" - - "CBZ w11, 1f \n" /* only one block jump to final GHASH */ - "LD1 {v12.2d}, [%[input]], #16 \n" - - /*************************************************** - Interweave GHASH and encrypt if more then 1 block - ***************************************************/ - "2: \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "EOR v15.16b, v17.16b, v15.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */ - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ - "MOV v0.16b, v13.16b \n" - "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v28.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v29.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" - "AESE v0.16b, v30.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "EOR v0.16b, v0.16b, v31.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" - - "EOR v0.16b, v0.16b, v12.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "MOV v15.16b, v0.16b \n" - "RBIT v17.16b, v19.16b \n" - - "CBZ w11, 1f \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "B 2b \n" - - /*************************************************** - GHASH on last block - ***************************************************/ - "1: \n" - "EOR v15.16b, v17.16b, v15.16b \n" - "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */ - - "#store current AES counter value \n" - "ST1 {v13.2d}, [%[ctrOut]] \n" - "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */ - "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */ - "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */ - "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */ - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ - - "#Reduce product from multiplication \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */ - "EOR v19.16b, v19.16b, v21.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "EOR v18.16b, v18.16b, v21.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "RBIT v17.16b, v19.16b \n" - "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */ - - :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in) - ,[xOut] "=r" (xPt),"=m" (aes->H) - :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), - [input] "3" (in) - ,[inX] "4" (xPt), [inY] "m" (aes->H) - : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24" - ); - } - - /* take care of partial block sizes leftover */ - if (partial != 0) { - IncrementGcmCounter(counter); - wc_AesEncrypt(aes, counter, scratch); - xorbuf(scratch, in, partial); - XMEMCPY(out, scratch, partial); - - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - XMEMCPY(scratch, out, partial); - xorbuf(x, scratch, AES_BLOCK_SIZE); - GMULT(x, aes->H); - } - - /* Hash in the lengths of A and C in bits */ - XMEMSET(scratch, 0, AES_BLOCK_SIZE); - FlattenSzInBits(&scratch[0], authInSz); - FlattenSzInBits(&scratch[8], sz); - xorbuf(x, scratch, AES_BLOCK_SIZE); - XMEMCPY(scratch, x, AES_BLOCK_SIZE); - - keyPt = (byte*)aes->key; __asm__ __volatile__ ( - - "LD1 {v16.16b}, [%[tag]] \n" - "LD1 {v17.16b}, %[h] \n" - "RBIT v16.16b, v16.16b \n" - - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */ - "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */ - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */ - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n" - "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */ - "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */ - "LD1 {v0.2d}, [%[ctr]] \n" - - "#Set a register to all 0s using EOR \n" - "EOR v22.16b, v22.16b, v22.16b \n" - "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */ - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */ - "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */ - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */ + "LD1 {v16.16b}, %[h] \n" + "# v23 = 0x00000000000000870000000000000087 reflected 0xe1.... \n" "MOVI v23.16b, #0x87 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v17.16b, v17.16b \n" "USHR v23.2d, v23.2d, #56 \n" - "PMULL2 v20.1q, v19.2d, v23.2d \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v20.16b, v22.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v19.16b, v19.16b, v21.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v21.16b, v22.16b, v20.16b, #8 \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "EOR v18.16b, v18.16b, v21.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v28.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v29.16b \n" - "AESMC v0.16b, v0.16b \n" - "PMULL v20.1q, v19.1d, v23.1d \n" - "EOR v19.16b, v18.16b, v20.16b \n" - "AESE v0.16b, v30.16b \n" - "RBIT v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v31.16b \n" - "EOR v19.16b, v19.16b, v0.16b \n" - "STR q19, [%[out]] \n" + "CBZ %w[aSz], 120f \n" - :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr) - :[tag] "0" (sPt), [Key] "1" (keyPt), - [ctr] "2" (iCtr) , [h] "m" (aes->H) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", - "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23", - "v24","v25","v26","v27","v28","v29","v30","v31" + "MOV w12, %w[aSz] \n" + + "# GHASH AAD \n" + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "114: \n" + "LD1 {v18.2d-v21.2d}, [%[aad]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v30.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v30.16b, #8 \n" + "PMULL2 v14.1q, v30.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v15.2d}, [%[aad]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial AAD \n" + "EOR v15.16b, v15.16b, v15.16b \n" + "MOV x14, x12 \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[aad]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "120: \n" + + "# Encrypt plaintext and GHASH ciphertext \n" + "LDR w12, [%[ctr], #12] \n" + "MOV w11, %w[sz] \n" + "REV w12, w12 \n" + "CMP w11, #64 \n" + "BLT 80f \n" + "CMP %w[aSz], #64 \n" + "BGE 82f \n" + + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "82: \n" + "# Should we do 8 blocks at a time? \n" + "CMP w11, #512 \n" + "BLT 80f \n" + + "# Calculate H^[5-8] - GMULT partials \n" + "# Multiply H and H^4 => H^5 \n" + "PMULL v18.1q, v26.1d, v16.1d \n" + "PMULL2 v19.1q, v26.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v26.1d, v20.1d \n" + "PMULL2 v20.1q, v26.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v9.16b, v18.16b, v20.16b \n" + "# Square H^3 - H^6 \n" + "PMULL2 v19.1q, v25.2d, v25.2d \n" + "PMULL v18.1q, v25.1d, v25.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v10.16b, v18.16b, v19.16b \n" + "# Multiply H and H^6 => H^7 \n" + "PMULL v18.1q, v10.1d, v16.1d \n" + "PMULL2 v19.1q, v10.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v10.1d, v20.1d \n" + "PMULL2 v20.1q, v10.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v11.16b, v18.16b, v20.16b \n" + "# Square H^4 => H^8 \n" + "PMULL2 v19.1q, v26.2d, v26.2d \n" + "PMULL v18.1q, v26.1d, v26.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v4.16b, v18.16b, v19.16b \n" + + "# First encrypt - no GHASH \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #192] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #208] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v12.16b, v12.16b, v5.16b \n" + "EOR v13.16b, v13.16b, v6.16b \n" + "EOR v14.16b, v14.16b, v7.16b \n" + "EOR v15.16b, v15.16b, v8.16b \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "ST1 {v12.2d-v15.2d}, [%[out]], #64 \n \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + + "81: \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "REV w15, w15 \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "REV w14, w14 \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "REV w13, w13 \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "REV w15, w15 \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "REV w14, w14 \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "REV w13, w13 \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v9.1d \n" + "PMULL2 v3.1q, v15.2d, v9.2d \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "PMULL v3.1q, v15.1d, v9.1d \n" + "PMULL2 v15.1q, v15.2d, v9.2d \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v10.1d \n" + "PMULL2 v3.1q, v14.2d, v10.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v3.1q, v14.1d, v10.1d \n" + "PMULL2 v14.1q, v14.2d, v10.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v11.1d \n" + "PMULL2 v3.1q, v13.2d, v11.2d \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL v3.1q, v13.1d, v11.1d \n" + "PMULL2 v13.1q, v13.2d, v11.2d \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v4.1d \n" + "PMULL2 v3.1q, v12.2d, v4.2d \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v3.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #192] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #208] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v12.16b, v12.16b, v5.16b \n" + "EOR v13.16b, v13.16b, v6.16b \n" + "EOR v14.16b, v14.16b, v7.16b \n" + "EOR v15.16b, v15.16b, v8.16b \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "ST1 {v12.2d-v15.2d}, [%[out]], #64 \n \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + + "CMP w11, #128 \n" + "BGE 81b \n" + + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v9.1d \n" + "PMULL2 v3.1q, v15.2d, v9.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "PMULL v3.1q, v15.1d, v9.1d \n" + "PMULL2 v15.1q, v15.2d, v9.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v10.1d \n" + "PMULL2 v3.1q, v14.2d, v10.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "PMULL v3.1q, v14.1d, v10.1d \n" + "PMULL2 v14.1q, v14.2d, v10.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v11.1d \n" + "PMULL2 v3.1q, v13.2d, v11.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v3.1q, v13.1d, v11.1d \n" + "PMULL2 v13.1q, v13.2d, v11.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v4.1d \n" + "PMULL2 v3.1q, v12.2d, v4.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v3.1q, v12.1d, v4.1d \n" + "PMULL2 v12.1q, v12.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "EOR v17.16b, v17.16b, v2.16b \n" + + "80: \n" + "LD1 {v22.2d}, [%[ctr]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" + "# Can we do 4 blocks at a time? \n" + "CMP w11, #64 \n" + "BLT 10f \n" + + "# First encrypt - no GHASH \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v12.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v12.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v12.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v12.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v14.2d, v15.2d}, [%[Key]] \n" + "AESE v27.16b, v13.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v13.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v13.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v13.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v14.16b \n" + "EOR v27.16b, v27.16b, v15.16b \n" + "AESE v28.16b, v14.16b \n" + "EOR v28.16b, v28.16b, v15.16b \n" + "AESE v29.16b, v14.16b \n" + "EOR v29.16b, v29.16b, v15.16b \n" + "AESE v30.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + + "# XOR in input \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "# Store cipher text \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BLT 12f \n" + + "11: \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "REV w15, w15 \n" + "RBIT v19.16b, v19.16b \n" + "REV w14, w14 \n" + "RBIT v20.16b, v20.16b \n" + "REV w13, w13 \n" + "RBIT v21.16b, v21.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v12.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v12.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v12.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v12.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v14.2d, v15.2d}, [%[Key]] \n" + "AESE v27.16b, v13.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v13.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v13.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v13.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v14.16b \n" + "EOR v27.16b, v27.16b, v15.16b \n" + "AESE v28.16b, v14.16b \n" + "EOR v28.16b, v28.16b, v15.16b \n" + "AESE v29.16b, v14.16b \n" + "EOR v29.16b, v29.16b, v15.16b \n" + "AESE v30.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + + "# XOR in input \n" + "EOR v18.16b, v18.16b, v27.16b \n" + "EOR v19.16b, v19.16b, v28.16b \n" + "EOR v20.16b, v20.16b, v29.16b \n" + "EOR v21.16b, v21.16b, v30.16b \n" + "# Store cipher text \n" + "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BGE 11b \n" + + "12: \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + + "10: \n" + "SUB %[Key], %[Key], #32 \n" + "CBZ w11, 30f \n" + "CMP w11, #16 \n" + "BLT 20f \n" + "# Encrypt first block for GHASH \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v31.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]], #32 \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]] \n" + "SUB %[Key], %[Key], #32 \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + + "# When only one full block to encrypt go straight to GHASH \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "LD1 {v31.2d}, [%[input]], #16 \n" + + "# Interweave GHASH and encrypt if more then 1 block \n" + "2: \n" + "RBIT v15.16b, v15.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]], #32 \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]] \n" + "SUB %[Key], %[Key], #32 \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[out]], #16 \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "LD1 {v31.2d}, [%[input]], #16 \n" + "B 2b \n" + + "# GHASH on last block \n" + "1: \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "20: \n" + "CBZ w11, 30f \n" + "EOR v31.16b, v31.16b, v31.16b \n" + "MOV x15, x11 \n" + "ST1 {v31.2d}, [%[scratch]] \n" + "23: \n" + "LDRB w14, [%[input]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 23b \n" + "SUB %[scratch], %[scratch], x11 \n" + "LD1 {v31.2d}, [%[scratch]] \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]], #32 \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]] \n" + "SUB %[Key], %[Key], #32 \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v15.16b, v0.16b, v31.16b \n \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "MOV x15, x11 \n" + "24: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[out]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 24b \n" + "MOV x15, #16 \n" + "EOR w14, w14, w14 \n" + "SUB x15, x15, x11 \n" + "25: \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 25b \n" + "SUB %[scratch], %[scratch], #16 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "30: \n" + "# store current counter value at the end \n" + "REV w13, w12 \n" + "MOV v22.S[3], w13 \n" + "LD1 {v0.2d}, [%[ctr]] \n" + "ST1 {v22.2d}, [%[ctr]] \n" + + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[sz], %x[sz], #3 \n" + "MOV v15.d[0], %x[aSz] \n" + "MOV v15.d[1], %x[sz] \n" + "REV64 v15.16b, v15.16b \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]], #32 \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v12.2d, v13.2d}, [%[Key]] \n" + "SUB %[Key], %[Key], #32 \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "RBIT v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v17.16b \n \n" + "CMP %w[tagSz], #16 \n" + "BNE 40f \n" + "ST1 {v0.2d}, [%[tag]] \n" + "B 41f \n" + "40: \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV x15, %x[tagSz] \n" + "44: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[tag]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 44b \n" + "SUB %[scratch], %[scratch], %x[tagSz] \n" + "41: \n" + + : [out] "+r" (out), [input] "+r" (in), [Key] "+r" (keyPt), + [aSz] "+r" (authInSz), [sz] "+r" (sz), [aad] "+r" (authIn) + : [ctr] "r" (ctr), [scratch] "r" (scratch), + [h] "m" (aes->gcm.H), [tag] "r" (authTag), [tagSz] "r" (authTagSz) + : "cc", "memory", "x11", "x12", "w13", "x14", "x15", "w16", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); - if (authTagSz > AES_BLOCK_SIZE) { - XMEMCPY(authTag, scratch, AES_BLOCK_SIZE); - } - else { - /* authTagSz can be smaller than AES_BLOCK_SIZE */ - XMEMCPY(authTag, scratch, authTagSz); - } - return 0; } #endif /* WOLFSSL_AES_256 */ - /* aarch64 with PMULL and PMULL2 * Encrypt and tag data using AES with GCM mode. * aes: Aes structure having already been set with set key function @@ -2552,20 +7189,17 @@ static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, * Algorithm 5 */ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) + const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { /* sanity checks */ - if (aes == NULL || (iv == NULL && ivSz > 0) || - (authTag == NULL) || - (authIn == NULL && authInSz > 0) || - (ivSz == 0)) { + if ((aes == NULL) || (iv == NULL && ivSz > 0) || (authTag == NULL) || + ((authIn == NULL) && (authInSz > 0)) || (ivSz == 0)) { WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); return BAD_FUNC_ARG; } - if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) { + if ((authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) || (authTagSz > AES_BLOCK_SIZE)) { WOLFSSL_MSG("GcmEncrypt authTagSz error"); return BAD_FUNC_ARG; } @@ -2592,8 +7226,5315 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, } } - #ifdef HAVE_AES_DECRYPT +#ifdef WOLFSSL_AES_128 +/* internal function : see wc_AesGcmDecrypt */ +static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte counter[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + byte *ctr = counter; + byte* keyPt = (byte*)aes->key; + int ret = 0; + + XMEMSET(counter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(counter, iv, GCM_NONCE_MID_SZ); + counter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); + } + + __asm__ __volatile__ ( + "LD1 {v16.16b}, %[h] \n" + "# v23 = 0x00000000000000870000000000000087 reflected 0xe1.... \n" + "MOVI v23.16b, #0x87 \n" + "EOR v17.16b, v17.16b, v17.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "CBZ %w[aSz], 120f \n" + + "MOV w12, %w[aSz] \n" + + "# GHASH AAD \n" + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "114: \n" + "LD1 {v18.2d-v21.2d}, [%[aad]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v30.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v30.16b, #8 \n" + "PMULL2 v14.1q, v30.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v15.2d}, [%[aad]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial AAD \n" + "EOR v15.16b, v15.16b, v15.16b \n" + "MOV x14, x12 \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[aad]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "120: \n" + + "# Decrypt ciphertext and GHASH ciphertext \n" + "LDR w12, [%[ctr], #12] \n" + "MOV w11, %w[sz] \n" + "REV w12, w12 \n" + "CMP w11, #64 \n" + "BLT 80f \n" + "CMP %w[aSz], #64 \n" + "BGE 82f \n" + + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "82: \n" + "# Should we do 8 blocks at a time? \n" + "CMP w11, #512 \n" + "BLT 80f \n" + + "# Calculate H^[5-8] - GMULT partials \n" + "# Multiply H and H^4 => H^5 \n" + "PMULL v18.1q, v26.1d, v16.1d \n" + "PMULL2 v19.1q, v26.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v26.1d, v20.1d \n" + "PMULL2 v20.1q, v26.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v4.16b, v18.16b, v20.16b \n" + "# Square H^3 - H^6 \n" + "PMULL2 v19.1q, v25.2d, v25.2d \n" + "PMULL v18.1q, v25.1d, v25.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v9.16b, v18.16b, v19.16b \n" + "# Multiply H and H^6 => H^7 \n" + "PMULL v18.1q, v9.1d, v16.1d \n" + "PMULL2 v19.1q, v9.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v9.1d, v20.1d \n" + "PMULL2 v20.1q, v9.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v10.16b, v18.16b, v20.16b \n" + "# Square H^4 => H^8 \n" + "PMULL2 v19.1q, v26.2d, v26.2d \n" + "PMULL v18.1q, v26.1d, v26.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v11.16b, v18.16b, v19.16b \n" + + "# First decrypt - no GHASH \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v5.16b, v5.16b, v12.16b \n" + "EOR v6.16b, v6.16b, v13.16b \n" + "EOR v7.16b, v7.16b, v14.16b \n" + "EOR v8.16b, v8.16b, v15.16b \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "ST1 {v5.2d-v8.2d}, [%[out]], #64 \n \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + + "81: \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "REV w15, w15 \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "REV w14, w14 \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "REV w13, w13 \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "REV w15, w15 \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "REV w14, w14 \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "REV w13, w13 \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v4.1d \n" + "PMULL2 v3.1q, v15.2d, v4.2d \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "PMULL v3.1q, v15.1d, v4.1d \n" + "PMULL2 v15.1q, v15.2d, v4.2d \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v9.1d \n" + "PMULL2 v3.1q, v14.2d, v9.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v3.1q, v14.1d, v9.1d \n" + "PMULL2 v14.1q, v14.2d, v9.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v10.1d \n" + "PMULL2 v3.1q, v13.2d, v10.2d \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL v3.1q, v13.1d, v10.1d \n" + "PMULL2 v13.1q, v13.2d, v10.2d \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v11.1d \n" + "PMULL2 v3.1q, v12.2d, v11.2d \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v3.1q, v12.1d, v11.1d \n" + "PMULL2 v12.1q, v12.2d, v11.2d \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v5.16b, v5.16b, v12.16b \n" + "EOR v6.16b, v6.16b, v13.16b \n" + "EOR v7.16b, v7.16b, v14.16b \n" + "EOR v8.16b, v8.16b, v15.16b \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "ST1 {v5.2d-v8.2d}, [%[out]], #64 \n \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + + "CMP w11, #128 \n" + "BGE 81b \n" + + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v4.1d \n" + "PMULL2 v3.1q, v15.2d, v4.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "PMULL v3.1q, v15.1d, v4.1d \n" + "PMULL2 v15.1q, v15.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v9.1d \n" + "PMULL2 v3.1q, v14.2d, v9.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "PMULL v3.1q, v14.1d, v9.1d \n" + "PMULL2 v14.1q, v14.2d, v9.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v10.1d \n" + "PMULL2 v3.1q, v13.2d, v10.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v3.1q, v13.1d, v10.1d \n" + "PMULL2 v13.1q, v13.2d, v10.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v11.1d \n" + "PMULL2 v3.1q, v12.2d, v11.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v3.1q, v12.1d, v11.1d \n" + "PMULL2 v12.1q, v12.2d, v11.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "EOR v17.16b, v17.16b, v2.16b \n" + + "80: \n" + "LD1 {v22.2d}, [%[ctr]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "# Can we do 4 blocks at a time? \n" + "CMP w11, #64 \n" + "BLT 10f \n" + + "# First decrypt - no GHASH \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "EOR v27.16b, v27.16b, v11.16b \n" + "AESE v28.16b, v10.16b \n" + "EOR v28.16b, v28.16b, v11.16b \n" + "AESE v29.16b, v10.16b \n" + "EOR v29.16b, v29.16b, v11.16b \n" + "AESE v30.16b, v10.16b \n" + "EOR v30.16b, v30.16b, v11.16b \n" + + "# XOR in input \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "# Store cipher text \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BLT 12f \n" + + "11: \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "REV w15, w15 \n" + "RBIT v19.16b, v19.16b \n" + "REV w14, w14 \n" + "RBIT v20.16b, v20.16b \n" + "REV w13, w13 \n" + "RBIT v21.16b, v21.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "EOR v27.16b, v27.16b, v11.16b \n" + "AESE v28.16b, v10.16b \n" + "EOR v28.16b, v28.16b, v11.16b \n" + "AESE v29.16b, v10.16b \n" + "EOR v29.16b, v29.16b, v11.16b \n" + "AESE v30.16b, v10.16b \n" + "EOR v30.16b, v30.16b, v11.16b \n" + + "# XOR in input \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "# Store cipher text \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BGE 11b \n" + + "12: \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + + "10: \n" + "CBZ w11, 30f \n" + "CMP w11, #16 \n" + "BLT 20f \n" + "# Decrypt first block for GHASH \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v28.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "EOR v0.16b, v0.16b, v28.16b \n \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "# When only one full block to decrypt go straight to GHASH \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "# Interweave GHASH and decrypt if more then 1 block \n" + "2: \n" + "RBIT v28.16b, v28.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "LD1 {v28.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "EOR v0.16b, v0.16b, v28.16b \n \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "CMP w11, #16 \n" + "BGE 2b \n" + + "# GHASH on last block \n" + "1: \n" + "RBIT v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "20: \n" + "CBZ w11, 30f \n" + "EOR v31.16b, v31.16b, v31.16b \n" + "MOV x15, x11 \n" + "ST1 {v31.2d}, [%[scratch]] \n" + "23: \n" + "LDRB w14, [%[input]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 23b \n" + "SUB %[scratch], %[scratch], x11 \n" + "LD1 {v31.2d}, [%[scratch]] \n" + "RBIT v31.16b, v31.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v31.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "RBIT v31.16b, v31.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "EOR v0.16b, v0.16b, v31.16b \n \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV x15, x11 \n" + "24: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[out]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 24b \n" + "SUB %[scratch], %[scratch], x11 \n" + + "30: \n" + "# store current counter value at the end \n" + "REV w13, w12 \n" + "MOV v22.S[3], w13 \n" + "LD1 {v0.16b}, [%[ctr]] \n" + "ST1 {v22.16b}, [%[ctr]] \n" + + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[sz], %x[sz], #3 \n" + "MOV v28.d[0], %x[aSz] \n" + "MOV v28.d[1], %x[sz] \n" + "REV64 v28.16b, v28.16b \n" + "RBIT v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n \n" + "RBIT v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v17.16b \n \n" + "CMP %w[tagSz], #16 \n" + "BNE 40f \n" + "LD1 {v1.2d}, [%[tag]] \n" + "B 41f \n" + "40: \n" + "EOR v1.16b, v1.16b, v1.16b \n" + "MOV x15, %x[tagSz] \n" + "ST1 {v1.2d}, [%[scratch]] \n" + "43: \n" + "LDRB w14, [%[tag]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 43b \n" + "SUB %[scratch], %[scratch], %x[tagSz] \n" + "LD1 {v1.2d}, [%[scratch]] \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV w14, #16 \n" + "SUB w14, w14, %w[tagSz] \n" + "ADD %[scratch], %[scratch], %x[tagSz] \n" + "44: \n" + "STRB wzr, [%[scratch]], #1 \n" + "SUB w14, w14, #1 \n" + "CBNZ w14, 44b \n" + "SUB %[scratch], %[scratch], #16 \n" + "LD1 {v0.2d}, [%[scratch]] \n" + "41: \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "MOV v1.D[0], v0.D[1] \n" + "EOR v0.8b, v0.8b, v1.8b \n" + "MOV %x[ret], v0.D[0] \n" + "CMP %x[ret], #0 \n" + "MOV w11, #-180 \n" + "CSETM %w[ret], ne \n" + "AND %w[ret], %w[ret], w11 \n" + + : [out] "+r" (out), [input] "+r" (in), [Key] "+r" (keyPt), + [aSz] "+r" (authInSz), [sz] "+r" (sz), [aad] "+r" (authIn), + [ret] "+r" (ret) + : [ctr] "r" (ctr), [scratch] "r" (scratch), + [h] "m" (aes->gcm.H), [tag] "r" (authTag), [tagSz] "r" (authTagSz) + : "cc", "memory", "x11", "x12", "w13", "x14", "x15", "w16", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" + ); + + return ret; +} +#endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 +/* internal function : see wc_AesGcmDecrypt */ +static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte counter[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + byte *ctr = counter; + byte* keyPt = (byte*)aes->key; + int ret = 0; + + XMEMSET(counter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(counter, iv, GCM_NONCE_MID_SZ); + counter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); + } + + __asm__ __volatile__ ( + "LD1 {v16.16b}, %[h] \n" + "# v23 = 0x00000000000000870000000000000087 reflected 0xe1.... \n" + "MOVI v23.16b, #0x87 \n" + "EOR v17.16b, v17.16b, v17.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "CBZ %w[aSz], 120f \n" + + "MOV w12, %w[aSz] \n" + + "# GHASH AAD \n" + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "114: \n" + "LD1 {v18.2d-v21.2d}, [%[aad]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v30.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v30.16b, #8 \n" + "PMULL2 v14.1q, v30.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v15.2d}, [%[aad]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial AAD \n" + "EOR v15.16b, v15.16b, v15.16b \n" + "MOV x14, x12 \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[aad]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "120: \n" + + "# Decrypt ciphertext and GHASH ciphertext \n" + "LDR w12, [%[ctr], #12] \n" + "MOV w11, %w[sz] \n" + "REV w12, w12 \n" + "CMP w11, #64 \n" + "BLT 80f \n" + "CMP %w[aSz], #64 \n" + "BGE 82f \n" + + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "82: \n" + "# Should we do 8 blocks at a time? \n" + "CMP w11, #512 \n" + "BLT 80f \n" + + "# Calculate H^[5-8] - GMULT partials \n" + "# Multiply H and H^4 => H^5 \n" + "PMULL v18.1q, v26.1d, v16.1d \n" + "PMULL2 v19.1q, v26.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v26.1d, v20.1d \n" + "PMULL2 v20.1q, v26.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v4.16b, v18.16b, v20.16b \n" + "# Square H^3 - H^6 \n" + "PMULL2 v19.1q, v25.2d, v25.2d \n" + "PMULL v18.1q, v25.1d, v25.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v9.16b, v18.16b, v19.16b \n" + "# Multiply H and H^6 => H^7 \n" + "PMULL v18.1q, v9.1d, v16.1d \n" + "PMULL2 v19.1q, v9.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v9.1d, v20.1d \n" + "PMULL2 v20.1q, v9.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v10.16b, v18.16b, v20.16b \n" + "# Square H^4 => H^8 \n" + "PMULL2 v19.1q, v26.2d, v26.2d \n" + "PMULL v18.1q, v26.1d, v26.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v11.16b, v18.16b, v19.16b \n" + + "# First decrypt - no GHASH \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v5.16b, v5.16b, v12.16b \n" + "EOR v6.16b, v6.16b, v13.16b \n" + "EOR v7.16b, v7.16b, v14.16b \n" + "EOR v8.16b, v8.16b, v15.16b \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "ST1 {v5.2d-v8.2d}, [%[out]], #64 \n \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + + "81: \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "REV w15, w15 \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "REV w14, w14 \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "REV w13, w13 \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "REV w15, w15 \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "REV w14, w14 \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "REV w13, w13 \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v4.1d \n" + "PMULL2 v3.1q, v15.2d, v4.2d \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "PMULL v3.1q, v15.1d, v4.1d \n" + "PMULL2 v15.1q, v15.2d, v4.2d \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v9.1d \n" + "PMULL2 v3.1q, v14.2d, v9.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v3.1q, v14.1d, v9.1d \n" + "PMULL2 v14.1q, v14.2d, v9.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v10.1d \n" + "PMULL2 v3.1q, v13.2d, v10.2d \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL v3.1q, v13.1d, v10.1d \n" + "PMULL2 v13.1q, v13.2d, v10.2d \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v11.1d \n" + "PMULL2 v3.1q, v12.2d, v11.2d \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v3.1q, v12.1d, v11.1d \n" + "PMULL2 v12.1q, v12.2d, v11.2d \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v5.16b, v5.16b, v12.16b \n" + "EOR v6.16b, v6.16b, v13.16b \n" + "EOR v7.16b, v7.16b, v14.16b \n" + "EOR v8.16b, v8.16b, v15.16b \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "ST1 {v5.2d-v8.2d}, [%[out]], #64 \n \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + + "CMP w11, #128 \n" + "BGE 81b \n" + + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v4.1d \n" + "PMULL2 v3.1q, v15.2d, v4.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "PMULL v3.1q, v15.1d, v4.1d \n" + "PMULL2 v15.1q, v15.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v9.1d \n" + "PMULL2 v3.1q, v14.2d, v9.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "PMULL v3.1q, v14.1d, v9.1d \n" + "PMULL2 v14.1q, v14.2d, v9.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v10.1d \n" + "PMULL2 v3.1q, v13.2d, v10.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v3.1q, v13.1d, v10.1d \n" + "PMULL2 v13.1q, v13.2d, v10.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v11.1d \n" + "PMULL2 v3.1q, v12.2d, v11.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v3.1q, v12.1d, v11.1d \n" + "PMULL2 v12.1q, v12.2d, v11.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "EOR v17.16b, v17.16b, v2.16b \n" + + "80: \n" + "LD1 {v22.2d}, [%[ctr]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" + "# Can we do 4 blocks at a time? \n" + "CMP w11, #64 \n" + "BLT 10f \n" + + "# First decrypt - no GHASH \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v12.16b \n" + "EOR v27.16b, v27.16b, v13.16b \n" + "AESE v28.16b, v12.16b \n" + "EOR v28.16b, v28.16b, v13.16b \n" + "AESE v29.16b, v12.16b \n" + "EOR v29.16b, v29.16b, v13.16b \n" + "AESE v30.16b, v12.16b \n" + "EOR v30.16b, v30.16b, v13.16b \n" + + "# XOR in input \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "# Store cipher text \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BLT 12f \n" + + "11: \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "REV w15, w15 \n" + "RBIT v19.16b, v19.16b \n" + "REV w14, w14 \n" + "RBIT v20.16b, v20.16b \n" + "REV w13, w13 \n" + "RBIT v21.16b, v21.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v12.16b \n" + "EOR v27.16b, v27.16b, v13.16b \n" + "AESE v28.16b, v12.16b \n" + "EOR v28.16b, v28.16b, v13.16b \n" + "AESE v29.16b, v12.16b \n" + "EOR v29.16b, v29.16b, v13.16b \n" + "AESE v30.16b, v12.16b \n" + "EOR v30.16b, v30.16b, v13.16b \n" + + "# XOR in input \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "# Store cipher text \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BGE 11b \n" + + "12: \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + + "10: \n" + "CBZ w11, 30f \n" + "CMP w11, #16 \n" + "BLT 20f \n" + "# Decrypt first block for GHASH \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v28.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v0.16b, v0.16b, v28.16b \n \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "# When only one full block to decrypt go straight to GHASH \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "# Interweave GHASH and decrypt if more then 1 block \n" + "2: \n" + "RBIT v28.16b, v28.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "LD1 {v28.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v0.16b, v0.16b, v28.16b \n \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "CMP w11, #16 \n" + "BGE 2b \n" + + "# GHASH on last block \n" + "1: \n" + "RBIT v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "20: \n" + "CBZ w11, 30f \n" + "EOR v31.16b, v31.16b, v31.16b \n" + "MOV x15, x11 \n" + "ST1 {v31.2d}, [%[scratch]] \n" + "23: \n" + "LDRB w14, [%[input]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 23b \n" + "SUB %[scratch], %[scratch], x11 \n" + "LD1 {v31.2d}, [%[scratch]] \n" + "RBIT v31.16b, v31.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v31.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "RBIT v31.16b, v31.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "EOR v0.16b, v0.16b, v31.16b \n \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV x15, x11 \n" + "24: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[out]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 24b \n" + "SUB %[scratch], %[scratch], x11 \n" + + "30: \n" + "# store current counter value at the end \n" + "REV w13, w12 \n" + "MOV v22.S[3], w13 \n" + "LD1 {v0.16b}, [%[ctr]] \n" + "ST1 {v22.16b}, [%[ctr]] \n" + + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[sz], %x[sz], #3 \n" + "MOV v28.d[0], %x[aSz] \n" + "MOV v28.d[1], %x[sz] \n" + "REV64 v28.16b, v28.16b \n" + "RBIT v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n \n" + "RBIT v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v17.16b \n \n" + "CMP %w[tagSz], #16 \n" + "BNE 40f \n" + "LD1 {v1.2d}, [%[tag]] \n" + "B 41f \n" + "40: \n" + "EOR v1.16b, v1.16b, v1.16b \n" + "MOV x15, %x[tagSz] \n" + "ST1 {v1.2d}, [%[scratch]] \n" + "43: \n" + "LDRB w14, [%[tag]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 43b \n" + "SUB %[scratch], %[scratch], %x[tagSz] \n" + "LD1 {v1.2d}, [%[scratch]] \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV w14, #16 \n" + "SUB w14, w14, %w[tagSz] \n" + "ADD %[scratch], %[scratch], %x[tagSz] \n" + "44: \n" + "STRB wzr, [%[scratch]], #1 \n" + "SUB w14, w14, #1 \n" + "CBNZ w14, 44b \n" + "SUB %[scratch], %[scratch], #16 \n" + "LD1 {v0.2d}, [%[scratch]] \n" + "41: \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "MOV v1.D[0], v0.D[1] \n" + "EOR v0.8b, v0.8b, v1.8b \n" + "MOV %x[ret], v0.D[0] \n" + "CMP %x[ret], #0 \n" + "MOV w11, #-180 \n" + "CSETM %w[ret], ne \n" + "AND %w[ret], %w[ret], w11 \n" + + : [out] "+r" (out), [input] "+r" (in), [Key] "+r" (keyPt), + [aSz] "+r" (authInSz), [sz] "+r" (sz), [aad] "+r" (authIn), + [ret] "+r" (ret) + : [ctr] "r" (ctr), [scratch] "r" (scratch), + [h] "m" (aes->gcm.H), [tag] "r" (authTag), [tagSz] "r" (authTagSz) + : "cc", "memory", "x11", "x12", "w13", "x14", "x15", "w16", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" + ); + + return ret; +} +#endif /* WOLFSSL_AES_192 */ +#ifdef WOLFSSL_AES_256 +/* internal function : see wc_AesGcmDecrypt */ +static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte counter[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + byte *ctr = counter; + byte* keyPt = (byte*)aes->key; + int ret = 0; + + XMEMSET(counter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(counter, iv, GCM_NONCE_MID_SZ); + counter[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); + } + + __asm__ __volatile__ ( + "LD1 {v16.16b}, %[h] \n" + "# v23 = 0x00000000000000870000000000000087 reflected 0xe1.... \n" + "MOVI v23.16b, #0x87 \n" + "EOR v17.16b, v17.16b, v17.16b \n" + "USHR v23.2d, v23.2d, #56 \n" + "CBZ %w[aSz], 120f \n" + + "MOV w12, %w[aSz] \n" + + "# GHASH AAD \n" + "CMP x12, #64 \n" + "BLT 115f \n" + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "114: \n" + "LD1 {v18.2d-v21.2d}, [%[aad]], #64 \n" + "SUB x12, x12, #64 \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v30.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v30.16b, #8 \n" + "PMULL2 v14.1q, v30.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "CMP x12, #64 \n" + "BGE 114b \n" + "CBZ x12, 120f \n" + "115: \n" + "CMP x12, #16 \n" + "BLT 112f \n" + "111: \n" + "LD1 {v15.2d}, [%[aad]], #16 \n" + "SUB x12, x12, #16 \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "CMP x12, #16 \n" + "BGE 111b \n" + "CBZ x12, 120f \n" + "112: \n" + "# Partial AAD \n" + "EOR v15.16b, v15.16b, v15.16b \n" + "MOV x14, x12 \n" + "ST1 {v15.2d}, [%[scratch]] \n" + "113: \n" + "LDRB w13, [%[aad]], #1 \n" + "STRB w13, [%[scratch]], #1 \n" + "SUB x14, x14, #1 \n" + "CBNZ x14, 113b \n" + "SUB %[scratch], %[scratch], x12 \n" + "LD1 {v15.2d}, [%[scratch]] \n" + "RBIT v15.16b, v15.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "120: \n" + + "# Decrypt ciphertext and GHASH ciphertext \n" + "LDR w12, [%[ctr], #12] \n" + "MOV w11, %w[sz] \n" + "REV w12, w12 \n" + "CMP w11, #64 \n" + "BLT 80f \n" + "CMP %w[aSz], #64 \n" + "BGE 82f \n" + + "# Calculate H^[1-4] - GMULT partials \n" + "# Square H => H^2 \n" + "PMULL2 v19.1q, v16.2d, v16.2d \n" + "PMULL v18.1q, v16.1d, v16.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v24.16b, v18.16b, v19.16b \n" + "# Multiply H and H^2 => H^3 \n" + "PMULL v18.1q, v24.1d, v16.1d \n" + "PMULL2 v19.1q, v24.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v24.1d, v20.1d \n" + "PMULL2 v20.1q, v24.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v25.16b, v18.16b, v20.16b \n" + "# Square H^2 => H^4 \n" + "PMULL2 v19.1q, v24.2d, v24.2d \n" + "PMULL v18.1q, v24.1d, v24.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v26.16b, v18.16b, v19.16b \n" + "82: \n" + "# Should we do 8 blocks at a time? \n" + "CMP w11, #512 \n" + "BLT 80f \n" + + "# Calculate H^[5-8] - GMULT partials \n" + "# Multiply H and H^4 => H^5 \n" + "PMULL v18.1q, v26.1d, v16.1d \n" + "PMULL2 v19.1q, v26.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v26.1d, v20.1d \n" + "PMULL2 v20.1q, v26.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v4.16b, v18.16b, v20.16b \n" + "# Square H^3 - H^6 \n" + "PMULL2 v19.1q, v25.2d, v25.2d \n" + "PMULL v18.1q, v25.1d, v25.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v9.16b, v18.16b, v19.16b \n" + "# Multiply H and H^6 => H^7 \n" + "PMULL v18.1q, v9.1d, v16.1d \n" + "PMULL2 v19.1q, v9.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v9.1d, v20.1d \n" + "PMULL2 v20.1q, v9.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v10.16b, v18.16b, v20.16b \n" + "# Square H^4 => H^8 \n" + "PMULL2 v19.1q, v26.2d, v26.2d \n" + "PMULL v18.1q, v26.1d, v26.1d \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v19.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v11.16b, v18.16b, v19.16b \n" + + "# First decrypt - no GHASH \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #192] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #208] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v5.16b, v5.16b, v12.16b \n" + "EOR v6.16b, v6.16b, v13.16b \n" + "EOR v7.16b, v7.16b, v14.16b \n" + "EOR v8.16b, v8.16b, v15.16b \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "ST1 {v5.2d-v8.2d}, [%[out]], #64 \n \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + + "81: \n" + "LDR q1, [%[Key]] \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "LD1 {v5.2d}, [%[ctr]] \n" + "ADD w14, w12, #2 \n" + "MOV v6.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v7.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v8.16b, v5.16b \n" + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "REV w15, w15 \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "REV w14, w14 \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "REV w13, w13 \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "REV w16, w12 \n" + "MOV v5.S[3], w15 \n" + "MOV v6.S[3], w14 \n" + "MOV v7.S[3], w13 \n" + "MOV v8.S[3], w16 \n" + "# Calculate next 4 counters (+5-8) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v5.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v5.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v5.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v5.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "REV w15, w15 \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "REV w14, w14 \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "REV w13, w13 \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 8 counters \n" + "LDR q22, [%[Key], #16] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "LDR q1, [%[Key], #32] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v4.1d \n" + "PMULL2 v3.1q, v15.2d, v4.2d \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "LDR q22, [%[Key], #48] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "PMULL v3.1q, v15.1d, v4.1d \n" + "PMULL2 v15.1q, v15.2d, v4.2d \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v9.1d \n" + "PMULL2 v3.1q, v14.2d, v9.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v3.1q, v14.1d, v9.1d \n" + "PMULL2 v14.1q, v14.2d, v9.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v10.1d \n" + "PMULL2 v3.1q, v13.2d, v10.2d \n" + "LDR q1, [%[Key], #64] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL v3.1q, v13.1d, v10.1d \n" + "PMULL2 v13.1q, v13.2d, v10.2d \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v11.1d \n" + "PMULL2 v3.1q, v12.2d, v11.2d \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v3.1q, v12.1d, v11.1d \n" + "PMULL2 v12.1q, v12.2d, v11.2d \n" + "LDR q22, [%[Key], #80] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #128 \n" + "LDR q1, [%[Key], #96] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #112] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #128] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #144] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #160] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q22, [%[Key], #176] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LDR q1, [%[Key], #192] \n" + "AESE v5.16b, v22.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v22.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v22.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v22.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v22.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v22.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v22.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v22.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v12.2d-v15.2d}, [%[input]], #64 \n" + "LDP q22, q31, [%[Key], #208] \n" + "AESE v5.16b, v1.16b \n" + "AESMC v5.16b, v5.16b \n" + "AESE v6.16b, v1.16b \n" + "AESMC v6.16b, v6.16b \n" + "AESE v7.16b, v1.16b \n" + "AESMC v7.16b, v7.16b \n" + "AESE v8.16b, v1.16b \n" + "AESMC v8.16b, v8.16b \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v5.16b, v22.16b \n" + "EOR v5.16b, v5.16b, v31.16b \n" + "AESE v6.16b, v22.16b \n" + "EOR v6.16b, v6.16b, v31.16b \n" + "AESE v7.16b, v22.16b \n" + "EOR v7.16b, v7.16b, v31.16b \n" + "AESE v8.16b, v22.16b \n" + "EOR v8.16b, v8.16b, v31.16b \n" + "AESE v27.16b, v22.16b \n" + "EOR v27.16b, v27.16b, v31.16b \n" + "AESE v28.16b, v22.16b \n" + "EOR v28.16b, v28.16b, v31.16b \n" + "AESE v29.16b, v22.16b \n" + "EOR v29.16b, v29.16b, v31.16b \n" + "AESE v30.16b, v22.16b \n" + "EOR v30.16b, v30.16b, v31.16b \n" + + "# XOR in input \n" + "EOR v5.16b, v5.16b, v12.16b \n" + "EOR v6.16b, v6.16b, v13.16b \n" + "EOR v7.16b, v7.16b, v14.16b \n" + "EOR v8.16b, v8.16b, v15.16b \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "ST1 {v5.2d-v8.2d}, [%[out]], #64 \n \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + + "CMP w11, #128 \n" + "BGE 81b \n" + + "# GHASH - 8 blocks \n" + "RBIT v12.16b, v12.16b \n" + "RBIT v13.16b, v13.16b \n" + "RBIT v14.16b, v14.16b \n" + "RBIT v15.16b, v15.16b \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v12.16b, v12.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v3.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v3.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v2.1q, v20.1d, v24.1d \n" + "PMULL2 v3.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v3.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v3.16b \n" +#else + "EOR v20.16b, v20.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v2.1q, v19.1d, v25.1d \n" + "PMULL2 v3.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v3.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v3.16b \n" +#else + "EOR v19.16b, v19.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v2.1q, v18.1d, v26.1d \n" + "PMULL2 v3.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v3.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v3.16b \n" +#else + "EOR v18.16b, v18.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^5 \n" + "PMULL v2.1q, v15.1d, v4.1d \n" + "PMULL2 v3.1q, v15.2d, v4.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "PMULL v3.1q, v15.1d, v4.1d \n" + "PMULL2 v15.1q, v15.2d, v4.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v15.16b, v3.16b \n" +#else + "EOR v15.16b, v15.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^6 \n" + "PMULL v2.1q, v14.1d, v9.1d \n" + "PMULL2 v3.1q, v14.2d, v9.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v14.16b, v14.16b, v14.16b, #8 \n" + "PMULL v3.1q, v14.1d, v9.1d \n" + "PMULL2 v14.1q, v14.2d, v9.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v14.16b, v3.16b \n" +#else + "EOR v14.16b, v14.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^7 \n" + "PMULL v2.1q, v13.1d, v10.1d \n" + "PMULL2 v3.1q, v13.2d, v10.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "PMULL v3.1q, v13.1d, v10.1d \n" + "PMULL2 v13.1q, v13.2d, v10.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v13.16b, v3.16b \n" +#else + "EOR v13.16b, v13.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v13.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^8 \n" + "PMULL v2.1q, v12.1d, v11.1d \n" + "PMULL2 v3.1q, v12.2d, v11.2d \n" + "EOR v17.16b, v17.16b, v2.16b \n" + "EOR v0.16b, v0.16b, v3.16b \n" + "EXT v12.16b, v12.16b, v12.16b, #8 \n" + "PMULL v3.1q, v12.1d, v11.1d \n" + "PMULL2 v12.1q, v12.2d, v11.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v12.16b, v3.16b \n" +#else + "EOR v12.16b, v12.16b, v3.16b \n" + "EOR v31.16b, v31.16b, v12.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v3.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v2.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v3.16b, v3.16b, v31.16b, v2.16b \n" +#else + "EOR v3.16b, v3.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v3.16b, v3.16b, v2.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v2.1q, v3.2d, v23.2d \n" + "MOV v17.D[1], v3.D[0] \n" + "EOR v17.16b, v17.16b, v2.16b \n" + + "80: \n" + "LD1 {v22.2d}, [%[ctr]] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" + "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" + "LD1 {v14.2d-v15.2d}, [%[Key]] \n" + "# Can we do 4 blocks at a time? \n" + "CMP w11, #64 \n" + "BLT 10f \n" + + "# First decrypt - no GHASH \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "REV w15, w15 \n" + "REV w14, w14 \n" + "REV w13, w13 \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v12.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v12.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v12.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v12.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v14.2d, v15.2d}, [%[Key]] \n" + "AESE v27.16b, v13.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v13.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v13.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v13.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v14.16b \n" + "EOR v27.16b, v27.16b, v15.16b \n" + "AESE v28.16b, v14.16b \n" + "EOR v28.16b, v28.16b, v15.16b \n" + "AESE v29.16b, v14.16b \n" + "EOR v29.16b, v29.16b, v15.16b \n" + "AESE v30.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + + "# XOR in input \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "# Store cipher text \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BLT 12f \n" + + "11: \n" + "# Calculate next 4 counters (+1-4) \n" + "ADD w15, w12, #1 \n" + "MOV v27.16b, v22.16b \n" + "ADD w14, w12, #2 \n" + "MOV v28.16b, v22.16b \n" + "ADD w13, w12, #3 \n" + "MOV v29.16b, v22.16b \n" + "ADD w12, w12, #4 \n" + "MOV v30.16b, v22.16b \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "REV w15, w15 \n" + "RBIT v19.16b, v19.16b \n" + "REV w14, w14 \n" + "RBIT v20.16b, v20.16b \n" + "REV w13, w13 \n" + "RBIT v21.16b, v21.16b \n" + "REV w16, w12 \n" + "MOV v27.S[3], w15 \n" + "MOV v28.S[3], w14 \n" + "MOV v29.S[3], w13 \n" + "MOV v30.S[3], w16 \n" + + "# Encrypt 4 counters \n" + "AESE v27.16b, v1.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "AESE v28.16b, v1.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "AESE v29.16b, v1.16b \n" + "AESMC v29.16b, v29.16b \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "AESE v30.16b, v1.16b \n" + "AESMC v30.16b, v30.16b \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "AESE v27.16b, v2.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "AESE v28.16b, v2.16b \n" + "AESMC v28.16b, v28.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "AESE v29.16b, v2.16b \n" + "AESMC v29.16b, v29.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v30.16b, v2.16b \n" + "AESMC v30.16b, v30.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "AESE v27.16b, v3.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" + "AESE v28.16b, v3.16b \n" + "AESMC v28.16b, v28.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v29.16b, v3.16b \n" + "AESMC v29.16b, v29.16b \n" + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "AESE v30.16b, v3.16b \n" + "AESMC v30.16b, v30.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v27.16b, v4.16b \n" + "AESMC v27.16b, v27.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "AESE v28.16b, v4.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" + "AESE v29.16b, v4.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v4.16b \n" + "AESMC v30.16b, v30.16b \n" + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "AESE v27.16b, v5.16b \n" + "AESMC v27.16b, v27.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v28.16b, v5.16b \n" + "AESMC v28.16b, v28.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "AESE v29.16b, v5.16b \n" + "AESMC v29.16b, v29.16b \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" + "AESE v30.16b, v5.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "SUB w11, w11, #64 \n" + "AESE v27.16b, v6.16b \n" + "AESMC v27.16b, v27.16b \n" + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "AESE v28.16b, v6.16b \n" + "AESMC v28.16b, v28.16b \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" + "AESE v29.16b, v6.16b \n" + "AESMC v29.16b, v29.16b \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v30.16b, v6.16b \n" + "AESMC v30.16b, v30.16b \n" +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "AESE v27.16b, v7.16b \n" + "AESMC v27.16b, v27.16b \n" + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "AESE v28.16b, v7.16b \n" + "AESMC v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "AESE v29.16b, v7.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v7.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v8.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v8.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v8.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v8.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v9.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v9.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v9.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v9.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v10.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v10.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v10.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v10.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v11.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v11.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v11.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v11.16b \n" + "AESMC v30.16b, v30.16b \n" + "# Load plaintext \n" + "LD1 {v18.2d-v21.2d}, [%[input]], #64 \n" + "AESE v27.16b, v12.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v12.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v12.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v12.16b \n" + "AESMC v30.16b, v30.16b \n" + "LD1 {v14.2d, v15.2d}, [%[Key]] \n" + "AESE v27.16b, v13.16b \n" + "AESMC v27.16b, v27.16b \n" + "AESE v28.16b, v13.16b \n" + "AESMC v28.16b, v28.16b \n" + "AESE v29.16b, v13.16b \n" + "AESMC v29.16b, v29.16b \n" + "AESE v30.16b, v13.16b \n" + "AESMC v30.16b, v30.16b \n" + "AESE v27.16b, v14.16b \n" + "EOR v27.16b, v27.16b, v15.16b \n" + "AESE v28.16b, v14.16b \n" + "EOR v28.16b, v28.16b, v15.16b \n" + "AESE v29.16b, v14.16b \n" + "EOR v29.16b, v29.16b, v15.16b \n" + "AESE v30.16b, v14.16b \n" + "EOR v30.16b, v30.16b, v15.16b \n" + + "# XOR in input \n" + "EOR v27.16b, v27.16b, v18.16b \n" + "EOR v28.16b, v28.16b, v19.16b \n" + "EOR v29.16b, v29.16b, v20.16b \n" + "EOR v30.16b, v30.16b, v21.16b \n" + "# Store cipher text \n" + "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" + "CMP w11, #64 \n" + "BGE 11b \n" + + "12: \n" + "# GHASH - 4 blocks \n" + "RBIT v18.16b, v18.16b \n" + "RBIT v19.16b, v19.16b \n" + "RBIT v20.16b, v20.16b \n" + "RBIT v21.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v17.16b \n" + "# x[0-2] = C * H^1 \n" + "PMULL v17.1q, v21.1d, v16.1d \n" + "PMULL2 v0.1q, v21.2d, v16.2d \n" + "EXT v21.16b, v21.16b, v21.16b, #8 \n" + "PMULL v31.1q, v21.1d, v16.1d \n" + "PMULL2 v15.1q, v21.2d, v16.2d \n" + "EOR v31.16b, v31.16b, v15.16b \n" + "# x[0-2] += C * H^2 \n" + "PMULL v14.1q, v20.1d, v24.1d \n" + "PMULL2 v15.1q, v20.2d, v24.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v20.16b, v20.16b, v20.16b, #8 \n" + "PMULL v15.1q, v20.1d, v24.1d \n" + "PMULL2 v20.1q, v20.2d, v24.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v20.16b, v15.16b \n" +#else + "EOR v20.16b, v20.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v20.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^3 \n" + "PMULL v14.1q, v19.1d, v25.1d \n" + "PMULL2 v15.1q, v19.2d, v25.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "PMULL v15.1q, v19.1d, v25.1d \n" + "PMULL2 v19.1q, v19.2d, v25.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v19.16b, v15.16b \n" +#else + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v19.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# x[0-2] += C * H^4 \n" + "PMULL v14.1q, v18.1d, v26.1d \n" + "PMULL2 v15.1q, v18.2d, v26.2d \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "EXT v18.16b, v18.16b, v18.16b, #8 \n" + "PMULL v15.1q, v18.1d, v26.1d \n" + "PMULL2 v18.1q, v18.2d, v26.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v31.16b, v31.16b, v18.16b, v15.16b \n" +#else + "EOR v18.16b, v18.16b, v15.16b \n" + "EOR v31.16b, v31.16b, v18.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "# Reduce X = x[0-2] \n" + "EXT v15.16b, v17.16b, v0.16b, #8 \n" + "PMULL2 v14.1q, v0.2d, v23.2d \n" +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR3 v15.16b, v15.16b, v31.16b, v14.16b \n" +#else + "EOR v15.16b, v15.16b, v31.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA3 + "EOR v15.16b, v15.16b, v14.16b \n" +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ + "PMULL2 v14.1q, v15.2d, v23.2d \n" + "MOV v17.D[1], v15.D[0] \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "LD1 {v14.2d, v15.2d}, [%[Key]] \n" + + "10: \n" + "CBZ w11, 30f \n" + "CMP w11, #16 \n" + "BLT 20f \n" + "LD1 {v14.2d, v15.2d}, [%[Key]] \n" + "# Decrypt first block for GHASH \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "LD1 {v28.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n \n" + "EOR v0.16b, v0.16b, v28.16b \n \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + + "# When only one full block to decrypt go straight to GHASH \n" + "CMP w11, 16 \n" + "BLT 1f \n" + + "# Interweave GHASH and decrypt if more then 1 block \n" + "2: \n" + "RBIT v28.16b, v28.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "SUB w11, w11, #16 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "LD1 {v28.2d}, [%[input]], #16 \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n \n" + "EOR v0.16b, v0.16b, v28.16b \n \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "CMP w11, #16 \n" + "BGE 2b \n" + + "# GHASH on last block \n" + "1: \n" + "RBIT v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "MOV v18.D[1], v21.D[0] \n" + "EOR v17.16b, v18.16b, v20.16b \n" + + "20: \n" + "CBZ w11, 30f \n" + "EOR v31.16b, v31.16b, v31.16b \n" + "MOV x15, x11 \n" + "ST1 {v31.2d}, [%[scratch]] \n" + "23: \n" + "LDRB w14, [%[input]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 23b \n" + "SUB %[scratch], %[scratch], x11 \n" + "LD1 {v31.2d}, [%[scratch]] \n" + "RBIT v31.16b, v31.16b \n" + "ADD w12, w12, #1 \n" + "MOV v0.16b, v22.16b \n" + "REV w13, w12 \n" + "MOV v0.S[3], w13 \n" + "EOR v17.16b, v17.16b, v31.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "RBIT v31.16b, v31.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n \n" + "EOR v0.16b, v0.16b, v31.16b \n \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV x15, x11 \n" + "24: \n" + "LDRB w14, [%[scratch]], #1 \n" + "STRB w14, [%[out]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 24b \n" + "SUB %[scratch], %[scratch], x11 \n" + + "30: \n" + "# store current counter value at the end \n" + "REV w13, w12 \n" + "MOV v22.S[3], w13 \n" + "LD1 {v0.16b}, [%[ctr]] \n" + "ST1 {v22.16b}, [%[ctr]] \n" + + "LSL %x[aSz], %x[aSz], #3 \n" + "LSL %x[sz], %x[sz], #3 \n" + "MOV v28.d[0], %x[aSz] \n" + "MOV v28.d[1], %x[sz] \n" + "REV64 v28.16b, v28.16b \n" + "RBIT v28.16b, v28.16b \n" + "EOR v17.16b, v17.16b, v28.16b \n" + "PMULL v18.1q, v17.1d, v16.1d \n" + "PMULL2 v19.1q, v17.2d, v16.2d \n" + "EXT v20.16b, v16.16b, v16.16b, #8 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL v21.1q, v17.1d, v20.1d \n" + "PMULL2 v20.1q, v17.2d, v20.2d \n" + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v20.16b, v20.16b, v21.16b \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v21.16b, v18.16b, v19.16b, #8 \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "# Reduce \n" + "PMULL2 v20.1q, v19.2d, v23.2d \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v21.16b, v21.16b, v20.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "PMULL2 v20.1q, v21.2d, v23.2d \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "MOV v18.D[1], v21.D[0] \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "EOR v17.16b, v18.16b, v20.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n \n" + "RBIT v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v17.16b \n \n" + "CMP %w[tagSz], #16 \n" + "BNE 40f \n" + "LD1 {v1.2d}, [%[tag]] \n" + "B 41f \n" + "40: \n" + "EOR v1.16b, v1.16b, v1.16b \n" + "MOV x15, %x[tagSz] \n" + "ST1 {v1.2d}, [%[scratch]] \n" + "43: \n" + "LDRB w14, [%[tag]], #1 \n" + "STRB w14, [%[scratch]], #1 \n" + "SUB x15, x15, #1 \n" + "CBNZ x15, 43b \n" + "SUB %[scratch], %[scratch], %x[tagSz] \n" + "LD1 {v1.2d}, [%[scratch]] \n" + "ST1 {v0.2d}, [%[scratch]] \n" + "MOV w14, #16 \n" + "SUB w14, w14, %w[tagSz] \n" + "ADD %[scratch], %[scratch], %x[tagSz] \n" + "44: \n" + "STRB wzr, [%[scratch]], #1 \n" + "SUB w14, w14, #1 \n" + "CBNZ w14, 44b \n" + "SUB %[scratch], %[scratch], #16 \n" + "LD1 {v0.2d}, [%[scratch]] \n" + "41: \n" + "EOR v0.16b, v0.16b, v1.16b \n" + "MOV v1.D[0], v0.D[1] \n" + "EOR v0.8b, v0.8b, v1.8b \n" + "MOV %x[ret], v0.D[0] \n" + "CMP %x[ret], #0 \n" + "MOV w11, #-180 \n" + "CSETM %w[ret], ne \n" + "AND %w[ret], %w[ret], w11 \n" + + : [out] "+r" (out), [input] "+r" (in), [Key] "+r" (keyPt), + [aSz] "+r" (authInSz), [sz] "+r" (sz), [aad] "+r" (authIn), + [ret] "+r" (ret) + : [ctr] "r" (ctr), [scratch] "r" (scratch), + [h] "m" (aes->gcm.H), [tag] "r" (authTag), [tagSz] "r" (authTagSz) + : "cc", "memory", "x11", "x12", "w13", "x14", "x15", "w16", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" + ); + + return ret; +} +#endif /* WOLFSSL_AES_256 */ /* * Check tag and decrypt data using AES with GCM mode. * aes: Aes structure having already been set with set key function @@ -2607,295 +12548,44 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, * authIn: additional data buffer * authInSz: size of additional data buffer */ -int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - const byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { - word32 blocks = sz / AES_BLOCK_SIZE; - word32 partial = sz % AES_BLOCK_SIZE; - const byte* c = in; - byte* p = out; - byte counter[AES_BLOCK_SIZE]; - byte initialCounter[AES_BLOCK_SIZE]; - byte *ctr ; - byte scratch[AES_BLOCK_SIZE]; - - ctr = counter ; - /* sanity checks */ - if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || - authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || - ivSz == 0) { + if ((aes == NULL) || (iv == NULL) || (authTag == NULL) || + (authTagSz > AES_BLOCK_SIZE) || (authTagSz == 0) || (ivSz == 0) || + ((sz != 0) && ((in == NULL) || (out == NULL)))) { WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); return BAD_FUNC_ARG; } - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); - if (ivSz == GCM_NONCE_MID_SZ) { - XMEMCPY(initialCounter, iv, ivSz); - initialCounter[AES_BLOCK_SIZE - 1] = 1; - } - else { - GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); - GMULT(initialCounter, aes->H); - } - XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); - - /* Calculate the authTag again using the received auth data and the - * cipher text. */ - { - byte Tprime[AES_BLOCK_SIZE]; - byte EKY0[AES_BLOCK_SIZE]; - - GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); - GMULT(Tprime, aes->H); - wc_AesEncrypt(aes, ctr, EKY0); - xorbuf(Tprime, EKY0, sizeof(Tprime)); - - if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { - return AES_GCM_AUTH_E; - } - } - - /* do as many blocks as possible */ - if (blocks > 0) { - /* pointer needed because it is incremented when read, causing - * an issue with call to encrypt/decrypt leftovers */ - byte* keyPt = (byte*)aes->key; - switch(aes->rounds) { + switch (aes->rounds) { #ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - - "#Create vector with the value 1 \n" - "MOVI v14.16b, #1 \n" - "USHR v14.2d, v14.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v13.16b, v13.16b, v13.16b \n" - "EXT v14.16b, v14.16b, v13.16b, #8 \n" - - "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" - "LD1 {v12.2d}, [%[ctr]] \n" - "LD1 {v13.2d}, [%[input]], #16 \n" - - "1: \n" - "REV64 v12.16b, v12.16b \n" /* network order */ - "EXT v12.16b, v12.16b, v12.16b, #8 \n" - "ADD v12.2d, v12.2d, v14.2d \n" /* add 1 to counter */ - "EXT v12.16b, v12.16b, v12.16b, #8 \n" - "REV64 v12.16b, v12.16b \n" /* revert from network order */ - "MOV v0.16b, v12.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - - "EOR v0.16b, v0.16b, v13.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - - "CBZ w11, 2f \n" - "LD1 {v13.2d}, [%[input]], #16 \n" - "B 1b \n" - - "2: \n" - "#store current counter value at the end \n" - "ST1 {v12.16b}, [%[ctrOut]] \n" - - :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c) - :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), - [input] "3" (c) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ); - break; + case 10: + return Aes128GcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); #endif #ifdef WOLFSSL_AES_192 - case 12: /* AES 192 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - - "#Create vector with the value 1 \n" - "MOVI v16.16b, #1 \n" - "USHR v16.2d, v16.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "EXT v16.16b, v16.16b, v14.16b, #8 \n" - - "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" - "LD1 {v13.2d}, [%[Key]], #16 \n" - "LD1 {v14.2d}, [%[ctr]] \n" - "LD1 {v15.2d}, [%[input]], #16 \n" - - "1: \n" - "REV64 v14.16b, v14.16b \n" /* network order */ - "EXT v14.16b, v14.16b, v14.16b, #8 \n" - "ADD v14.2d, v14.2d, v16.2d \n" /* add 1 to counter */ - "EXT v14.16b, v14.16b, v14.16b, #8 \n" - "REV64 v14.16b, v14.16b \n" /* revert from network order */ - "MOV v0.16b, v14.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - - "EOR v0.16b, v0.16b, v15.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - - "CBZ w11, 2f \n" - "LD1 {v15.2d}, [%[input]], #16 \n" - "B 1b \n" - - "2: \n" - "#store current counter value at the end \n" - "ST1 {v14.2d}, [%[ctrOut]] \n" - - :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c) - :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), - [input] "3" (c) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", - "v16" - ); - break; -#endif /* WOLFSSL_AES_192 */ + case 12: + return Aes192GcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); +#endif #ifdef WOLFSSL_AES_256 - case 14: /* AES 256 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - - "#Create vector with the value 1 \n" - "MOVI v18.16b, #1 \n" - "USHR v18.2d, v18.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v19.16b, v19.16b, v19.16b \n" - "EXT v18.16b, v18.16b, v19.16b, #8 \n" - - "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" - "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" - "LD1 {v17.2d}, [%[ctr]] \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - - "1: \n" - "REV64 v17.16b, v17.16b \n" /* network order */ - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "MOV v0.16b, v17.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v13.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - - "EOR v0.16b, v0.16b, v16.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - - "CBZ w11, 2f \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "B 1b \n" - - "2: \n" - "#store current counter value at the end \n" - "ST1 {v17.2d}, [%[ctrOut]] \n" - - :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c) - :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks), - [input] "3" (c) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", - "v16", "v17", "v18", "v19" - ); - break; -#endif /* WOLFSSL_AES_256 */ + case 14: + return Aes256GcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); +#endif default: - WOLFSSL_MSG("Bad AES-GCM round value"); + WOLFSSL_MSG("AES-GCM invalid round number"); return BAD_FUNC_ARG; - } } - if (partial != 0) { - IncrementGcmCounter(ctr); - wc_AesEncrypt(aes, ctr, scratch); - - /* check if pointer is null after main AES-GCM blocks - * helps static analysis */ - if (p == NULL || c == NULL) { - return BAD_STATE_E; - } - xorbuf(scratch, c, partial); - XMEMCPY(p, scratch, partial); - } - return 0; } #endif /* HAVE_AES_DECRYPT */ + +/* END script replace AES-GCM Aarch64 with hardware crypto. */ + #endif /* HAVE_AESGCM */ @@ -3550,544 +13240,636 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, /* AES-CTR */ #ifdef WOLFSSL_AES_COUNTER +static void wc_aes_ctr_encrypt_asm(Aes* aes, byte* out, const byte* in, + word32 numBlocks) +{ + word32* keyPt = aes->key; + word32* regPt = aes->reg; - /* Increment AES counter */ - static WC_INLINE void IncrementAesCounter(byte* inOutCtr) - { - int i; - - /* in network byte order so start at end and work back */ - for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { - if (++inOutCtr[i]) /* we're done unless we overflow */ - return; - } - } - - int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) - { - byte* tmp; - word32 numBlocks; - - if (aes == NULL || out == NULL || in == NULL) { - return BAD_FUNC_ARG; - } - - tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; - - /* consume any unused bytes left in aes->tmp */ - while (aes->left && sz) { - *(out++) = *(in++) ^ *(tmp++); - aes->left--; - sz--; - } - - /* do as many block size ops as possible */ - numBlocks = sz/AES_BLOCK_SIZE; - if (numBlocks > 0) { - /* pointer needed because it is incremented when read, causing - * an issue with call to encrypt/decrypt leftovers */ - word32* keyPt = aes->key; - word32* regPt = aes->reg; - sz -= numBlocks * AES_BLOCK_SIZE; - switch(aes->rounds) { + switch(aes->rounds) { #ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV r11, %[blocks] \n" - "VLDM %[Key]!, {q1-q4} \n" + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" - "#Create vector with the value 1 \n" - "VMOV.u32 q15, #1 \n" - "VSHR.u64 q15, q15, #32 \n" - "VLDM %[Key]!, {q5-q8} \n" - "VEOR.32 q14, q14, q14 \n" - "VLDM %[Key]!, {q9-q11} \n" - "VEXT.8 q14, q15, q14, #8\n" + "#Create vector with the value 1 \n" + "VMOV.u32 q15, #1 \n" + "VSHR.u64 q15, q15, #32 \n" + "VLDM %[Key]!, {q5-q8} \n" + "VEOR.32 q14, q14, q14 \n" + "VLDM %[Key]!, {q9-q11} \n" + "VEXT.8 q14, q15, q14, #8\n" - "VLD1.32 {q13}, [%[reg]]\n" + "VLD1.32 {q13}, [%[reg]]\n" - /* double block */ - "1: \n" - "CMP r11, #1 \n" - "BEQ 2f \n" - "CMP r11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP r11, #1 \n" + "BEQ 2f \n" + "CMP r11, #0 \n" + "BEQ 3f \n" - "VMOV.32 q0, q13 \n" - "AESE.8 q0, q1\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13 \n" /* network order */ - "AESE.8 q0, q2\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "SUB r11, r11, #2 \n" - "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ - "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ - "AESE.8 q0, q3\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q15, q15, q15, #8 \n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q4\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q15, q15\n" /* revert from network order */ - "VREV64.8 q13, q13\n" /* revert from network order */ - "AESE.8 q0, q5\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q1\n" - "AESMC.8 q15, q15\n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "SUB r11, r11, #2 \n" - "AESE.8 q0, q6\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q2\n" - "AESMC.8 q15, q15\n" + /* Comparison value to check whether carry is going to happen */ + "VMOV.u32 q12, #0xffffffff \n" + "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ + /* Carry across 32-bit lanes */ + "VCEQ.i32 q12, q13, q12 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 q13, q14, q12, #12 \n" + "VAND.32 d27, d27, d24 \n" + "VSUB.i32 q15, q15, q13 \n" - "AESE.8 q0, q7\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q3\n" - "AESMC.8 q15, q15\n" + "VMOV.u32 q12, #0xffffffff \n" + "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ + /* Carry across 32-bit lanes */ + "VCEQ.i32 q12, q15, q12 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 d25, d24, d25, #4 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 d24, d29, d24, #4 \n" + "VSUB.i32 q13, q13, q12 \n" - "AESE.8 q0, q8\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q4\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q15, q15, q15, #8 \n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q15, q15\n" /* revert from network order */ + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q1\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q9\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q5\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q2\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q10\n" - "AESE.8 q15, q6\n" - "AESMC.8 q15, q15\n" - "VEOR.32 q0, q0, q11\n" + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q3\n" + "AESMC.8 q15, q15\n" - "AESE.8 q15, q7\n" - "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[input]]! \n" - "AESE.8 q15, q8\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q4\n" + "AESMC.8 q15, q15\n" - "VEOR.32 q0, q0, q12\n" - "AESE.8 q15, q9\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q5\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[input]]! \n" - "AESE.8 q15, q10\n" - "VST1.32 {q0}, [%[out]]! \n" - "VEOR.32 q15, q15, q11\n" - "VEOR.32 q15, q15, q12\n" - "VST1.32 {q15}, [%[out]]! \n" + "AESE.8 q0, q10\n" + "AESE.8 q15, q6\n" + "AESMC.8 q15, q15\n" + "VEOR.32 q0, q0, q11\n" - "B 1b \n" + "AESE.8 q15, q7\n" + "AESMC.8 q15, q15\n" + "VLD1.32 {q12}, [%[input]]! \n" + "AESE.8 q15, q8\n" + "AESMC.8 q15, q15\n" - /* single block */ - "2: \n" - "VMOV.32 q0, q13 \n" - "AESE.8 q0, q1\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13 \n" /* network order */ - "AESE.8 q0, q2\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q3\n" - "AESMC.8 q0, q0\n" - "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ - "AESE.8 q0, q4\n" - "AESMC.8 q0, q0\n" - "SUB r11, r11, #1 \n" - "AESE.8 q0, q5\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q6\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13\n" /* revert from network order */ - "AESE.8 q0, q7\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q8\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q9\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q10\n" - "VLD1.32 {q12}, [%[input]]! \n" - "VEOR.32 q0, q0, q11\n" - "#CTR operations, increment counter and xorbuf \n" - "VEOR.32 q0, q0, q12\n" - "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q0, q0, q12\n" + "AESE.8 q15, q9\n" + "AESMC.8 q15, q15\n" - "3: \n" - "#store current counter qalue at the end \n" - "VST1.32 {q13}, [%[regOut]] \n" + "VLD1.32 {q12}, [%[input]]! \n" + "AESE.8 q15, q10\n" + "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q15, q15, q11\n" + "VEOR.32 q15, q15, q12\n" + "VST1.32 {q15}, [%[out]]! \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "2" (regPt) - : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", - "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14", "q15" - ); - break; + "B 1b \n" + + /* single block */ + "2: \n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + + "VMOV.u32 q15, #0xffffffff \n" + "VCEQ.i32 q12, q13, q15 \n" + "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ + "VAND.32 d25, d25, d24 \n" + "VEXT.8 q15, q14, q12, #12 \n" + "VAND.32 d31, d31, d24 \n" + "VSUB.i32 q13, q13, q15 \n" + + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "SUB r11, r11, #1 \n" + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" + "#CTR operations, increment counter and xorbuf \n" + "VEOR.32 q0, q0, q12\n" + "VST1.32 {q0}, [%[out]]! \n" + + "3: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "2" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14", "q15" + ); + break; #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 - case 12: /* AES 192 BLOCK */ - __asm__ __volatile__ ( - "MOV r11, %[blocks] \n" - "VLDM %[Key]!, {q1-q4} \n" + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" - "#Create vector with the value 1 \n" - "VMOV.u32 q15, #1 \n" - "VSHR.u64 q15, q15, #32 \n" - "VLDM %[Key]!, {q5-q8} \n" - "VEOR.32 q14, q14, q14 \n" - "VEXT.8 q14, q15, q14, #8\n" + "#Create vector with the value 1 \n" + "VMOV.u32 q15, #1 \n" + "VSHR.u64 q15, q15, #32 \n" + "VLDM %[Key]!, {q5-q8} \n" + "VEOR.32 q14, q14, q14 \n" + "VEXT.8 q14, q15, q14, #8\n" - "VLDM %[Key]!, {q9-q10} \n" - "VLD1.32 {q13}, [%[reg]]\n" + "VLDM %[Key]!, {q9-q10} \n" + "VLD1.32 {q13}, [%[reg]]\n" - /* double block */ - "1: \n" - "CMP r11, #1 \n" - "BEQ 2f \n" - "CMP r11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP r11, #1 \n" + "BEQ 2f \n" + "CMP r11, #0 \n" + "BEQ 3f \n" - "VMOV.32 q0, q13\n" - "AESE.8 q0, q1\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13 \n" /* network order */ - "AESE.8 q0, q2\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "SUB r11, r11, #2 \n" - "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ - "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ - "AESE.8 q0, q3\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q15, q15, q15, #8 \n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q4\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q15, q15\n" /* revert from network order */ - "VREV64.8 q13, q13\n" /* revert from network order */ - "AESE.8 q0, q5\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q1\n" - "AESMC.8 q15, q15\n" + "VMOV.32 q0, q13\n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "SUB r11, r11, #2 \n" - "AESE.8 q0, q6\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q2\n" - "AESMC.8 q15, q15\n" + "VMOV.u32 q12, #0xffffffff \n" + "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ + /* Carry across 32-bit lanes */ + "VCEQ.i32 q12, q13, q12 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 q13, q14, q12, #12 \n" + "VAND.32 d27, d27, d24 \n" + "VSUB.i32 q15, q15, q13 \n" - "AESE.8 q0, q7\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q3\n" - "AESMC.8 q15, q15\n" + "VMOV.u32 q12, #0xffffffff \n" + "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ + /* Carry across 32-bit lanes */ + "VCEQ.i32 q12, q15, q12 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 d25, d24, d25, #4 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 d24, d29, d24, #4 \n" + "VSUB.i32 q13, q13, q12 \n" - "AESE.8 q0, q8\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q4\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q15, q15, q15, #8 \n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q15, q15\n" /* revert from network order */ + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q1\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q9\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q5\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q2\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q10\n" - "AESMC.8 q0, q0\n" - "VLD1.32 {q11}, [%[Key]]! \n" - "AESE.8 q15, q6\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q3\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q11\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q7\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q4\n" + "AESMC.8 q15, q15\n" - "AESE.8 q15, q8\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q5\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[Key]]! \n" - "AESE.8 q15, q9\n" - "AESMC.8 q15, q15\n" - "AESE.8 q15, q10\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q15, q6\n" + "AESMC.8 q15, q15\n" - "AESE.8 q15, q11\n" - "AESMC.8 q15, q15\n" - "VLD1.32 {q11}, [%[Key]] \n" - "AESE.8 q0, q12\n" - "AESE.8 q15, q12\n" + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q7\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[input]]! \n" - "VEOR.32 q0, q0, q11\n" - "VEOR.32 q15, q15, q11\n" - "VEOR.32 q0, q0, q12\n" + "AESE.8 q15, q8\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[input]]! \n" - "VST1.32 {q0}, [%[out]]! \n" - "VEOR.32 q15, q15, q12\n" - "VST1.32 {q15}, [%[out]]! \n" - "SUB %[Key], %[Key], #32 \n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q15, q9\n" + "AESMC.8 q15, q15\n" + "AESE.8 q15, q10\n" + "AESMC.8 q15, q15\n" - "B 1b \n" + "AESE.8 q15, q11\n" + "AESMC.8 q15, q15\n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" + "AESE.8 q15, q12\n" + + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" + "VEOR.32 q15, q15, q11\n" + "VEOR.32 q0, q0, q12\n" + + "VLD1.32 {q12}, [%[input]]! \n" + "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q15, q15, q12\n" + "VST1.32 {q15}, [%[out]]! \n" + "SUB %[Key], %[Key], #32 \n" + + "B 1b \n" - /* single block */ - "2: \n" - "VLD1.32 {q11}, [%[Key]]! \n" - "VMOV.32 q0, q13 \n" - "AESE.8 q0, q1\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13 \n" /* network order */ - "AESE.8 q0, q2\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q3\n" - "AESMC.8 q0, q0\n" - "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ - "AESE.8 q0, q4\n" - "AESMC.8 q0, q0\n" - "SUB r11, r11, #1 \n" - "AESE.8 q0, q5\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q6\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13\n" /* revert from network order */ - "AESE.8 q0, q7\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q8\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q9\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q10\n" - "AESMC.8 q0, q0\n" - "VLD1.32 {q12}, [%[Key]]! \n" - "AESE.8 q0, q11\n" - "AESMC.8 q0, q0\n" - "VLD1.32 {q11}, [%[Key]] \n" - "AESE.8 q0, q12\n" - "VLD1.32 {q12}, [%[input]]! \n" - "VEOR.32 q0, q0, q11\n" - "#CTR operations, increment counter and xorbuf \n" - "VEOR.32 q0, q0, q12\n" - "VST1.32 {q0}, [%[out]]! \n" + /* single block */ + "2: \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" - "3: \n" - "#store current counter qalue at the end \n" - "VST1.32 {q13}, [%[regOut]] \n" + "VMOV.u32 q15, #0xffffffff \n" + "VCEQ.i32 q12, q13, q15 \n" + "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ + "VAND.32 d25, d25, d24 \n" + "VEXT.8 q15, q14, q12, #12 \n" + "VAND.32 d31, d31, d24 \n" + "VSUB.i32 q13, q13, q15 \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "2" (regPt) - : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", - "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14" - ); - break; + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "SUB r11, r11, #1 \n" + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" + "#CTR operations, increment counter and xorbuf \n" + "VEOR.32 q0, q0, q12\n" + "VST1.32 {q0}, [%[out]]! \n" + + "3: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "2" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14" + ); + break; #endif /* WOLFSSL_AES_192 */ #ifdef WOLFSSL_AES_256 - case 14: /* AES 256 BLOCK */ - __asm__ __volatile__ ( - "MOV r11, %[blocks] \n" - "VLDM %[Key]!, {q1-q4} \n" + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" - "#Create vector with the value 1 \n" - "VMOV.u32 q15, #1 \n" - "VSHR.u64 q15, q15, #32 \n" - "VLDM %[Key]!, {q5-q8} \n" - "VEOR.32 q14, q14, q14 \n" - "VEXT.8 q14, q15, q14, #8\n" + "#Create vector with the value 1 \n" + "VMOV.u32 q15, #1 \n" + "VSHR.u64 q15, q15, #32 \n" + "VLDM %[Key]!, {q5-q8} \n" + "VEOR.32 q14, q14, q14 \n" + "VEXT.8 q14, q15, q14, #8\n" - "VLDM %[Key]!, {q9-q10} \n" - "VLD1.32 {q13}, [%[reg]]\n" + "VLDM %[Key]!, {q9-q10} \n" + "VLD1.32 {q13}, [%[reg]]\n" - /* double block */ - "1: \n" - "CMP r11, #1 \n" - "BEQ 2f \n" - "CMP r11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP r11, #1 \n" + "BEQ 2f \n" + "CMP r11, #0 \n" + "BEQ 3f \n" - "VMOV.32 q0, q13 \n" - "AESE.8 q0, q1\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13 \n" /* network order */ - "AESE.8 q0, q2\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "SUB r11, r11, #2 \n" - "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ - "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ - "AESE.8 q0, q3\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q15, q15, q15, #8 \n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q4\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q15, q15\n" /* revert from network order */ - "AESE.8 q0, q5\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13\n" /* revert from network order */ - "AESE.8 q15, q1\n" - "AESMC.8 q15, q15\n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "SUB r11, r11, #2 \n" - "AESE.8 q0, q6\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q2\n" - "AESMC.8 q15, q15\n" + "VMOV.u32 q12, #0xffffffff \n" + "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */ + /* Carry across 32-bit lanes */ + "VCEQ.i32 q12, q13, q12 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 q13, q14, q12, #12 \n" + "VAND.32 d27, d27, d24 \n" + "VSUB.i32 q15, q15, q13 \n" - "AESE.8 q0, q7\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q3\n" - "AESMC.8 q15, q15\n" + "VMOV.u32 q12, #0xffffffff \n" + "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */ + /* Carry across 32-bit lanes */ + "VCEQ.i32 q12, q15, q12 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 d25, d24, d25, #4 \n" + "VAND.32 d25, d25, d24 \n" + "VEXT.8 d24, d29, d24, #4 \n" + "VSUB.i32 q13, q13, q12 \n" - "AESE.8 q0, q8\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q4\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q15, q15, q15, #8 \n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q15, q15\n" /* revert from network order */ + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q15, q1\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q9\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q5\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q2\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q10\n" - "AESMC.8 q0, q0\n" - "VLD1.32 {q11}, [%[Key]]! \n" - "AESE.8 q15, q6\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q3\n" + "AESMC.8 q15, q15\n" - "AESE.8 q0, q11\n" - "AESMC.8 q0, q0\n" - "AESE.8 q15, q7\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q4\n" + "AESMC.8 q15, q15\n" - "AESE.8 q15, q8\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q5\n" + "AESMC.8 q15, q15\n" - "AESE.8 q15, q9\n" - "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[Key]]! \n" - "AESE.8 q15, q10\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q15, q6\n" + "AESMC.8 q15, q15\n" - "AESE.8 q15, q11\n" - "AESMC.8 q15, q15\n" + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "AESE.8 q15, q7\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q11}, [%[Key]]! \n" - "AESE.8 q0, q12\n" /* rnd 12*/ - "AESMC.8 q0, q0\n" - "AESE.8 q15, q12\n" /* rnd 12 */ - "AESMC.8 q15, q15\n" + "AESE.8 q15, q8\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[Key]]! \n" - "AESE.8 q0, q11\n" /* rnd 13 */ - "AESMC.8 q0, q0\n" - "AESE.8 q15, q11\n" /* rnd 13 */ - "AESMC.8 q15, q15\n" + "AESE.8 q15, q9\n" + "AESMC.8 q15, q15\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q15, q10\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q11}, [%[Key]] \n" - "AESE.8 q0, q12\n" /* rnd 14 */ - "AESE.8 q15, q12\n" /* rnd 14 */ + "AESE.8 q15, q11\n" + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[input]]! \n" - "VEOR.32 q0, q0, q11\n" /* rnd 15 */ - "VEOR.32 q15, q15, q11\n" /* rnd 15 */ - "VEOR.32 q0, q0, q12\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q0, q12\n" /* rnd 12*/ + "AESMC.8 q0, q0\n" + "AESE.8 q15, q12\n" /* rnd 12 */ + "AESMC.8 q15, q15\n" - "VLD1.32 {q12}, [%[input]]! \n" - "VST1.32 {q0}, [%[out]]! \n" - "VEOR.32 q15, q15, q12\n" - "VST1.32 {q15}, [%[out]]! \n" - "SUB %[Key], %[Key], #64 \n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" /* rnd 13 */ + "AESMC.8 q0, q0\n" + "AESE.8 q15, q11\n" /* rnd 13 */ + "AESMC.8 q15, q15\n" - /* single block */ - "B 1b \n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" /* rnd 14 */ + "AESE.8 q15, q12\n" /* rnd 14 */ - "2: \n" - "VLD1.32 {q11}, [%[Key]]! \n" - "VMOV.32 q0, q13 \n" - "AESE.8 q0, q1\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13 \n" /* network order */ - "AESE.8 q0, q2\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q3\n" - "AESMC.8 q0, q0\n" - "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ - "AESE.8 q0, q4\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q5\n" - "AESMC.8 q0, q0\n" - "VEXT.8 q13, q13, q13, #8 \n" - "AESE.8 q0, q6\n" - "AESMC.8 q0, q0\n" - "VREV64.8 q13, q13\n" /* revert from network order */ - "AESE.8 q0, q7\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q8\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q9\n" - "AESMC.8 q0, q0\n" - "AESE.8 q0, q10\n" - "AESMC.8 q0, q0\n" - "VLD1.32 {q12}, [%[Key]]! \n" - "AESE.8 q0, q11\n" - "AESMC.8 q0, q0\n" - "VLD1.32 {q11}, [%[Key]]! \n" - "AESE.8 q0, q12\n" /* rnd 12 */ - "AESMC.8 q0, q0\n" - "VLD1.32 {q12}, [%[Key]]! \n" - "AESE.8 q0, q11\n" /* rnd 13 */ - "AESMC.8 q0, q0\n" - "VLD1.32 {q11}, [%[Key]] \n" - "AESE.8 q0, q12\n" /* rnd 14 */ - "VLD1.32 {q12}, [%[input]]! \n" - "VEOR.32 q0, q0, q11\n" /* rnd 15 */ - "#CTR operations, increment counter and xorbuf \n" - "VEOR.32 q0, q0, q12\n" - "VST1.32 {q0}, [%[out]]! \n" + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" /* rnd 15 */ + "VEOR.32 q15, q15, q11\n" /* rnd 15 */ + "VEOR.32 q0, q0, q12\n" - "3: \n" - "#store current counter qalue at the end \n" - "VST1.32 {q13}, [%[regOut]] \n" + "VLD1.32 {q12}, [%[input]]! \n" + "VST1.32 {q0}, [%[out]]! \n" + "VEOR.32 q15, q15, q12\n" + "VST1.32 {q15}, [%[out]]! \n" + "SUB %[Key], %[Key], #64 \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "2" (regPt) - : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", - "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14" - ); - break; + /* single block */ + "B 1b \n" + + "2: \n" + "VLD1.32 {q11}, [%[Key]]! \n" + "VMOV.32 q0, q13 \n" + "AESE.8 q0, q1\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13 \n" /* network order */ + "AESE.8 q0, q2\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q3\n" + "AESMC.8 q0, q0\n" + + "VMOV.u32 q15, #0xffffffff \n" + "VCEQ.i32 q12, q13, q15 \n" + "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */ + "VAND.32 d25, d25, d24 \n" + "VEXT.8 q15, q14, q12, #12 \n" + "VAND.32 d31, d31, d24 \n" + "VSUB.i32 q13, q13, q15 \n" + + "AESE.8 q0, q4\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q5\n" + "AESMC.8 q0, q0\n" + "VEXT.8 q13, q13, q13, #8 \n" + "AESE.8 q0, q6\n" + "AESMC.8 q0, q0\n" + "VREV64.8 q13, q13\n" /* revert from network order */ + "AESE.8 q0, q7\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q8\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q9\n" + "AESMC.8 q0, q0\n" + "AESE.8 q0, q10\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]]! \n" + "AESE.8 q0, q12\n" /* rnd 12 */ + "AESMC.8 q0, q0\n" + "VLD1.32 {q12}, [%[Key]]! \n" + "AESE.8 q0, q11\n" /* rnd 13 */ + "AESMC.8 q0, q0\n" + "VLD1.32 {q11}, [%[Key]] \n" + "AESE.8 q0, q12\n" /* rnd 14 */ + "VLD1.32 {q12}, [%[input]]! \n" + "VEOR.32 q0, q0, q11\n" /* rnd 15 */ + "#CTR operations, increment counter and xorbuf \n" + "VEOR.32 q0, q0, q12\n" + "VST1.32 {q0}, [%[out]]! \n" + + "3: \n" + "#store current counter qalue at the end \n" + "VST1.32 {q13}, [%[regOut]] \n" + + :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "2" (regPt) + : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5", + "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14" + ); + break; #endif /* WOLFSSL_AES_256 */ - default: - WOLFSSL_MSG("Bad AES-CTR round qalue"); - return BAD_FUNC_ARG; - } + } +} - aes->left = 0; - } +int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + byte* tmp; + word32 numBlocks; - /* handle non block size remaining */ - if (sz) { - wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); - IncrementAesCounter((byte*)aes->reg); + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + switch(aes->rounds) { + #ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + #endif /* WOLFSSL_AES_256 */ + break; + default: + WOLFSSL_MSG("Bad AES-CTR round value"); + return BAD_FUNC_ARG; + } - aes->left = AES_BLOCK_SIZE; - tmp = (byte*)aes->tmp; - while (sz--) { - *(out++) = *(in++) ^ *(tmp++); - aes->left--; - } - } + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; - return 0; + /* consume any unused bytes left in aes->tmp */ + while ((aes->left != 0) && (sz != 0)) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + numBlocks = sz / AES_BLOCK_SIZE; + if (numBlocks > 0) { + wc_aes_ctr_encrypt_asm(aes, out, in, numBlocks); + + sz -= numBlocks * AES_BLOCK_SIZE; + out += numBlocks * AES_BLOCK_SIZE; + in += numBlocks * AES_BLOCK_SIZE; + } + + /* handle non block size remaining */ + if (sz) { + byte zeros[AES_BLOCK_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + wc_aes_ctr_encrypt_asm(aes, (byte*)aes->tmp, zeros, 1); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; } + } + return 0; +} + +int wc_AesCtrSetKey(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + (void)dir; + return wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); +} #endif /* WOLFSSL_AES_COUNTER */ @@ -4097,7 +13879,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, * on ARMv8". Shifting left to account for bit reflection is based on * "Carry-Less Multiplication and Its Usage for Computing the GCM mode" */ -static void GMULT(byte* X, byte* Y) +void GMULT(byte* X, byte* Y) { __asm__ __volatile__ ( "VLD1.32 {q0}, [%[x]] \n" @@ -4156,13 +13938,13 @@ static void GMULT(byte* X, byte* Y) } -void GHASH(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz, byte* s, word32 sSz) +void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz, + byte* s, word32 sSz) { byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; word32 blocks, partial; - byte* h = aes->H; + byte* h = gcm->H; XMEMSET(x, 0, AES_BLOCK_SIZE); @@ -4259,7 +14041,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, initialCounter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); } XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); @@ -4280,7 +14062,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, } - GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + GHASH(&aes->gcm, authIn, authInSz, out, sz, authTag, authTagSz); wc_AesEncrypt(aes, initialCounter, scratch); if (authTagSz > AES_BLOCK_SIZE) { xorbuf(authTag, scratch, AES_BLOCK_SIZE); @@ -4336,7 +14118,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, initialCounter[AES_BLOCK_SIZE - 1] = 1; } else { - GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); } XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); @@ -4346,7 +14128,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, byte Tprime[AES_BLOCK_SIZE]; byte EKY0[AES_BLOCK_SIZE]; - GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); + GHASH(&aes->gcm, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); wc_AesEncrypt(aes, ctr, EKY0); xorbuf(Tprime, EKY0, sizeof(Tprime)); @@ -4405,7 +14187,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, #define GHASH_ONE_BLOCK(aes, block) \ do { \ xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \ - GMULT(AES_TAG(aes), aes->H); \ + GMULT(AES_TAG(aes), aes->gcm.H); \ } \ while (0) @@ -4520,7 +14302,7 @@ static void GHASH_UPDATE(Aes* aes, const byte* a, word32 aSz, const byte* c, sz = cSz; } XMEMCPY(AES_LASTGBLOCK(aes) + aes->cOver, c, sz); - /* Update count of unsed encrypted counter. */ + /* Update count of unused encrypted counter. */ aes->cOver += sz; if (aes->cOver == AES_BLOCK_SIZE) { /* We have filled up the block and can process. */ @@ -4564,7 +14346,7 @@ static void GHASH_FINAL(Aes* aes, byte* s, word32 sSz) if (aes->cOver > 0) { /* Cipher text block incomplete. */ - over = aes->cOver; + over = aes->cOver; } if (over > 0) { /* Zeroize the unused part of the block. */ @@ -4598,13 +14380,13 @@ static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz) else { /* Counter is GHASH of IV. */ #ifdef OPENSSL_EXTRA - word32 aadTemp = aes->aadLen; - aes->aadLen = 0; + word32 aadTemp = aes->gcm.aadLen; + aes->gcm.aadLen = 0; #endif - GHASH(aes, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); - GMULT(counter, aes->H); + GHASH(&aes->gcm, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + GMULT(counter, aes->gcm.H); #ifdef OPENSSL_EXTRA - aes->aadLen = aadTemp; + aes->gcm.aadLen = aadTemp; #endif } @@ -4693,7 +14475,7 @@ static void AesGcmFinal_C(Aes* aes, byte* authTag, word32 authTagSz) xorbuf(authTag, AES_INITCTR(aes), authTagSz); #ifdef OPENSSL_EXTRA /* store AAD size for next call */ - aes->aadLen = aes->aSz; + aes->gcm.aadLen = aes->aSz; #endif /* Zeroize last block to protect sensitive data. */ ForceZero(AES_LASTBLOCK(aes), AES_BLOCK_SIZE); @@ -4720,8 +14502,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv, /* Check validity of parameters. */ if ((aes == NULL) || ((len > 0) && (key == NULL)) || - ((ivSz == 0) && (iv != NULL)) || (ivSz > AES_BLOCK_SIZE) || - ((ivSz > 0) && (iv == NULL))) { + ((ivSz == 0) && (iv != NULL)) || ((ivSz > 0) && (iv == NULL))) { ret = BAD_FUNC_ARG; } @@ -4742,14 +14523,14 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv, } if (ret == 0) { - /* Setup with IV if needed. */ - if (iv != NULL) { - /* Cache the IV in AES GCM object. */ - XMEMCPY((byte*)aes->reg, iv, ivSz); + /* Set the IV passed in if it is smaller than a block. */ + if ((iv != NULL) && (ivSz <= AES_BLOCK_SIZE)) { + XMEMMOVE((byte*)aes->reg, iv, ivSz); aes->nonceSz = ivSz; } - else if (aes->nonceSz != 0) { - /* Copy out the cached copy. */ + /* No IV passed in, check for cached IV. */ + if ((iv == NULL) && (aes->nonceSz != 0)) { + /* Use the cached copy. */ iv = (byte*)aes->reg; ivSz = aes->nonceSz; } @@ -4854,7 +14635,7 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, if (ret == 0) { /* Encrypt the plaintext. */ AesGcmCryptUpdate_C(aes, out, in, sz); - /* Update the authenication tag with any authentication data and the + /* Update the authentication tag with any authentication data and the * new cipher text. */ GHASH_UPDATE(aes, authIn, authInSz, out, sz); } @@ -4963,7 +14744,7 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, if (ret == 0) { /* Decrypt with AAD and/or cipher text. */ - /* Update the authenication tag with any authentication data and + /* Update the authentication tag with any authentication data and * cipher text. */ GHASH_UPDATE(aes, authIn, authInSz, in, sz); /* Decrypt the cipher text. */ @@ -5098,6 +14879,1928 @@ static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz) } +/* return 0 on success */ +int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte A[AES_BLOCK_SIZE]; + byte B[AES_BLOCK_SIZE]; + byte lenSz; + word32 i; + byte mask = 0xFF; + word32 wordSz = (word32)sizeof(word32); + + /* sanity check on arguments */ + if (aes == NULL || (inSz != 0 && (in == NULL || out == NULL)) || + nonce == NULL || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + if (wc_AesCcmCheckTagSize(authTagSz) != 0) { + return BAD_FUNC_ARG; + } + + XMEMCPY(B+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + B[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); + + if (authInSz > 0) + roll_auth(aes, authIn, authInSz, A); + if (inSz > 0) + roll_x(aes, in, inSz, A); + XMEMCPY(authTag, A, authTagSz); + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, A); + xorbuf(authTag, A, authTagSz); + + B[15] = 1; + while (inSz >= AES_BLOCK_SIZE) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, AES_BLOCK_SIZE); + XMEMCPY(out, A, AES_BLOCK_SIZE); + + AesCcmCtrInc(B, lenSz); + inSz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + if (inSz > 0) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, inSz); + XMEMCPY(out, A, inSz); + } + + ForceZero(A, AES_BLOCK_SIZE); + ForceZero(B, AES_BLOCK_SIZE); + + return 0; +} + +#ifdef HAVE_AES_DECRYPT +int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte A[AES_BLOCK_SIZE]; + byte B[AES_BLOCK_SIZE]; + byte* o; + byte lenSz; + word32 i, oSz; + int result = 0; + byte mask = 0xFF; + word32 wordSz = (word32)sizeof(word32); + + /* sanity check on arguments */ + if (aes == NULL || (inSz != 0 && (in == NULL || out == NULL)) || + nonce == NULL || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + if (wc_AesCcmCheckTagSize(authTagSz) != 0) { + return BAD_FUNC_ARG; + } + + o = out; + oSz = inSz; + XMEMCPY(B+1, nonce, nonceSz); + lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + B[15] = 1; + + while (oSz >= AES_BLOCK_SIZE) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, AES_BLOCK_SIZE); + XMEMCPY(o, A, AES_BLOCK_SIZE); + + AesCcmCtrInc(B, lenSz); + oSz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + o += AES_BLOCK_SIZE; + } + if (inSz > 0) { + wc_AesEncrypt(aes, B, A); + xorbuf(A, in, oSz); + XMEMCPY(o, A, oSz); + } + + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, A); + + o = out; + oSz = inSz; + + B[0] = (authInSz > 0 ? 64 : 0) + + (8 * (((byte)authTagSz - 2) / 2)) + + (lenSz - 1); + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); + + if (authInSz > 0) + roll_auth(aes, authIn, authInSz, A); + if (inSz > 0) + roll_x(aes, o, oSz, A); + + B[0] = lenSz - 1; + for (i = 0; i < lenSz; i++) + B[AES_BLOCK_SIZE - 1 - i] = 0; + wc_AesEncrypt(aes, B, B); + xorbuf(A, B, authTagSz); + + if (ConstantCompare(A, authTag, authTagSz) != 0) { + /* If the authTag check fails, don't keep the decrypted data. + * Unfortunately, you need the decrypted data to calculate the + * check value. */ + XMEMSET(out, 0, inSz); + result = AES_CCM_AUTH_E; + } + + ForceZero(A, AES_BLOCK_SIZE); + ForceZero(B, AES_BLOCK_SIZE); + o = NULL; + + return result; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESCCM */ + + + +#ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */ +int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) +{ + int ret; + byte iv[AES_BLOCK_SIZE]; + + if (!((len == 16) || (len == 24) || (len == 32))) + return BAD_FUNC_ARG; + + XMEMSET(iv, 0, AES_BLOCK_SIZE); + ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); + + if (ret == 0) { +#ifdef WOLFSSL_AESGCM_STREAM + aes->gcmKeySet = 1; +#endif + + wc_AesEncrypt(aes, iv, aes->gcm.H); + #if defined(__aarch64__) + { + word32* pt = (word32*)aes->gcm.H; + __asm__ volatile ( + "LD1 {v0.16b}, [%[h]] \n" + "RBIT v0.16b, v0.16b \n" + "ST1 {v0.16b}, [%[out]] \n" + : [out] "=r" (pt) + : [h] "0" (pt) + : "cc", "memory", "v0" + ); + } + #else + { + word32* pt = (word32*)aes->gcm.H; + __asm__ volatile ( + "VLD1.32 {q0}, [%[h]] \n" + "VREV64.8 q0, q0 \n" + "VSWP.8 d0, d1 \n" + "VST1.32 {q0}, [%[out]] \n" + : [out] "=r" (pt) + : [h] "0" (pt) + : "cc", "memory", "q0" + ); + } + #endif + } + + return ret; +} + +#endif /* HAVE_AESGCM */ + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) + /* Allow direct access to one block encrypt */ + int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + if (aes == NULL || out == NULL || in == NULL) { + WOLFSSL_MSG("Invalid input to wc_AesEncryptDirect"); + return BAD_FUNC_ARG; + } + return wc_AesEncrypt(aes, in, out); + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + if (aes == NULL || out == NULL || in == NULL) { + WOLFSSL_MSG("Invalid input to wc_AesDecryptDirect"); + return BAD_FUNC_ARG; + } + return wc_AesDecrypt(aes, in, out); + } + #endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_DIRECT */ + +#ifdef WOLFSSL_AES_XTS + +#ifdef __aarch64__ + +#define AES_ENCRYPT_UPDATE_TWEAK(label) \ + "AESE v0.16b, v1.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AND x11, x19, x10, ASR #63\n" \ + "AESE v0.16b, v2.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v3.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "EXTR x10, x10, x9, #63 \n" \ + "AESE v0.16b, v4.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "AESE v0.16b, v5.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "EOR x9, x11, x9, LSL #1 \n" \ + "AESE v0.16b, v6.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v7.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v8.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "AESE v0.16b, v9.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESE v0.16b, v10.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v11.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESE v0.16b, v12.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v13.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + #label ": \n" \ + "AESE v0.16b, v14.16b \n" \ + "EOR v0.16b, v0.16b, v15.16b \n" + +#define AES_ENCRYPT(label) \ + "AESE v0.16b, v1.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v2.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v3.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v4.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "AESE v0.16b, v5.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v6.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v7.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v8.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "AESE v0.16b, v9.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESE v0.16b, v10.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v11.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESE v0.16b, v12.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + "AESE v0.16b, v13.16b \n" \ + "AESMC v0.16b, v0.16b \n" \ + \ + #label ": \n" \ + "AESE v0.16b, v14.16b \n" \ + "EOR v0.16b, v0.16b, v15.16b \n" + +#define AES_DECRYPT_UPDATE_TWEAK(label) \ + "AESD v0.16b, v1.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AND x11, x19, x10, ASR #63\n" \ + "AESD v0.16b, v2.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v3.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "EXTR x10, x10, x9, #63 \n" \ + "AESD v0.16b, v4.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "AESD v0.16b, v5.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "EOR x9, x11, x9, LSL #1 \n" \ + "AESD v0.16b, v6.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v7.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v8.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "AESD v0.16b, v9.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESD v0.16b, v10.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v11.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESD v0.16b, v12.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v13.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + #label ": \n" \ + "AESD v0.16b, v14.16b \n" \ + "EOR v0.16b, v0.16b, v15.16b \n" + +#define AES_DECRYPT(label) \ + "AESD v0.16b, v1.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v2.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v3.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v4.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "AESD v0.16b, v5.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v6.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v7.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v8.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "AESD v0.16b, v9.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESD v0.16b, v10.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v11.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + "SUBS WZR, %w[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESD v0.16b, v12.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + "AESD v0.16b, v13.16b \n" \ + "AESIMC v0.16b, v0.16b \n" \ + \ + #label ": \n" \ + "AESD v0.16b, v14.16b \n" \ + "EOR v0.16b, v0.16b, v15.16b \n" + +/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte tmp[AES_BLOCK_SIZE]; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks == 0) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + __asm__ __volatile__ ( + "MOV x19, 0x87 \n" + + "# Load tweak calculation key\n" + "LD1 {v0.16b}, [%[i]] \n" + "MOV x10, %[key2] \n" + "LD1 {v1.2d-v4.2d}, [x10], #64 \n" + "LD1 {v5.2d-v8.2d}, [x10], #64 \n" + "LD1 {v9.2d-v12.2d}, [x10], #64 \n" + "LD1 {v13.2d-v15.2d}, [x10] \n" + + "# Put last 2 blocks of keys based on rounds into v14, v15\n" + "SUBS WZR, %w[rounds], #14 \n" + "BEQ 40f \n" + "SUBS WZR, %w[rounds], #12 \n" + "MOV v14.16b, v12.16b \n" + "MOV v15.16b, v13.16b \n" + "BEQ 40f \n" + "MOV v14.16b, v10.16b \n" + "MOV v15.16b, v11.16b \n" + "40: \n" + + AES_ENCRYPT(10) + + "MOV x9, v0.d[0] \n" + "MOV x10, v0.d[1] \n" + "MOV v20.16b, v0.16b \n" + + "# Load encryption key\n" + "MOV x11, %[key] \n" + "LD1 {v1.2d-v4.2d}, [x11], #64 \n" + "LD1 {v5.2d-v8.2d}, [x11], #64 \n" + "LD1 {v9.2d-v12.2d}, [x11], #64 \n" + "LD1 {v13.2d-v15.2d}, [x11] \n" + + "# Put last 2 blocks of keys based on rounds into v14, v15\n" + "SUBS WZR, %w[rounds], #14 \n" + "BEQ 41f \n" + "SUBS WZR, %w[rounds], #10 \n" + "MOV v14.16b, v10.16b \n" + "MOV v15.16b, v11.16b \n" + "BEQ 41f \n" + "MOV v14.16b, v12.16b \n" + "MOV v15.16b, v13.16b \n" + "41: \n" + + "SUBS WZR, %w[blocks], #4 \n" + "BLT 1f \n" + + "AND %w[sz], %w[sz], 0x3f \n" + + "AND x17, x19, x10, ASR #63\n" + "EXTR x12, x10, x9, #63 \n" + "EOR x11, x17, x9, LSL #1 \n" + + "AND x17, x19, x12, ASR #63\n" + "EXTR x14, x12, x11, #63 \n" + "EOR x13, x17, x11, LSL #1 \n" + + "AND x17, x19, x14, ASR #63\n" + "EXTR x16, x14, x13, #63 \n" + "EOR x15, x17, x13, LSL #1 \n" + + "SUB %w[blocks], %w[blocks], #4 \n" + + "#Four blocks at a time\n" + "20:\n" + + "LD1 {v16.16b-v19.16b}, [%[in]], #64 \n" + + "MOV v21.d[0], x11 \n" + "MOV v21.d[1], x12 \n" + "MOV v22.d[0], x13 \n" + "MOV v22.d[1], x14 \n" + "MOV v23.d[0], x15 \n" + "MOV v23.d[1], x16 \n" + + "EOR v16.16b, v16.16b, v20.16b \n" + "EOR v17.16b, v17.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v22.16b \n" + "EOR v19.16b, v19.16b, v23.16b \n" + + "AESE v16.16b, v1.16b \n" + "AESMC v16.16b, v16.16b \n" + "AND x17, x19, x16, ASR #63\n" + "AESE v17.16b, v1.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v1.16b \n" + "AESMC v18.16b, v18.16b \n" + "EXTR x10, x16, x15, #63 \n" + "AESE v19.16b, v1.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v2.16b \n" + "AESMC v16.16b, v16.16b \n" + "EOR x9, x17, x15, LSL #1 \n" + "AESE v17.16b, v2.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v2.16b \n" + "AESMC v18.16b, v18.16b \n" + "AND x17, x19, x10, ASR #63\n" + "AESE v19.16b, v2.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v3.16b \n" + "AESMC v16.16b, v16.16b \n" + "EXTR x12, x10, x9, #63 \n" + "AESE v17.16b, v3.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v3.16b \n" + "AESMC v18.16b, v18.16b \n" + "EOR x11, x17, x9, LSL #1 \n" + "AESE v19.16b, v3.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v4.16b \n" + "AESMC v16.16b, v16.16b \n" + "AND x17, x19, x12, ASR #63\n" + "AESE v17.16b, v4.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v4.16b \n" + "AESMC v18.16b, v18.16b \n" + "EXTR x14, x12, x11, #63 \n" + "AESE v19.16b, v4.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v16.16b, v5.16b \n" + "AESMC v16.16b, v16.16b \n" + "EOR x13, x17, x11, LSL #1 \n" + "AESE v17.16b, v5.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v5.16b \n" + "AESMC v18.16b, v18.16b \n" + "AND x17, x19, x14, ASR #63\n" + "AESE v19.16b, v5.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v6.16b \n" + "AESMC v16.16b, v16.16b \n" + "EXTR x16, x14, x13, #63 \n" + "AESE v17.16b, v6.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v6.16b \n" + "AESMC v18.16b, v18.16b \n" + "EOR x15, x17, x13, LSL #1 \n" + "AESE v19.16b, v6.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v7.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v7.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v7.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v7.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v8.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v8.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v8.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v8.16b \n" + "AESMC v19.16b, v19.16b \n" + + "AESE v16.16b, v9.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v9.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v9.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v9.16b \n" + "AESMC v19.16b, v19.16b \n" + + "SUBS WZR, %w[rounds], #10 \n" + "BEQ 21f \n" + "AESE v16.16b, v10.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v10.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v10.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v10.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v11.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v11.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v11.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v11.16b \n" + "AESMC v19.16b, v19.16b \n" + + "SUBS WZR, %w[rounds], #12 \n" + "BEQ 21f \n" + "AESE v16.16b, v12.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v12.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v12.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v12.16b \n" + "AESMC v19.16b, v19.16b \n" + "AESE v16.16b, v13.16b \n" + "AESMC v16.16b, v16.16b \n" + "AESE v17.16b, v13.16b \n" + "AESMC v17.16b, v17.16b \n" + "AESE v18.16b, v13.16b \n" + "AESMC v18.16b, v18.16b \n" + "AESE v19.16b, v13.16b \n" + "AESMC v19.16b, v19.16b \n" + + "21: \n" + "AESE v16.16b, v14.16b \n" + "EOR v16.16b, v16.16b, v15.16b \n" + "AESE v17.16b, v14.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "AESE v18.16b, v14.16b \n" + "EOR v18.16b, v18.16b, v15.16b \n" + "AESE v19.16b, v14.16b \n" + "EOR v19.16b, v19.16b, v15.16b \n" + + "EOR v16.16b, v16.16b, v20.16b \n" + "EOR v17.16b, v17.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v22.16b \n" + "EOR v19.16b, v19.16b, v23.16b \n" + "MOV v20.d[0], x9 \n" + "MOV v20.d[1], x10 \n" + + "ST1 {v16.16b-v19.16b}, [%[out]], #64 \n" + + "SUBS %w[blocks], %w[blocks], #4 \n" + "BGE 20b \n" + "ADD %w[blocks], %w[blocks], #4 \n" + + "CBZ %w[sz], 3f \n" + + "CBZ %w[blocks], 30f \n" + + "1: \n" + "LD1 {v0.16b}, [%[in]], #16 \n" + + "MOV x9, v20.d[0] \n" + "MOV x10, v20.d[1] \n" + + "EOR v0.16b, v0.16b, v20.16b \n" + + AES_ENCRYPT_UPDATE_TWEAK(2) + + "EOR v0.16b, v0.16b, v20.16b \n" + + "ST1 {v0.16b}, [%[out]], #16 \n" + + "MOV v20.d[0], x9 \n" + "MOV v20.d[1], x10 \n" + + "SUBS %w[blocks], %w[blocks], #1 \n" + "SUB %w[sz], %w[sz], #16 \n" + "BGT 1b \n" + + "CBZ %w[sz], 3f \n" + + "30: \n" + "#Partial block \n" + "SUB %[out], %[out], #16 \n" + "LD1 {v0.16b}, [%[out]], #16 \n" + "ST1 {v0.16b}, [%[tmp]] \n" + + "MOV w12, %w[sz] \n" + "4: \n" + "LDRB w13, [%[tmp]] \n" + "LDRB w14, [%[in]], #1 \n" + "STRB w13, [%[out]], #1 \n" + "STRB w14, [%[tmp]], #1 \n" + "SUBS w12, w12, #1 \n" + "BGT 4b \n" + + "SUB %[out], %[out], %x[sz] \n" + "SUB %[tmp], %[tmp], %x[sz] \n" + "SUB %[out], %[out], #16 \n" + + "LD1 {v0.16b}, [%[tmp]] \n" + + "EOR v0.16b, v0.16b, v20.16b \n" + + AES_ENCRYPT(5) + + "EOR v0.16b, v0.16b, v20.16b \n" + + "STR q0, [%[out]] \n" + + "3: \n" + + : [blocks] "+r" (blocks), [in] "+r" (in), [out] "+r" (out), + [sz] "+r" (sz) + : [key] "r" (xaes->aes.key), [rounds] "r" (xaes->aes.rounds), + [key2] "r" (xaes->tweak.key), [i] "r" (i), + [tmp] "r" (tmp) + : "cc", "memory", + "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", + "x17", "x19", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" + ); + + return ret; +} + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte tmp[AES_BLOCK_SIZE]; + byte stl = (sz % AES_BLOCK_SIZE); + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks == 0) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + /* if Stealing then break out of loop one block early to handle special + * case */ + blocks -= (stl > 0); + + __asm__ __volatile__ ( + "MOV x19, 0x87 \n" + + "LD1 {v0.16b}, [%[i]] \n" + "MOV x10, %[key2] \n" + "LD1 {v1.2d-v4.2d}, [x10], #64 \n" + "LD1 {v5.2d-v8.2d}, [x10], #64 \n" + "LD1 {v9.2d-v12.2d}, [x10], #64 \n" + "LD1 {v13.2d-v15.2d}, [x10] \n" + + "SUBS WZR, %w[rounds], #14 \n" + "BEQ 40f \n" + "SUBS WZR, %w[rounds], #12 \n" + "MOV v14.16b, v12.16b \n" + "MOV v15.16b, v13.16b \n" + "BEQ 40f \n" + "MOV v14.16b, v10.16b \n" + "MOV v15.16b, v11.16b \n" + "40: \n" + + AES_ENCRYPT(10) + + "MOV x9, v0.d[0] \n" + "MOV x10, v0.d[1] \n" + "MOV v20.16b, v0.16b \n" + + "MOV x11, %[key] \n" + "LD1 {v1.2d-v4.2d}, [x11], #64 \n" + "LD1 {v5.2d-v8.2d}, [x11], #64 \n" + "LD1 {v9.2d-v12.2d}, [x11], #64 \n" + "LD1 {v13.2d-v15.2d}, [x11] \n" + + "SUBS WZR, %w[rounds], #14 \n" + "BEQ 41f \n" + "SUBS WZR, %w[rounds], #12 \n" + "MOV v14.16b, v12.16b \n" + "MOV v15.16b, v13.16b \n" + "BEQ 41f \n" + "MOV v14.16b, v10.16b \n" + "MOV v15.16b, v11.16b \n" + "41: \n" + + "CBZ %w[blocks], 3f \n" + + "SUBS WZR, %w[blocks], #4 \n" + "BLT 1f \n" + + "AND x17, x19, x10, ASR #63\n" + "EXTR x12, x10, x9, #63 \n" + "EOR x11, x17, x9, LSL #1 \n" + + "AND x17, x19, x12, ASR #63\n" + "EXTR x14, x12, x11, #63 \n" + "EOR x13, x17, x11, LSL #1 \n" + + "AND x17, x19, x14, ASR #63\n" + "EXTR x16, x14, x13, #63 \n" + "EOR x15, x17, x13, LSL #1 \n" + + "SUB %w[blocks], %w[blocks], #4 \n" + + "#Four blocks at a time\n" + "20:\n" + + "LD1 {v16.16b-v19.16b}, [%[in]], #64 \n" + + "MOV v21.d[0], x11 \n" + "MOV v21.d[1], x12 \n" + "MOV v22.d[0], x13 \n" + "MOV v22.d[1], x14 \n" + "MOV v23.d[0], x15 \n" + "MOV v23.d[1], x16 \n" + + "EOR v16.16b, v16.16b, v20.16b \n" + "EOR v17.16b, v17.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v22.16b \n" + "EOR v19.16b, v19.16b, v23.16b \n" + + "AESD v16.16b, v1.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AND x17, x19, x16, ASR #63\n" + "AESD v17.16b, v1.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v1.16b \n" + "AESIMC v18.16b, v18.16b \n" + "EXTR x10, x16, x15, #63 \n" + "AESD v19.16b, v1.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v2.16b \n" + "AESIMC v16.16b, v16.16b \n" + "EOR x9, x17, x15, LSL #1 \n" + "AESD v17.16b, v2.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v2.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AND x17, x19, x10, ASR #63\n" + "AESD v19.16b, v2.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v3.16b \n" + "AESIMC v16.16b, v16.16b \n" + "EXTR x12, x10, x9, #63 \n" + "AESD v17.16b, v3.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v3.16b \n" + "AESIMC v18.16b, v18.16b \n" + "EOR x11, x17, x9, LSL #1 \n" + "AESD v19.16b, v3.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v4.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AND x17, x19, x12, ASR #63\n" + "AESD v17.16b, v4.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v4.16b \n" + "AESIMC v18.16b, v18.16b \n" + "EXTR x14, x12, x11, #63 \n" + "AESD v19.16b, v4.16b \n" + "AESIMC v19.16b, v19.16b \n" + + "AESD v16.16b, v5.16b \n" + "AESIMC v16.16b, v16.16b \n" + "EOR x13, x17, x11, LSL #1 \n" + "AESD v17.16b, v5.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v5.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AND x17, x19, x14, ASR #63\n" + "AESD v19.16b, v5.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v6.16b \n" + "AESIMC v16.16b, v16.16b \n" + "EXTR x16, x14, x13, #63 \n" + "AESD v17.16b, v6.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v6.16b \n" + "AESIMC v18.16b, v18.16b \n" + "EOR x15, x17, x13, LSL #1 \n" + "AESD v19.16b, v6.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v7.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v7.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v7.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v7.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v8.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v8.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v8.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v8.16b \n" + "AESIMC v19.16b, v19.16b \n" + + "AESD v16.16b, v9.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v9.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v9.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v9.16b \n" + "AESIMC v19.16b, v19.16b \n" + + "SUBS WZR, %w[rounds], #10 \n" + "BEQ 21f \n" + "AESD v16.16b, v10.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v10.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v10.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v10.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v11.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v11.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v11.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v11.16b \n" + "AESIMC v19.16b, v19.16b \n" + + "SUBS WZR, %w[rounds], #12 \n" + "BEQ 21f \n" + "AESD v16.16b, v12.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v12.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v12.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v12.16b \n" + "AESIMC v19.16b, v19.16b \n" + "AESD v16.16b, v13.16b \n" + "AESIMC v16.16b, v16.16b \n" + "AESD v17.16b, v13.16b \n" + "AESIMC v17.16b, v17.16b \n" + "AESD v18.16b, v13.16b \n" + "AESIMC v18.16b, v18.16b \n" + "AESD v19.16b, v13.16b \n" + "AESIMC v19.16b, v19.16b \n" + + "21: \n" + "AESD v16.16b, v14.16b \n" + "EOR v16.16b, v16.16b, v15.16b \n" + "AESD v17.16b, v14.16b \n" + "EOR v17.16b, v17.16b, v15.16b \n" + "AESD v18.16b, v14.16b \n" + "EOR v18.16b, v18.16b, v15.16b \n" + "AESD v19.16b, v14.16b \n" + "EOR v19.16b, v19.16b, v15.16b \n" + + "EOR v16.16b, v16.16b, v20.16b \n" + "EOR v17.16b, v17.16b, v21.16b \n" + "EOR v18.16b, v18.16b, v22.16b \n" + "EOR v19.16b, v19.16b, v23.16b \n" + "MOV v20.d[0], x9 \n" + "MOV v20.d[1], x10 \n" + + "ST1 {v16.16b-v19.16b}, [%[out]], #64 \n" + + "SUBS %w[blocks], %w[blocks], #4 \n" + "SUB %w[sz], %w[sz], #64 \n" + "BGE 20b \n" + "ADD %w[blocks], %w[blocks], #4 \n" + + "CBZ %w[sz], 4f \n" + + "CBZ %w[blocks], 3f \n" + + "1: \n" + "LD1 {v0.16b}, [%[in]], #16 \n" + + "EOR v0.16b, v0.16b, v20.16b \n" + + AES_DECRYPT_UPDATE_TWEAK(2) + + "EOR v0.16b, v0.16b, v20.16b \n" + + "ST1 {v0.16b}, [%[out]], #16 \n" + + "MOV v20.d[0], x9 \n" + "MOV v20.d[1], x10 \n" + + "SUBS %w[blocks], %w[blocks], #1 \n" + "SUB %w[sz], %w[sz], #16 \n" + "BGT 1b \n" + + "CBZ %w[sz], 4f \n" + + "3: \n" + + "AND x11, x19, x10, ASR #63\n" + "EXTR x10, x10, x9, #63 \n" + "EOR x9, x11, x9, LSL #1 \n" + "MOV v21.d[0], x9 \n" + "MOV v21.d[1], x10 \n" + + "LD1 {v0.16b}, [%[in]], #16 \n" + + "EOR v0.16b, v0.16b, v21.16b \n" + + AES_DECRYPT(5) + + "EOR v0.16b, v0.16b, v21.16b \n" + + "SUB %w[sz], %w[sz], #16 \n" + + "ST1 {v0.16b}, [%[tmp]] \n" + "ADD %[out], %[out], #16 \n" + "MOV w12, %w[sz] \n" + "6: \n" + "LDRB w13, [%[tmp]] \n" + "LDRB w14, [%[in]], #1 \n" + "STRB w13, [%[out]], #1 \n" + "STRB w14, [%[tmp]], #1 \n" + "SUBS w12, w12, #1 \n" + "BGT 6b \n" + "SUB %[out], %[out], %x[sz] \n" + "SUB %[tmp], %[tmp], %x[sz] \n" + "SUB %[out], %[out], #16 \n" + + "LD1 {v0.16b}, [%[tmp]] \n" + + "EOR v0.16b, v0.16b, v20.16b \n" + + AES_DECRYPT(7) + + "EOR v0.16b, v0.16b, v20.16b \n" + + "ST1 {v0.16b}, [%[out]] \n" + + "4: \n" + + : [blocks] "+r" (blocks), [in] "+r" (in), [out] "+r" (out), + [sz] "+r" (sz) + : [key] "r" (xaes->aes.key), [rounds] "r" (xaes->aes.rounds), + [key2] "r" (xaes->tweak.key), [i] "r" (i), + [tmp] "r" (tmp) + : "cc", "memory", + "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", + "x17", "x19", + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" + ); + + return ret; +} +#else + +#define AES_ENCRYPT_UPDATE_TWEAK(label) \ + "AESE.8 q0, q1 \n" \ + "AESMC.8 q0, q0 \n" \ + "AND %[i], r14, r12, ASR #31 \n" \ + "AESE.8 q0, q2 \n" \ + "AESMC.8 q0, q0 \n" \ + "LSL r12, r12, #1 \n" \ + "AESE.8 q0, q3 \n" \ + "AESMC.8 q0, q0 \n" \ + "ORR r12, r12, r11, LSR #31 \n" \ + "AESE.8 q0, q4 \n" \ + "AESMC.8 q0, q0 \n" \ + "LSL r11, r11, #1 \n" \ + \ + "AESE.8 q0, q5 \n" \ + "AESMC.8 q0, q0 \n" \ + "ORR r11, r11, r10, LSR #31 \n" \ + "AESE.8 q0, q6 \n" \ + "AESMC.8 q0, q0 \n" \ + "LSL r10, r10, #1 \n" \ + "AESE.8 q0, q7 \n" \ + "AESMC.8 q0, q0 \n" \ + "ORR r10, r10, r9, LSR #31 \n" \ + "AESE.8 q0, q8 \n" \ + "AESMC.8 q0, q0 \n" \ + "EOR r9, %[i], r9, LSL #1 \n" \ + \ + "AESE.8 q0, q9 \n" \ + "AESMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESE.8 q0, q10 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q11 \n" \ + "AESMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESE.8 q0, q10 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q11 \n" \ + "AESMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + #label ": \n" \ + "AESE.8 q0, q10 \n" \ + "VEOR q0, q0, q11 \n" + +#define AES_ENCRYPT(label) \ + "AESE.8 q0, q1 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q2 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q3 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q4 \n" \ + "AESMC.8 q0, q0 \n" \ + \ + "AESE.8 q0, q5 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q6 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q7 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q8 \n" \ + "AESMC.8 q0, q0 \n" \ + \ + "AESE.8 q0, q9 \n" \ + "AESMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESE.8 q0, q10 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q11 \n" \ + "AESMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESE.8 q0, q10 \n" \ + "AESMC.8 q0, q0 \n" \ + "AESE.8 q0, q11 \n" \ + "AESMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + #label ": \n" \ + "AESE.8 q0, q10 \n" \ + "VEOR q0, q0, q11 \n" + +#define AES_DECRYPT_UPDATE_TWEAK(label) \ + "AESD.8 q0, q1 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AND %[i], r14, r12, ASR #31 \n" \ + "AESD.8 q0, q2 \n" \ + "AESIMC.8 q0, q0 \n" \ + "LSL r12, r12, #1 \n" \ + "AESD.8 q0, q3 \n" \ + "AESIMC.8 q0, q0 \n" \ + "ORR r12, r12, r11, LSR #31 \n" \ + "AESD.8 q0, q4 \n" \ + "AESIMC.8 q0, q0 \n" \ + "LSL r11, r11, #1 \n" \ + \ + "AESD.8 q0, q5 \n" \ + "AESIMC.8 q0, q0 \n" \ + "ORR r11, r11, r10, LSR #31 \n" \ + "AESD.8 q0, q6 \n" \ + "AESIMC.8 q0, q0 \n" \ + "LSL r10, r10, #1 \n" \ + "AESD.8 q0, q7 \n" \ + "AESIMC.8 q0, q0 \n" \ + "ORR r10, r10, r9, LSR #31 \n" \ + "AESD.8 q0, q8 \n" \ + "AESIMC.8 q0, q0 \n" \ + "EOR r9, %[i], r9, LSL #1 \n" \ + \ + "AESD.8 q0, q9 \n" \ + "AESIMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESD.8 q0, q10 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q11 \n" \ + "AESIMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESD.8 q0, q10 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q11 \n" \ + "AESIMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + #label ": \n" \ + "AESD.8 q0, q10 \n" \ + "VEOR q0, q0, q11 \n" + +#define AES_DECRYPT(label) \ + "AESD.8 q0, q1 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q2 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q3 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q4 \n" \ + "AESIMC.8 q0, q0 \n" \ + \ + "AESD.8 q0, q5 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q6 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q7 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q8 \n" \ + "AESIMC.8 q0, q0 \n" \ + \ + "AESD.8 q0, q9 \n" \ + "AESIMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #10 \n" \ + "BLE " #label "f \n" \ + "AESD.8 q0, q10 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q11 \n" \ + "AESIMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + "CMP %[rounds], #12 \n" \ + "BLE " #label "f \n" \ + "AESD.8 q0, q10 \n" \ + "AESIMC.8 q0, q0 \n" \ + "AESD.8 q0, q11 \n" \ + "AESIMC.8 q0, q0 \n" \ + "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ + \ + #label ": \n" \ + "AESD.8 q0, q10 \n" \ + "VEOR q0, q0, q11 \n" + +/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte tmp[AES_BLOCK_SIZE]; + word32* key2 = xaes->tweak.key; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks == 0) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + __asm__ __volatile__ ( + "MOV r14, #0x87 \n" + + "# Load tweak calculation key\n" + "VLD1.32 {q0}, [%[i]] \n" + "VLD1.32 {d2, d3, d4, d5}, [%[key2]]! \n" + "VLD1.32 {d6, d7, d8, d9}, [%[key2]]! \n" + "VLD1.32 {d10, d11, d12, d13}, [%[key2]]! \n" + "VLD1.32 {d14, d15, d16, d17}, [%[key2]]! \n" + "VLD1.32 {d18, d19}, [%[key2]]! \n" + + AES_ENCRYPT(10) + + "VMOV.32 r9, d0[0] \n" + "VMOV.32 r10, d0[1] \n" + "VMOV.32 r11, d1[0] \n" + "VMOV.32 r12, d1[1] \n" + "VMOV q14, q0 \n" + + "# Load encryption key\n" + "MOV %[key2], %[key] \n" + "VLD1.32 {d2, d3, d4, d5}, [%[key2]]! \n" + "VLD1.32 {d6, d7, d8, d9}, [%[key2]]! \n" + "VLD1.32 {d10, d11, d12, d13}, [%[key2]]! \n" + "VLD1.32 {d14, d15, d16, d17}, [%[key2]]! \n" + "VLD1.32 {d18, d19}, [%[key2]]! \n" + + "1: \n" + "VLD1.32 {q0}, [%[in]]! \n" + "ADD %[key2], %[key], #144 \n" + + "VMOV.32 r9, d28[0] \n" + "VMOV.32 r10, d28[1] \n" + "VMOV.32 r11, d29[0] \n" + "VMOV.32 r12, d29[1] \n" + + "VEOR q0, q0, q14 \n" + + AES_ENCRYPT_UPDATE_TWEAK(2) + + "VEOR q0, q0, q14 \n" + + "VST1.32 {q0}, [%[out]]! \n" + + "VMOV.32 d28[0], r9 \n" + "VMOV.32 d28[1], r10 \n" + "VMOV.32 d29[0], r11 \n" + "VMOV.32 d29[1], r12 \n" + + "SUBS %[blocks], %[blocks], #1 \n" + "SUB %[sz], %[sz], #16 \n" + "BGT 1b \n" + + "CMP %[sz], #0 \n" + "BEQ 3f \n" + + "30: \n" + "#Partial block \n" + "SUB %[out], %[out], #16 \n" + "VLD1.32 {q0}, [%[out]]! \n" + "VST1.32 {q0}, [%[tmp]] \n" + + "MOV r9, %[sz] \n" + "4: \n" + "LDRB r10, [%[tmp]] \n" + "LDRB r11, [%[in]], #1 \n" + "STRB r10, [%[out]], #1 \n" + "STRB r11, [%[tmp]], #1 \n" + "SUBS r9, r9, #1 \n" + "BGT 4b \n" + + "SUB %[out], %[out], %[sz] \n" + "SUB %[tmp], %[tmp], %[sz] \n" + "SUB %[out], %[out], #16 \n" + + "VLD1.32 {q0}, [%[tmp]] \n" + "ADD %[key2], %[key], #144 \n" + + "VEOR q0, q0, q14 \n" + + AES_ENCRYPT(5) + + "VEOR q0, q0, q14 \n" + + "VST1.32 {q0}, [%[out]] \n" + + "3: \n" + + : [blocks] "+r" (blocks), [in] "+r" (in), [out] "+r" (out), + [sz] "+r" (sz), [i] "+r" (i), [key2] "+r" (key2) + : [key] "r" (xaes->aes.key), [rounds] "r" (xaes->aes.rounds), + [tmp] "r" (tmp) + : "cc", "memory", + "r9", "r10", "r11", "r12", "r14", + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q14" + ); + + return ret; +} + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte tmp[AES_BLOCK_SIZE]; + byte stl = (sz % AES_BLOCK_SIZE); + word32* key2 = xaes->tweak.key; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks == 0) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + /* if Stealing then break out of loop one block early to handle special + * case */ + blocks -= (stl > 0); + + __asm__ __volatile__ ( + "MOV r14, #0x87 \n" + + "VLD1.32 {q0}, [%[i]] \n" + "VLD1.32 {d2, d3, d4, d5}, [%[key2]]! \n" + "VLD1.32 {d6, d7, d8, d9}, [%[key2]]! \n" + "VLD1.32 {d10, d11, d12, d13}, [%[key2]]! \n" + "VLD1.32 {d14, d15, d16, d17}, [%[key2]]! \n" + "VLD1.32 {d18, d19}, [%[key2]]! \n" + + AES_ENCRYPT(10) + + "VMOV.32 r9, d0[0] \n" + "VMOV.32 r10, d0[1] \n" + "VMOV.32 r11, d1[0] \n" + "VMOV.32 r12, d1[1] \n" + "VMOV q14, q0 \n" + + "# Load decryption key\n" + "MOV %[key2], %[key] \n" + "VLD1.32 {d2, d3, d4, d5}, [%[key2]]! \n" + "VLD1.32 {d6, d7, d8, d9}, [%[key2]]! \n" + "VLD1.32 {d10, d11, d12, d13}, [%[key2]]! \n" + "VLD1.32 {d14, d15, d16, d17}, [%[key2]]! \n" + "VLD1.32 {d18, d19}, [%[key2]]! \n" + + "CMP %[blocks], #0 \n" + "BEQ 3f \n" + + "1: \n" + "VLD1.32 {q0}, [%[in]]! \n" + "ADD %[key2], %[key], #144 \n" + + "VEOR q0, q0, q14 \n" + + AES_DECRYPT_UPDATE_TWEAK(2) + + "VEOR q0, q0, q14 \n" + + "VST1.32 {q0}, [%[out]]! \n" + + "VMOV.32 d28[0], r9 \n" + "VMOV.32 d28[1], r10 \n" + "VMOV.32 d29[0], r11 \n" + "VMOV.32 d29[1], r12 \n" + + "SUBS %[blocks], %[blocks], #1 \n" + "SUB %[sz], %[sz], #16 \n" + "BGT 1b \n" + + "CMP %[sz], #0 \n" + "BEQ 4f \n" + + "3: \n" + + "AND %[i], r14, r12, ASR #31 \n" + "LSL r12, r12, #1 \n" + "ORR r12, r12, r11, LSR #31 \n" + "LSL r11, r11, #1 \n" + "ORR r11, r11, r10, LSR #31 \n" + "LSL r10, r10, #1 \n" + "ORR r10, r10, r9, LSR #31 \n"\ + "EOR r9, %[i], r9, LSL #1 \n" + "VMOV.32 d30[0], r9 \n" + "VMOV.32 d30[1], r10 \n" + "VMOV.32 d31[0], r11 \n" + "VMOV.32 d31[1], r12 \n" + + "VLD1.32 {q0}, [%[in]]! \n" + "ADD %[key2], %[key], #144 \n" + + "VEOR q0, q0, q15 \n" + + AES_DECRYPT(5) + + "VEOR q0, q0, q15 \n" + + "SUB %[sz], %[sz], #16 \n" + + "VST1.32 {q0}, [%[tmp]] \n" + "ADD %[out], %[out], #16 \n" + "MOV r9, %[sz] \n" + "6: \n" + "LDRB r10, [%[tmp]] \n" + "LDRB r11, [%[in]], #1 \n" + "STRB r10, [%[out]], #1 \n" + "STRB r11, [%[tmp]], #1 \n" + "SUBS r9, r9, #1 \n" + "BGT 6b \n" + "SUB %[out], %[out], %[sz] \n" + "SUB %[tmp], %[tmp], %[sz] \n" + "SUB %[out], %[out], #16 \n" + + "VLD1.32 {q0}, [%[tmp]] \n" + "ADD %[key2], %[key], #144 \n" + + "VEOR q0, q0, q14 \n" + + AES_DECRYPT(7) + + "VEOR q0, q0, q14 \n" + + "VST1.32 {q0}, [%[out]] \n" + + "4: \n" + + : [blocks] "+r" (blocks), [in] "+r" (in), [out] "+r" (out), + [sz] "+r" (sz), [i] "+r" (i), [key2] "+r" (key2) + : [key] "r" (xaes->aes.key), [rounds] "r" (xaes->aes.rounds), + [tmp] "r" (tmp) + : "cc", "memory", + "r9", "r10", "r11", "r12", "r14", + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q14", "q15" + ); + + return ret; +} + +#endif /* __aach64__ */ +#endif /* WOLFSSL_AES_XTS */ + +#else /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ + +#include +#include +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +extern void AES_set_encrypt_key(const unsigned char* key, word32 len, + unsigned char* ks); +extern void AES_invert_key(unsigned char* ks, word32 rounds); +extern void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr); +extern void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr); +extern void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +extern void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +extern void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +/* in pre-C2x C, constness conflicts for dimensioned arrays can't be resolved. */ +extern void GCM_gmult_len(byte* x, /* const */ byte m[32][AES_BLOCK_SIZE], + const unsigned char* data, unsigned long len); +extern void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); + +int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ +#if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); +#endif + + if (((keylen != 16) && (keylen != 24) && (keylen != 32)) || + (aes == NULL) || (userKey == NULL)) { + return BAD_FUNC_ARG; + } + +#if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } +#endif + +#ifdef WOLFSSL_AES_COUNTER + aes->left = 0; +#endif /* WOLFSSL_AES_COUNTER */ + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + + AES_set_encrypt_key(userKey, keylen * 8, (byte*)aes->key); +#ifdef HAVE_AES_DECRYPT + if (dir == AES_DECRYPTION) { + AES_invert_key((byte*)aes->key, aes->rounds); + } +#else + (void)dir; +#endif + + return wc_AesSetIV(aes, iv); +} + +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) +{ + return wc_AesSetKey(aes, userKey, keylen, iv, dir); +} +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ + +/* wc_AesSetIV is shared between software and hardware */ +int wc_AesSetIV(Aes* aes, const byte* iv) +{ + if (aes == NULL) + return BAD_FUNC_ARG; + + if (iv) + XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); + + return 0; +} + +#if defined(HAVE_AESCCM) || defined(WOLFSSL_AES_DIRECT) +static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) +{ + if (aes->rounds != 10 && aes->rounds != 12 && aes->rounds != 14) { + WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); + return KEYUSAGE_E; + } + + AES_ECB_encrypt(inBlock, outBlock, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds); + return 0; +} +#endif /* HAVE_AESCCM && WOLFSSL_AES_DIRECT */ + +#if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) +static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) +{ + if (aes->rounds != 10 && aes->rounds != 12 && aes->rounds != 14) { + WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); + return KEYUSAGE_E; + } + + AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds); + return 0; +} +#endif /* HAVE_AES_DECRYPT && WOLFSSL_AES_DIRECT */ + +/* AES-DIRECT */ +#if defined(WOLFSSL_AES_DIRECT) +/* Allow direct access to one block encrypt */ +int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) +{ + if (aes == NULL || out == NULL || in == NULL) { + WOLFSSL_MSG("Invalid input to wc_AesEncryptDirect"); + return BAD_FUNC_ARG; + } + return wc_AesEncrypt(aes, in, out); +} + +#ifdef HAVE_AES_DECRYPT +/* Allow direct access to one block decrypt */ +int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) +{ + if (aes == NULL || out == NULL || in == NULL) { + WOLFSSL_MSG("Invalid input to wc_AesDecryptDirect"); + return BAD_FUNC_ARG; + } + return wc_AesDecrypt(aes, in, out); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_DIRECT */ + +#ifdef HAVE_AES_CBC +int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (aes->rounds != 10 && aes->rounds != 12 && aes->rounds != 14) { + WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); + return KEYUSAGE_E; + } + + if (sz == 0) { + return 0; + } + if (sz % AES_BLOCK_SIZE) { +#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + return BAD_LENGTH_E; +#else + return BAD_FUNC_ARG; +#endif + } + + AES_CBC_encrypt(in, out, sz, (const unsigned char*)aes->key, aes->rounds, + (unsigned char*)aes->reg); + + return 0; +} + +#ifdef HAVE_AES_DECRYPT +int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (aes->rounds != 10 && aes->rounds != 12 && aes->rounds != 14) { + WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); + return KEYUSAGE_E; + } + + if (sz == 0) { + return 0; + } + if (sz % AES_BLOCK_SIZE) { +#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + return BAD_LENGTH_E; +#else + return BAD_FUNC_ARG; +#endif + } + + AES_CBC_decrypt(in, out, sz, (const unsigned char*)aes->key, aes->rounds, + (unsigned char*)aes->reg); + + return 0; +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AES_CBC */ + +#ifdef WOLFSSL_AES_COUNTER +int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + byte* tmp; + word32 numBlocks; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (aes->rounds != 10 && aes->rounds != 12 && aes->rounds != 14) { + WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); + return KEYUSAGE_E; + } + + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + /* consume any unused bytes left in aes->tmp */ + while ((aes->left != 0) && (sz != 0)) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + numBlocks = sz / AES_BLOCK_SIZE; + if (numBlocks > 0) { + AES_CTR_encrypt(in, out, numBlocks * AES_BLOCK_SIZE, (byte*)aes->key, + aes->rounds, (byte*)aes->reg); + + sz -= numBlocks * AES_BLOCK_SIZE; + out += numBlocks * AES_BLOCK_SIZE; + in += numBlocks * AES_BLOCK_SIZE; + } + + /* handle non block size remaining */ + if (sz) { + byte zeros[AES_BLOCK_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + AES_CTR_encrypt(zeros, (byte*)aes->tmp, AES_BLOCK_SIZE, (byte*)aes->key, + aes->rounds, (byte*)aes->reg); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + } + } + return 0; +} + +int wc_AesCtrSetKey(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) +{ + (void)dir; + return wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); +} +#endif /* WOLFSSL_AES_COUNTER */ + +#ifdef HAVE_AESCCM +/* Software version of AES-CCM from wolfcrypt/src/aes.c + * Gets some speed up from hardware acceleration of wc_AesEncrypt */ + +static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out) +{ + /* process the bulk of the data */ + while (inSz >= AES_BLOCK_SIZE) { + xorbuf(out, in, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + inSz -= AES_BLOCK_SIZE; + + wc_AesEncrypt(aes, out, out); + } + + /* process remainder of the data */ + if (inSz > 0) { + xorbuf(out, in, inSz); + wc_AesEncrypt(aes, out, out); + } +} + + +static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out) +{ + word32 authLenSz; + word32 remainder; + + /* encode the length in */ + if (inSz <= 0xFEFF) { + authLenSz = 2; + out[0] ^= ((inSz & 0xFF00) >> 8); + out[1] ^= (inSz & 0x00FF); + } + else if (inSz <= 0xFFFFFFFF) { + authLenSz = 6; + out[0] ^= 0xFF; out[1] ^= 0xFE; + out[2] ^= ((inSz & 0xFF000000) >> 24); + out[3] ^= ((inSz & 0x00FF0000) >> 16); + out[4] ^= ((inSz & 0x0000FF00) >> 8); + out[5] ^= (inSz & 0x000000FF); + } + /* Note, the protocol handles auth data up to 2^64, but we are + * using 32-bit sizes right now, so the bigger data isn't handled + * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */ + else + return; + + /* start fill out the rest of the first block */ + remainder = AES_BLOCK_SIZE - authLenSz; + if (inSz >= remainder) { + /* plenty of bulk data to fill the remainder of this block */ + xorbuf(out + authLenSz, in, remainder); + inSz -= remainder; + in += remainder; + } + else { + /* not enough bulk data, copy what is available, and pad zero */ + xorbuf(out + authLenSz, in, inSz); + inSz = 0; + } + wc_AesEncrypt(aes, out, out); + + if (inSz > 0) + roll_x(aes, in, inSz, out); +} + + +static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz) +{ + word32 i; + + for (i = 0; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return; + } +} + + /* return 0 on success */ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, const byte* nonce, word32 nonceSz, @@ -5264,80 +16967,362 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, #endif /* HAVE_AES_DECRYPT */ #endif /* HAVE_AESCCM */ +#ifdef HAVE_AESGCM +static WC_INLINE void RIGHTSHIFTX(byte* x) +{ + int i; + int carryIn = 0; + byte borrow = (0x00 - (x[15] & 0x01)) & 0xE1; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + int carryOut = (x[i] & 0x01) << 7; + x[i] = (byte) ((x[i] >> 1) | carryIn); + carryIn = carryOut; + } + x[0] ^= borrow; +} + +void GenerateM0(Gcm* gcm) +{ + int i; + byte (*m)[AES_BLOCK_SIZE] = gcm->M0; + + /* 0 times -> 0x0 */ + XMEMSET(m[0x0], 0, AES_BLOCK_SIZE); + /* 1 times -> 0x8 */ + XMEMCPY(m[0x8], gcm->H, AES_BLOCK_SIZE); + /* 2 times -> 0x4 */ + XMEMCPY(m[0x4], m[0x8], AES_BLOCK_SIZE); + RIGHTSHIFTX(m[0x4]); + /* 4 times -> 0x2 */ + XMEMCPY(m[0x2], m[0x4], AES_BLOCK_SIZE); + RIGHTSHIFTX(m[0x2]); + /* 8 times -> 0x1 */ + XMEMCPY(m[0x1], m[0x2], AES_BLOCK_SIZE); + RIGHTSHIFTX(m[0x1]); + + /* 0x3 */ + XMEMCPY(m[0x3], m[0x2], AES_BLOCK_SIZE); + xorbuf (m[0x3], m[0x1], AES_BLOCK_SIZE); + + /* 0x5 -> 0x7 */ + XMEMCPY(m[0x5], m[0x4], AES_BLOCK_SIZE); + xorbuf (m[0x5], m[0x1], AES_BLOCK_SIZE); + XMEMCPY(m[0x6], m[0x4], AES_BLOCK_SIZE); + xorbuf (m[0x6], m[0x2], AES_BLOCK_SIZE); + XMEMCPY(m[0x7], m[0x4], AES_BLOCK_SIZE); + xorbuf (m[0x7], m[0x3], AES_BLOCK_SIZE); + + /* 0x9 -> 0xf */ + XMEMCPY(m[0x9], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0x9], m[0x1], AES_BLOCK_SIZE); + XMEMCPY(m[0xa], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0xa], m[0x2], AES_BLOCK_SIZE); + XMEMCPY(m[0xb], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0xb], m[0x3], AES_BLOCK_SIZE); + XMEMCPY(m[0xc], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0xc], m[0x4], AES_BLOCK_SIZE); + XMEMCPY(m[0xd], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0xd], m[0x5], AES_BLOCK_SIZE); + XMEMCPY(m[0xe], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0xe], m[0x6], AES_BLOCK_SIZE); + XMEMCPY(m[0xf], m[0x8], AES_BLOCK_SIZE); + xorbuf (m[0xf], m[0x7], AES_BLOCK_SIZE); + + for (i = 0; i < 16; i++) { + word32* m32 = (word32*)gcm->M0[i]; + m32[0] = ByteReverseWord32(m32[0]); + m32[1] = ByteReverseWord32(m32[1]); + m32[2] = ByteReverseWord32(m32[2]); + m32[3] = ByteReverseWord32(m32[3]); + } +} -#ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) { int ret; byte iv[AES_BLOCK_SIZE]; - if (!((len == 16) || (len == 24) || (len == 32))) + if (aes == NULL) { return BAD_FUNC_ARG; + } + + if ((len != 16) && (len != 24) && (len != 32)) { + return BAD_FUNC_ARG; + } XMEMSET(iv, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); if (ret == 0) { -#ifdef WOLFSSL_AESGCM_STREAM - aes->gcmKeySet = 1; -#endif - - wc_AesEncrypt(aes, iv, aes->H); - #if defined(__aarch64__) - { - word32* pt = (word32*)aes->H; - __asm__ volatile ( - "LD1 {v0.16b}, [%[h]] \n" - "RBIT v0.16b, v0.16b \n" - "ST1 {v0.16b}, [%[out]] \n" - : [out] "=r" (pt) - : [h] "0" (pt) - : "cc", "memory", "v0" - ); - } - #else - { - word32* pt = (word32*)aes->H; - __asm__ volatile ( - "VLD1.32 {q0}, [%[h]] \n" - "VREV64.8 q0, q0 \n" - "VSWP.8 d0, d1 \n" - "VST1.32 {q0}, [%[out]] \n" - : [out] "=r" (pt) - : [h] "0" (pt) - : "cc", "memory", "q0" - ); - } - #endif + AES_ECB_encrypt(iv, aes->gcm.H, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds); + GenerateM0(&aes->gcm); } return ret; } +static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + +static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) +{ + /* Multiply the sz by 8 */ + word32 szHi = (sz >> (8*sizeof(sz) - 3)); + sz <<= 3; + + /* copy over the words of the sz into the destination buffer */ + buf[0] = (szHi >> 24) & 0xff; + buf[1] = (szHi >> 16) & 0xff; + buf[2] = (szHi >> 8) & 0xff; + buf[3] = szHi & 0xff; + buf[4] = (sz >> 24) & 0xff; + buf[5] = (sz >> 16) & 0xff; + buf[6] = (sz >> 8) & 0xff; + buf[7] = sz & 0xff; +} + +static void gcm_ghash_arm32(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) +{ + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + word32 blocks, partial; + + if (aes == NULL) { + return; + } + + XMEMSET(x, 0, AES_BLOCK_SIZE); + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + if (blocks > 0) { + GCM_gmult_len(x, aes->gcm.M0, a, blocks * AES_BLOCK_SIZE); + a += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, a, partial); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + } + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + if (blocks > 0) { + GCM_gmult_len(x, aes->gcm.M0, c, blocks * AES_BLOCK_SIZE); + c += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, c, partial); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + } + } + + /* Hash in the lengths of A and C in bits */ + FlattenSzInBits(&scratch[0], aSz); + FlattenSzInBits(&scratch[8], cSz); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + + /* Copy the result into s. */ + XMEMCPY(s, x, sSz); +} + +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks; + word32 partial; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte x[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + + /* sanity checks */ + if (aes == NULL || (iv == NULL && ivSz > 0) || (authTag == NULL) || + (authIn == NULL && authInSz > 0) || (ivSz == 0)) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) { + WOLFSSL_MSG("GcmEncrypt authTagSz error"); + return BAD_FUNC_ARG; + } + + if (aes->rounds != 10 && aes->rounds != 12 && aes->rounds != 14) { + WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); + return KEYUSAGE_E; + } + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + gcm_ghash_arm32(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + } + XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); + + /* Hash in the Additional Authentication Data */ + XMEMSET(x, 0, AES_BLOCK_SIZE); + if (authInSz != 0 && authIn != NULL) { + blocks = authInSz / AES_BLOCK_SIZE; + partial = authInSz % AES_BLOCK_SIZE; + if (blocks > 0) { + GCM_gmult_len(x, aes->gcm.M0, authIn, blocks * AES_BLOCK_SIZE); + authIn += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, authIn, partial); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + } + } + + /* do as many blocks as possible */ + blocks = sz / AES_BLOCK_SIZE; + partial = sz % AES_BLOCK_SIZE; + if (blocks > 0) { + AES_GCM_encrypt(in, out, blocks * AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds, counter); + GCM_gmult_len(x, aes->gcm.M0, out, blocks * AES_BLOCK_SIZE); + in += blocks * AES_BLOCK_SIZE; + out += blocks * AES_BLOCK_SIZE; + } + + /* take care of partial block sizes leftover */ + if (partial != 0) { + AES_GCM_encrypt(in, scratch, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds, counter); + XMEMCPY(out, scratch, partial); + + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, out, partial); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + } + + /* Hash in the lengths of A and C in bits */ + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + FlattenSzInBits(&scratch[0], authInSz); + FlattenSzInBits(&scratch[8], sz); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + if (authTagSz > AES_BLOCK_SIZE) { + XMEMCPY(authTag, x, AES_BLOCK_SIZE); + } + else { + /* authTagSz can be smaller than AES_BLOCK_SIZE */ + XMEMCPY(authTag, x, authTagSz); + } + + /* Auth tag calculation. */ + AES_ECB_encrypt(initialCounter, scratch, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds); + xorbuf(authTag, scratch, authTagSz); + + return 0; +} + +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + word32 blocks; + word32 partial; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte scratch[AES_BLOCK_SIZE]; + byte x[AES_BLOCK_SIZE]; + + /* sanity checks */ + if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { + WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); + return BAD_FUNC_ARG; + } + + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { + gcm_ghash_arm32(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); + } + XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE); + + XMEMSET(x, 0, AES_BLOCK_SIZE); + /* Hash in the Additional Authentication Data */ + if (authInSz != 0 && authIn != NULL) { + blocks = authInSz / AES_BLOCK_SIZE; + partial = authInSz % AES_BLOCK_SIZE; + if (blocks > 0) { + GCM_gmult_len(x, aes->gcm.M0, authIn, blocks * AES_BLOCK_SIZE); + authIn += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, authIn, partial); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + } + } + + blocks = sz / AES_BLOCK_SIZE; + partial = sz % AES_BLOCK_SIZE; + /* do as many blocks as possible */ + if (blocks > 0) { + GCM_gmult_len(x, aes->gcm.M0, in, blocks * AES_BLOCK_SIZE); + + AES_GCM_encrypt(in, out, blocks * AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds, counter); + in += blocks * AES_BLOCK_SIZE; + out += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + XMEMCPY(scratch, in, partial); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + + AES_GCM_encrypt(in, scratch, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds, counter); + XMEMCPY(out, scratch, partial); + } + + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + FlattenSzInBits(&scratch[0], authInSz); + FlattenSzInBits(&scratch[8], sz); + GCM_gmult_len(x, aes->gcm.M0, scratch, AES_BLOCK_SIZE); + AES_ECB_encrypt(initialCounter, scratch, AES_BLOCK_SIZE, + (const unsigned char*)aes->key, aes->rounds); + xorbuf(x, scratch, authTagSz); + if (authTag != NULL) { + if (ConstantCompare(authTag, x, authTagSz) != 0) { + return AES_GCM_AUTH_E; + } + } + + return 0; +} #endif /* HAVE_AESGCM */ -/* AES-DIRECT */ -#if defined(WOLFSSL_AES_DIRECT) - /* Allow direct access to one block encrypt */ - void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) - { - if (aes == NULL || out == NULL || in == NULL) { - WOLFSSL_MSG("Invalid input to wc_AesEncryptDirect"); - return; - } - wc_AesEncrypt(aes, in, out); - } - #ifdef HAVE_AES_DECRYPT - /* Allow direct access to one block decrypt */ - void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) - { - if (aes == NULL || out == NULL || in == NULL) { - WOLFSSL_MSG("Invalid input to wc_AesDecryptDirect"); - return; - } - wc_AesDecrypt(aes, in, out); - } - #endif /* HAVE_AES_DECRYPT */ -#endif /* WOLFSSL_AES_DIRECT */ +#endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #endif /* !NO_AES && WOLFSSL_ARMASM */ diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index 05da4cd91..f774b26cd 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -1,22 +1,12 @@ /* armv8-sha256.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ @@ -44,7 +34,15 @@ #include #endif +#if defined(FREESCALE_MMCAU_SHA) + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + #include "cau_api.h" + #else + #include "fsl_mmcau.h" + #endif +#endif +#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO static const ALIGN32 word32 K[64] = { 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L, @@ -60,6 +58,7 @@ static const ALIGN32 word32 K[64] = { 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L, 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L }; +#endif static int InitSha256(wc_Sha256* sha256) @@ -70,6 +69,17 @@ static int InitSha256(wc_Sha256* sha256) return BAD_FUNC_ARG; } +#ifdef FREESCALE_MMCAU_SHA + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_initialize_output(sha256->digest); + #else + MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } +#else sha256->digest[0] = 0x6A09E667L; sha256->digest[1] = 0xBB67AE85L; sha256->digest[2] = 0x3C6EF372L; @@ -78,11 +88,16 @@ static int InitSha256(wc_Sha256* sha256) sha256->digest[5] = 0x9B05688CL; sha256->digest[6] = 0x1F83D9ABL; sha256->digest[7] = 0x5BE0CD19L; +#endif sha256->buffLen = 0; sha256->loLen = 0; sha256->hiLen = 0; +#ifdef WOLFSSL_HASH_FLAGS + sha256->flags = 0; +#endif + return ret; } @@ -94,6 +109,8 @@ static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len) } +#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO + #ifdef __aarch64__ /* First block is in sha256->buffer and rest in data. */ @@ -322,6 +339,7 @@ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 le static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) { byte* local; + const word32* k; local = (byte*)sha256->buffer; AddLength(sha256, sha256->buffLen); /* before adding pads */ @@ -333,6 +351,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen); sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; + k = K; __asm__ volatile ( "LD1 {v4.2d-v7.2d}, %[buffer] \n" "MOV v0.16b, v4.16b \n" @@ -474,8 +493,8 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "ADD v17.4s, v17.4s, v21.4s \n" "STP q16, q17, %[out] \n" - : [out] "=m" (sha256->digest) - : [k] "r" (K), [digest] "m" (sha256->digest), + : [out] "=m" (sha256->digest), [k] "+r" (k) + : [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer) : "cc", "memory", "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11" , "v12", "v13", "v14", "v15", "v16", "v17", "v18" @@ -510,6 +529,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, sizeof(word32)); + k = K; __asm__ volatile ( "#load in message and schedule updates \n" "LD1 {v4.2d-v7.2d}, %[buffer] \n" @@ -652,8 +672,8 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "REV32 v17.16b, v17.16b \n" #endif "ST1 {v17.16b}, [%[hashOut]] \n" - : [hashOut] "=r" (hash) - : [k] "r" (K), [digest] "m" (sha256->digest), + : [hashOut] "=r" (hash), [k] "+r" (k) + : [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), "0" (hash) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", @@ -1306,6 +1326,157 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) #endif /* __aarch64__ */ +#else /* WOLFSSL_ARMASM_NO_HW_CRYPTO */ + +#if defined(FREESCALE_MMCAU_SHA) + + #ifndef WC_HASH_DATA_ALIGNMENT + /* these hardware API's require 4 byte (word32) alignment */ + #define WC_HASH_DATA_ALIGNMENT 4 + #endif + + static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, + word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0 + if ((wc_ptr_t)data % WC_HASH_DATA_ALIGNMENT) { + /* data pointer is NOT aligned, + * so copy and perform one block at a time */ + byte* local = (byte*)sha256->buffer; + while (len >= WC_SHA256_BLOCK_SIZE) { + XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE); + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n(local, 1, sha256->digest); + #else + MMCAU_SHA256_HashN(local, 1, (uint32_t*)sha256->digest); + #endif + data += WC_SHA256_BLOCK_SIZE; + len -= WC_SHA256_BLOCK_SIZE; + } + } + else + #endif + { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); + #else + MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE, + (uint32_t*)sha256->digest); + #endif + } + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + +#else /* */ + +extern void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, + word32 len); + +#endif + +/* ARMv8 hardware acceleration Aarch32 and Thumb2 */ +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +{ + int ret = 0; + /* do block size increments */ + byte* local = (byte*)sha256->buffer; + word32 blocksLen; + + /* check that internal buffLen is valid */ + if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) + return BUFFER_E; + + AddLength(sha256, len); + + if (sha256->buffLen > 0) { + word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + if (add > 0) { + XMEMCPY(&local[sha256->buffLen], data, add); + + sha256->buffLen += add; + data += add; + len -= add; + } + + if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { + Transform_Sha256_Len(sha256, (const byte*)sha256->buffer, + WC_SHA256_BLOCK_SIZE); + sha256->buffLen = 0; + } + } + + blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + Transform_Sha256_Len(sha256, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } + + if (len > 0) { + XMEMCPY(local, data, len); + sha256->buffLen = len; + } + + return ret; +} + +static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) +{ + byte* local = (byte*)sha256->buffer; + + if (sha256 == NULL) { + return BAD_FUNC_ARG; + } + + local[sha256->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha256->buffLen > WC_SHA256_PAD_SIZE) { + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - + sha256->buffLen); + sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; + Transform_Sha256_Len(sha256, (const byte*)sha256->buffer, + WC_SHA256_BLOCK_SIZE); + + sha256->buffLen = 0; + } + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen); + + /* put lengths in bits */ + sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) + + (sha256->hiLen << 3); + sha256->loLen = sha256->loLen << 3; + + /* store lengths */ + /* ! length ordering dependent on digest endian type ! */ + + sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 2] = sha256->hiLen; + sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 1] = sha256->loLen; + + ByteReverseWords( + &(sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 2]), + &(sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 2]), + WC_SHA256_BLOCK_SIZE - WC_SHA256_PAD_SIZE); + Transform_Sha256_Len(sha256, (const byte*)sha256->buffer, + WC_SHA256_BLOCK_SIZE); + +#ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords((word32*)hash, sha256->digest, WC_SHA256_DIGEST_SIZE); +#else + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); +#endif + + return 0; +} + +#endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ + #ifndef NO_SHA256 @@ -1315,6 +1486,9 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) return BAD_FUNC_ARG; sha256->heap = heap; +#ifdef WOLF_CRYPTO_CB + sha256->devId = devId; +#endif (void)devId; return InitSha256(sha256); @@ -1430,7 +1604,11 @@ int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data) #else XMEMCPY(sha256->buffer, data, WC_SHA256_BLOCK_SIZE); #endif +#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO Sha256Transform(sha256, data, 1); +#else + Transform_Sha256_Len(sha256, data, WC_SHA256_BLOCK_SIZE); +#endif return 0; } #endif @@ -1461,6 +1639,9 @@ int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data) sha224->loLen = 0; sha224->hiLen = 0; + #ifdef WOLFSSL_HASH_FLAGS + sha224->flags = 0; + #endif return ret; } diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-sha3-asm.S new file mode 100644 index 000000000..da0a00746 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm.S @@ -0,0 +1,207 @@ +/* armv8-sha3-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha3/sha3.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.S + */ +#ifdef WOLFSSL_ARMASM +#ifdef __aarch64__ +#ifndef WOLFSSL_ARMASM_INLINE +#ifdef WOLFSSL_SHA3 +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 +#ifndef __APPLE__ + .text + .type L_SHA3_transform_crypto_r, %object + .section .rodata + .size L_SHA3_transform_crypto_r, 192 +#else + .section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ +L_SHA3_transform_crypto_r: + .xword 0x1 + .xword 0x8082 + .xword 0x800000000000808a + .xword 0x8000000080008000 + .xword 0x808b + .xword 0x80000001 + .xword 0x8000000080008081 + .xword 0x8000000000008009 + .xword 0x8a + .xword 0x88 + .xword 0x80008009 + .xword 0x8000000a + .xword 0x8000808b + .xword 0x800000000000008b + .xword 0x8000000000008089 + .xword 0x8000000000008003 + .xword 0x8000000000008002 + .xword 0x8000000000000080 + .xword 0x800a + .xword 0x800000008000000a + .xword 0x8000000080008081 + .xword 0x8000000000008080 + .xword 0x80000001 + .xword 0x8000000080008008 +#ifndef __APPLE__ +.text +.globl BlockSha3 +.type BlockSha3,@function +.align 2 +BlockSha3: +#else +.section __TEXT,__text +.globl _BlockSha3 +.p2align 2 +_BlockSha3: +#endif /* __APPLE__ */ + stp x29, x30, [sp, #-80]! + add x29, sp, #0 + stp d8, d9, [x29, #16] + stp d10, d11, [x29, #32] + stp d12, d13, [x29, #48] + stp d14, d15, [x29, #64] +#ifdef __APPLE__ +.arch_extension sha3 +#endif /* __APPLE__ */ +#ifndef __APPLE__ + adrp x1, L_SHA3_transform_crypto_r + add x1, x1, :lo12:L_SHA3_transform_crypto_r +#else + adrp x1, L_SHA3_transform_crypto_r@PAGE + add x1, x1, :lo12:L_SHA3_transform_crypto_r@PAGEOFF +#endif /* __APPLE__ */ + ld4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32 + ld4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32 + ld4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32 + ld4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32 + ld4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32 + ld4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32 + ld1 {v24.1d}, [x0] + sub x0, x0, #0xc0 + mov x2, #24 + # Start of 24 rounds +L_sha3_crypto_begin: + # Col Mix + eor3 v31.16b, v0.16b, v5.16b, v10.16b + eor3 v27.16b, v1.16b, v6.16b, v11.16b + eor3 v28.16b, v2.16b, v7.16b, v12.16b + eor3 v29.16b, v3.16b, v8.16b, v13.16b + eor3 v30.16b, v4.16b, v9.16b, v14.16b + eor3 v31.16b, v31.16b, v15.16b, v20.16b + eor3 v27.16b, v27.16b, v16.16b, v21.16b + eor3 v28.16b, v28.16b, v17.16b, v22.16b + eor3 v29.16b, v29.16b, v18.16b, v23.16b + eor3 v30.16b, v30.16b, v19.16b, v24.16b + rax1 v25.2d, v30.2d, v27.2d + rax1 v26.2d, v31.2d, v28.2d + rax1 v27.2d, v27.2d, v29.2d + rax1 v28.2d, v28.2d, v30.2d + rax1 v29.2d, v29.2d, v31.2d + eor v0.16b, v0.16b, v25.16b + xar v30.2d, v1.2d, v26.2d, #63 + xar v1.2d, v6.2d, v26.2d, #20 + xar v6.2d, v9.2d, v29.2d, #44 + xar v9.2d, v22.2d, v27.2d, #3 + xar v22.2d, v14.2d, v29.2d, #25 + xar v14.2d, v20.2d, v25.2d, #46 + xar v20.2d, v2.2d, v27.2d, #2 + xar v2.2d, v12.2d, v27.2d, #21 + xar v12.2d, v13.2d, v28.2d, #39 + xar v13.2d, v19.2d, v29.2d, #56 + xar v19.2d, v23.2d, v28.2d, #8 + xar v23.2d, v15.2d, v25.2d, #23 + xar v15.2d, v4.2d, v29.2d, #37 + xar v4.2d, v24.2d, v29.2d, #50 + xar v24.2d, v21.2d, v26.2d, #62 + xar v21.2d, v8.2d, v28.2d, #9 + xar v8.2d, v16.2d, v26.2d, #19 + xar v16.2d, v5.2d, v25.2d, #28 + xar v5.2d, v3.2d, v28.2d, #36 + xar v3.2d, v18.2d, v28.2d, #43 + xar v18.2d, v17.2d, v27.2d, #49 + xar v17.2d, v11.2d, v26.2d, #54 + xar v11.2d, v7.2d, v27.2d, #58 + xar v7.2d, v10.2d, v25.2d, #61 + # Row Mix + mov v25.16b, v0.16b + mov v26.16b, v1.16b + bcax v0.16b, v25.16b, v2.16b, v26.16b + bcax v1.16b, v26.16b, v3.16b, v2.16b + bcax v2.16b, v2.16b, v4.16b, v3.16b + bcax v3.16b, v3.16b, v25.16b, v4.16b + bcax v4.16b, v4.16b, v26.16b, v25.16b + mov v25.16b, v5.16b + mov v26.16b, v6.16b + bcax v5.16b, v25.16b, v7.16b, v26.16b + bcax v6.16b, v26.16b, v8.16b, v7.16b + bcax v7.16b, v7.16b, v9.16b, v8.16b + bcax v8.16b, v8.16b, v25.16b, v9.16b + bcax v9.16b, v9.16b, v26.16b, v25.16b + mov v26.16b, v11.16b + bcax v10.16b, v30.16b, v12.16b, v26.16b + bcax v11.16b, v26.16b, v13.16b, v12.16b + bcax v12.16b, v12.16b, v14.16b, v13.16b + bcax v13.16b, v13.16b, v30.16b, v14.16b + bcax v14.16b, v14.16b, v26.16b, v30.16b + mov v25.16b, v15.16b + mov v26.16b, v16.16b + bcax v15.16b, v25.16b, v17.16b, v26.16b + bcax v16.16b, v26.16b, v18.16b, v17.16b + bcax v17.16b, v17.16b, v19.16b, v18.16b + bcax v18.16b, v18.16b, v25.16b, v19.16b + bcax v19.16b, v19.16b, v26.16b, v25.16b + mov v25.16b, v20.16b + mov v26.16b, v21.16b + bcax v20.16b, v25.16b, v22.16b, v26.16b + bcax v21.16b, v26.16b, v23.16b, v22.16b + bcax v22.16b, v22.16b, v24.16b, v23.16b + bcax v23.16b, v23.16b, v25.16b, v24.16b + bcax v24.16b, v24.16b, v26.16b, v25.16b + ld1r {v30.2d}, [x1], #8 + subs x2, x2, #1 + eor v0.16b, v0.16b, v30.16b + bne L_sha3_crypto_begin + st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32 + st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32 + st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32 + st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32 + st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32 + st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32 + st1 {v24.1d}, [x0] + ldp d8, d9, [x29, #16] + ldp d10, d11, [x29, #32] + ldp d12, d13, [x29, #48] + ldp d14, d15, [x29, #64] + ldp x29, x30, [sp], #0x50 + ret +#ifndef __APPLE__ + .size BlockSha3,.-BlockSha3 +#endif /* __APPLE__ */ +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#endif /* WOLFSSL_SHA3 */ +#endif /* __aarch64__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c new file mode 100644 index 000000000..ea343734d --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c @@ -0,0 +1,178 @@ +/* armv8-sha3-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha3/sha3.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.c + */ +#ifdef WOLFSSL_ARMASM +#ifdef __aarch64__ +#ifdef WOLFSSL_ARMASM_INLINE +#include + +#ifdef WOLFSSL_SHA3 +#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 +static const uint64_t L_SHA3_transform_crypto_r[] = { + 0x1UL, + 0x8082UL, + 0x800000000000808aUL, + 0x8000000080008000UL, + 0x808bUL, + 0x80000001UL, + 0x8000000080008081UL, + 0x8000000000008009UL, + 0x8aUL, + 0x88UL, + 0x80008009UL, + 0x8000000aUL, + 0x8000808bUL, + 0x800000000000008bUL, + 0x8000000000008089UL, + 0x8000000000008003UL, + 0x8000000000008002UL, + 0x8000000000000080UL, + 0x800aUL, + 0x800000008000000aUL, + 0x8000000080008081UL, + 0x8000000000008080UL, + 0x80000001UL, + 0x8000000080008008UL, +}; + +void BlockSha3(unsigned long* state) +{ + __asm__ __volatile__ ( +#ifdef __APPLE__ + ".arch_extension sha3\n\t" +#endif /* __APPLE__ */ +#ifndef __APPLE__ + "adrp x1, %[L_SHA3_transform_crypto_r]\n\t" + "add x1, x1, :lo12:%[L_SHA3_transform_crypto_r]\n\t" +#else + "adrp x1, %[L_SHA3_transform_crypto_r]@PAGE\n\t" + "add x1, x1, %[L_SHA3_transform_crypto_r]@PAGEOFF\n\t" +#endif /* __APPLE__ */ + "ld4 {v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t" + "ld4 {v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t" + "ld4 {v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t" + "ld4 {v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t" + "ld4 {v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t" + "ld4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t" + "ld1 {v24.1d}, [%x[state]]\n\t" + "sub %x[state], %x[state], #0xc0\n\t" + "mov x2, #24\n\t" + /* Start of 24 rounds */ + "\n" + "L_sha3_crypto_begin_%=: \n\t" + /* Col Mix */ + "eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t" + "eor3 v27.16b, v1.16b, v6.16b, v11.16b\n\t" + "eor3 v28.16b, v2.16b, v7.16b, v12.16b\n\t" + "eor3 v29.16b, v3.16b, v8.16b, v13.16b\n\t" + "eor3 v30.16b, v4.16b, v9.16b, v14.16b\n\t" + "eor3 v31.16b, v31.16b, v15.16b, v20.16b\n\t" + "eor3 v27.16b, v27.16b, v16.16b, v21.16b\n\t" + "eor3 v28.16b, v28.16b, v17.16b, v22.16b\n\t" + "eor3 v29.16b, v29.16b, v18.16b, v23.16b\n\t" + "eor3 v30.16b, v30.16b, v19.16b, v24.16b\n\t" + "rax1 v25.2d, v30.2d, v27.2d\n\t" + "rax1 v26.2d, v31.2d, v28.2d\n\t" + "rax1 v27.2d, v27.2d, v29.2d\n\t" + "rax1 v28.2d, v28.2d, v30.2d\n\t" + "rax1 v29.2d, v29.2d, v31.2d\n\t" + "eor v0.16b, v0.16b, v25.16b\n\t" + "xar v30.2d, v1.2d, v26.2d, #63\n\t" + "xar v1.2d, v6.2d, v26.2d, #20\n\t" + "xar v6.2d, v9.2d, v29.2d, #44\n\t" + "xar v9.2d, v22.2d, v27.2d, #3\n\t" + "xar v22.2d, v14.2d, v29.2d, #25\n\t" + "xar v14.2d, v20.2d, v25.2d, #46\n\t" + "xar v20.2d, v2.2d, v27.2d, #2\n\t" + "xar v2.2d, v12.2d, v27.2d, #21\n\t" + "xar v12.2d, v13.2d, v28.2d, #39\n\t" + "xar v13.2d, v19.2d, v29.2d, #56\n\t" + "xar v19.2d, v23.2d, v28.2d, #8\n\t" + "xar v23.2d, v15.2d, v25.2d, #23\n\t" + "xar v15.2d, v4.2d, v29.2d, #37\n\t" + "xar v4.2d, v24.2d, v29.2d, #50\n\t" + "xar v24.2d, v21.2d, v26.2d, #62\n\t" + "xar v21.2d, v8.2d, v28.2d, #9\n\t" + "xar v8.2d, v16.2d, v26.2d, #19\n\t" + "xar v16.2d, v5.2d, v25.2d, #28\n\t" + "xar v5.2d, v3.2d, v28.2d, #36\n\t" + "xar v3.2d, v18.2d, v28.2d, #43\n\t" + "xar v18.2d, v17.2d, v27.2d, #49\n\t" + "xar v17.2d, v11.2d, v26.2d, #54\n\t" + "xar v11.2d, v7.2d, v27.2d, #58\n\t" + "xar v7.2d, v10.2d, v25.2d, #61\n\t" + /* Row Mix */ + "mov v25.16b, v0.16b\n\t" + "mov v26.16b, v1.16b\n\t" + "bcax v0.16b, v25.16b, v2.16b, v26.16b\n\t" + "bcax v1.16b, v26.16b, v3.16b, v2.16b\n\t" + "bcax v2.16b, v2.16b, v4.16b, v3.16b\n\t" + "bcax v3.16b, v3.16b, v25.16b, v4.16b\n\t" + "bcax v4.16b, v4.16b, v26.16b, v25.16b\n\t" + "mov v25.16b, v5.16b\n\t" + "mov v26.16b, v6.16b\n\t" + "bcax v5.16b, v25.16b, v7.16b, v26.16b\n\t" + "bcax v6.16b, v26.16b, v8.16b, v7.16b\n\t" + "bcax v7.16b, v7.16b, v9.16b, v8.16b\n\t" + "bcax v8.16b, v8.16b, v25.16b, v9.16b\n\t" + "bcax v9.16b, v9.16b, v26.16b, v25.16b\n\t" + "mov v26.16b, v11.16b\n\t" + "bcax v10.16b, v30.16b, v12.16b, v26.16b\n\t" + "bcax v11.16b, v26.16b, v13.16b, v12.16b\n\t" + "bcax v12.16b, v12.16b, v14.16b, v13.16b\n\t" + "bcax v13.16b, v13.16b, v30.16b, v14.16b\n\t" + "bcax v14.16b, v14.16b, v26.16b, v30.16b\n\t" + "mov v25.16b, v15.16b\n\t" + "mov v26.16b, v16.16b\n\t" + "bcax v15.16b, v25.16b, v17.16b, v26.16b\n\t" + "bcax v16.16b, v26.16b, v18.16b, v17.16b\n\t" + "bcax v17.16b, v17.16b, v19.16b, v18.16b\n\t" + "bcax v18.16b, v18.16b, v25.16b, v19.16b\n\t" + "bcax v19.16b, v19.16b, v26.16b, v25.16b\n\t" + "mov v25.16b, v20.16b\n\t" + "mov v26.16b, v21.16b\n\t" + "bcax v20.16b, v25.16b, v22.16b, v26.16b\n\t" + "bcax v21.16b, v26.16b, v23.16b, v22.16b\n\t" + "bcax v22.16b, v22.16b, v24.16b, v23.16b\n\t" + "bcax v23.16b, v23.16b, v25.16b, v24.16b\n\t" + "bcax v24.16b, v24.16b, v26.16b, v25.16b\n\t" + "ld1r {v30.2d}, [x1], #8\n\t" + "subs x2, x2, #1\n\t" + "eor v0.16b, v0.16b, v30.16b\n\t" + "bne L_sha3_crypto_begin_%=\n\t" + "st4 {v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t" + "st4 {v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t" + "st4 {v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t" + "st4 {v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t" + "st4 {v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t" + "st4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t" + "st1 {v24.1d}, [%x[state]]\n\t" + : [state] "+r" (state) + : [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r) + : "memory", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ +#endif /* WOLFSSL_SHA3 */ +#endif /* __aarch64__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-sha512-asm.S index 136df9472..9f5ec0de8 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm.S @@ -1,22 +1,12 @@ /* armv8-sha512-asm * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ #ifdef HAVE_CONFIG_H @@ -30,6 +20,7 @@ */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ +#ifndef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_SHA512 #ifndef WOLFSSL_ARMASM_CRYPTO_SHA512 #ifndef __APPLE__ @@ -1201,20 +1192,15 @@ Transform_Sha512_Len_crypto: .p2align 2 _Transform_Sha512_Len_crypto: #endif /* __APPLE__ */ - stp x29, x30, [sp, #-208]! + stp x29, x30, [sp, #-80]! add x29, sp, #0 stp d8, d9, [x29, #16] stp d10, d11, [x29, #32] stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] - stp d16, d17, [x29, #80] - stp d18, d19, [x29, #96] - stp d20, d21, [x29, #112] - stp d22, d23, [x29, #128] - stp d24, d25, [x29, #144] - stp d26, d27, [x29, #160] - stp d28, d29, [x29, #176] - stp d30, d31, [x29, #192] +#ifdef __APPLE__ +.arch_extension sha3 +#endif /* __APPLE__ */ #ifndef __APPLE__ adrp x4, L_SHA512_transform_crypto_len_k add x4, x4, :lo12:L_SHA512_transform_crypto_len_k @@ -1222,7 +1208,7 @@ _Transform_Sha512_Len_crypto: adrp x4, L_SHA512_transform_crypto_len_k@PAGE add x4, x4, :lo12:L_SHA512_transform_crypto_len_k@PAGEOFF #endif /* __APPLE__ */ - # Load first 16 64-bit words of K permantly + # Load first 16 64-bit words of K permanently ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x4], #0x40 ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [x4], #0x40 # Load digest into working vars @@ -1731,15 +1717,7 @@ L_sha512_len_crypto_begin: ldp d10, d11, [x29, #32] ldp d12, d13, [x29, #48] ldp d14, d15, [x29, #64] - ldp d16, d17, [x29, #80] - ldp d18, d19, [x29, #96] - ldp d20, d21, [x29, #112] - ldp d22, d23, [x29, #128] - ldp d24, d25, [x29, #144] - ldp d26, d27, [x29, #160] - ldp d28, d29, [x29, #176] - ldp d30, d31, [x29, #192] - ldp x29, x30, [sp], #0xd0 + ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ .size Transform_Sha512_Len_crypto,.-Transform_Sha512_Len_crypto @@ -1752,3 +1730,4 @@ L_sha512_len_crypto_begin: #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c new file mode 100644 index 000000000..4db6e522e --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c @@ -0,0 +1,1660 @@ +/* armv8-sha512-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. + * + * This file is part of wolfSSL. + * + * Contact licensing@wolfssl.com with any questions or comments. + * + * https://www.wolfssl.com + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c + */ +#ifdef WOLFSSL_ARMASM +#ifdef __aarch64__ +#ifdef WOLFSSL_ARMASM_INLINE +#include + +#ifdef WOLFSSL_SHA512 +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512 +static const uint64_t L_SHA512_transform_neon_len_k[] = { + 0x428a2f98d728ae22UL, + 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, + 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, + 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, + 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, + 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, + 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, + 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, + 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, + 0xefbe4786384f25e3UL, + 0xfc19dc68b8cd5b5UL, + 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, + 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, + 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, + 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, + 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, + 0xd5a79147930aa725UL, + 0x6ca6351e003826fUL, + 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, + 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, + 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, + 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, + 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, + 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, + 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, + 0xd69906245565a910UL, + 0xf40e35855771202aUL, + 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, + 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, + 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, + 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, + 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, + 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, + 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, + 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, + 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, + 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, + 0xf57d4f7fee6ed178UL, + 0x6f067aa72176fbaUL, + 0xa637dc5a2c898a6UL, + 0x113f9804bef90daeUL, + 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, + 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, + 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, + 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, + 0x6c44198c4a475817UL, +}; + +static const uint64_t L_SHA512_transform_neon_len_ror8[] = { + 0x7060504030201UL, + 0x80f0e0d0c0b0a09UL, +}; + +void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) +{ + __asm__ __volatile__ ( +#ifndef __APPLE__ + "adrp x3, %[L_SHA512_transform_neon_len_k]\n\t" + "add x3, x3, :lo12:%[L_SHA512_transform_neon_len_k]\n\t" +#else + "adrp x3, %[L_SHA512_transform_neon_len_k]@PAGE\n\t" + "add x3, x3, %[L_SHA512_transform_neon_len_k]@PAGEOFF\n\t" +#endif /* __APPLE__ */ +#ifndef __APPLE__ + "adrp x27, %[L_SHA512_transform_neon_len_ror8]\n\t" + "add x27, x27, :lo12:%[L_SHA512_transform_neon_len_ror8]\n\t" +#else + "adrp x27, %[L_SHA512_transform_neon_len_ror8]@PAGE\n\t" + "add x27, x27, %[L_SHA512_transform_neon_len_ror8]@PAGEOFF\n\t" +#endif /* __APPLE__ */ + "ld1 {v11.16b}, [x27]\n\t" + /* Load digest into working vars */ + "ldp x4, x5, [%x[sha512]]\n\t" + "ldp x6, x7, [%x[sha512], #16]\n\t" + "ldp x8, x9, [%x[sha512], #32]\n\t" + "ldp x10, x11, [%x[sha512], #48]\n\t" + /* Start of loop processing a block */ + "\n" + "L_sha512_len_neon_begin_%=: \n\t" + /* Load W */ + /* Copy digest to add in at end */ + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t" + "mov x19, x4\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t" + "mov x20, x5\n\t" + "rev64 v0.16b, v0.16b\n\t" + "mov x21, x6\n\t" + "rev64 v1.16b, v1.16b\n\t" + "mov x22, x7\n\t" + "rev64 v2.16b, v2.16b\n\t" + "mov x23, x8\n\t" + "rev64 v3.16b, v3.16b\n\t" + "mov x24, x9\n\t" + "rev64 v4.16b, v4.16b\n\t" + "mov x25, x10\n\t" + "rev64 v5.16b, v5.16b\n\t" + "mov x26, x11\n\t" + "rev64 v6.16b, v6.16b\n\t" + "rev64 v7.16b, v7.16b\n\t" + /* Pre-calc: b ^ c */ + "eor x16, x5, x6\n\t" + "mov x27, #4\n\t" + /* Start of 16 rounds */ + "\n" + "L_sha512_len_neon_start_%=: \n\t" + /* Round 0 */ + "mov x13, v0.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 1 */ + "mov x13, v0.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v0.16b, v1.16b, #8\n\t" + "ror x12, x7, #14\n\t" + "shl v8.2d, v7.2d, #45\n\t" + "ror x14, x11, #28\n\t" + "sri v8.2d, v7.2d, #19\n\t" + "eor x12, x12, x7, ror 18\n\t" + "shl v9.2d, v7.2d, #3\n\t" + "eor x14, x14, x11, ror 34\n\t" + "sri v9.2d, v7.2d, #61\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x11, ror 39\n\t" + "ushr v8.2d, v7.2d, #6\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x11, x4\n\t" + "add v0.2d, v0.2d, v9.2d\n\t" + "eor x12, x8, x9\n\t" + "ext v9.16b, v4.16b, v5.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v0.2d, v0.2d, v9.2d\n\t" + "and x12, x12, x7\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x10, x10, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x9\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x10, x10, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x4\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v0.2d, v0.2d, v9.2d\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 2 */ + "mov x13, v1.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 3 */ + "mov x13, v1.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v1.16b, v2.16b, #8\n\t" + "ror x12, x5, #14\n\t" + "shl v8.2d, v0.2d, #45\n\t" + "ror x14, x9, #28\n\t" + "sri v8.2d, v0.2d, #19\n\t" + "eor x12, x12, x5, ror 18\n\t" + "shl v9.2d, v0.2d, #3\n\t" + "eor x14, x14, x9, ror 34\n\t" + "sri v9.2d, v0.2d, #61\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x9, ror 39\n\t" + "ushr v8.2d, v0.2d, #6\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x9, x10\n\t" + "add v1.2d, v1.2d, v9.2d\n\t" + "eor x12, x6, x7\n\t" + "ext v9.16b, v5.16b, v6.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v1.2d, v1.2d, v9.2d\n\t" + "and x12, x12, x5\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x8, x8, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x7\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x8, x8, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x10\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v1.2d, v1.2d, v9.2d\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 4 */ + "mov x13, v2.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 5 */ + "mov x13, v2.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v2.16b, v3.16b, #8\n\t" + "ror x12, x11, #14\n\t" + "shl v8.2d, v1.2d, #45\n\t" + "ror x14, x7, #28\n\t" + "sri v8.2d, v1.2d, #19\n\t" + "eor x12, x12, x11, ror 18\n\t" + "shl v9.2d, v1.2d, #3\n\t" + "eor x14, x14, x7, ror 34\n\t" + "sri v9.2d, v1.2d, #61\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x7, ror 39\n\t" + "ushr v8.2d, v1.2d, #6\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x7, x8\n\t" + "add v2.2d, v2.2d, v9.2d\n\t" + "eor x12, x4, x5\n\t" + "ext v9.16b, v6.16b, v7.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v2.2d, v2.2d, v9.2d\n\t" + "and x12, x12, x11\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x6, x6, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x5\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x6, x6, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x8\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v2.2d, v2.2d, v9.2d\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 6 */ + "mov x13, v3.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 7 */ + "mov x13, v3.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v3.16b, v4.16b, #8\n\t" + "ror x12, x9, #14\n\t" + "shl v8.2d, v2.2d, #45\n\t" + "ror x14, x5, #28\n\t" + "sri v8.2d, v2.2d, #19\n\t" + "eor x12, x12, x9, ror 18\n\t" + "shl v9.2d, v2.2d, #3\n\t" + "eor x14, x14, x5, ror 34\n\t" + "sri v9.2d, v2.2d, #61\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x5, ror 39\n\t" + "ushr v8.2d, v2.2d, #6\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x5, x6\n\t" + "add v3.2d, v3.2d, v9.2d\n\t" + "eor x12, x10, x11\n\t" + "ext v9.16b, v7.16b, v0.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v3.2d, v3.2d, v9.2d\n\t" + "and x12, x12, x9\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x4, x4, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x11\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x4, x4, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x6\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v3.2d, v3.2d, v9.2d\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + /* Round 8 */ + "mov x13, v4.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 9 */ + "mov x13, v4.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v4.16b, v5.16b, #8\n\t" + "ror x12, x7, #14\n\t" + "shl v8.2d, v3.2d, #45\n\t" + "ror x14, x11, #28\n\t" + "sri v8.2d, v3.2d, #19\n\t" + "eor x12, x12, x7, ror 18\n\t" + "shl v9.2d, v3.2d, #3\n\t" + "eor x14, x14, x11, ror 34\n\t" + "sri v9.2d, v3.2d, #61\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x11, ror 39\n\t" + "ushr v8.2d, v3.2d, #6\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x11, x4\n\t" + "add v4.2d, v4.2d, v9.2d\n\t" + "eor x12, x8, x9\n\t" + "ext v9.16b, v0.16b, v1.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v4.2d, v4.2d, v9.2d\n\t" + "and x12, x12, x7\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x10, x10, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x9\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x10, x10, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x4\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x10, x10, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v4.2d, v4.2d, v9.2d\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 10 */ + "mov x13, v5.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 11 */ + "mov x13, v5.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v5.16b, v6.16b, #8\n\t" + "ror x12, x5, #14\n\t" + "shl v8.2d, v4.2d, #45\n\t" + "ror x14, x9, #28\n\t" + "sri v8.2d, v4.2d, #19\n\t" + "eor x12, x12, x5, ror 18\n\t" + "shl v9.2d, v4.2d, #3\n\t" + "eor x14, x14, x9, ror 34\n\t" + "sri v9.2d, v4.2d, #61\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x9, ror 39\n\t" + "ushr v8.2d, v4.2d, #6\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x9, x10\n\t" + "add v5.2d, v5.2d, v9.2d\n\t" + "eor x12, x6, x7\n\t" + "ext v9.16b, v1.16b, v2.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v5.2d, v5.2d, v9.2d\n\t" + "and x12, x12, x5\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x8, x8, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x7\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x8, x8, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x10\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x8, x8, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v5.2d, v5.2d, v9.2d\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 12 */ + "mov x13, v6.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 13 */ + "mov x13, v6.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v6.16b, v7.16b, #8\n\t" + "ror x12, x11, #14\n\t" + "shl v8.2d, v5.2d, #45\n\t" + "ror x14, x7, #28\n\t" + "sri v8.2d, v5.2d, #19\n\t" + "eor x12, x12, x11, ror 18\n\t" + "shl v9.2d, v5.2d, #3\n\t" + "eor x14, x14, x7, ror 34\n\t" + "sri v9.2d, v5.2d, #61\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x7, ror 39\n\t" + "ushr v8.2d, v5.2d, #6\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x7, x8\n\t" + "add v6.2d, v6.2d, v9.2d\n\t" + "eor x12, x4, x5\n\t" + "ext v9.16b, v2.16b, v3.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v6.2d, v6.2d, v9.2d\n\t" + "and x12, x12, x11\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x6, x6, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x5\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x6, x6, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x8\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x6, x6, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v6.2d, v6.2d, v9.2d\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 14 */ + "mov x13, v7.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 15 */ + "mov x13, v7.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ext v10.16b, v7.16b, v0.16b, #8\n\t" + "ror x12, x9, #14\n\t" + "shl v8.2d, v6.2d, #45\n\t" + "ror x14, x5, #28\n\t" + "sri v8.2d, v6.2d, #19\n\t" + "eor x12, x12, x9, ror 18\n\t" + "shl v9.2d, v6.2d, #3\n\t" + "eor x14, x14, x5, ror 34\n\t" + "sri v9.2d, v6.2d, #61\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x14, x14, x5, ror 39\n\t" + "ushr v8.2d, v6.2d, #6\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x16, x5, x6\n\t" + "add v7.2d, v7.2d, v9.2d\n\t" + "eor x12, x10, x11\n\t" + "ext v9.16b, v3.16b, v4.16b, #8\n\t" + "and x17, x16, x17\n\t" + "add v7.2d, v7.2d, v9.2d\n\t" + "and x12, x12, x9\n\t" + "shl v8.2d, v10.2d, #63\n\t" + "add x4, x4, x13\n\t" + "sri v8.2d, v10.2d, #1\n\t" + "eor x12, x12, x11\n\t" + "tbl v9.16b, {v10.16b}, v11.16b\n\t" + "add x4, x4, x15\n\t" + "eor v9.16b, v9.16b, v8.16b\n\t" + "eor x17, x17, x6\n\t" + "ushr v10.2d, v10.2d, #7\n\t" + "add x4, x4, x12\n\t" + "eor v9.16b, v9.16b, v10.16b\n\t" + "add x14, x14, x17\n\t" + "add v7.2d, v7.2d, v9.2d\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + "subs x27, x27, #1\n\t" + "bne L_sha512_len_neon_start_%=\n\t" + /* Round 0 */ + "mov x13, v0.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 1 */ + "mov x13, v0.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x7, #14\n\t" + "ror x14, x11, #28\n\t" + "eor x12, x12, x7, ror 18\n\t" + "eor x14, x14, x11, ror 34\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor x14, x14, x11, ror 39\n\t" + "add x10, x10, x12\n\t" + "eor x16, x11, x4\n\t" + "eor x12, x8, x9\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x7\n\t" + "add x10, x10, x13\n\t" + "eor x12, x12, x9\n\t" + "add x10, x10, x15\n\t" + "eor x17, x17, x4\n\t" + "add x10, x10, x12\n\t" + "add x14, x14, x17\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 2 */ + "mov x13, v1.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 3 */ + "mov x13, v1.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x5, #14\n\t" + "ror x14, x9, #28\n\t" + "eor x12, x12, x5, ror 18\n\t" + "eor x14, x14, x9, ror 34\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor x14, x14, x9, ror 39\n\t" + "add x8, x8, x12\n\t" + "eor x16, x9, x10\n\t" + "eor x12, x6, x7\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x5\n\t" + "add x8, x8, x13\n\t" + "eor x12, x12, x7\n\t" + "add x8, x8, x15\n\t" + "eor x17, x17, x10\n\t" + "add x8, x8, x12\n\t" + "add x14, x14, x17\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 4 */ + "mov x13, v2.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 5 */ + "mov x13, v2.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x11, #14\n\t" + "ror x14, x7, #28\n\t" + "eor x12, x12, x11, ror 18\n\t" + "eor x14, x14, x7, ror 34\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor x14, x14, x7, ror 39\n\t" + "add x6, x6, x12\n\t" + "eor x16, x7, x8\n\t" + "eor x12, x4, x5\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x11\n\t" + "add x6, x6, x13\n\t" + "eor x12, x12, x5\n\t" + "add x6, x6, x15\n\t" + "eor x17, x17, x8\n\t" + "add x6, x6, x12\n\t" + "add x14, x14, x17\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 6 */ + "mov x13, v3.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 7 */ + "mov x13, v3.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x9, #14\n\t" + "ror x14, x5, #28\n\t" + "eor x12, x12, x9, ror 18\n\t" + "eor x14, x14, x5, ror 34\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor x14, x14, x5, ror 39\n\t" + "add x4, x4, x12\n\t" + "eor x16, x5, x6\n\t" + "eor x12, x10, x11\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x9\n\t" + "add x4, x4, x13\n\t" + "eor x12, x12, x11\n\t" + "add x4, x4, x15\n\t" + "eor x17, x17, x6\n\t" + "add x4, x4, x12\n\t" + "add x14, x14, x17\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + /* Round 8 */ + "mov x13, v4.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x8, #14\n\t" + "ror x14, x4, #28\n\t" + "eor x12, x12, x8, ror 18\n\t" + "eor x14, x14, x4, ror 34\n\t" + "eor x12, x12, x8, ror 41\n\t" + "eor x14, x14, x4, ror 39\n\t" + "add x11, x11, x12\n\t" + "eor x17, x4, x5\n\t" + "eor x12, x9, x10\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x8\n\t" + "add x11, x11, x13\n\t" + "eor x12, x12, x10\n\t" + "add x11, x11, x15\n\t" + "eor x16, x16, x5\n\t" + "add x11, x11, x12\n\t" + "add x14, x14, x16\n\t" + "add x7, x7, x11\n\t" + "add x11, x11, x14\n\t" + /* Round 9 */ + "mov x13, v4.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x7, #14\n\t" + "ror x14, x11, #28\n\t" + "eor x12, x12, x7, ror 18\n\t" + "eor x14, x14, x11, ror 34\n\t" + "eor x12, x12, x7, ror 41\n\t" + "eor x14, x14, x11, ror 39\n\t" + "add x10, x10, x12\n\t" + "eor x16, x11, x4\n\t" + "eor x12, x8, x9\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x7\n\t" + "add x10, x10, x13\n\t" + "eor x12, x12, x9\n\t" + "add x10, x10, x15\n\t" + "eor x17, x17, x4\n\t" + "add x10, x10, x12\n\t" + "add x14, x14, x17\n\t" + "add x6, x6, x10\n\t" + "add x10, x10, x14\n\t" + /* Round 10 */ + "mov x13, v5.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x6, #14\n\t" + "ror x14, x10, #28\n\t" + "eor x12, x12, x6, ror 18\n\t" + "eor x14, x14, x10, ror 34\n\t" + "eor x12, x12, x6, ror 41\n\t" + "eor x14, x14, x10, ror 39\n\t" + "add x9, x9, x12\n\t" + "eor x17, x10, x11\n\t" + "eor x12, x7, x8\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x6\n\t" + "add x9, x9, x13\n\t" + "eor x12, x12, x8\n\t" + "add x9, x9, x15\n\t" + "eor x16, x16, x11\n\t" + "add x9, x9, x12\n\t" + "add x14, x14, x16\n\t" + "add x5, x5, x9\n\t" + "add x9, x9, x14\n\t" + /* Round 11 */ + "mov x13, v5.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x5, #14\n\t" + "ror x14, x9, #28\n\t" + "eor x12, x12, x5, ror 18\n\t" + "eor x14, x14, x9, ror 34\n\t" + "eor x12, x12, x5, ror 41\n\t" + "eor x14, x14, x9, ror 39\n\t" + "add x8, x8, x12\n\t" + "eor x16, x9, x10\n\t" + "eor x12, x6, x7\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x5\n\t" + "add x8, x8, x13\n\t" + "eor x12, x12, x7\n\t" + "add x8, x8, x15\n\t" + "eor x17, x17, x10\n\t" + "add x8, x8, x12\n\t" + "add x14, x14, x17\n\t" + "add x4, x4, x8\n\t" + "add x8, x8, x14\n\t" + /* Round 12 */ + "mov x13, v6.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x4, #14\n\t" + "ror x14, x8, #28\n\t" + "eor x12, x12, x4, ror 18\n\t" + "eor x14, x14, x8, ror 34\n\t" + "eor x12, x12, x4, ror 41\n\t" + "eor x14, x14, x8, ror 39\n\t" + "add x7, x7, x12\n\t" + "eor x17, x8, x9\n\t" + "eor x12, x5, x6\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x4\n\t" + "add x7, x7, x13\n\t" + "eor x12, x12, x6\n\t" + "add x7, x7, x15\n\t" + "eor x16, x16, x9\n\t" + "add x7, x7, x12\n\t" + "add x14, x14, x16\n\t" + "add x11, x11, x7\n\t" + "add x7, x7, x14\n\t" + /* Round 13 */ + "mov x13, v6.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x11, #14\n\t" + "ror x14, x7, #28\n\t" + "eor x12, x12, x11, ror 18\n\t" + "eor x14, x14, x7, ror 34\n\t" + "eor x12, x12, x11, ror 41\n\t" + "eor x14, x14, x7, ror 39\n\t" + "add x6, x6, x12\n\t" + "eor x16, x7, x8\n\t" + "eor x12, x4, x5\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x11\n\t" + "add x6, x6, x13\n\t" + "eor x12, x12, x5\n\t" + "add x6, x6, x15\n\t" + "eor x17, x17, x8\n\t" + "add x6, x6, x12\n\t" + "add x14, x14, x17\n\t" + "add x10, x10, x6\n\t" + "add x6, x6, x14\n\t" + /* Round 14 */ + "mov x13, v7.d[0]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x10, #14\n\t" + "ror x14, x6, #28\n\t" + "eor x12, x12, x10, ror 18\n\t" + "eor x14, x14, x6, ror 34\n\t" + "eor x12, x12, x10, ror 41\n\t" + "eor x14, x14, x6, ror 39\n\t" + "add x5, x5, x12\n\t" + "eor x17, x6, x7\n\t" + "eor x12, x11, x4\n\t" + "and x16, x17, x16\n\t" + "and x12, x12, x10\n\t" + "add x5, x5, x13\n\t" + "eor x12, x12, x4\n\t" + "add x5, x5, x15\n\t" + "eor x16, x16, x7\n\t" + "add x5, x5, x12\n\t" + "add x14, x14, x16\n\t" + "add x9, x9, x5\n\t" + "add x5, x5, x14\n\t" + /* Round 15 */ + "mov x13, v7.d[1]\n\t" + "ldr x15, [x3], #8\n\t" + "ror x12, x9, #14\n\t" + "ror x14, x5, #28\n\t" + "eor x12, x12, x9, ror 18\n\t" + "eor x14, x14, x5, ror 34\n\t" + "eor x12, x12, x9, ror 41\n\t" + "eor x14, x14, x5, ror 39\n\t" + "add x4, x4, x12\n\t" + "eor x16, x5, x6\n\t" + "eor x12, x10, x11\n\t" + "and x17, x16, x17\n\t" + "and x12, x12, x9\n\t" + "add x4, x4, x13\n\t" + "eor x12, x12, x11\n\t" + "add x4, x4, x15\n\t" + "eor x17, x17, x6\n\t" + "add x4, x4, x12\n\t" + "add x14, x14, x17\n\t" + "add x8, x8, x4\n\t" + "add x4, x4, x14\n\t" + "add x11, x11, x26\n\t" + "add x10, x10, x25\n\t" + "add x9, x9, x24\n\t" + "add x8, x8, x23\n\t" + "add x7, x7, x22\n\t" + "add x6, x6, x21\n\t" + "add x5, x5, x20\n\t" + "add x4, x4, x19\n\t" +#ifndef __APPLE__ + "adrp x3, %[L_SHA512_transform_neon_len_k]\n\t" + "add x3, x3, :lo12:%[L_SHA512_transform_neon_len_k]\n\t" +#else + "adrp x3, %[L_SHA512_transform_neon_len_k]@PAGE\n\t" + "add x3, x3, %[L_SHA512_transform_neon_len_k]@PAGEOFF\n\t" +#endif /* __APPLE__ */ + "subs %w[len], %w[len], #0x80\n\t" + "bne L_sha512_len_neon_begin_%=\n\t" + "stp x4, x5, [%x[sha512]]\n\t" + "stp x6, x7, [%x[sha512], #16]\n\t" + "stp x8, x9, [%x[sha512], #32]\n\t" + "stp x10, x11, [%x[sha512], #48]\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) + : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "cc" + ); +} + +#else +static const uint64_t L_SHA512_transform_crypto_len_k[] = { + 0x428a2f98d728ae22UL, + 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, + 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, + 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, + 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, + 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, + 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, + 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, + 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, + 0xefbe4786384f25e3UL, + 0xfc19dc68b8cd5b5UL, + 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, + 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, + 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, + 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, + 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, + 0xd5a79147930aa725UL, + 0x6ca6351e003826fUL, + 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, + 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, + 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, + 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, + 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, + 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, + 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, + 0xd69906245565a910UL, + 0xf40e35855771202aUL, + 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, + 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, + 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, + 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, + 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, + 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, + 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, + 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, + 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, + 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, + 0xf57d4f7fee6ed178UL, + 0x6f067aa72176fbaUL, + 0xa637dc5a2c898a6UL, + 0x113f9804bef90daeUL, + 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, + 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, + 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, + 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, + 0x6c44198c4a475817UL, +}; + +void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len); +void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len) +{ + __asm__ __volatile__ ( +#ifdef __APPLE__ + ".arch_extension sha3\n\t" +#endif /* __APPLE__ */ +#ifndef __APPLE__ + "adrp x4, %[L_SHA512_transform_crypto_len_k]\n\t" + "add x4, x4, :lo12:%[L_SHA512_transform_crypto_len_k]\n\t" +#else + "adrp x4, %[L_SHA512_transform_crypto_len_k]@PAGE\n\t" + "add x4, x4, %[L_SHA512_transform_crypto_len_k]@PAGEOFF\n\t" +#endif /* __APPLE__ */ + /* Load first 16 64-bit words of K permanently */ + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x4], #0x40\n\t" + "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [x4], #0x40\n\t" + /* Load digest into working vars */ + "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" + /* Start of loop processing a block */ + "\n" + "L_sha512_len_crypto_begin_%=: \n\t" + "mov x3, x4\n\t" + /* Load W */ + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t" + "rev64 v0.16b, v0.16b\n\t" + "rev64 v1.16b, v1.16b\n\t" + "rev64 v2.16b, v2.16b\n\t" + "rev64 v3.16b, v3.16b\n\t" + "rev64 v4.16b, v4.16b\n\t" + "rev64 v5.16b, v5.16b\n\t" + "rev64 v6.16b, v6.16b\n\t" + "rev64 v7.16b, v7.16b\n\t" + /* Copy digest to add in at end */ + "mov v28.16b, v24.16b\n\t" + "mov v29.16b, v25.16b\n\t" + "mov v30.16b, v26.16b\n\t" + "mov v31.16b, v27.16b\n\t" + /* Start of 16 rounds */ + /* Round 0 */ + "add v20.2d, v0.2d, v8.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Round 1 */ + "add v20.2d, v1.2d, v9.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Round 2 */ + "add v20.2d, v2.2d, v10.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Round 3 */ + "add v20.2d, v3.2d, v11.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 4 */ + "add v20.2d, v4.2d, v12.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Round 5 */ + "add v20.2d, v5.2d, v13.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Round 6 */ + "add v20.2d, v6.2d, v14.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Round 7 */ + "add v20.2d, v7.2d, v15.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 8 */ + "sha512su0 v0.2d, v1.2d\n\t" + "ext v21.16b, v4.16b, v5.16b, #8\n\t" + "sha512su1 v0.2d, v7.2d, v21.2d\n\t" + "add v20.2d, v0.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 9 */ + "sha512su0 v1.2d, v2.2d\n\t" + "ext v21.16b, v5.16b, v6.16b, #8\n\t" + "sha512su1 v1.2d, v0.2d, v21.2d\n\t" + "add v20.2d, v1.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Round 10 */ + "sha512su0 v2.2d, v3.2d\n\t" + "ext v21.16b, v6.16b, v7.16b, #8\n\t" + "sha512su1 v2.2d, v1.2d, v21.2d\n\t" + "add v20.2d, v2.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Round 11 */ + "sha512su0 v3.2d, v4.2d\n\t" + "ext v21.16b, v7.16b, v0.16b, #8\n\t" + "sha512su1 v3.2d, v2.2d, v21.2d\n\t" + "add v20.2d, v3.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 12 */ + "sha512su0 v4.2d, v5.2d\n\t" + "ext v21.16b, v0.16b, v1.16b, #8\n\t" + "sha512su1 v4.2d, v3.2d, v21.2d\n\t" + "add v20.2d, v4.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Round 13 */ + "sha512su0 v5.2d, v6.2d\n\t" + "ext v21.16b, v1.16b, v2.16b, #8\n\t" + "sha512su1 v5.2d, v4.2d, v21.2d\n\t" + "add v20.2d, v5.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 14 */ + "sha512su0 v6.2d, v7.2d\n\t" + "ext v21.16b, v2.16b, v3.16b, #8\n\t" + "sha512su1 v6.2d, v5.2d, v21.2d\n\t" + "add v20.2d, v6.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Round 15 */ + "sha512su0 v7.2d, v0.2d\n\t" + "ext v21.16b, v3.16b, v4.16b, #8\n\t" + "sha512su1 v7.2d, v6.2d, v21.2d\n\t" + "add v20.2d, v7.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 16 */ + "sha512su0 v0.2d, v1.2d\n\t" + "ext v21.16b, v4.16b, v5.16b, #8\n\t" + "sha512su1 v0.2d, v7.2d, v21.2d\n\t" + "add v20.2d, v0.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Round 17 */ + "sha512su0 v1.2d, v2.2d\n\t" + "ext v21.16b, v5.16b, v6.16b, #8\n\t" + "sha512su1 v1.2d, v0.2d, v21.2d\n\t" + "add v20.2d, v1.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Round 18 */ + "sha512su0 v2.2d, v3.2d\n\t" + "ext v21.16b, v6.16b, v7.16b, #8\n\t" + "sha512su1 v2.2d, v1.2d, v21.2d\n\t" + "add v20.2d, v2.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 19 */ + "sha512su0 v3.2d, v4.2d\n\t" + "ext v21.16b, v7.16b, v0.16b, #8\n\t" + "sha512su1 v3.2d, v2.2d, v21.2d\n\t" + "add v20.2d, v3.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 20 */ + "sha512su0 v4.2d, v5.2d\n\t" + "ext v21.16b, v0.16b, v1.16b, #8\n\t" + "sha512su1 v4.2d, v3.2d, v21.2d\n\t" + "add v20.2d, v4.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Round 21 */ + "sha512su0 v5.2d, v6.2d\n\t" + "ext v21.16b, v1.16b, v2.16b, #8\n\t" + "sha512su1 v5.2d, v4.2d, v21.2d\n\t" + "add v20.2d, v5.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Round 22 */ + "sha512su0 v6.2d, v7.2d\n\t" + "ext v21.16b, v2.16b, v3.16b, #8\n\t" + "sha512su1 v6.2d, v5.2d, v21.2d\n\t" + "add v20.2d, v6.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Round 23 */ + "sha512su0 v7.2d, v0.2d\n\t" + "ext v21.16b, v3.16b, v4.16b, #8\n\t" + "sha512su1 v7.2d, v6.2d, v21.2d\n\t" + "add v20.2d, v7.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 24 */ + "sha512su0 v0.2d, v1.2d\n\t" + "ext v21.16b, v4.16b, v5.16b, #8\n\t" + "sha512su1 v0.2d, v7.2d, v21.2d\n\t" + "add v20.2d, v0.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Round 25 */ + "sha512su0 v1.2d, v2.2d\n\t" + "ext v21.16b, v5.16b, v6.16b, #8\n\t" + "sha512su1 v1.2d, v0.2d, v21.2d\n\t" + "add v20.2d, v1.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Round 26 */ + "sha512su0 v2.2d, v3.2d\n\t" + "ext v21.16b, v6.16b, v7.16b, #8\n\t" + "sha512su1 v2.2d, v1.2d, v21.2d\n\t" + "add v20.2d, v2.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Round 27 */ + "sha512su0 v3.2d, v4.2d\n\t" + "ext v21.16b, v7.16b, v0.16b, #8\n\t" + "sha512su1 v3.2d, v2.2d, v21.2d\n\t" + "add v20.2d, v3.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 28 */ + "sha512su0 v4.2d, v5.2d\n\t" + "ext v21.16b, v0.16b, v1.16b, #8\n\t" + "sha512su1 v4.2d, v3.2d, v21.2d\n\t" + "add v20.2d, v4.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 29 */ + "sha512su0 v5.2d, v6.2d\n\t" + "ext v21.16b, v1.16b, v2.16b, #8\n\t" + "sha512su1 v5.2d, v4.2d, v21.2d\n\t" + "add v20.2d, v5.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Round 30 */ + "sha512su0 v6.2d, v7.2d\n\t" + "ext v21.16b, v2.16b, v3.16b, #8\n\t" + "sha512su1 v6.2d, v5.2d, v21.2d\n\t" + "add v20.2d, v6.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Round 31 */ + "sha512su0 v7.2d, v0.2d\n\t" + "ext v21.16b, v3.16b, v4.16b, #8\n\t" + "sha512su1 v7.2d, v6.2d, v21.2d\n\t" + "add v20.2d, v7.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 32 */ + "sha512su0 v0.2d, v1.2d\n\t" + "ext v21.16b, v4.16b, v5.16b, #8\n\t" + "sha512su1 v0.2d, v7.2d, v21.2d\n\t" + "add v20.2d, v0.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Round 33 */ + "sha512su0 v1.2d, v2.2d\n\t" + "ext v21.16b, v5.16b, v6.16b, #8\n\t" + "sha512su1 v1.2d, v0.2d, v21.2d\n\t" + "add v20.2d, v1.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 34 */ + "sha512su0 v2.2d, v3.2d\n\t" + "ext v21.16b, v6.16b, v7.16b, #8\n\t" + "sha512su1 v2.2d, v1.2d, v21.2d\n\t" + "add v20.2d, v2.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + /* Round 35 */ + "sha512su0 v3.2d, v4.2d\n\t" + "ext v21.16b, v7.16b, v0.16b, #8\n\t" + "sha512su1 v3.2d, v2.2d, v21.2d\n\t" + "add v20.2d, v3.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v26.16b, v27.16b, #8\n\t" + "ext v22.16b, v25.16b, v26.16b, #8\n\t" + "add v27.2d, v27.2d, v20.2d\n\t" + "sha512h q27, q21, v22.2d\n\t" + "add v23.2d, v25.2d, v27.2d\n\t" + "sha512h2 q27, q25, v24.2d\n\t" + /* Load next 8 64-bit words of K */ + "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" + /* Round 36 */ + "sha512su0 v4.2d, v5.2d\n\t" + "ext v21.16b, v0.16b, v1.16b, #8\n\t" + "sha512su1 v4.2d, v3.2d, v21.2d\n\t" + "add v20.2d, v4.2d, v16.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v23.16b, v26.16b, #8\n\t" + "ext v22.16b, v24.16b, v23.16b, #8\n\t" + "add v26.2d, v26.2d, v20.2d\n\t" + "sha512h q26, q21, v22.2d\n\t" + "add v25.2d, v24.2d, v26.2d\n\t" + "sha512h2 q26, q24, v27.2d\n\t" + /* Round 37 */ + "sha512su0 v5.2d, v6.2d\n\t" + "ext v21.16b, v1.16b, v2.16b, #8\n\t" + "sha512su1 v5.2d, v4.2d, v21.2d\n\t" + "add v20.2d, v5.2d, v17.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v25.16b, v23.16b, #8\n\t" + "ext v22.16b, v27.16b, v25.16b, #8\n\t" + "add v23.2d, v23.2d, v20.2d\n\t" + "sha512h q23, q21, v22.2d\n\t" + "add v24.2d, v27.2d, v23.2d\n\t" + "sha512h2 q23, q27, v26.2d\n\t" + /* Round 38 */ + "sha512su0 v6.2d, v7.2d\n\t" + "ext v21.16b, v2.16b, v3.16b, #8\n\t" + "sha512su1 v6.2d, v5.2d, v21.2d\n\t" + "add v20.2d, v6.2d, v18.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v24.16b, v25.16b, #8\n\t" + "ext v22.16b, v26.16b, v24.16b, #8\n\t" + "add v25.2d, v25.2d, v20.2d\n\t" + "sha512h q25, q21, v22.2d\n\t" + "add v27.2d, v26.2d, v25.2d\n\t" + "sha512h2 q25, q26, v23.2d\n\t" + /* Round 39 */ + "sha512su0 v7.2d, v0.2d\n\t" + "ext v21.16b, v3.16b, v4.16b, #8\n\t" + "sha512su1 v7.2d, v6.2d, v21.2d\n\t" + "add v20.2d, v7.2d, v19.2d\n\t" + "ext v20.16b, v20.16b, v20.16b, #8\n\t" + "ext v21.16b, v27.16b, v24.16b, #8\n\t" + "ext v22.16b, v23.16b, v27.16b, #8\n\t" + "add v24.2d, v24.2d, v20.2d\n\t" + "sha512h q24, q21, v22.2d\n\t" + "add v26.2d, v23.2d, v24.2d\n\t" + "sha512h2 q24, q23, v25.2d\n\t" + "add v27.2d, v27.2d, v31.2d\n\t" + "add v26.2d, v26.2d, v30.2d\n\t" + "add v25.2d, v25.2d, v29.2d\n\t" + "add v24.2d, v24.2d, v28.2d\n\t" + "subs %w[len], %w[len], #0x80\n\t" + "bne L_sha512_len_crypto_begin_%=\n\t" + /* Store digest back */ + "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) + : [L_SHA512_transform_crypto_len_k] "S" (L_SHA512_transform_crypto_len_k) + : "memory", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_CRYPTO_SHA512 */ +#endif /* WOLFSSL_SHA512 */ +#endif /* __aarch64__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-sha512.c b/wolfcrypt/src/port/arm/armv8-sha512.c index c6c0147da..ace00a301 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512.c +++ b/wolfcrypt/src/port/arm/armv8-sha512.c @@ -1,22 +1,12 @@ /* sha512.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ #ifdef HAVE_CONFIG_H @@ -146,18 +136,6 @@ static int InitSha512_256(wc_Sha512* sha512) #ifdef WOLFSSL_SHA512 -#ifdef WOLFSSL_ARMASM -#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512 - extern void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, - word32 len); - #define Transform_Sha512_Len Transform_Sha512_Len_neon -#else - extern void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, - word32 len); - #define Transform_Sha512_Len Transform_Sha512_Len_crypto -#endif -#endif - static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId, enum wc_HashType type) { @@ -467,7 +445,25 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1); if (blocksLen > 0) { /* Byte reversal performed in function if required. */ - Transform_Sha512_Len(sha512, data, blocksLen); + #ifndef WOLFSSL_ARMASM_NO_NEON + /* Data must be 64-bit aligned to be passed to Transform_Sha512_Len(). + * 64 bits is 8 bytes. + */ + if (((size_t)data & 0x7) != 0) { + word32 i; + + for (i = 0; i < blocksLen; i += WC_SHA512_BLOCK_SIZE) { + word64 buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64)]; + XMEMCPY(buffer, data + i, WC_SHA512_BLOCK_SIZE); + Transform_Sha512_Len(sha512, (const byte*)buffer, + WC_SHA512_BLOCK_SIZE); + } + } + else + #endif + { + Transform_Sha512_Len(sha512, data, blocksLen); + } data += blocksLen; len -= blocksLen; } @@ -792,6 +788,8 @@ void wc_Sha384Free(wc_Sha384* sha384) #ifdef WOLFSSL_SHA512 +#if !defined(WOLFSSL_NOSHA512_224) || !defined(WOLFSSL_NOSHA512_256) + static int Sha512_Family_GetHash(wc_Sha512* sha512, byte* hash, enum wc_HashType type ) { @@ -828,6 +826,8 @@ static int Sha512_Family_GetHash(wc_Sha512* sha512, byte* hash, return ret; } +#endif /* !WOLFSSL_NOSHA512_224 || !WOLFSSL_NOSHA512_256 */ + int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash) { int ret; diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 97dbcb030..917b38585 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -1,22 +1,12 @@ /* sp.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ /* Implementation by Sean Parkinson. */ @@ -45,20 +35,39 @@ #endif #endif +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#undef WOLFSSL_SP_SMALL_STACK +#define WOLFSSL_SP_SMALL_STACK +#endif + #include +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif + #ifdef WOLFSSL_SP_ARM32_ASM -#define SP_PRINT_NUM(var, name, total, words, bits) \ - do { \ - int ii; \ - fprintf(stderr, name "=0x"); \ - for (ii = words - 1; ii >= 0; ii--) \ - fprintf(stderr, SP_PRINT_FMT, (var)[ii]); \ - fprintf(stderr, "\n"); \ +#define SP_PRINT_NUM(var, name, total, words, bits) \ + do { \ + int ii; \ + fprintf(stderr, name "=0x"); \ + for (ii = ((bits + 31) / 32) - 1; ii >= 0; ii--) \ + fprintf(stderr, SP_PRINT_FMT, (var)[ii]); \ + fprintf(stderr, "\n"); \ } while (0) -#define SP_PRINT_VAL(var, name) \ +#define SP_PRINT_VAL(var, name) \ fprintf(stderr, name "=0x" SP_PRINT_FMT "\n", var) + +#define SP_PRINT_INT(var, name) \ + fprintf(stderr, name "=%d\n", var) + #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) #ifndef WOLFSSL_SP_NO_2048 /* Read big endian unsigned byte array into r. @@ -71,27 +80,30 @@ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) { int i; - int j = 0; - word32 s = 0; + int j; + byte* d; - r[0] = 0; - for (i = n-1; i >= 0; i--) { - r[j] |= (((sp_digit)a[i]) << s); - if (s >= 24U) { - r[j] &= 0xffffffff; - s = 32U - s; - if (j + 1 >= size) { - break; - } - r[++j] = (sp_digit)a[i] >> s; - s = 8U - s; - } - else { - s += 8U; - } + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; } - for (j++; j < size; j++) { + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { r[j] = 0; } } @@ -105,20 +117,23 @@ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 32 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); } #elif DIGIT_BIT > 32 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffff; s = 32U - s; @@ -148,12 +163,12 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 32) { r[j] &= 0xffffffff; @@ -190,34 +205,13 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) static void sp_2048_to_bin_64(sp_digit* r, byte* a) { int i; - int j; - int s = 0; - int b; + int j = 0; - j = 2048 / 8 - 1; - a[j] = 0; - for (i=0; i<64 && j>=0; i++) { - b = 0; - /* lint allow cast of mismatch sp_digit and int */ - a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ - b += 8 - s; - if (j < 0) { - break; - } - while (b < 32) { - a[j--] = (byte)(r[i] >> b); - b += 8; - if (j < 0) { - break; - } - } - s = 8 - (b - 32); - if (j >= 0) { - a[j] = 0; - } - if (s != 0) { - j++; - } + for (i = 63; i >= 0; i--) { + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; } } @@ -236,441 +230,1987 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a) #define sp_2048_norm_64(a) #ifndef WOLFSSL_SP_SMALL +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ -static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" - "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" - "umull r3, r4, r11, r12\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" "mov r5, #0\n\t" "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r11, r9\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" - "umull r6, r7, r11, r12\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r11, r9\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" + "ldr r9, [%[b]]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" - "umull r6, r7, r11, r12\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r11, r9\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" + "ldr r9, [%[b]]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" - "umull r6, r7, r11, r12\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r11, r9\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" + "ldr r9, [%[b]]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [sp, #28]\n\t" - "# A[7] * B[1]\n\t" + /* A[7] * B[1] */ "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" - "umull r6, r7, r11, r12\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[r], #32]\n\t" - "# A[2] * B[7]\n\t" + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r11, r9\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [%[r], #36]\n\t" - "# A[7] * B[3]\n\t" + /* A[7] * B[3] */ "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" - "umull r6, r7, r11, r12\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[r], #40]\n\t" - "# A[4] * B[7]\n\t" + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r11, r9\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[r], #44]\n\t" - "# A[7] * B[5]\n\t" - "umull r6, r7, r8, r12\n\t" + /* A[7] * B[5] */ + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" - "umull r6, r7, r11, r12\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [%[r], #48]\n\t" - "# A[6] * B[7]\n\t" - "umull r6, r7, r11, r9\n\t" + /* A[6] * B[7] */ + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, r8, r12\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[r], #52]\n\t" - "# A[7] * B[7]\n\t" - "umull r6, r7, r8, r9\n\t" + /* A[7] * B[7] */ + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" "str r5, [%[r], #56]\n\t" @@ -679,404 +2219,531 @@ static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #32\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" ); } -/* Square a and put result in r. (r = a * a) +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. + * b A single precision integer. */ -static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #32\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" - "umull r8, r3, r10, r10\n\t" - "mov r4, #0\n\t" - "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" - "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" - "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" - "ldr r10, [%[a], #4]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" - "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" - "ldr r10, [%[a], #8]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r4, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r2, r2, r5\n\t" - "adcs r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r2, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc r2, r2, r7\n\t" - "str r3, [sp, #28]\n\t" - "# A[1] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[2] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [%[r], #32]\n\t" - "# A[2] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r4, #0\n\t" - "mov r7, #0\n\t" - "# A[3] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r2, r2, r5\n\t" - "adcs r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r2, [%[r], #36]\n\t" - "# A[3] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[4] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[5] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [%[r], #40]\n\t" - "# A[4] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[5] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "str r4, [%[r], #44]\n\t" - "# A[5] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[6] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "str r2, [%[r], #48]\n\t" - "# A[6] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [%[r], #52]\n\t" - "# A[7] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adc r2, r2, r9\n\t" - "str r4, [%[r], #56]\n\t" - "str r2, [%[r], #60]\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #32\n\t" + "sub sp, sp, #36\n\t" + "str %[r], [sp, #32]\n\t" + "mov %[r], #0\n\t" + "ldr r12, [%[a]]\n\t" + /* A[0] * B[0] */ + "ldr lr, [%[b]]\n\t" + "umull r3, r4, r12, lr\n\t" + /* A[0] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "umull r5, r6, r12, lr\n\t" + /* A[0] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "umull r7, r8, r12, lr\n\t" + /* A[0] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "umull r9, r10, r12, lr\n\t" + "str r3, [sp]\n\t" + /* A[0] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "mov r11, %[r]\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[0] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adcs r6, r6, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[0] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adcs r8, r8, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[0] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adcs r10, r10, #0\n\t" + "adc r3, %[r], #0\n\t" + "umlal r10, r3, r12, lr\n\t" + /* A[1] * B[0] */ + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "str r4, [sp, #4]\n\t" + "adds r5, r5, r11\n\t" + /* A[1] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[1] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[1] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[1] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[1] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[1] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[1] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r4, %[r], #0\n\t" + "umlal r3, r4, r12, lr\n\t" + /* A[2] * B[0] */ + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "str r5, [sp, #8]\n\t" + "adds r6, r6, r11\n\t" + /* A[2] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[2] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[2] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[2] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[2] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[2] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[2] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r5, %[r], #0\n\t" + "umlal r4, r5, r12, lr\n\t" + /* A[3] * B[0] */ + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "str r6, [sp, #12]\n\t" + "adds r7, r7, r11\n\t" + /* A[3] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[3] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[3] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[3] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[3] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[3] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[3] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r6, %[r], #0\n\t" + "umlal r5, r6, r12, lr\n\t" + /* A[4] * B[0] */ + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "str r7, [sp, #16]\n\t" + "adds r8, r8, r11\n\t" + /* A[4] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[4] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[4] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[4] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[4] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[4] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[4] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r7, %[r], #0\n\t" + "umlal r6, r7, r12, lr\n\t" + /* A[5] * B[0] */ + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "str r8, [sp, #20]\n\t" + "adds r9, r9, r11\n\t" + /* A[5] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[5] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[5] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[5] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[5] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[5] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[5] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r8, %[r], #0\n\t" + "umlal r7, r8, r12, lr\n\t" + /* A[6] * B[0] */ + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "str r9, [sp, #24]\n\t" + "adds r10, r10, r11\n\t" + /* A[6] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[6] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[6] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[6] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[6] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[6] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[6] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r9, %[r], #0\n\t" + "umlal r8, r9, r12, lr\n\t" + /* A[7] * B[0] */ + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "str r10, [sp, #28]\n\t" + "adds r3, r3, r11\n\t" + /* A[7] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[7] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[7] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[7] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[7] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[7] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[7] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r10, %[r], #0\n\t" + "umlal r9, r10, r12, lr\n\t" + "ldr %[r], [sp, #32]\n\t" + "add %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "sub %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add sp, sp, #36\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" ); } +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #44\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str %[r], [sp, #36]\n\t" + "str %[a], [sp, #40]\n\t" +#else + "strd %[r], %[a], [sp, #36]\n\t" +#endif + "mov lr, %[b]\n\t" + "ldm %[a], {%[r], %[a], %[b], r3}\n\t" + "ldm lr!, {r4, r5, r6}\n\t" + "umull r10, r11, %[r], r4\n\t" + "umull r12, r7, %[a], r4\n\t" + "umaal r11, r12, %[r], r5\n\t" + "umull r8, r9, %[b], r4\n\t" + "umaal r12, r8, %[a], r5\n\t" + "umaal r12, r7, %[r], r6\n\t" + "umaal r8, r9, r3, r4\n\t" + "stm sp, {r10, r11, r12}\n\t" + "umaal r7, r8, %[b], r5\n\t" + "ldm lr!, {r4}\n\t" + "umull r10, r11, %[a], r6\n\t" + "umaal r8, r9, %[b], r6\n\t" + "umaal r7, r10, %[r], r4\n\t" + "umaal r8, r11, r3, r5\n\t" + "str r7, [sp, #12]\n\t" + "umaal r8, r10, %[a], r4\n\t" + "umaal r9, r11, r3, r6\n\t" + "umaal r9, r10, %[b], r4\n\t" + "umaal r10, r11, r3, r4\n\t" + "ldm lr, {r4, r5, r6, r7}\n\t" + "mov r12, #0\n\t" + "umlal r8, r12, %[r], r4\n\t" + "umaal r9, r12, %[a], r4\n\t" + "umaal r10, r12, %[b], r4\n\t" + "umaal r11, r12, r3, r4\n\t" + "mov r4, #0\n\t" + "umlal r9, r4, %[r], r5\n\t" + "umaal r10, r4, %[a], r5\n\t" + "umaal r11, r4, %[b], r5\n\t" + "umaal r12, r4, r3, r5\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, %[r], r6\n\t" + "umaal r11, r5, %[a], r6\n\t" + "umaal r12, r5, %[b], r6\n\t" + "umaal r4, r5, r3, r6\n\t" + "mov r6, #0\n\t" + "umlal r11, r6, %[r], r7\n\t" + "ldr %[r], [sp, #40]\n\t" + "umaal r12, r6, %[a], r7\n\t" + "add %[r], %[r], #16\n\t" + "umaal r4, r6, %[b], r7\n\t" + "sub lr, lr, #16\n\t" + "umaal r5, r6, r3, r7\n\t" + "ldm %[r], {%[r], %[a], %[b], r3}\n\t" + "str r6, [sp, #32]\n\t" + "ldm lr!, {r6}\n\t" + "mov r7, #0\n\t" + "umlal r8, r7, %[r], r6\n\t" + "umaal r9, r7, %[a], r6\n\t" + "str r8, [sp, #16]\n\t" + "umaal r10, r7, %[b], r6\n\t" + "umaal r11, r7, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r8, #0\n\t" + "umlal r9, r8, %[r], r6\n\t" + "umaal r10, r8, %[a], r6\n\t" + "str r9, [sp, #20]\n\t" + "umaal r11, r8, %[b], r6\n\t" + "umaal r12, r8, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r9, #0\n\t" + "umlal r10, r9, %[r], r6\n\t" + "umaal r11, r9, %[a], r6\n\t" + "str r10, [sp, #24]\n\t" + "umaal r12, r9, %[b], r6\n\t" + "umaal r4, r9, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r10, #0\n\t" + "umlal r11, r10, %[r], r6\n\t" + "umaal r12, r10, %[a], r6\n\t" + "str r11, [sp, #28]\n\t" + "umaal r4, r10, %[b], r6\n\t" + "umaal r5, r10, r3, r6\n\t" + "ldm lr!, {r11}\n\t" + "umaal r12, r7, %[r], r11\n\t" + "umaal r4, r7, %[a], r11\n\t" + "ldr r6, [sp, #32]\n\t" + "umaal r5, r7, %[b], r11\n\t" + "umaal r6, r7, r3, r11\n\t" + "ldm lr!, {r11}\n\t" + "umaal r4, r8, %[r], r11\n\t" + "umaal r5, r8, %[a], r11\n\t" + "umaal r6, r8, %[b], r11\n\t" + "umaal r7, r8, r3, r11\n\t" + "ldm lr, {r11, lr}\n\t" + "umaal r5, r9, %[r], r11\n\t" + "umaal r6, r10, %[r], lr\n\t" + "umaal r6, r9, %[a], r11\n\t" + "umaal r7, r10, %[a], lr\n\t" + "umaal r7, r9, %[b], r11\n\t" + "umaal r8, r10, %[b], lr\n\t" + "umaal r8, r9, r3, r11\n\t" + "umaal r9, r10, r3, lr\n\t" + "mov r3, r12\n\t" + "ldr lr, [sp, #36]\n\t" + "add lr, lr, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "sub lr, lr, #32\n\t" + "ldm sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add sp, sp, #44\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr", "cc" + ); +} + +#endif /* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* Sub b from a into a. (a -= b) @@ -1084,82 +2751,46 @@ static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -1168,84 +2799,48 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -1287,7 +2882,7 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, sp_digit z1[16]; sp_digit a1[8]; sp_digit b1[8]; - sp_digit z2[16]; + sp_digit* z2 = r + 16; sp_digit u; sp_digit ca; sp_digit cb; @@ -1295,45 +2890,22 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, ca = sp_2048_add_8(a1, a, &a[8]); cb = sp_2048_add_8(b1, b, &b[8]); u = ca & cb; - sp_2048_mul_8(z1, a1, b1); + sp_2048_mul_8(z2, &a[8], &b[8]); sp_2048_mul_8(z0, a, b); - sp_2048_mask_8(r + 16, a1, 0 - cb); + sp_2048_mul_8(z1, a1, b1); + + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_sub_in_place_16(z1, z2); + sp_2048_mask_8(a1, a1, 0 - cb); + u += sp_2048_add_8(z1 + 8, z1 + 8, a1); sp_2048_mask_8(b1, b1, 0 - ca); - u += sp_2048_add_8(r + 16, r + 16, b1); - u += sp_2048_sub_in_place_16(z1, z2); - u += sp_2048_sub_in_place_16(z1, z0); - u += sp_2048_add_16(r + 8, r + 8, z1); - r[24] = u; - XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - (void)sp_2048_add_16(r + 16, r + 16, z2); -} + u += sp_2048_add_8(z1 + 8, z1 + 8, b1); -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z2[16]; - sp_digit z1[16]; - sp_digit a1[8]; - sp_digit u; - - u = sp_2048_add_8(a1, a, &a[8]); - sp_2048_sqr_8(z1, a1); - sp_2048_sqr_8(z2, &a[8]); - sp_2048_sqr_8(z0, a); - sp_2048_mask_8(r + 16, a1, 0 - u); - u += sp_2048_add_8(r + 16, r + 16, r + 16); - u += sp_2048_sub_in_place_16(z1, z2); - u += sp_2048_sub_in_place_16(z1, z0); u += sp_2048_add_16(r + 8, r + 8, z1); - r[24] = u; - XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - (void)sp_2048_add_16(r + 16, r + 16, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (8 - 1)); + a1[0] = u; + (void)sp_2048_add_8(r + 24, r + 24, a1); } /* Sub b from a into a. (a -= b) @@ -1341,146 +2913,74 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "ldr r2, [%[a], #96]\n\t" - "ldr r3, [%[a], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "ldr r8, [%[b], #104]\n\t" - "ldr r9, [%[b], #108]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #96]\n\t" - "str r3, [%[a], #100]\n\t" - "str r4, [%[a], #104]\n\t" - "str r5, [%[a], #108]\n\t" - "ldr r2, [%[a], #112]\n\t" - "ldr r3, [%[a], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "ldr r8, [%[b], #120]\n\t" - "ldr r9, [%[b], #124]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #112]\n\t" - "str r3, [%[a], #116]\n\t" - "str r4, [%[a], #120]\n\t" - "str r5, [%[a], #124]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -1489,148 +2989,76 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[a], #104]\n\t" - "ldr r7, [%[a], #108]\n\t" - "ldr r8, [%[b], #96]\n\t" - "ldr r9, [%[b], #100]\n\t" - "ldr r10, [%[b], #104]\n\t" - "ldr r14, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" - "str r6, [%[r], #104]\n\t" - "str r7, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[a], #120]\n\t" - "ldr r7, [%[a], #124]\n\t" - "ldr r8, [%[b], #112]\n\t" - "ldr r9, [%[b], #116]\n\t" - "ldr r10, [%[b], #120]\n\t" - "ldr r14, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" - "str r6, [%[r], #120]\n\t" - "str r7, [%[r], #124]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -1676,7 +3104,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, sp_digit z1[32]; sp_digit a1[16]; sp_digit b1[16]; - sp_digit z2[32]; + sp_digit* z2 = r + 32; sp_digit u; sp_digit ca; sp_digit cb; @@ -1684,45 +3112,22 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, ca = sp_2048_add_16(a1, a, &a[16]); cb = sp_2048_add_16(b1, b, &b[16]); u = ca & cb; - sp_2048_mul_16(z1, a1, b1); + sp_2048_mul_16(z2, &a[16], &b[16]); sp_2048_mul_16(z0, a, b); - sp_2048_mask_16(r + 32, a1, 0 - cb); + sp_2048_mul_16(z1, a1, b1); + + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_sub_in_place_32(z1, z2); + sp_2048_mask_16(a1, a1, 0 - cb); + u += sp_2048_add_16(z1 + 16, z1 + 16, a1); sp_2048_mask_16(b1, b1, 0 - ca); - u += sp_2048_add_16(r + 32, r + 32, b1); - u += sp_2048_sub_in_place_32(z1, z2); - u += sp_2048_sub_in_place_32(z1, z0); - u += sp_2048_add_32(r + 16, r + 16, z1); - r[48] = u; - XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - (void)sp_2048_add_32(r + 32, r + 32, z2); -} + u += sp_2048_add_16(z1 + 16, z1 + 16, b1); -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z2[32]; - sp_digit z1[32]; - sp_digit a1[16]; - sp_digit u; - - u = sp_2048_add_16(a1, a, &a[16]); - sp_2048_sqr_16(z1, a1); - sp_2048_sqr_16(z2, &a[16]); - sp_2048_sqr_16(z0, a); - sp_2048_mask_16(r + 32, a1, 0 - u); - u += sp_2048_add_16(r + 32, r + 32, r + 32); - u += sp_2048_sub_in_place_32(z1, z2); - u += sp_2048_sub_in_place_32(z1, z0); u += sp_2048_add_32(r + 16, r + 16, z1); - r[48] = u; - XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - (void)sp_2048_add_32(r + 32, r + 32, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (16 - 1)); + a1[0] = u; + (void)sp_2048_add_16(r + 48, r + 48, a1); } /* Sub b from a into a. (a -= b) @@ -1730,274 +3135,130 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "ldr r2, [%[a], #96]\n\t" - "ldr r3, [%[a], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "ldr r8, [%[b], #104]\n\t" - "ldr r9, [%[b], #108]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #96]\n\t" - "str r3, [%[a], #100]\n\t" - "str r4, [%[a], #104]\n\t" - "str r5, [%[a], #108]\n\t" - "ldr r2, [%[a], #112]\n\t" - "ldr r3, [%[a], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "ldr r8, [%[b], #120]\n\t" - "ldr r9, [%[b], #124]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #112]\n\t" - "str r3, [%[a], #116]\n\t" - "str r4, [%[a], #120]\n\t" - "str r5, [%[a], #124]\n\t" - "ldr r2, [%[a], #128]\n\t" - "ldr r3, [%[a], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[a], #140]\n\t" - "ldr r6, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "ldr r8, [%[b], #136]\n\t" - "ldr r9, [%[b], #140]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #128]\n\t" - "str r3, [%[a], #132]\n\t" - "str r4, [%[a], #136]\n\t" - "str r5, [%[a], #140]\n\t" - "ldr r2, [%[a], #144]\n\t" - "ldr r3, [%[a], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[a], #156]\n\t" - "ldr r6, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "ldr r8, [%[b], #152]\n\t" - "ldr r9, [%[b], #156]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #144]\n\t" - "str r3, [%[a], #148]\n\t" - "str r4, [%[a], #152]\n\t" - "str r5, [%[a], #156]\n\t" - "ldr r2, [%[a], #160]\n\t" - "ldr r3, [%[a], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[a], #172]\n\t" - "ldr r6, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "ldr r8, [%[b], #168]\n\t" - "ldr r9, [%[b], #172]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #160]\n\t" - "str r3, [%[a], #164]\n\t" - "str r4, [%[a], #168]\n\t" - "str r5, [%[a], #172]\n\t" - "ldr r2, [%[a], #176]\n\t" - "ldr r3, [%[a], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[a], #188]\n\t" - "ldr r6, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "ldr r8, [%[b], #184]\n\t" - "ldr r9, [%[b], #188]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #176]\n\t" - "str r3, [%[a], #180]\n\t" - "str r4, [%[a], #184]\n\t" - "str r5, [%[a], #188]\n\t" - "ldr r2, [%[a], #192]\n\t" - "ldr r3, [%[a], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[a], #204]\n\t" - "ldr r6, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "ldr r8, [%[b], #200]\n\t" - "ldr r9, [%[b], #204]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #192]\n\t" - "str r3, [%[a], #196]\n\t" - "str r4, [%[a], #200]\n\t" - "str r5, [%[a], #204]\n\t" - "ldr r2, [%[a], #208]\n\t" - "ldr r3, [%[a], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[a], #220]\n\t" - "ldr r6, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "ldr r8, [%[b], #216]\n\t" - "ldr r9, [%[b], #220]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #208]\n\t" - "str r3, [%[a], #212]\n\t" - "str r4, [%[a], #216]\n\t" - "str r5, [%[a], #220]\n\t" - "ldr r2, [%[a], #224]\n\t" - "ldr r3, [%[a], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[a], #236]\n\t" - "ldr r6, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "ldr r8, [%[b], #232]\n\t" - "ldr r9, [%[b], #236]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #224]\n\t" - "str r3, [%[a], #228]\n\t" - "str r4, [%[a], #232]\n\t" - "str r5, [%[a], #236]\n\t" - "ldr r2, [%[a], #240]\n\t" - "ldr r3, [%[a], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[a], #252]\n\t" - "ldr r6, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "ldr r8, [%[b], #248]\n\t" - "ldr r9, [%[b], #252]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #240]\n\t" - "str r3, [%[a], #244]\n\t" - "str r4, [%[a], #248]\n\t" - "str r5, [%[a], #252]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -2006,276 +3267,132 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[a], #104]\n\t" - "ldr r7, [%[a], #108]\n\t" - "ldr r8, [%[b], #96]\n\t" - "ldr r9, [%[b], #100]\n\t" - "ldr r10, [%[b], #104]\n\t" - "ldr r14, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" - "str r6, [%[r], #104]\n\t" - "str r7, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[a], #120]\n\t" - "ldr r7, [%[a], #124]\n\t" - "ldr r8, [%[b], #112]\n\t" - "ldr r9, [%[b], #116]\n\t" - "ldr r10, [%[b], #120]\n\t" - "ldr r14, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" - "str r6, [%[r], #120]\n\t" - "str r7, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[a], #132]\n\t" - "ldr r6, [%[a], #136]\n\t" - "ldr r7, [%[a], #140]\n\t" - "ldr r8, [%[b], #128]\n\t" - "ldr r9, [%[b], #132]\n\t" - "ldr r10, [%[b], #136]\n\t" - "ldr r14, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #128]\n\t" - "str r5, [%[r], #132]\n\t" - "str r6, [%[r], #136]\n\t" - "str r7, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[a], #148]\n\t" - "ldr r6, [%[a], #152]\n\t" - "ldr r7, [%[a], #156]\n\t" - "ldr r8, [%[b], #144]\n\t" - "ldr r9, [%[b], #148]\n\t" - "ldr r10, [%[b], #152]\n\t" - "ldr r14, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #144]\n\t" - "str r5, [%[r], #148]\n\t" - "str r6, [%[r], #152]\n\t" - "str r7, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[a], #164]\n\t" - "ldr r6, [%[a], #168]\n\t" - "ldr r7, [%[a], #172]\n\t" - "ldr r8, [%[b], #160]\n\t" - "ldr r9, [%[b], #164]\n\t" - "ldr r10, [%[b], #168]\n\t" - "ldr r14, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #160]\n\t" - "str r5, [%[r], #164]\n\t" - "str r6, [%[r], #168]\n\t" - "str r7, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[a], #180]\n\t" - "ldr r6, [%[a], #184]\n\t" - "ldr r7, [%[a], #188]\n\t" - "ldr r8, [%[b], #176]\n\t" - "ldr r9, [%[b], #180]\n\t" - "ldr r10, [%[b], #184]\n\t" - "ldr r14, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #176]\n\t" - "str r5, [%[r], #180]\n\t" - "str r6, [%[r], #184]\n\t" - "str r7, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[a], #196]\n\t" - "ldr r6, [%[a], #200]\n\t" - "ldr r7, [%[a], #204]\n\t" - "ldr r8, [%[b], #192]\n\t" - "ldr r9, [%[b], #196]\n\t" - "ldr r10, [%[b], #200]\n\t" - "ldr r14, [%[b], #204]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #192]\n\t" - "str r5, [%[r], #196]\n\t" - "str r6, [%[r], #200]\n\t" - "str r7, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[a], #212]\n\t" - "ldr r6, [%[a], #216]\n\t" - "ldr r7, [%[a], #220]\n\t" - "ldr r8, [%[b], #208]\n\t" - "ldr r9, [%[b], #212]\n\t" - "ldr r10, [%[b], #216]\n\t" - "ldr r14, [%[b], #220]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #208]\n\t" - "str r5, [%[r], #212]\n\t" - "str r6, [%[r], #216]\n\t" - "str r7, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[a], #228]\n\t" - "ldr r6, [%[a], #232]\n\t" - "ldr r7, [%[a], #236]\n\t" - "ldr r8, [%[b], #224]\n\t" - "ldr r9, [%[b], #228]\n\t" - "ldr r10, [%[b], #232]\n\t" - "ldr r14, [%[b], #236]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #224]\n\t" - "str r5, [%[r], #228]\n\t" - "str r6, [%[r], #232]\n\t" - "str r7, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[a], #244]\n\t" - "ldr r6, [%[a], #248]\n\t" - "ldr r7, [%[a], #252]\n\t" - "ldr r8, [%[b], #240]\n\t" - "ldr r9, [%[b], #244]\n\t" - "ldr r10, [%[b], #248]\n\t" - "ldr r14, [%[b], #252]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #240]\n\t" - "str r5, [%[r], #244]\n\t" - "str r6, [%[r], #248]\n\t" - "str r7, [%[r], #252]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -2321,7 +3438,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, sp_digit z1[64]; sp_digit a1[32]; sp_digit b1[32]; - sp_digit z2[64]; + sp_digit* z2 = r + 64; sp_digit u; sp_digit ca; sp_digit cb; @@ -2329,18 +3446,1825 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, ca = sp_2048_add_32(a1, a, &a[32]); cb = sp_2048_add_32(b1, b, &b[32]); u = ca & cb; - sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); sp_2048_mul_32(z0, a, b); - sp_2048_mask_32(r + 64, a1, 0 - cb); - sp_2048_mask_32(b1, b1, 0 - ca); - u += sp_2048_add_32(r + 64, r + 64, b1); - u += sp_2048_sub_in_place_64(z1, z2); + sp_2048_mul_32(z1, a1, b1); + u += sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_sub_in_place_64(z1, z2); + sp_2048_mask_32(a1, a1, 0 - cb); + u += sp_2048_add_32(z1 + 32, z1 + 32, a1); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(z1 + 32, z1 + 32, b1); + u += sp_2048_add_64(r + 32, r + 32, z1); - r[96] = u; - XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - (void)sp_2048_add_64(r + 64, r + 64, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (32 - 1)); + a1[0] = u; + (void)sp_2048_add_32(r + 96, r + 96, a1); +} + +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + /* A[0] * A[1] */ + "ldr r10, [%[a], #4]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "str r3, [sp, #4]\n\t" + /* A[0] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * A[1] */ + "ldr r10, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "str r4, [sp, #8]\n\t" + /* A[0] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "str r2, [sp, #12]\n\t" + /* A[0] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[1] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[2] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "str r3, [sp, #16]\n\t" + /* A[0] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[2] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + /* A[0] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[2] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[3] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + /* A[0] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[2] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[3] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + /* A[1] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[2] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[3] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[4] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #32]\n\t" + /* A[2] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[3] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[4] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #36]\n\t" + /* A[3] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[4] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[5] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "str r3, [%[r], #40]\n\t" + /* A[4] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #20]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #44]\n\t" + /* A[5] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #20]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "str r2, [%[r], #48]\n\t" + /* A[6] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #24]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "str r3, [%[r], #52]\n\t" + /* A[7] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "str r4, [%[r], #56]\n\t" + "str r2, [%[r], #60]\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + ); +} + +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "str %[r], [sp, #64]\n\t" + "mov %[r], #0\n\t" + "ldr r12, [%[a]]\n\t" + /* A[0] * A[1] */ + "ldr lr, [%[a], #4]\n\t" + "umull r4, r5, r12, lr\n\t" + /* A[0] * A[3] */ + "ldr lr, [%[a], #12]\n\t" + "umull r6, r7, r12, lr\n\t" + /* A[0] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "umull r8, r9, r12, lr\n\t" + /* A[0] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "umull r10, r3, r12, lr\n\t" + /* A[0] * A[2] */ + "ldr lr, [%[a], #8]\n\t" + "mov r11, #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[0] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[0] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adcs r9, r9, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + "adcs r3, r3, #0\n\t" + "str r4, [sp, #4]\n\t" + "str r5, [sp, #8]\n\t" + /* A[1] * A[2] */ + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "str r6, [sp, #12]\n\t" + "adds r7, r7, r11\n\t" + /* A[1] * A[3] */ + "ldr lr, [%[a], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "str r7, [sp, #16]\n\t" + "adds r8, r8, r11\n\t" + /* A[1] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[1] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[1] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[1] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r4, %[r], #0\n\t" + "umlal r3, r4, r12, lr\n\t" + /* A[2] * A[3] */ + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" + "mov r11, #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "str r8, [sp, #20]\n\t" + "adds r9, r9, r11\n\t" + /* A[2] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "str r9, [sp, #24]\n\t" + "adds r10, r10, r11\n\t" + /* A[2] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[2] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[2] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r5, %[r], #0\n\t" + "umlal r4, r5, r12, lr\n\t" + /* A[3] * A[4] */ + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[a], #16]\n\t" + "mov r11, #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "str r10, [sp, #28]\n\t" + "adds r3, r3, r11\n\t" + /* A[3] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[3] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[3] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r6, %[r], #0\n\t" + "umlal r5, r6, r12, lr\n\t" + /* A[4] * A[5] */ + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[a], #20]\n\t" + "mov r11, #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[4] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[4] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r7, %[r], #0\n\t" + "umlal r6, r7, r12, lr\n\t" + /* A[5] * A[6] */ + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[a], #24]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[5] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r8, %[r], #0\n\t" + "umlal r7, r8, r12, lr\n\t" + /* A[6] * A[7] */ + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[a], #28]\n\t" + "mov r9, #0\n\t" + "umlal r8, r9, r12, lr\n\t" + "add lr, sp, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "add lr, sp, #4\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "stm lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "adcs r3, r3, r3\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, %[r], #0\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add lr, sp, #4\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "mov lr, sp\n\t" + /* A[0] * A[0] */ + "ldr r12, [%[a]]\n\t" + "umull r3, r11, r12, r12\n\t" + "adds r4, r4, r11\n\t" + /* A[1] * A[1] */ + "ldr r12, [%[a], #4]\n\t" + "adcs r5, r5, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, r12\n\t" + "adds r6, r6, r11\n\t" + /* A[2] * A[2] */ + "ldr r12, [%[a], #8]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, r12\n\t" + "adds r8, r8, r11\n\t" + /* A[3] * A[3] */ + "ldr r12, [%[a], #12]\n\t" + "adcs r9, r9, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, r12\n\t" + "adds r10, r10, r11\n\t" + "stm lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* A[4] * A[4] */ + "ldr r12, [%[a], #16]\n\t" + "adcs r3, r3, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, r12\n\t" + "adds r4, r4, r11\n\t" + /* A[5] * A[5] */ + "ldr r12, [%[a], #20]\n\t" + "adcs r5, r5, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, r12\n\t" + "adds r6, r6, r11\n\t" + /* A[6] * A[6] */ + "ldr r12, [%[a], #24]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, r12\n\t" + "adds r8, r8, r11\n\t" + /* A[7] * A[7] */ + "ldr r12, [%[a], #28]\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r10, #0\n\t" + "umlal r9, r10, r12, r12\n\t" + "ldr %[r], [sp, #64]\n\t" + "add %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "sub %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "str %[r], [sp, #28]\n\t" + "ldm %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" + "umull r9, r10, %[r], %[r]\n\t" + "umull r11, r12, %[r], %[a]\n\t" + "adds r11, r11, r11\n\t" + "mov lr, #0\n\t" + "umaal r10, r11, lr, lr\n\t" + "stm sp, {r9, r10}\n\t" + "mov r8, lr\n\t" + "umaal r8, r12, %[r], r2\n\t" + "adcs r8, r8, r8\n\t" + "umaal r8, r11, %[a], %[a]\n\t" + "umull r9, r10, %[r], r3\n\t" + "umaal r9, r12, %[a], r2\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, lr, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #8]\n\t" + "str r9, [sp, #12]\n\t" +#else + "strd r8, r9, [sp, #8]\n\t" +#endif + "mov r9, lr\n\t" + "umaal r9, r10, %[r], r4\n\t" + "umaal r9, r12, %[a], r3\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, r2, r2\n\t" + "str r9, [sp, #16]\n\t" + "umull r9, r8, %[r], r5\n\t" + "umaal r9, r12, %[a], r4\n\t" + "umaal r9, r10, r2, r3\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, lr, lr\n\t" + "str r9, [sp, #20]\n\t" + "mov r9, lr\n\t" + "umaal r9, r8, %[r], r6\n\t" + "umaal r9, r12, %[a], r5\n\t" + "umaal r9, r10, r2, r4\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, r3, r3\n\t" + "str r9, [sp, #24]\n\t" + "umull %[r], r9, %[r], r7\n\t" + "umaal %[r], r8, %[a], r6\n\t" + "umaal %[r], r12, r2, r5\n\t" + "umaal %[r], r10, r3, r4\n\t" + "adcs %[r], %[r], %[r]\n\t" + "umaal %[r], r11, lr, lr\n\t" + /* R[7] = r0 */ + "umaal r9, r8, %[a], r7\n\t" + "umaal r9, r10, r2, r6\n\t" + "umaal r12, r9, r3, r5\n\t" + "adcs r12, r12, r12\n\t" + "umaal r12, r11, r4, r4\n\t" + /* R[8] = r12 */ + "umaal r9, r8, r2, r7\n\t" + "umaal r10, r9, r3, r6\n\t" + "mov r2, lr\n\t" + "umaal r10, r2, r4, r5\n\t" + "adcs r10, r10, r10\n\t" + "umaal r11, r10, lr, lr\n\t" + /* R[9] = r11 */ + "umaal r2, r8, r3, r7\n\t" + "umaal r2, r9, r4, r6\n\t" + "adcs r3, r2, r2\n\t" + "umaal r10, r3, r5, r5\n\t" + /* R[10] = r10 */ + "mov %[a], lr\n\t" + "umaal %[a], r8, r4, r7\n\t" + "umaal %[a], r9, r5, r6\n\t" + "adcs r4, %[a], %[a]\n\t" + "umaal r3, r4, lr, lr\n\t" + /* R[11] = r3 */ + "umaal r8, r9, r5, r7\n\t" + "adcs r8, r8, r8\n\t" + "umaal r4, r8, r6, r6\n\t" + /* R[12] = r4 */ + "mov r5, lr\n\t" + "umaal r5, r9, r6, r7\n\t" + "adcs r5, r5, r5\n\t" + "umaal r8, r5, lr, lr\n\t" + /* R[13] = r8 */ + "adcs r9, r9, r9\n\t" + "umaal r9, r5, r7, r7\n\t" + "adcs r7, r5, lr\n\t" + /* R[14] = r9 */ + /* R[15] = r7 */ + "ldr lr, [sp, #28]\n\t" + "add lr, lr, #28\n\t" + "stm lr!, {%[r], r12}\n\t" + "stm lr!, {r11}\n\t" + "stm lr!, {r10}\n\t" + "stm lr!, {r3, r4, r8, r9}\n\t" + "stm lr!, {r7}\n\t" + "sub lr, lr, #0x40\n\t" + "ldm sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "stm lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "add sp, sp, #32\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); +} + +#endif +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit* z2 = r + 16; + sp_digit z1[16]; + sp_digit* a1 = z1; + sp_digit zero[8]; + sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 8); + + mask = sp_2048_sub_8(a1, a, &a[8]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_2048_sub_8(a1, p1, p2); + + sp_2048_sqr_8(z2, &a[8]); + sp_2048_sqr_8(z0, a); + sp_2048_sqr_8(z1, a1); + + u = 0; + u -= sp_2048_sub_in_place_16(z1, z2); + u -= sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_sub_in_place_16(r + 8, z1); + zero[0] = u; + (void)sp_2048_add_8(r + 24, r + 24, zero); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit* z2 = r + 32; + sp_digit z1[32]; + sp_digit* a1 = z1; + sp_digit zero[16]; + sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 16); + + mask = sp_2048_sub_16(a1, a, &a[16]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_2048_sub_16(a1, p1, p2); + + sp_2048_sqr_16(z2, &a[16]); + sp_2048_sqr_16(z0, a); + sp_2048_sqr_16(z1, a1); + + u = 0; + u -= sp_2048_sub_in_place_32(z1, z2); + u -= sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_sub_in_place_32(r + 16, z1); + zero[0] = u; + (void)sp_2048_add_16(r + 48, r + 48, zero); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_2048_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -2351,23 +5275,32 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[64]; + sp_digit* z2 = r + 64; sp_digit z1[64]; - sp_digit a1[32]; + sp_digit* a1 = z1; + sp_digit zero[32]; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 32); + + mask = sp_2048_sub_32(a1, a, &a[32]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_2048_sub_32(a1, p1, p2); - u = sp_2048_add_32(a1, a, &a[32]); - sp_2048_sqr_32(z1, a1); sp_2048_sqr_32(z2, &a[32]); sp_2048_sqr_32(z0, a); - sp_2048_mask_32(r + 64, a1, 0 - u); - u += sp_2048_add_32(r + 64, r + 64, r + 64); - u += sp_2048_sub_in_place_64(z1, z2); - u += sp_2048_sub_in_place_64(z1, z0); - u += sp_2048_add_64(r + 32, r + 32, z1); - r[96] = u; - XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - (void)sp_2048_add_64(r + 64, r + 64, z2); + sp_2048_sqr_32(z1, a1); + + u = 0; + u -= sp_2048_sub_in_place_64(z1, z2); + u -= sp_2048_sub_in_place_64(z1, z0); + u += sp_2048_sub_in_place_64(r + 32, z1); + zero[0] = u; + (void)sp_2048_add_32(r + 96, r + 96, zero); } #endif /* !WOLFSSL_SP_SMALL */ @@ -2378,41 +5311,35 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #256\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x100\n\t" + "\n" + "L_sp_2048_add_64_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_2048_add_64_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -2422,40 +5349,33 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #256\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x100\n\t" + "\n" + "L_sp_2048_sub_in_pkace_64_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -2466,57 +5386,196 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #512\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x200\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #252\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_2048_mul_64_outer_%=: \n\t" + "subs r3, r5, #0xfc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_2048_mul_64_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #256\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_2048_mul_64_inner_done_%=\n\t" + "blt L_sp_2048_mul_64_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_2048_mul_64_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #504\n\t" - "ble 1b\n\t" + "cmp r5, #0x1f4\n\t" + "ble L_sp_2048_mul_64_outer_%=\n\t" + "ldr lr, [%[a], #252]\n\t" + "ldr r11, [%[b], #252]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_2048_mul_64_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_2048_mul_64_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -2525,73 +5584,155 @@ static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #512\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x200\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #252\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_2048_sqr_64_outer_%=: \n\t" + "subs r3, r5, #0xfc\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_2048_sqr_64_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #256\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_2048_sqr_64_inner_done_%=\n\t" + "blt L_sp_2048_sqr_64_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_2048_sqr_64_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #504\n\t" - "ble 1b\n\t" + "cmp r5, #0x1f4\n\t" + "ble L_sp_2048_sqr_64_outer_%=\n\t" + "ldr lr, [%[a], #252]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_2048_sqr_64_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_2048_sqr_64_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -2621,41 +5762,35 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x80\n\t" + "\n" + "L_sp_2048_add_32_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_2048_add_32_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -2665,40 +5800,33 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x80\n\t" + "\n" + "L_sp_2048_sub_in_pkace_32_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -2709,57 +5837,196 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x100\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_2048_mul_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_2048_mul_32_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_2048_mul_32_inner_done_%=\n\t" + "blt L_sp_2048_mul_32_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_2048_mul_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf4\n\t" + "ble L_sp_2048_mul_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" + "ldr r11, [%[b], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_2048_mul_32_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_2048_mul_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -2768,80 +6035,162 @@ static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x100\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_2048_sqr_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_2048_sqr_32_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_2048_sqr_32_inner_done_%=\n\t" + "blt L_sp_2048_sqr_32_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_2048_sqr_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf4\n\t" + "ble L_sp_2048_sqr_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_2048_sqr_32_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_2048_sqr_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } #endif /* WOLFSSL_SP_SMALL */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -2861,499 +6210,2170 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) *rho = (sp_digit)0 - x; } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_2048_mul_d_64_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #256\n\t" - "blt 1b\n\t" + "cmp r9, #0x100\n\t" + "blt L_sp_2048_mul_d_64_word_%=\n\t" "str r3, [%[r], #256]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" - "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" - "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" - "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" - "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" - "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" - "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" - "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" - "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" - "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" - "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" - "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" - "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" - "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" - "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" - "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" - "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #188]\n\t" - "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #192]\n\t" - "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #196]\n\t" - "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #200]\n\t" - "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #204]\n\t" - "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #208]\n\t" - "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #212]\n\t" - "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #216]\n\t" - "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #220]\n\t" - "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #224]\n\t" - "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #228]\n\t" - "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #232]\n\t" - "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #236]\n\t" - "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #240]\n\t" - "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #244]\n\t" - "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #248]\n\t" - "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r3, [%[r], #252]\n\t" - "str r4, [%[r], #256]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[17] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[18] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[19] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[20] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[21] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[22] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[23] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[24] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[25] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[26] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[27] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[28] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[29] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[30] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[31] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[32] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[33] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[34] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[35] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[36] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[37] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[38] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[39] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[40] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[41] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[42] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[43] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[44] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[45] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[46] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[47] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[48] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[49] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[50] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[51] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[52] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[53] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[54] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[55] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[56] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[57] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[58] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[59] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[60] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[61] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[62] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[63] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "str r4, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. @@ -3369,6 +8389,7 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) sp_2048_sub_in_place_32(r, m); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -3377,526 +8398,1657 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_2048_cond_sub_32_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x80\n\t" + "blt L_sp_2048_cond_sub_32_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #124]\n\t" +#else "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #124]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" - "ldr r9, [%[a], #128]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #128]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "ldr r10, [%[a], #128]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #128\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x80\n\t" + "blt L_sp_2048_mont_reduce_32_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "ldr r10, [%[a], #128]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_2048_mont_reduce_32_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #64]\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r10, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #68]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r10, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #72]\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r10, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #76]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r10, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #80]\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r10, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #84]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r10, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #88]\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r10, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #92]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r10, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #96]\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r10, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #100]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r10, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #104]\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r10, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #108]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r10, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #112]\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r10, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #116]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r10, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #120]\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r10, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #128]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #124]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #128]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0x80\n\t" + "blt L_sp_2048_mont_reduce_32_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -3904,9 +10056,9 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_32(r, a, b); @@ -3918,346 +10070,1353 @@ static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_32(r, a); sp_2048_mont_reduce_32(r, m, mp); } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_2048_mul_d_32_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #128\n\t" - "blt 1b\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_2048_mul_d_32_word_%=\n\t" "str r3, [%[r], #128]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" - "str r4, [%[r], #124]\n\t" - "str r5, [%[r], #128]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[17] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[18] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[19] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[20] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[21] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[22] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[23] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[24] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[25] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[26] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[27] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[28] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[29] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[30] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[31] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_2048_word_32_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_2048_word_32_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Compare a with b in constant time. * * a A single precision integer. @@ -4265,400 +11424,395 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #124\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0x7c\n\t" + "\n" + "L_sp_2048_cmp_32_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_2048_cmp_32_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -4670,8 +11824,8 @@ static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -4679,12 +11833,15 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig (void)m; - div = d[31]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); - for (i=31; i>=0; i--) { - sp_digit hi = t1[32 + i] - (t1[32 + i] == div); + r1 = sp_2048_cmp_32(&t1[32], d) >= 0; + sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1); + for (i = 31; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[32 + i] == div); + sp_digit hi = t1[32 + i] + mask; r1 = div_2048_word_32(hi, t1[32 + i - 1], div); + r1 |= mask; sp_2048_mul_d_32(t2, d, r1); t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); @@ -4708,7 +11865,8 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_2048_div_32(a, m, NULL, r); } @@ -4721,12 +11879,14 @@ static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 64]; @@ -4741,11 +11901,17 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 64), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 64), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -4805,6 +11971,10 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -4846,7 +12016,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_32(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -4861,12 +12031,14 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[32 * 64]; @@ -4881,11 +12053,17 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 64), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 64), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -4961,6 +12139,10 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -5003,7 +12185,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_32(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -5030,6 +12212,7 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) } #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -5038,974 +12221,3113 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_2048_cond_sub_64_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #256\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x100\n\t" + "blt L_sp_2048_cond_sub_64_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r6, [%[a], #132]\n\t" - "ldr r5, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #128]\n\t" - "str r6, [%[r], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r5, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r6, [%[a], #148]\n\t" - "ldr r5, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #144]\n\t" - "str r6, [%[r], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r5, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r6, [%[a], #164]\n\t" - "ldr r5, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #160]\n\t" - "str r6, [%[r], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r5, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r6, [%[a], #180]\n\t" - "ldr r5, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #176]\n\t" - "str r6, [%[r], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r5, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r6, [%[a], #196]\n\t" - "ldr r5, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #192]\n\t" - "str r6, [%[r], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r5, [%[b], #200]\n\t" - "ldr r7, [%[b], #204]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r6, [%[a], #212]\n\t" - "ldr r5, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #208]\n\t" - "str r6, [%[r], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r5, [%[b], #216]\n\t" - "ldr r7, [%[b], #220]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r6, [%[a], #228]\n\t" - "ldr r5, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #224]\n\t" - "str r6, [%[r], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r5, [%[b], #232]\n\t" - "ldr r7, [%[b], #236]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r6, [%[a], #244]\n\t" - "ldr r5, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #240]\n\t" - "str r6, [%[r], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r5, [%[b], #248]\n\t" - "ldr r7, [%[b], #252]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" - "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+48] += m[48] * mu\n\t" - "ldr r7, [%[m], #192]\n\t" - "ldr r9, [%[a], #192]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #192]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+49] += m[49] * mu\n\t" - "ldr r7, [%[m], #196]\n\t" - "ldr r9, [%[a], #196]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+50] += m[50] * mu\n\t" - "ldr r7, [%[m], #200]\n\t" - "ldr r9, [%[a], #200]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #200]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+51] += m[51] * mu\n\t" - "ldr r7, [%[m], #204]\n\t" - "ldr r9, [%[a], #204]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+52] += m[52] * mu\n\t" - "ldr r7, [%[m], #208]\n\t" - "ldr r9, [%[a], #208]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #208]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+53] += m[53] * mu\n\t" - "ldr r7, [%[m], #212]\n\t" - "ldr r9, [%[a], #212]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+54] += m[54] * mu\n\t" - "ldr r7, [%[m], #216]\n\t" - "ldr r9, [%[a], #216]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #216]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+55] += m[55] * mu\n\t" - "ldr r7, [%[m], #220]\n\t" - "ldr r9, [%[a], #220]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+56] += m[56] * mu\n\t" - "ldr r7, [%[m], #224]\n\t" - "ldr r9, [%[a], #224]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #224]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+57] += m[57] * mu\n\t" - "ldr r7, [%[m], #228]\n\t" - "ldr r9, [%[a], #228]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+58] += m[58] * mu\n\t" - "ldr r7, [%[m], #232]\n\t" - "ldr r9, [%[a], #232]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #232]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+59] += m[59] * mu\n\t" - "ldr r7, [%[m], #236]\n\t" - "ldr r9, [%[a], #236]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+60] += m[60] * mu\n\t" - "ldr r7, [%[m], #240]\n\t" - "ldr r9, [%[a], #240]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #240]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+61] += m[61] * mu\n\t" - "ldr r7, [%[m], #244]\n\t" - "ldr r9, [%[a], #244]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+62] += m[62] * mu\n\t" - "ldr r7, [%[m], #248]\n\t" - "ldr r9, [%[a], #248]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #248]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+63] += m[63] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #252]\n\t" +#else "ldr r7, [%[m], #252]\n\t" - "ldr r9, [%[a], #252]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #252]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #252]\n\t" - "ldr r9, [%[a], #256]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #256]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" + "ldr r10, [%[a], #256]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #256\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x100\n\t" + "blt L_sp_2048_mont_reduce_64_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" + "ldr r10, [%[a], #256]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x100\n\t" + "blt L_sp_2048_mont_reduce_64_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #64]\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r10, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #68]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r10, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #72]\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r10, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #76]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r10, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #80]\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r10, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #84]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r10, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #88]\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r10, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #92]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r10, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #96]\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r10, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #100]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r10, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #104]\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r10, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #108]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r10, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #112]\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r10, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #116]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r10, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #120]\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r10, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #124]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r10, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #128]\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r10, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #132]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r10, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #136]\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r10, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #140]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r10, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #144]\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r10, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #148]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r10, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #152]\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r10, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #156]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r10, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #160]\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r10, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #164]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r10, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #168]\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r10, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #172]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r10, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #176]\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r10, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #180]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r10, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #184]\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r10, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #188]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r10, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #192]\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r10, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #196]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r10, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #200]\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r10, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #204]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r10, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #208]\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r10, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #212]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r10, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #216]\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r10, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #220]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r10, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #224]\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r10, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #228]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r10, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #232]\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r10, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #236]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r10, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #240]\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r10, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #244]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r10, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #248]\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r10, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #256]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #252]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #256]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0x100\n\t" + "blt L_sp_2048_mont_reduce_64_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -6013,9 +15335,9 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_64(r, a, b); @@ -6027,9 +15349,9 @@ static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_64(r, a); @@ -6043,40 +15365,34 @@ static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #256\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x100\n\t" + "\n" + "L_sp_2048_sub_64_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_2048_sub_64_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -6086,340 +15402,332 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "ldr r8, [%[b], #4]\n\t" - "ldr r9, [%[b], #8]\n\t" - "ldr r10, [%[b], #12]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "ldr r8, [%[b], #20]\n\t" - "ldr r9, [%[b], #24]\n\t" - "ldr r10, [%[b], #28]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r7, [%[b], #32]\n\t" - "ldr r8, [%[b], #36]\n\t" - "ldr r9, [%[b], #40]\n\t" - "ldr r10, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #32]\n\t" - "str r4, [%[r], #36]\n\t" - "str r5, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r7, [%[b], #48]\n\t" - "ldr r8, [%[b], #52]\n\t" - "ldr r9, [%[b], #56]\n\t" - "ldr r10, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #48]\n\t" - "str r4, [%[r], #52]\n\t" - "str r5, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r7, [%[b], #64]\n\t" - "ldr r8, [%[b], #68]\n\t" - "ldr r9, [%[b], #72]\n\t" - "ldr r10, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #64]\n\t" - "str r4, [%[r], #68]\n\t" - "str r5, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r7, [%[b], #80]\n\t" - "ldr r8, [%[b], #84]\n\t" - "ldr r9, [%[b], #88]\n\t" - "ldr r10, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #80]\n\t" - "str r4, [%[r], #84]\n\t" - "str r5, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r7, [%[b], #96]\n\t" - "ldr r8, [%[b], #100]\n\t" - "ldr r9, [%[b], #104]\n\t" - "ldr r10, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #96]\n\t" - "str r4, [%[r], #100]\n\t" - "str r5, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r7, [%[b], #112]\n\t" - "ldr r8, [%[b], #116]\n\t" - "ldr r9, [%[b], #120]\n\t" - "ldr r10, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #112]\n\t" - "str r4, [%[r], #116]\n\t" - "str r5, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r3, [%[a], #128]\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r7, [%[b], #128]\n\t" - "ldr r8, [%[b], #132]\n\t" - "ldr r9, [%[b], #136]\n\t" - "ldr r10, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #128]\n\t" - "str r4, [%[r], #132]\n\t" - "str r5, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r3, [%[a], #144]\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r7, [%[b], #144]\n\t" - "ldr r8, [%[b], #148]\n\t" - "ldr r9, [%[b], #152]\n\t" - "ldr r10, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #144]\n\t" - "str r4, [%[r], #148]\n\t" - "str r5, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r3, [%[a], #160]\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r7, [%[b], #160]\n\t" - "ldr r8, [%[b], #164]\n\t" - "ldr r9, [%[b], #168]\n\t" - "ldr r10, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #160]\n\t" - "str r4, [%[r], #164]\n\t" - "str r5, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r3, [%[a], #176]\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r7, [%[b], #176]\n\t" - "ldr r8, [%[b], #180]\n\t" - "ldr r9, [%[b], #184]\n\t" - "ldr r10, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #176]\n\t" - "str r4, [%[r], #180]\n\t" - "str r5, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r3, [%[a], #192]\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r7, [%[b], #192]\n\t" - "ldr r8, [%[b], #196]\n\t" - "ldr r9, [%[b], #200]\n\t" - "ldr r10, [%[b], #204]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #192]\n\t" - "str r4, [%[r], #196]\n\t" - "str r5, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r3, [%[a], #208]\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r7, [%[b], #208]\n\t" - "ldr r8, [%[b], #212]\n\t" - "ldr r9, [%[b], #216]\n\t" - "ldr r10, [%[b], #220]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #208]\n\t" - "str r4, [%[r], #212]\n\t" - "str r5, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r3, [%[a], #224]\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r7, [%[b], #224]\n\t" - "ldr r8, [%[b], #228]\n\t" - "ldr r9, [%[b], #232]\n\t" - "ldr r10, [%[b], #236]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #224]\n\t" - "str r4, [%[r], #228]\n\t" - "str r5, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r3, [%[a], #240]\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r7, [%[b], #240]\n\t" - "ldr r8, [%[b], #244]\n\t" - "ldr r9, [%[b], #248]\n\t" - "ldr r10, [%[b], #252]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #240]\n\t" - "str r4, [%[r], #244]\n\t" - "str r5, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_2048_word_64_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_2048_word_64_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -6429,8 +15737,8 @@ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -6438,12 +15746,22 @@ static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, s (void)m; - div = d[63]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); - for (i=63; i>=0; i--) { - sp_digit hi = t1[64 + i] - (t1[64 + i] == div); - r1 = div_2048_word_64(hi, t1[64 + i - 1], div); + for (i = 63; i > 0; i--) { + if (t1[i + 64] != d[i]) + break; + } + if (t1[i + 64] >= d[i]) { + sp_2048_sub_in_place_64(&t1[64], d); + } + for (i = 63; i >= 0; i--) { + if (t1[64 + i] == div) { + r1 = SP_DIGIT_MAX; + } + else { + r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div); + } sp_2048_mul_d_64(t2, d, r1); t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); @@ -6476,7 +15794,8 @@ static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, s * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_2048_div_64_cond(a, m, NULL, r); } @@ -6520,752 +15839,747 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #252\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0xfc\n\t" + "\n" + "L_sp_2048_cmp_64_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_2048_cmp_64_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #252]\n\t" - "ldr r5, [%[b], #252]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #252]\n\t" + "ldr lr, [%[b], #252]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[b], #248]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #248]\n\t" + "ldr lr, [%[b], #248]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[b], #244]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #244]\n\t" + "ldr lr, [%[b], #244]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[b], #240]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #240]\n\t" + "ldr lr, [%[b], #240]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #236]\n\t" - "ldr r5, [%[b], #236]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #236]\n\t" + "ldr lr, [%[b], #236]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[b], #232]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #232]\n\t" + "ldr lr, [%[b], #232]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[b], #228]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #228]\n\t" + "ldr lr, [%[b], #228]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[b], #224]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #224]\n\t" + "ldr lr, [%[b], #224]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #220]\n\t" - "ldr r5, [%[b], #220]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #220]\n\t" + "ldr lr, [%[b], #220]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[b], #216]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #216]\n\t" + "ldr lr, [%[b], #216]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[b], #212]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #212]\n\t" + "ldr lr, [%[b], #212]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[b], #208]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #208]\n\t" + "ldr lr, [%[b], #208]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #204]\n\t" - "ldr r5, [%[b], #204]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #204]\n\t" + "ldr lr, [%[b], #204]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[b], #200]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #200]\n\t" + "ldr lr, [%[b], #200]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[b], #196]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #196]\n\t" + "ldr lr, [%[b], #196]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[b], #192]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #192]\n\t" + "ldr lr, [%[b], #192]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -7277,8 +16591,8 @@ static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -7286,12 +16600,15 @@ static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_dig (void)m; - div = d[63]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); - for (i=63; i>=0; i--) { - sp_digit hi = t1[64 + i] - (t1[64 + i] == div); + r1 = sp_2048_cmp_64(&t1[64], d) >= 0; + sp_2048_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1); + for (i = 63; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[64 + i] == div); + sp_digit hi = t1[64 + i] + mask; r1 = div_2048_word_64(hi, t1[64 + i - 1], div); + r1 |= mask; sp_2048_mul_d_64(t2, d, r1); t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2); @@ -7315,7 +16632,8 @@ static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_2048_div_64(a, m, NULL, r); } @@ -7331,12 +16649,14 @@ static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[8 * 128]; @@ -7351,11 +16671,17 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 128), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 128), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -7407,6 +16733,10 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -7447,7 +16777,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_64(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -7462,12 +16792,14 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 128]; @@ -7482,11 +16814,17 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 128), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 128), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -7546,6 +16884,10 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -7587,7 +16929,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_64(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -7614,7 +16956,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[64 * 5]; @@ -7636,7 +16978,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -7646,9 +16988,9 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, #endif if (err == MP_OKAY) { + ah = a + 64; r = a + 64 * 2; m = r + 64 * 2; - ah = a + 64; sp_2048_from_bin(ah, 64, in, inLen); #if DIGIT_BIT >= 32 @@ -7666,7 +17008,38 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_2048_from_mp(m, 64, mm); - if (e[0] == 0x3) { + if (e[0] == 0x10001) { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_2048_mod_64_cond(r, a, m); + /* Montgomery form: r = a.R mod m */ + + if (err == MP_OKAY) { + /* r = a ^ 0x10000 => r = a squared 16 times */ + for (i = 15; i >= 0; i--) { + sp_2048_mont_sqr_64(r, r, m, mp); + } + /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m + * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m + */ + sp_2048_mont_mul_64(r, r, ah, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_2048_sub_in_place_64(r, m); + } + } + } + else if (e[0] == 0x3) { if (err == MP_OKAY) { sp_2048_sqr_64(r, ah); err = sp_2048_mod_64_cond(r, r, m); @@ -7694,7 +17067,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } XMEMCPY(r, a, sizeof(sp_digit) * 64); - for (i--; i>=0; i--) { + for (i--; i >= 0; i--) { sp_2048_mont_sqr_64(r, r, m, mp); if (((e[0] >> i) & 1) == 1) { sp_2048_mont_mul_64(r, r, a, m, mp); @@ -7720,7 +17093,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif @@ -7729,6 +17102,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -7737,204 +17111,176 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov lr, #0\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_2048_cond_add_32_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0x80\n\t" + "blt L_sp_2048_cond_add_32_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "adc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov r8, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -7957,7 +17303,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, const mp_int* qim, const mp_int* mm, byte* out, word32* outLen) { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[64 * 4]; @@ -7991,7 +17337,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -8016,21 +17362,21 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 64); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[32 * 11]; @@ -8058,8 +17404,14 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, DYNAMIC_TYPE_RSA); @@ -8107,12 +17459,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 32 * 11); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -8243,398 +17595,401 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 -static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n) +static void sp_2048_lshift_64(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register byte n asm ("r2") = (byte)n_p; + __asm__ __volatile__ ( - "mov r6, #31\n\t" - "sub r6, r6, %[n]\n\t" - "ldr r3, [%[a], #252]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #248]\n\t" - "str r4, [%[r], #256]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #244]\n\t" - "str r3, [%[r], #252]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #252]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #248]\n\t" + "str r6, [%[r], #256]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #240]\n\t" - "str r2, [%[r], #248]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #236]\n\t" - "str r4, [%[r], #244]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #232]\n\t" - "str r3, [%[r], #240]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #244]\n\t" + "str r5, [%[r], #252]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #240]\n\t" + "str r4, [%[r], #248]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #236]\n\t" + "str r6, [%[r], #244]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #228]\n\t" - "str r2, [%[r], #236]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #224]\n\t" - "str r4, [%[r], #232]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #220]\n\t" - "str r3, [%[r], #228]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #232]\n\t" + "str r5, [%[r], #240]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #228]\n\t" + "str r4, [%[r], #236]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #224]\n\t" + "str r6, [%[r], #232]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #216]\n\t" - "str r2, [%[r], #224]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #212]\n\t" - "str r4, [%[r], #220]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #208]\n\t" - "str r3, [%[r], #216]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #220]\n\t" + "str r5, [%[r], #228]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #216]\n\t" + "str r4, [%[r], #224]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #212]\n\t" + "str r6, [%[r], #220]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #204]\n\t" - "str r2, [%[r], #212]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #200]\n\t" - "str r4, [%[r], #208]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #196]\n\t" - "str r3, [%[r], #204]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #208]\n\t" + "str r5, [%[r], #216]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #204]\n\t" + "str r4, [%[r], #212]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #200]\n\t" + "str r6, [%[r], #208]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #192]\n\t" - "str r2, [%[r], #200]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #188]\n\t" - "str r4, [%[r], #196]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #184]\n\t" - "str r3, [%[r], #192]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #196]\n\t" + "str r5, [%[r], #204]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #192]\n\t" + "str r4, [%[r], #200]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #188]\n\t" + "str r6, [%[r], #196]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #180]\n\t" - "str r2, [%[r], #188]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #176]\n\t" - "str r4, [%[r], #184]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #172]\n\t" - "str r3, [%[r], #180]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #184]\n\t" + "str r5, [%[r], #192]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #180]\n\t" + "str r4, [%[r], #188]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #176]\n\t" + "str r6, [%[r], #184]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #168]\n\t" - "str r2, [%[r], #176]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #164]\n\t" - "str r4, [%[r], #172]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #160]\n\t" - "str r3, [%[r], #168]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #172]\n\t" + "str r5, [%[r], #180]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #168]\n\t" + "str r4, [%[r], #176]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #164]\n\t" + "str r6, [%[r], #172]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #156]\n\t" - "str r2, [%[r], #164]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #152]\n\t" - "str r4, [%[r], #160]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #148]\n\t" - "str r3, [%[r], #156]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #160]\n\t" + "str r5, [%[r], #168]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #156]\n\t" + "str r4, [%[r], #164]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #152]\n\t" + "str r6, [%[r], #160]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #144]\n\t" - "str r2, [%[r], #152]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #140]\n\t" - "str r4, [%[r], #148]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #136]\n\t" - "str r3, [%[r], #144]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #148]\n\t" + "str r5, [%[r], #156]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #144]\n\t" + "str r4, [%[r], #152]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #140]\n\t" + "str r6, [%[r], #148]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #132]\n\t" - "str r2, [%[r], #140]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #128]\n\t" - "str r4, [%[r], #136]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #124]\n\t" - "str r3, [%[r], #132]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #136]\n\t" + "str r5, [%[r], #144]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #132]\n\t" + "str r4, [%[r], #140]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #128]\n\t" + "str r6, [%[r], #136]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #120]\n\t" - "str r2, [%[r], #128]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #116]\n\t" - "str r4, [%[r], #124]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #112]\n\t" - "str r3, [%[r], #120]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #124]\n\t" + "str r5, [%[r], #132]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #120]\n\t" + "str r4, [%[r], #128]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #116]\n\t" + "str r6, [%[r], #124]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #108]\n\t" - "str r2, [%[r], #116]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #104]\n\t" - "str r4, [%[r], #112]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #100]\n\t" - "str r3, [%[r], #108]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #112]\n\t" + "str r5, [%[r], #120]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #108]\n\t" + "str r4, [%[r], #116]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #104]\n\t" + "str r6, [%[r], #112]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #96]\n\t" - "str r2, [%[r], #104]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #92]\n\t" - "str r4, [%[r], #100]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #88]\n\t" - "str r3, [%[r], #96]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #100]\n\t" + "str r5, [%[r], #108]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #96]\n\t" + "str r4, [%[r], #104]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #92]\n\t" + "str r6, [%[r], #100]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #84]\n\t" - "str r2, [%[r], #92]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #80]\n\t" - "str r4, [%[r], #88]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #76]\n\t" - "str r3, [%[r], #84]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #88]\n\t" + "str r5, [%[r], #96]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #84]\n\t" + "str r4, [%[r], #92]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #80]\n\t" + "str r6, [%[r], #88]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #72]\n\t" - "str r2, [%[r], #80]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #68]\n\t" - "str r4, [%[r], #76]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #64]\n\t" - "str r3, [%[r], #72]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #76]\n\t" + "str r5, [%[r], #84]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #72]\n\t" + "str r4, [%[r], #80]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #68]\n\t" + "str r6, [%[r], #76]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" - "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #64]\n\t" + "str r5, [%[r], #72]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r6, [%[r], #64]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" - "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #52]\n\t" + "str r5, [%[r], #60]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r6, [%[r], #52]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" - "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #40]\n\t" + "str r5, [%[r], #48]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r6, [%[r], #40]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" - "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #28]\n\t" + "str r5, [%[r], #36]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r6, [%[r], #28]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" - "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #4]\n\t" - "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #16]\n\t" + "str r5, [%[r], #24]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r6, [%[r], #16]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "str r3, [%[r]]\n\t" - "str r4, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #4]\n\t" + "str r5, [%[r], #12]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a]]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "str r5, [%[r]]\n\t" + "str r6, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12", "cc" ); } @@ -8644,12 +17999,14 @@ static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[193]; @@ -8665,11 +18022,17 @@ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -8698,6 +18061,10 @@ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -8744,7 +18111,7 @@ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, sp_2048_cond_sub_64(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -8886,27 +18253,30 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) { int i; - int j = 0; - word32 s = 0; + int j; + byte* d; - r[0] = 0; - for (i = n-1; i >= 0; i--) { - r[j] |= (((sp_digit)a[i]) << s); - if (s >= 24U) { - r[j] &= 0xffffffff; - s = 32U - s; - if (j + 1 >= size) { - break; - } - r[++j] = (sp_digit)a[i] >> s; - s = 8U - s; - } - else { - s += 8U; - } + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; } - for (j++; j < size; j++) { + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { r[j] = 0; } } @@ -8920,20 +18290,23 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 32 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); } #elif DIGIT_BIT > 32 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffff; s = 32U - s; @@ -8963,12 +18336,12 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 32) { r[j] &= 0xffffffff; @@ -9005,34 +18378,13 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) static void sp_3072_to_bin_96(sp_digit* r, byte* a) { int i; - int j; - int s = 0; - int b; + int j = 0; - j = 3072 / 8 - 1; - a[j] = 0; - for (i=0; i<96 && j>=0; i++) { - b = 0; - /* lint allow cast of mismatch sp_digit and int */ - a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ - b += 8 - s; - if (j < 0) { - break; - } - while (b < 32) { - a[j--] = (byte)(r[i] >> b); - b += 8; - if (j < 0) { - break; - } - } - s = 8 - (b - 32); - if (j >= 0) { - a[j] = 0; - } - if (s != 0) { - j++; - } + for (i = 95; i >= 0; i--) { + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; } } @@ -9057,985 +18409,5482 @@ static void sp_3072_to_bin_96(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" - "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" + /* A[8] * B[0] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" + /* A[0] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" + /* A[10] * B[0] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" + /* A[0] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #44]\n\t" - "# A[11] * B[1]\n\t" + /* A[11] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #48]\n\t" - "# A[2] * B[11]\n\t" + /* A[2] * B[11] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #52]\n\t" - "# A[11] * B[3]\n\t" + /* A[11] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ "ldr r11, [%[a], #28]\n\t" "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #56]\n\t" - "# A[4] * B[11]\n\t" + /* A[4] * B[11] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #60]\n\t" - "# A[11] * B[5]\n\t" + /* A[11] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[10] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ "ldr r11, [%[a], #32]\n\t" "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #64]\n\t" - "# A[6] * B[11]\n\t" + /* A[6] * B[11] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #68]\n\t" - "# A[11] * B[7]\n\t" + /* A[11] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ "ldr r11, [%[a], #36]\n\t" "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #72]\n\t" - "# A[8] * B[11]\n\t" + /* A[8] * B[11] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #76]\n\t" - "# A[11] * B[9]\n\t" + /* A[11] * B[9] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ "ldr r11, [%[a], #40]\n\t" "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #80]\n\t" - "# A[10] * B[11]\n\t" + /* A[10] * B[11] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #84]\n\t" - "# A[11] * B[11]\n\t" - "umull r6, r7, r8, r9\n\t" + /* A[11] * B[11] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, r8, r9\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -10044,689 +23893,9 @@ static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #48\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" - ); -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) -{ - __asm__ __volatile__ ( - "sub sp, sp, #48\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" - "umull r8, r3, r10, r10\n\t" - "mov r4, #0\n\t" - "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" - "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" - "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" - "ldr r10, [%[a], #4]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" - "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" - "ldr r10, [%[a], #8]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r4, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" - "ldr r10, [%[a], #12]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r2, r2, r5\n\t" - "adcs r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r2, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc r2, r2, r7\n\t" - "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" - "ldr r10, [%[a], #16]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r4, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r2, r2, r5\n\t" - "adcs r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r2, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" - "ldr r10, [%[a], #20]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc r2, r2, r7\n\t" - "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [sp, #44]\n\t" - "# A[1] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r4, #0\n\t" - "mov r7, #0\n\t" - "# A[2] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" - "ldr r10, [%[a], #24]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r2, r2, r5\n\t" - "adcs r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r2, [%[r], #48]\n\t" - "# A[2] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r2, #0\n\t" - "mov r7, #0\n\t" - "# A[3] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc r2, r2, r7\n\t" - "str r3, [%[r], #52]\n\t" - "# A[3] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[4] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" - "ldr r10, [%[a], #28]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [%[r], #56]\n\t" - "# A[4] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r4, #0\n\t" - "mov r7, #0\n\t" - "# A[5] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r2, r2, r5\n\t" - "adcs r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r2, [%[r], #60]\n\t" - "# A[5] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r2, #0\n\t" - "mov r7, #0\n\t" - "# A[6] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" - "ldr r10, [%[a], #32]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc r2, r2, r7\n\t" - "str r3, [%[r], #64]\n\t" - "# A[6] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" - "# A[7] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "adds r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adc r7, r7, r7\n\t" - "adds r4, r4, r5\n\t" - "adcs r2, r2, r6\n\t" - "adc r3, r3, r7\n\t" - "str r4, [%[r], #68]\n\t" - "# A[7] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[8] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[9] * A[9]\n\t" - "ldr r10, [%[a], #36]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "str r2, [%[r], #72]\n\t" - "# A[8] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[9] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "str r3, [%[r], #76]\n\t" - "# A[9] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[10] * A[10]\n\t" - "ldr r10, [%[a], #40]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "str r4, [%[r], #80]\n\t" - "# A[10] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "str r2, [%[r], #84]\n\t" - "# A[11] * A[11]\n\t" - "ldr r10, [%[a], #44]\n\t" - "umull r8, r9, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adc r4, r4, r9\n\t" - "str r3, [%[r], #88]\n\t" - "str r4, [%[r], #92]\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #48\n\t" - : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" ); } @@ -10736,68 +23905,41 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* Sub b from a into a. (a -= b) @@ -10805,114 +23947,60 @@ static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -10921,116 +24009,62 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -11076,7 +24110,7 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, sp_digit z1[24]; sp_digit a1[12]; sp_digit b1[12]; - sp_digit z2[24]; + sp_digit* z2 = r + 24; sp_digit u; sp_digit ca; sp_digit cb; @@ -11084,45 +24118,22 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, ca = sp_3072_add_12(a1, a, &a[12]); cb = sp_3072_add_12(b1, b, &b[12]); u = ca & cb; - sp_3072_mul_12(z1, a1, b1); + sp_3072_mul_12(z2, &a[12], &b[12]); sp_3072_mul_12(z0, a, b); - sp_3072_mask_12(r + 24, a1, 0 - cb); + sp_3072_mul_12(z1, a1, b1); + + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_sub_in_place_24(z1, z2); + sp_3072_mask_12(a1, a1, 0 - cb); + u += sp_3072_add_12(z1 + 12, z1 + 12, a1); sp_3072_mask_12(b1, b1, 0 - ca); - u += sp_3072_add_12(r + 24, r + 24, b1); - u += sp_3072_sub_in_place_24(z1, z2); - u += sp_3072_sub_in_place_24(z1, z0); - u += sp_3072_add_24(r + 12, r + 12, z1); - r[36] = u; - XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - (void)sp_3072_add_24(r + 24, r + 24, z2); -} + u += sp_3072_add_12(z1 + 12, z1 + 12, b1); -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z2[24]; - sp_digit z1[24]; - sp_digit a1[12]; - sp_digit u; - - u = sp_3072_add_12(a1, a, &a[12]); - sp_3072_sqr_12(z1, a1); - sp_3072_sqr_12(z2, &a[12]); - sp_3072_sqr_12(z0, a); - sp_3072_mask_12(r + 24, a1, 0 - u); - u += sp_3072_add_12(r + 24, r + 24, r + 24); - u += sp_3072_sub_in_place_24(z1, z2); - u += sp_3072_sub_in_place_24(z1, z0); u += sp_3072_add_24(r + 12, r + 12, z1); - r[36] = u; - XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - (void)sp_3072_add_24(r + 24, r + 24, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (12 - 1)); + a1[0] = u; + (void)sp_3072_add_12(r + 36, r + 36, a1); } /* Sub b from a into a. (a -= b) @@ -11130,210 +24141,102 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "ldr r2, [%[a], #96]\n\t" - "ldr r3, [%[a], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "ldr r8, [%[b], #104]\n\t" - "ldr r9, [%[b], #108]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #96]\n\t" - "str r3, [%[a], #100]\n\t" - "str r4, [%[a], #104]\n\t" - "str r5, [%[a], #108]\n\t" - "ldr r2, [%[a], #112]\n\t" - "ldr r3, [%[a], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "ldr r8, [%[b], #120]\n\t" - "ldr r9, [%[b], #124]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #112]\n\t" - "str r3, [%[a], #116]\n\t" - "str r4, [%[a], #120]\n\t" - "str r5, [%[a], #124]\n\t" - "ldr r2, [%[a], #128]\n\t" - "ldr r3, [%[a], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[a], #140]\n\t" - "ldr r6, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "ldr r8, [%[b], #136]\n\t" - "ldr r9, [%[b], #140]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #128]\n\t" - "str r3, [%[a], #132]\n\t" - "str r4, [%[a], #136]\n\t" - "str r5, [%[a], #140]\n\t" - "ldr r2, [%[a], #144]\n\t" - "ldr r3, [%[a], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[a], #156]\n\t" - "ldr r6, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "ldr r8, [%[b], #152]\n\t" - "ldr r9, [%[b], #156]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #144]\n\t" - "str r3, [%[a], #148]\n\t" - "str r4, [%[a], #152]\n\t" - "str r5, [%[a], #156]\n\t" - "ldr r2, [%[a], #160]\n\t" - "ldr r3, [%[a], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[a], #172]\n\t" - "ldr r6, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "ldr r8, [%[b], #168]\n\t" - "ldr r9, [%[b], #172]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #160]\n\t" - "str r3, [%[a], #164]\n\t" - "str r4, [%[a], #168]\n\t" - "str r5, [%[a], #172]\n\t" - "ldr r2, [%[a], #176]\n\t" - "ldr r3, [%[a], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[a], #188]\n\t" - "ldr r6, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "ldr r8, [%[b], #184]\n\t" - "ldr r9, [%[b], #188]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #176]\n\t" - "str r3, [%[a], #180]\n\t" - "str r4, [%[a], #184]\n\t" - "str r5, [%[a], #188]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -11342,212 +24245,104 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[a], #104]\n\t" - "ldr r7, [%[a], #108]\n\t" - "ldr r8, [%[b], #96]\n\t" - "ldr r9, [%[b], #100]\n\t" - "ldr r10, [%[b], #104]\n\t" - "ldr r14, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" - "str r6, [%[r], #104]\n\t" - "str r7, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[a], #120]\n\t" - "ldr r7, [%[a], #124]\n\t" - "ldr r8, [%[b], #112]\n\t" - "ldr r9, [%[b], #116]\n\t" - "ldr r10, [%[b], #120]\n\t" - "ldr r14, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" - "str r6, [%[r], #120]\n\t" - "str r7, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[a], #132]\n\t" - "ldr r6, [%[a], #136]\n\t" - "ldr r7, [%[a], #140]\n\t" - "ldr r8, [%[b], #128]\n\t" - "ldr r9, [%[b], #132]\n\t" - "ldr r10, [%[b], #136]\n\t" - "ldr r14, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #128]\n\t" - "str r5, [%[r], #132]\n\t" - "str r6, [%[r], #136]\n\t" - "str r7, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[a], #148]\n\t" - "ldr r6, [%[a], #152]\n\t" - "ldr r7, [%[a], #156]\n\t" - "ldr r8, [%[b], #144]\n\t" - "ldr r9, [%[b], #148]\n\t" - "ldr r10, [%[b], #152]\n\t" - "ldr r14, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #144]\n\t" - "str r5, [%[r], #148]\n\t" - "str r6, [%[r], #152]\n\t" - "str r7, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[a], #164]\n\t" - "ldr r6, [%[a], #168]\n\t" - "ldr r7, [%[a], #172]\n\t" - "ldr r8, [%[b], #160]\n\t" - "ldr r9, [%[b], #164]\n\t" - "ldr r10, [%[b], #168]\n\t" - "ldr r14, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #160]\n\t" - "str r5, [%[r], #164]\n\t" - "str r6, [%[r], #168]\n\t" - "str r7, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[a], #180]\n\t" - "ldr r6, [%[a], #184]\n\t" - "ldr r7, [%[a], #188]\n\t" - "ldr r8, [%[b], #176]\n\t" - "ldr r9, [%[b], #180]\n\t" - "ldr r10, [%[b], #184]\n\t" - "ldr r14, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #176]\n\t" - "str r5, [%[r], #180]\n\t" - "str r6, [%[r], #184]\n\t" - "str r7, [%[r], #188]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -11593,7 +24388,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, sp_digit z1[48]; sp_digit a1[24]; sp_digit b1[24]; - sp_digit z2[48]; + sp_digit* z2 = r + 48; sp_digit u; sp_digit ca; sp_digit cb; @@ -11601,45 +24396,22 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, ca = sp_3072_add_24(a1, a, &a[24]); cb = sp_3072_add_24(b1, b, &b[24]); u = ca & cb; - sp_3072_mul_24(z1, a1, b1); + sp_3072_mul_24(z2, &a[24], &b[24]); sp_3072_mul_24(z0, a, b); - sp_3072_mask_24(r + 48, a1, 0 - cb); + sp_3072_mul_24(z1, a1, b1); + + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_sub_in_place_48(z1, z2); + sp_3072_mask_24(a1, a1, 0 - cb); + u += sp_3072_add_24(z1 + 24, z1 + 24, a1); sp_3072_mask_24(b1, b1, 0 - ca); - u += sp_3072_add_24(r + 48, r + 48, b1); - u += sp_3072_sub_in_place_48(z1, z2); - u += sp_3072_sub_in_place_48(z1, z0); - u += sp_3072_add_48(r + 24, r + 24, z1); - r[72] = u; - XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - (void)sp_3072_add_48(r + 48, r + 48, z2); -} + u += sp_3072_add_24(z1 + 24, z1 + 24, b1); -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z2[48]; - sp_digit z1[48]; - sp_digit a1[24]; - sp_digit u; - - u = sp_3072_add_24(a1, a, &a[24]); - sp_3072_sqr_24(z1, a1); - sp_3072_sqr_24(z2, &a[24]); - sp_3072_sqr_24(z0, a); - sp_3072_mask_24(r + 48, a1, 0 - u); - u += sp_3072_add_24(r + 48, r + 48, r + 48); - u += sp_3072_sub_in_place_48(z1, z2); - u += sp_3072_sub_in_place_48(z1, z0); u += sp_3072_add_48(r + 24, r + 24, z1); - r[72] = u; - XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - (void)sp_3072_add_48(r + 48, r + 48, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (24 - 1)); + a1[0] = u; + (void)sp_3072_add_24(r + 72, r + 72, a1); } /* Sub b from a into a. (a -= b) @@ -11647,402 +24419,186 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "ldr r2, [%[a], #96]\n\t" - "ldr r3, [%[a], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "ldr r8, [%[b], #104]\n\t" - "ldr r9, [%[b], #108]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #96]\n\t" - "str r3, [%[a], #100]\n\t" - "str r4, [%[a], #104]\n\t" - "str r5, [%[a], #108]\n\t" - "ldr r2, [%[a], #112]\n\t" - "ldr r3, [%[a], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "ldr r8, [%[b], #120]\n\t" - "ldr r9, [%[b], #124]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #112]\n\t" - "str r3, [%[a], #116]\n\t" - "str r4, [%[a], #120]\n\t" - "str r5, [%[a], #124]\n\t" - "ldr r2, [%[a], #128]\n\t" - "ldr r3, [%[a], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[a], #140]\n\t" - "ldr r6, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "ldr r8, [%[b], #136]\n\t" - "ldr r9, [%[b], #140]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #128]\n\t" - "str r3, [%[a], #132]\n\t" - "str r4, [%[a], #136]\n\t" - "str r5, [%[a], #140]\n\t" - "ldr r2, [%[a], #144]\n\t" - "ldr r3, [%[a], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[a], #156]\n\t" - "ldr r6, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "ldr r8, [%[b], #152]\n\t" - "ldr r9, [%[b], #156]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #144]\n\t" - "str r3, [%[a], #148]\n\t" - "str r4, [%[a], #152]\n\t" - "str r5, [%[a], #156]\n\t" - "ldr r2, [%[a], #160]\n\t" - "ldr r3, [%[a], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[a], #172]\n\t" - "ldr r6, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "ldr r8, [%[b], #168]\n\t" - "ldr r9, [%[b], #172]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #160]\n\t" - "str r3, [%[a], #164]\n\t" - "str r4, [%[a], #168]\n\t" - "str r5, [%[a], #172]\n\t" - "ldr r2, [%[a], #176]\n\t" - "ldr r3, [%[a], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[a], #188]\n\t" - "ldr r6, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "ldr r8, [%[b], #184]\n\t" - "ldr r9, [%[b], #188]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #176]\n\t" - "str r3, [%[a], #180]\n\t" - "str r4, [%[a], #184]\n\t" - "str r5, [%[a], #188]\n\t" - "ldr r2, [%[a], #192]\n\t" - "ldr r3, [%[a], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[a], #204]\n\t" - "ldr r6, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "ldr r8, [%[b], #200]\n\t" - "ldr r9, [%[b], #204]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #192]\n\t" - "str r3, [%[a], #196]\n\t" - "str r4, [%[a], #200]\n\t" - "str r5, [%[a], #204]\n\t" - "ldr r2, [%[a], #208]\n\t" - "ldr r3, [%[a], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[a], #220]\n\t" - "ldr r6, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "ldr r8, [%[b], #216]\n\t" - "ldr r9, [%[b], #220]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #208]\n\t" - "str r3, [%[a], #212]\n\t" - "str r4, [%[a], #216]\n\t" - "str r5, [%[a], #220]\n\t" - "ldr r2, [%[a], #224]\n\t" - "ldr r3, [%[a], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[a], #236]\n\t" - "ldr r6, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "ldr r8, [%[b], #232]\n\t" - "ldr r9, [%[b], #236]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #224]\n\t" - "str r3, [%[a], #228]\n\t" - "str r4, [%[a], #232]\n\t" - "str r5, [%[a], #236]\n\t" - "ldr r2, [%[a], #240]\n\t" - "ldr r3, [%[a], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[a], #252]\n\t" - "ldr r6, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "ldr r8, [%[b], #248]\n\t" - "ldr r9, [%[b], #252]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #240]\n\t" - "str r3, [%[a], #244]\n\t" - "str r4, [%[a], #248]\n\t" - "str r5, [%[a], #252]\n\t" - "ldr r2, [%[a], #256]\n\t" - "ldr r3, [%[a], #260]\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r5, [%[a], #268]\n\t" - "ldr r6, [%[b], #256]\n\t" - "ldr r7, [%[b], #260]\n\t" - "ldr r8, [%[b], #264]\n\t" - "ldr r9, [%[b], #268]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #256]\n\t" - "str r3, [%[a], #260]\n\t" - "str r4, [%[a], #264]\n\t" - "str r5, [%[a], #268]\n\t" - "ldr r2, [%[a], #272]\n\t" - "ldr r3, [%[a], #276]\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r5, [%[a], #284]\n\t" - "ldr r6, [%[b], #272]\n\t" - "ldr r7, [%[b], #276]\n\t" - "ldr r8, [%[b], #280]\n\t" - "ldr r9, [%[b], #284]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #272]\n\t" - "str r3, [%[a], #276]\n\t" - "str r4, [%[a], #280]\n\t" - "str r5, [%[a], #284]\n\t" - "ldr r2, [%[a], #288]\n\t" - "ldr r3, [%[a], #292]\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r5, [%[a], #300]\n\t" - "ldr r6, [%[b], #288]\n\t" - "ldr r7, [%[b], #292]\n\t" - "ldr r8, [%[b], #296]\n\t" - "ldr r9, [%[b], #300]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #288]\n\t" - "str r3, [%[a], #292]\n\t" - "str r4, [%[a], #296]\n\t" - "str r5, [%[a], #300]\n\t" - "ldr r2, [%[a], #304]\n\t" - "ldr r3, [%[a], #308]\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r5, [%[a], #316]\n\t" - "ldr r6, [%[b], #304]\n\t" - "ldr r7, [%[b], #308]\n\t" - "ldr r8, [%[b], #312]\n\t" - "ldr r9, [%[b], #316]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #304]\n\t" - "str r3, [%[a], #308]\n\t" - "str r4, [%[a], #312]\n\t" - "str r5, [%[a], #316]\n\t" - "ldr r2, [%[a], #320]\n\t" - "ldr r3, [%[a], #324]\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r5, [%[a], #332]\n\t" - "ldr r6, [%[b], #320]\n\t" - "ldr r7, [%[b], #324]\n\t" - "ldr r8, [%[b], #328]\n\t" - "ldr r9, [%[b], #332]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #320]\n\t" - "str r3, [%[a], #324]\n\t" - "str r4, [%[a], #328]\n\t" - "str r5, [%[a], #332]\n\t" - "ldr r2, [%[a], #336]\n\t" - "ldr r3, [%[a], #340]\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r5, [%[a], #348]\n\t" - "ldr r6, [%[b], #336]\n\t" - "ldr r7, [%[b], #340]\n\t" - "ldr r8, [%[b], #344]\n\t" - "ldr r9, [%[b], #348]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #336]\n\t" - "str r3, [%[a], #340]\n\t" - "str r4, [%[a], #344]\n\t" - "str r5, [%[a], #348]\n\t" - "ldr r2, [%[a], #352]\n\t" - "ldr r3, [%[a], #356]\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r5, [%[a], #364]\n\t" - "ldr r6, [%[b], #352]\n\t" - "ldr r7, [%[b], #356]\n\t" - "ldr r8, [%[b], #360]\n\t" - "ldr r9, [%[b], #364]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #352]\n\t" - "str r3, [%[a], #356]\n\t" - "str r4, [%[a], #360]\n\t" - "str r5, [%[a], #364]\n\t" - "ldr r2, [%[a], #368]\n\t" - "ldr r3, [%[a], #372]\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r5, [%[a], #380]\n\t" - "ldr r6, [%[b], #368]\n\t" - "ldr r7, [%[b], #372]\n\t" - "ldr r8, [%[b], #376]\n\t" - "ldr r9, [%[b], #380]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #368]\n\t" - "str r3, [%[a], #372]\n\t" - "str r4, [%[a], #376]\n\t" - "str r5, [%[a], #380]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -12051,404 +24607,188 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[a], #104]\n\t" - "ldr r7, [%[a], #108]\n\t" - "ldr r8, [%[b], #96]\n\t" - "ldr r9, [%[b], #100]\n\t" - "ldr r10, [%[b], #104]\n\t" - "ldr r14, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" - "str r6, [%[r], #104]\n\t" - "str r7, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[a], #120]\n\t" - "ldr r7, [%[a], #124]\n\t" - "ldr r8, [%[b], #112]\n\t" - "ldr r9, [%[b], #116]\n\t" - "ldr r10, [%[b], #120]\n\t" - "ldr r14, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" - "str r6, [%[r], #120]\n\t" - "str r7, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[a], #132]\n\t" - "ldr r6, [%[a], #136]\n\t" - "ldr r7, [%[a], #140]\n\t" - "ldr r8, [%[b], #128]\n\t" - "ldr r9, [%[b], #132]\n\t" - "ldr r10, [%[b], #136]\n\t" - "ldr r14, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #128]\n\t" - "str r5, [%[r], #132]\n\t" - "str r6, [%[r], #136]\n\t" - "str r7, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[a], #148]\n\t" - "ldr r6, [%[a], #152]\n\t" - "ldr r7, [%[a], #156]\n\t" - "ldr r8, [%[b], #144]\n\t" - "ldr r9, [%[b], #148]\n\t" - "ldr r10, [%[b], #152]\n\t" - "ldr r14, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #144]\n\t" - "str r5, [%[r], #148]\n\t" - "str r6, [%[r], #152]\n\t" - "str r7, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[a], #164]\n\t" - "ldr r6, [%[a], #168]\n\t" - "ldr r7, [%[a], #172]\n\t" - "ldr r8, [%[b], #160]\n\t" - "ldr r9, [%[b], #164]\n\t" - "ldr r10, [%[b], #168]\n\t" - "ldr r14, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #160]\n\t" - "str r5, [%[r], #164]\n\t" - "str r6, [%[r], #168]\n\t" - "str r7, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[a], #180]\n\t" - "ldr r6, [%[a], #184]\n\t" - "ldr r7, [%[a], #188]\n\t" - "ldr r8, [%[b], #176]\n\t" - "ldr r9, [%[b], #180]\n\t" - "ldr r10, [%[b], #184]\n\t" - "ldr r14, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #176]\n\t" - "str r5, [%[r], #180]\n\t" - "str r6, [%[r], #184]\n\t" - "str r7, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[a], #196]\n\t" - "ldr r6, [%[a], #200]\n\t" - "ldr r7, [%[a], #204]\n\t" - "ldr r8, [%[b], #192]\n\t" - "ldr r9, [%[b], #196]\n\t" - "ldr r10, [%[b], #200]\n\t" - "ldr r14, [%[b], #204]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #192]\n\t" - "str r5, [%[r], #196]\n\t" - "str r6, [%[r], #200]\n\t" - "str r7, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[a], #212]\n\t" - "ldr r6, [%[a], #216]\n\t" - "ldr r7, [%[a], #220]\n\t" - "ldr r8, [%[b], #208]\n\t" - "ldr r9, [%[b], #212]\n\t" - "ldr r10, [%[b], #216]\n\t" - "ldr r14, [%[b], #220]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #208]\n\t" - "str r5, [%[r], #212]\n\t" - "str r6, [%[r], #216]\n\t" - "str r7, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[a], #228]\n\t" - "ldr r6, [%[a], #232]\n\t" - "ldr r7, [%[a], #236]\n\t" - "ldr r8, [%[b], #224]\n\t" - "ldr r9, [%[b], #228]\n\t" - "ldr r10, [%[b], #232]\n\t" - "ldr r14, [%[b], #236]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #224]\n\t" - "str r5, [%[r], #228]\n\t" - "str r6, [%[r], #232]\n\t" - "str r7, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[a], #244]\n\t" - "ldr r6, [%[a], #248]\n\t" - "ldr r7, [%[a], #252]\n\t" - "ldr r8, [%[b], #240]\n\t" - "ldr r9, [%[b], #244]\n\t" - "ldr r10, [%[b], #248]\n\t" - "ldr r14, [%[b], #252]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #240]\n\t" - "str r5, [%[r], #244]\n\t" - "str r6, [%[r], #248]\n\t" - "str r7, [%[r], #252]\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r5, [%[a], #260]\n\t" - "ldr r6, [%[a], #264]\n\t" - "ldr r7, [%[a], #268]\n\t" - "ldr r8, [%[b], #256]\n\t" - "ldr r9, [%[b], #260]\n\t" - "ldr r10, [%[b], #264]\n\t" - "ldr r14, [%[b], #268]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #256]\n\t" - "str r5, [%[r], #260]\n\t" - "str r6, [%[r], #264]\n\t" - "str r7, [%[r], #268]\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r5, [%[a], #276]\n\t" - "ldr r6, [%[a], #280]\n\t" - "ldr r7, [%[a], #284]\n\t" - "ldr r8, [%[b], #272]\n\t" - "ldr r9, [%[b], #276]\n\t" - "ldr r10, [%[b], #280]\n\t" - "ldr r14, [%[b], #284]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #272]\n\t" - "str r5, [%[r], #276]\n\t" - "str r6, [%[r], #280]\n\t" - "str r7, [%[r], #284]\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r5, [%[a], #292]\n\t" - "ldr r6, [%[a], #296]\n\t" - "ldr r7, [%[a], #300]\n\t" - "ldr r8, [%[b], #288]\n\t" - "ldr r9, [%[b], #292]\n\t" - "ldr r10, [%[b], #296]\n\t" - "ldr r14, [%[b], #300]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #288]\n\t" - "str r5, [%[r], #292]\n\t" - "str r6, [%[r], #296]\n\t" - "str r7, [%[r], #300]\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r5, [%[a], #308]\n\t" - "ldr r6, [%[a], #312]\n\t" - "ldr r7, [%[a], #316]\n\t" - "ldr r8, [%[b], #304]\n\t" - "ldr r9, [%[b], #308]\n\t" - "ldr r10, [%[b], #312]\n\t" - "ldr r14, [%[b], #316]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #304]\n\t" - "str r5, [%[r], #308]\n\t" - "str r6, [%[r], #312]\n\t" - "str r7, [%[r], #316]\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r5, [%[a], #324]\n\t" - "ldr r6, [%[a], #328]\n\t" - "ldr r7, [%[a], #332]\n\t" - "ldr r8, [%[b], #320]\n\t" - "ldr r9, [%[b], #324]\n\t" - "ldr r10, [%[b], #328]\n\t" - "ldr r14, [%[b], #332]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #320]\n\t" - "str r5, [%[r], #324]\n\t" - "str r6, [%[r], #328]\n\t" - "str r7, [%[r], #332]\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r5, [%[a], #340]\n\t" - "ldr r6, [%[a], #344]\n\t" - "ldr r7, [%[a], #348]\n\t" - "ldr r8, [%[b], #336]\n\t" - "ldr r9, [%[b], #340]\n\t" - "ldr r10, [%[b], #344]\n\t" - "ldr r14, [%[b], #348]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #336]\n\t" - "str r5, [%[r], #340]\n\t" - "str r6, [%[r], #344]\n\t" - "str r7, [%[r], #348]\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r5, [%[a], #356]\n\t" - "ldr r6, [%[a], #360]\n\t" - "ldr r7, [%[a], #364]\n\t" - "ldr r8, [%[b], #352]\n\t" - "ldr r9, [%[b], #356]\n\t" - "ldr r10, [%[b], #360]\n\t" - "ldr r14, [%[b], #364]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #352]\n\t" - "str r5, [%[r], #356]\n\t" - "str r6, [%[r], #360]\n\t" - "str r7, [%[r], #364]\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r5, [%[a], #372]\n\t" - "ldr r6, [%[a], #376]\n\t" - "ldr r7, [%[a], #380]\n\t" - "ldr r8, [%[b], #368]\n\t" - "ldr r9, [%[b], #372]\n\t" - "ldr r10, [%[b], #376]\n\t" - "ldr r14, [%[b], #380]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #368]\n\t" - "str r5, [%[r], #372]\n\t" - "str r6, [%[r], #376]\n\t" - "str r7, [%[r], #380]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -12494,7 +24834,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, sp_digit z1[96]; sp_digit a1[48]; sp_digit b1[48]; - sp_digit z2[96]; + sp_digit* z2 = r + 96; sp_digit u; sp_digit ca; sp_digit cb; @@ -12502,18 +24842,3366 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, ca = sp_3072_add_48(a1, a, &a[48]); cb = sp_3072_add_48(b1, b, &b[48]); u = ca & cb; - sp_3072_mul_48(z1, a1, b1); + sp_3072_mul_48(z2, &a[48], &b[48]); sp_3072_mul_48(z0, a, b); - sp_3072_mask_48(r + 96, a1, 0 - cb); - sp_3072_mask_48(b1, b1, 0 - ca); - u += sp_3072_add_48(r + 96, r + 96, b1); - u += sp_3072_sub_in_place_96(z1, z2); + sp_3072_mul_48(z1, a1, b1); + u += sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_sub_in_place_96(z1, z2); + sp_3072_mask_48(a1, a1, 0 - cb); + u += sp_3072_add_48(z1 + 48, z1 + 48, a1); + sp_3072_mask_48(b1, b1, 0 - ca); + u += sp_3072_add_48(z1 + 48, z1 + 48, b1); + u += sp_3072_add_96(r + 48, r + 48, z1); - r[144] = u; - XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - (void)sp_3072_add_96(r + 96, r + 96, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (48 - 1)); + a1[0] = u; + (void)sp_3072_add_48(r + 144, r + 144, a1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #48\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else + "umull r8, r3, r10, r10\n\t" +#endif + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + /* A[0] * A[1] */ + "ldr r10, [%[a], #4]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [sp, #4]\n\t" + /* A[0] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ + "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #8]\n\t" + /* A[0] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r2, [sp, #12]\n\t" + /* A[0] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ + "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [sp, #16]\n\t" + /* A[0] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + /* A[0] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + /* A[0] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + /* A[0] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #32]\n\t" + /* A[0] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #36]\n\t" + /* A[0] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #40]\n\t" + /* A[0] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #44]\n\t" + /* A[1] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[2] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #48]\n\t" + /* A[2] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[3] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #52]\n\t" + /* A[3] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[4] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #56]\n\t" + /* A[4] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[5] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #60]\n\t" + /* A[5] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[6] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #64]\n\t" + /* A[6] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[7] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #68]\n\t" + /* A[7] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * A[9] */ + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r2, [%[r], #72]\n\t" + /* A[8] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[9] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [%[r], #76]\n\t" + /* A[9] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * A[10] */ + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #80]\n\t" + /* A[10] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r2, [%[r], #84]\n\t" + /* A[11] * A[11] */ + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" +#endif + "str r3, [%[r], #88]\n\t" + "str r4, [%[r], #92]\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + ); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit* z2 = r + 24; + sp_digit z1[24]; + sp_digit* a1 = z1; + sp_digit zero[12]; + sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 12); + + mask = sp_3072_sub_12(a1, a, &a[12]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_3072_sub_12(a1, p1, p2); + + sp_3072_sqr_12(z2, &a[12]); + sp_3072_sqr_12(z0, a); + sp_3072_sqr_12(z1, a1); + + u = 0; + u -= sp_3072_sub_in_place_24(z1, z2); + u -= sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_sub_in_place_24(r + 12, z1); + zero[0] = u; + (void)sp_3072_add_12(r + 36, r + 36, zero); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit* z2 = r + 48; + sp_digit z1[48]; + sp_digit* a1 = z1; + sp_digit zero[24]; + sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 24); + + mask = sp_3072_sub_24(a1, a, &a[24]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_3072_sub_24(a1, p1, p2); + + sp_3072_sqr_24(z2, &a[24]); + sp_3072_sqr_24(z0, a); + sp_3072_sqr_24(z1, a1); + + u = 0; + u -= sp_3072_sub_in_place_48(z1, z2); + u -= sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_sub_in_place_48(r + 24, z1); + zero[0] = u; + (void)sp_3072_add_24(r + 72, r + 72, zero); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -12524,23 +28212,32 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[96]; + sp_digit* z2 = r + 96; sp_digit z1[96]; - sp_digit a1[48]; + sp_digit* a1 = z1; + sp_digit zero[48]; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 48); + + mask = sp_3072_sub_48(a1, a, &a[48]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_3072_sub_48(a1, p1, p2); - u = sp_3072_add_48(a1, a, &a[48]); - sp_3072_sqr_48(z1, a1); sp_3072_sqr_48(z2, &a[48]); sp_3072_sqr_48(z0, a); - sp_3072_mask_48(r + 96, a1, 0 - u); - u += sp_3072_add_48(r + 96, r + 96, r + 96); - u += sp_3072_sub_in_place_96(z1, z2); - u += sp_3072_sub_in_place_96(z1, z0); - u += sp_3072_add_96(r + 48, r + 48, z1); - r[144] = u; - XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - (void)sp_3072_add_96(r + 96, r + 96, z2); + sp_3072_sqr_48(z1, a1); + + u = 0; + u -= sp_3072_sub_in_place_96(z1, z2); + u -= sp_3072_sub_in_place_96(z1, z0); + u += sp_3072_sub_in_place_96(r + 48, z1); + zero[0] = u; + (void)sp_3072_add_48(r + 144, r + 144, zero); } #endif /* !WOLFSSL_SP_SMALL */ @@ -12551,41 +28248,35 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #384\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x180\n\t" + "\n" + "L_sp_3072_add_96_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_3072_add_96_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -12595,40 +28286,33 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #384\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x180\n\t" + "\n" + "L_sp_3072_sub_in_pkace_96_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -12639,57 +28323,196 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #768\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x300\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #380\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_3072_mul_96_outer_%=: \n\t" + "subs r3, r5, #0x17c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_3072_mul_96_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #384\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_3072_mul_96_inner_done_%=\n\t" + "blt L_sp_3072_mul_96_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_3072_mul_96_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #760\n\t" - "ble 1b\n\t" + "cmp r5, #0x2f4\n\t" + "ble L_sp_3072_mul_96_outer_%=\n\t" + "ldr lr, [%[a], #380]\n\t" + "ldr r11, [%[b], #380]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_3072_mul_96_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_3072_mul_96_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -12698,73 +28521,155 @@ static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #768\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x300\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #380\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_3072_sqr_96_outer_%=: \n\t" + "subs r3, r5, #0x17c\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_3072_sqr_96_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #384\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_3072_sqr_96_inner_done_%=\n\t" + "blt L_sp_3072_sqr_96_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_3072_sqr_96_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #760\n\t" - "ble 1b\n\t" + "cmp r5, #0x2f4\n\t" + "ble L_sp_3072_sqr_96_outer_%=\n\t" + "ldr lr, [%[a], #380]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_3072_sqr_96_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_3072_sqr_96_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -12794,41 +28699,35 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #192\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0xc0\n\t" + "\n" + "L_sp_3072_add_48_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_3072_add_48_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -12838,40 +28737,33 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #192\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0xc0\n\t" + "\n" + "L_sp_3072_sub_in_pkace_48_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -12882,57 +28774,196 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #384\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x180\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #188\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_3072_mul_48_outer_%=: \n\t" + "subs r3, r5, #0xbc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_3072_mul_48_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #192\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_3072_mul_48_inner_done_%=\n\t" + "blt L_sp_3072_mul_48_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_3072_mul_48_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #376\n\t" - "ble 1b\n\t" + "cmp r5, #0x174\n\t" + "ble L_sp_3072_mul_48_outer_%=\n\t" + "ldr lr, [%[a], #188]\n\t" + "ldr r11, [%[b], #188]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_3072_mul_48_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_3072_mul_48_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -12941,80 +28972,162 @@ static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #384\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x180\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #188\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_3072_sqr_48_outer_%=: \n\t" + "subs r3, r5, #0xbc\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_3072_sqr_48_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #192\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_3072_sqr_48_inner_done_%=\n\t" + "blt L_sp_3072_sqr_48_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_3072_sqr_48_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #376\n\t" - "ble 1b\n\t" + "cmp r5, #0x174\n\t" + "ble L_sp_3072_sqr_48_outer_%=\n\t" + "ldr lr, [%[a], #188]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_3072_sqr_48_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_3072_sqr_48_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } #endif /* WOLFSSL_SP_SMALL */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -13034,723 +29147,3194 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) *rho = (sp_digit)0 - x; } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_3072_mul_d_96_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #384\n\t" - "blt 1b\n\t" + "cmp r9, #0x180\n\t" + "blt L_sp_3072_mul_d_96_word_%=\n\t" "str r3, [%[r], #384]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" - "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" - "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" - "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" - "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" - "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" - "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" - "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" - "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" - "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" - "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" - "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" - "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" - "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" - "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" - "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" - "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #188]\n\t" - "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #192]\n\t" - "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #196]\n\t" - "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #200]\n\t" - "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #204]\n\t" - "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #208]\n\t" - "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #212]\n\t" - "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #216]\n\t" - "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #220]\n\t" - "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #224]\n\t" - "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #228]\n\t" - "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #232]\n\t" - "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #236]\n\t" - "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #240]\n\t" - "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #244]\n\t" - "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #248]\n\t" - "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #252]\n\t" - "# A[64] * B\n\t" - "ldr r8, [%[a], #256]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #256]\n\t" - "# A[65] * B\n\t" - "ldr r8, [%[a], #260]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #260]\n\t" - "# A[66] * B\n\t" - "ldr r8, [%[a], #264]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #264]\n\t" - "# A[67] * B\n\t" - "ldr r8, [%[a], #268]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #268]\n\t" - "# A[68] * B\n\t" - "ldr r8, [%[a], #272]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #272]\n\t" - "# A[69] * B\n\t" - "ldr r8, [%[a], #276]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #276]\n\t" - "# A[70] * B\n\t" - "ldr r8, [%[a], #280]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #280]\n\t" - "# A[71] * B\n\t" - "ldr r8, [%[a], #284]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #284]\n\t" - "# A[72] * B\n\t" - "ldr r8, [%[a], #288]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #288]\n\t" - "# A[73] * B\n\t" - "ldr r8, [%[a], #292]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #292]\n\t" - "# A[74] * B\n\t" - "ldr r8, [%[a], #296]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #296]\n\t" - "# A[75] * B\n\t" - "ldr r8, [%[a], #300]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #300]\n\t" - "# A[76] * B\n\t" - "ldr r8, [%[a], #304]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #304]\n\t" - "# A[77] * B\n\t" - "ldr r8, [%[a], #308]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #308]\n\t" - "# A[78] * B\n\t" - "ldr r8, [%[a], #312]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #312]\n\t" - "# A[79] * B\n\t" - "ldr r8, [%[a], #316]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #316]\n\t" - "# A[80] * B\n\t" - "ldr r8, [%[a], #320]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #320]\n\t" - "# A[81] * B\n\t" - "ldr r8, [%[a], #324]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #324]\n\t" - "# A[82] * B\n\t" - "ldr r8, [%[a], #328]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #328]\n\t" - "# A[83] * B\n\t" - "ldr r8, [%[a], #332]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #332]\n\t" - "# A[84] * B\n\t" - "ldr r8, [%[a], #336]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #336]\n\t" - "# A[85] * B\n\t" - "ldr r8, [%[a], #340]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #340]\n\t" - "# A[86] * B\n\t" - "ldr r8, [%[a], #344]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #344]\n\t" - "# A[87] * B\n\t" - "ldr r8, [%[a], #348]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #348]\n\t" - "# A[88] * B\n\t" - "ldr r8, [%[a], #352]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #352]\n\t" - "# A[89] * B\n\t" - "ldr r8, [%[a], #356]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #356]\n\t" - "# A[90] * B\n\t" - "ldr r8, [%[a], #360]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #360]\n\t" - "# A[91] * B\n\t" - "ldr r8, [%[a], #364]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #364]\n\t" - "# A[92] * B\n\t" - "ldr r8, [%[a], #368]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #368]\n\t" - "# A[93] * B\n\t" - "ldr r8, [%[a], #372]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #372]\n\t" - "# A[94] * B\n\t" - "ldr r8, [%[a], #376]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #376]\n\t" - "# A[95] * B\n\t" - "ldr r8, [%[a], #380]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" - "str r5, [%[r], #380]\n\t" - "str r3, [%[r], #384]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[17] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[18] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[19] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[20] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[21] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[22] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[23] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[24] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[25] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[26] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[27] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[28] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[29] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[30] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[31] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[32] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[33] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[34] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[35] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[36] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[37] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[38] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[39] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[40] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[41] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[42] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[43] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[44] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[45] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[46] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[47] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[48] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[49] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[50] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[51] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[52] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[53] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[54] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[55] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[56] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[57] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[58] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[59] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[60] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[61] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[62] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[63] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[64] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[65] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[66] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[67] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[68] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[69] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[70] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[71] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[72] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[73] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[74] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[75] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[76] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[77] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[78] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[79] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[80] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[81] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[82] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[83] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[84] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[85] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[86] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[87] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[88] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[89] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[90] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[91] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[92] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[93] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[94] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[95] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. @@ -13766,6 +32350,7 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) sp_3072_sub_in_place_48(r, m); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -13774,750 +32359,2385 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_3072_cond_sub_48_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #192\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0xc0\n\t" + "blt L_sp_3072_cond_sub_48_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r6, [%[a], #132]\n\t" - "ldr r5, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #128]\n\t" - "str r6, [%[r], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r5, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r6, [%[a], #148]\n\t" - "ldr r5, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #144]\n\t" - "str r6, [%[r], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r5, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r6, [%[a], #164]\n\t" - "ldr r5, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #160]\n\t" - "str r6, [%[r], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r5, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r6, [%[a], #180]\n\t" - "ldr r5, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #176]\n\t" - "str r6, [%[r], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r5, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #188]\n\t" +#else "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #188]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" - "ldr r9, [%[a], #192]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #192]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "ldr r10, [%[a], #192]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #192\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0xc0\n\t" + "blt L_sp_3072_mont_reduce_48_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "ldr r10, [%[a], #192]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0xc0\n\t" + "blt L_sp_3072_mont_reduce_48_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #64]\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r10, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #68]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r10, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #72]\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r10, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #76]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r10, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #80]\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r10, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #84]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r10, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #88]\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r10, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #92]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r10, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #96]\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r10, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #100]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r10, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #104]\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r10, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #108]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r10, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #112]\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r10, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #116]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r10, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #120]\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r10, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #124]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r10, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #128]\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r10, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #132]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r10, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #136]\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r10, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #140]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r10, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #144]\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r10, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #148]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r10, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #152]\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r10, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #156]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r10, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #160]\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r10, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #164]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r10, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #168]\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r10, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #172]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r10, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #176]\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r10, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #180]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r10, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #184]\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r10, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #192]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #188]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #192]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0xc0\n\t" + "blt L_sp_3072_mont_reduce_48_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -14525,9 +34745,9 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_48(r, a, b); @@ -14539,458 +34759,1865 @@ static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_48(r, a); sp_3072_mont_reduce_48(r, m, mp); } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_3072_mul_d_48_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #192\n\t" - "blt 1b\n\t" + "cmp r9, #0xc0\n\t" + "blt L_sp_3072_mul_d_48_word_%=\n\t" "str r3, [%[r], #192]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" - "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" - "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" - "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" - "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" - "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" - "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" - "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" - "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" - "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" - "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" - "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" - "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" - "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" - "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" - "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" - "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" - "str r5, [%[r], #188]\n\t" - "str r3, [%[r], #192]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[17] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[18] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[19] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[20] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[21] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[22] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[23] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[24] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[25] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[26] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[27] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[28] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[29] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[30] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[31] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[32] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[33] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[34] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[35] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[36] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[37] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[38] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[39] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[40] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[41] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[42] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[43] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[44] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[45] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[46] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[47] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_3072_word_48_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_3072_word_48_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Compare a with b in constant time. * * a A single precision integer. @@ -14998,576 +36625,571 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #188\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0xbc\n\t" + "\n" + "L_sp_3072_cmp_48_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_3072_cmp_48_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -15579,8 +37201,8 @@ static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -15588,12 +37210,15 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig (void)m; - div = d[47]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); - for (i=47; i>=0; i--) { - sp_digit hi = t1[48 + i] - (t1[48 + i] == div); + r1 = sp_3072_cmp_48(&t1[48], d) >= 0; + sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1); + for (i = 47; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[48 + i] == div); + sp_digit hi = t1[48 + i] + mask; r1 = div_3072_word_48(hi, t1[48 + i - 1], div); + r1 |= mask; sp_3072_mul_d_48(t2, d, r1); t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); @@ -15617,7 +37242,8 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_3072_div_48(a, m, NULL, r); } @@ -15630,12 +37256,14 @@ static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 96]; @@ -15650,11 +37278,17 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 96), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 96), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -15714,6 +37348,10 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -15755,7 +37393,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_48(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -15770,12 +37408,14 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[32 * 96]; @@ -15790,11 +37430,17 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 96), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 96), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -15870,6 +37516,10 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -15912,7 +37562,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_48(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -15939,6 +37589,7 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) } #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -15947,1422 +37598,4569 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_3072_cond_sub_96_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #384\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x180\n\t" + "blt L_sp_3072_cond_sub_96_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r6, [%[a], #132]\n\t" - "ldr r5, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #128]\n\t" - "str r6, [%[r], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r5, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r6, [%[a], #148]\n\t" - "ldr r5, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #144]\n\t" - "str r6, [%[r], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r5, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r6, [%[a], #164]\n\t" - "ldr r5, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #160]\n\t" - "str r6, [%[r], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r5, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r6, [%[a], #180]\n\t" - "ldr r5, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #176]\n\t" - "str r6, [%[r], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r5, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r6, [%[a], #196]\n\t" - "ldr r5, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #192]\n\t" - "str r6, [%[r], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r5, [%[b], #200]\n\t" - "ldr r7, [%[b], #204]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r6, [%[a], #212]\n\t" - "ldr r5, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #208]\n\t" - "str r6, [%[r], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r5, [%[b], #216]\n\t" - "ldr r7, [%[b], #220]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r6, [%[a], #228]\n\t" - "ldr r5, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #224]\n\t" - "str r6, [%[r], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r5, [%[b], #232]\n\t" - "ldr r7, [%[b], #236]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r6, [%[a], #244]\n\t" - "ldr r5, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #240]\n\t" - "str r6, [%[r], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r5, [%[b], #248]\n\t" - "ldr r7, [%[b], #252]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r6, [%[a], #260]\n\t" - "ldr r5, [%[b], #256]\n\t" - "ldr r7, [%[b], #260]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #256]\n\t" - "str r6, [%[r], #260]\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r6, [%[a], #268]\n\t" - "ldr r5, [%[b], #264]\n\t" - "ldr r7, [%[b], #268]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #264]\n\t" - "str r6, [%[r], #268]\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r6, [%[a], #276]\n\t" - "ldr r5, [%[b], #272]\n\t" - "ldr r7, [%[b], #276]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #272]\n\t" - "str r6, [%[r], #276]\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r6, [%[a], #284]\n\t" - "ldr r5, [%[b], #280]\n\t" - "ldr r7, [%[b], #284]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #280]\n\t" - "str r6, [%[r], #284]\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r6, [%[a], #292]\n\t" - "ldr r5, [%[b], #288]\n\t" - "ldr r7, [%[b], #292]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #288]\n\t" - "str r6, [%[r], #292]\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r6, [%[a], #300]\n\t" - "ldr r5, [%[b], #296]\n\t" - "ldr r7, [%[b], #300]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #296]\n\t" - "str r6, [%[r], #300]\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r6, [%[a], #308]\n\t" - "ldr r5, [%[b], #304]\n\t" - "ldr r7, [%[b], #308]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #304]\n\t" - "str r6, [%[r], #308]\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r6, [%[a], #316]\n\t" - "ldr r5, [%[b], #312]\n\t" - "ldr r7, [%[b], #316]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #312]\n\t" - "str r6, [%[r], #316]\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r6, [%[a], #324]\n\t" - "ldr r5, [%[b], #320]\n\t" - "ldr r7, [%[b], #324]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #320]\n\t" - "str r6, [%[r], #324]\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r6, [%[a], #332]\n\t" - "ldr r5, [%[b], #328]\n\t" - "ldr r7, [%[b], #332]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #328]\n\t" - "str r6, [%[r], #332]\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r6, [%[a], #340]\n\t" - "ldr r5, [%[b], #336]\n\t" - "ldr r7, [%[b], #340]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #336]\n\t" - "str r6, [%[r], #340]\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r6, [%[a], #348]\n\t" - "ldr r5, [%[b], #344]\n\t" - "ldr r7, [%[b], #348]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #344]\n\t" - "str r6, [%[r], #348]\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r6, [%[a], #356]\n\t" - "ldr r5, [%[b], #352]\n\t" - "ldr r7, [%[b], #356]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #352]\n\t" - "str r6, [%[r], #356]\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r6, [%[a], #364]\n\t" - "ldr r5, [%[b], #360]\n\t" - "ldr r7, [%[b], #364]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #360]\n\t" - "str r6, [%[r], #364]\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r6, [%[a], #372]\n\t" - "ldr r5, [%[b], #368]\n\t" - "ldr r7, [%[b], #372]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #368]\n\t" - "str r6, [%[r], #372]\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r6, [%[a], #380]\n\t" - "ldr r5, [%[b], #376]\n\t" - "ldr r7, [%[b], #380]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #376]\n\t" - "str r6, [%[r], #380]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" - "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+48] += m[48] * mu\n\t" - "ldr r7, [%[m], #192]\n\t" - "ldr r9, [%[a], #192]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #192]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+49] += m[49] * mu\n\t" - "ldr r7, [%[m], #196]\n\t" - "ldr r9, [%[a], #196]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+50] += m[50] * mu\n\t" - "ldr r7, [%[m], #200]\n\t" - "ldr r9, [%[a], #200]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #200]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+51] += m[51] * mu\n\t" - "ldr r7, [%[m], #204]\n\t" - "ldr r9, [%[a], #204]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+52] += m[52] * mu\n\t" - "ldr r7, [%[m], #208]\n\t" - "ldr r9, [%[a], #208]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #208]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+53] += m[53] * mu\n\t" - "ldr r7, [%[m], #212]\n\t" - "ldr r9, [%[a], #212]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+54] += m[54] * mu\n\t" - "ldr r7, [%[m], #216]\n\t" - "ldr r9, [%[a], #216]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #216]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+55] += m[55] * mu\n\t" - "ldr r7, [%[m], #220]\n\t" - "ldr r9, [%[a], #220]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+56] += m[56] * mu\n\t" - "ldr r7, [%[m], #224]\n\t" - "ldr r9, [%[a], #224]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #224]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+57] += m[57] * mu\n\t" - "ldr r7, [%[m], #228]\n\t" - "ldr r9, [%[a], #228]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+58] += m[58] * mu\n\t" - "ldr r7, [%[m], #232]\n\t" - "ldr r9, [%[a], #232]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #232]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+59] += m[59] * mu\n\t" - "ldr r7, [%[m], #236]\n\t" - "ldr r9, [%[a], #236]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+60] += m[60] * mu\n\t" - "ldr r7, [%[m], #240]\n\t" - "ldr r9, [%[a], #240]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #240]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+61] += m[61] * mu\n\t" - "ldr r7, [%[m], #244]\n\t" - "ldr r9, [%[a], #244]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+62] += m[62] * mu\n\t" - "ldr r7, [%[m], #248]\n\t" - "ldr r9, [%[a], #248]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #248]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+63] += m[63] * mu\n\t" - "ldr r7, [%[m], #252]\n\t" - "ldr r9, [%[a], #252]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #252]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+64] += m[64] * mu\n\t" - "ldr r7, [%[m], #256]\n\t" - "ldr r9, [%[a], #256]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #256]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+65] += m[65] * mu\n\t" - "ldr r7, [%[m], #260]\n\t" - "ldr r9, [%[a], #260]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #260]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" "adc r4, r4, #0\n\t" - "# a[i+66] += m[66] * mu\n\t" - "ldr r7, [%[m], #264]\n\t" - "ldr r9, [%[a], #264]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #264]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+67] += m[67] * mu\n\t" - "ldr r7, [%[m], #268]\n\t" - "ldr r9, [%[a], #268]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #268]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+68] += m[68] * mu\n\t" - "ldr r7, [%[m], #272]\n\t" - "ldr r9, [%[a], #272]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #272]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+69] += m[69] * mu\n\t" - "ldr r7, [%[m], #276]\n\t" - "ldr r9, [%[a], #276]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #276]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" "adc r4, r4, #0\n\t" - "# a[i+70] += m[70] * mu\n\t" - "ldr r7, [%[m], #280]\n\t" - "ldr r9, [%[a], #280]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #280]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+71] += m[71] * mu\n\t" - "ldr r7, [%[m], #284]\n\t" - "ldr r9, [%[a], #284]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #284]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+72] += m[72] * mu\n\t" - "ldr r7, [%[m], #288]\n\t" - "ldr r9, [%[a], #288]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #288]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+73] += m[73] * mu\n\t" - "ldr r7, [%[m], #292]\n\t" - "ldr r9, [%[a], #292]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #292]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" "adc r4, r4, #0\n\t" - "# a[i+74] += m[74] * mu\n\t" - "ldr r7, [%[m], #296]\n\t" - "ldr r9, [%[a], #296]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #296]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+75] += m[75] * mu\n\t" - "ldr r7, [%[m], #300]\n\t" - "ldr r9, [%[a], #300]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #300]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+76] += m[76] * mu\n\t" - "ldr r7, [%[m], #304]\n\t" - "ldr r9, [%[a], #304]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #304]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+77] += m[77] * mu\n\t" - "ldr r7, [%[m], #308]\n\t" - "ldr r9, [%[a], #308]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #308]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" "adc r4, r4, #0\n\t" - "# a[i+78] += m[78] * mu\n\t" - "ldr r7, [%[m], #312]\n\t" - "ldr r9, [%[a], #312]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #312]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+79] += m[79] * mu\n\t" - "ldr r7, [%[m], #316]\n\t" - "ldr r9, [%[a], #316]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #316]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+80] += m[80] * mu\n\t" - "ldr r7, [%[m], #320]\n\t" - "ldr r9, [%[a], #320]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #320]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+81] += m[81] * mu\n\t" - "ldr r7, [%[m], #324]\n\t" - "ldr r9, [%[a], #324]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #324]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" "adc r4, r4, #0\n\t" - "# a[i+82] += m[82] * mu\n\t" - "ldr r7, [%[m], #328]\n\t" - "ldr r9, [%[a], #328]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #328]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+83] += m[83] * mu\n\t" - "ldr r7, [%[m], #332]\n\t" - "ldr r9, [%[a], #332]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #332]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+84] += m[84] * mu\n\t" - "ldr r7, [%[m], #336]\n\t" - "ldr r9, [%[a], #336]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #336]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+85] += m[85] * mu\n\t" - "ldr r7, [%[m], #340]\n\t" - "ldr r9, [%[a], #340]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #340]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" "adc r4, r4, #0\n\t" - "# a[i+86] += m[86] * mu\n\t" - "ldr r7, [%[m], #344]\n\t" - "ldr r9, [%[a], #344]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #344]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+87] += m[87] * mu\n\t" - "ldr r7, [%[m], #348]\n\t" - "ldr r9, [%[a], #348]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #348]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+88] += m[88] * mu\n\t" - "ldr r7, [%[m], #352]\n\t" - "ldr r9, [%[a], #352]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #352]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+89] += m[89] * mu\n\t" - "ldr r7, [%[m], #356]\n\t" - "ldr r9, [%[a], #356]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #356]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" "adc r4, r4, #0\n\t" - "# a[i+90] += m[90] * mu\n\t" - "ldr r7, [%[m], #360]\n\t" - "ldr r9, [%[a], #360]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #360]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+91] += m[91] * mu\n\t" - "ldr r7, [%[m], #364]\n\t" - "ldr r9, [%[a], #364]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #364]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+92] += m[92] * mu\n\t" - "ldr r7, [%[m], #368]\n\t" - "ldr r9, [%[a], #368]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #368]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+93] += m[93] * mu\n\t" - "ldr r7, [%[m], #372]\n\t" - "ldr r9, [%[a], #372]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #372]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" "adc r4, r4, #0\n\t" - "# a[i+94] += m[94] * mu\n\t" - "ldr r7, [%[m], #376]\n\t" - "ldr r9, [%[a], #376]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #376]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+95] += m[95] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r7, [%[m], #256]\n\t" + "ldr r10, [%[a], #256]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r7, [%[m], #260]\n\t" + "ldr r10, [%[a], #260]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #260]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r7, [%[m], #264]\n\t" + "ldr r10, [%[a], #264]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r7, [%[m], #268]\n\t" + "ldr r10, [%[a], #268]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r7, [%[m], #272]\n\t" + "ldr r10, [%[a], #272]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r7, [%[m], #276]\n\t" + "ldr r10, [%[a], #276]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r7, [%[m], #280]\n\t" + "ldr r10, [%[a], #280]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r7, [%[m], #284]\n\t" + "ldr r10, [%[a], #284]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r7, [%[m], #288]\n\t" + "ldr r10, [%[a], #288]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r7, [%[m], #292]\n\t" + "ldr r10, [%[a], #292]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r7, [%[m], #296]\n\t" + "ldr r10, [%[a], #296]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r7, [%[m], #300]\n\t" + "ldr r10, [%[a], #300]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r7, [%[m], #304]\n\t" + "ldr r10, [%[a], #304]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r7, [%[m], #308]\n\t" + "ldr r10, [%[a], #308]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r7, [%[m], #312]\n\t" + "ldr r10, [%[a], #312]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r7, [%[m], #316]\n\t" + "ldr r10, [%[a], #316]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r7, [%[m], #320]\n\t" + "ldr r10, [%[a], #320]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r7, [%[m], #324]\n\t" + "ldr r10, [%[a], #324]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r7, [%[m], #328]\n\t" + "ldr r10, [%[a], #328]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r7, [%[m], #332]\n\t" + "ldr r10, [%[a], #332]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r7, [%[m], #336]\n\t" + "ldr r10, [%[a], #336]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r7, [%[m], #340]\n\t" + "ldr r10, [%[a], #340]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r7, [%[m], #344]\n\t" + "ldr r10, [%[a], #344]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r7, [%[m], #348]\n\t" + "ldr r10, [%[a], #348]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r7, [%[m], #352]\n\t" + "ldr r10, [%[a], #352]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r7, [%[m], #356]\n\t" + "ldr r10, [%[a], #356]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r7, [%[m], #360]\n\t" + "ldr r10, [%[a], #360]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r7, [%[m], #364]\n\t" + "ldr r10, [%[a], #364]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r7, [%[m], #368]\n\t" + "ldr r10, [%[a], #368]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r7, [%[m], #372]\n\t" + "ldr r10, [%[a], #372]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r7, [%[m], #376]\n\t" + "ldr r10, [%[a], #376]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+95] += m[95] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #380]\n\t" +#else "ldr r7, [%[m], #380]\n\t" - "ldr r9, [%[a], #380]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #380]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #380]\n\t" - "ldr r9, [%[a], #384]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #384]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #380]\n\t" + "ldr r10, [%[a], #384]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #384]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #384\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x180\n\t" + "blt L_sp_3072_mont_reduce_96_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r7, [%[m], #256]\n\t" + "ldr r10, [%[a], #256]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r7, [%[m], #260]\n\t" + "ldr r10, [%[a], #260]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #260]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r7, [%[m], #264]\n\t" + "ldr r10, [%[a], #264]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r7, [%[m], #268]\n\t" + "ldr r10, [%[a], #268]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r7, [%[m], #272]\n\t" + "ldr r10, [%[a], #272]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r7, [%[m], #276]\n\t" + "ldr r10, [%[a], #276]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r7, [%[m], #280]\n\t" + "ldr r10, [%[a], #280]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r7, [%[m], #284]\n\t" + "ldr r10, [%[a], #284]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r7, [%[m], #288]\n\t" + "ldr r10, [%[a], #288]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r7, [%[m], #292]\n\t" + "ldr r10, [%[a], #292]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r7, [%[m], #296]\n\t" + "ldr r10, [%[a], #296]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r7, [%[m], #300]\n\t" + "ldr r10, [%[a], #300]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r7, [%[m], #304]\n\t" + "ldr r10, [%[a], #304]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r7, [%[m], #308]\n\t" + "ldr r10, [%[a], #308]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r7, [%[m], #312]\n\t" + "ldr r10, [%[a], #312]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r7, [%[m], #316]\n\t" + "ldr r10, [%[a], #316]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r7, [%[m], #320]\n\t" + "ldr r10, [%[a], #320]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r7, [%[m], #324]\n\t" + "ldr r10, [%[a], #324]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r7, [%[m], #328]\n\t" + "ldr r10, [%[a], #328]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r7, [%[m], #332]\n\t" + "ldr r10, [%[a], #332]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r7, [%[m], #336]\n\t" + "ldr r10, [%[a], #336]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r7, [%[m], #340]\n\t" + "ldr r10, [%[a], #340]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r7, [%[m], #344]\n\t" + "ldr r10, [%[a], #344]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r7, [%[m], #348]\n\t" + "ldr r10, [%[a], #348]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r7, [%[m], #352]\n\t" + "ldr r10, [%[a], #352]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r7, [%[m], #356]\n\t" + "ldr r10, [%[a], #356]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r7, [%[m], #360]\n\t" + "ldr r10, [%[a], #360]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r7, [%[m], #364]\n\t" + "ldr r10, [%[a], #364]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r7, [%[m], #368]\n\t" + "ldr r10, [%[a], #368]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r7, [%[m], #372]\n\t" + "ldr r10, [%[a], #372]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r7, [%[m], #376]\n\t" + "ldr r10, [%[a], #376]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+95] += m[95] * mu */ + "ldr r7, [%[m], #380]\n\t" + "ldr r10, [%[a], #380]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #380]\n\t" + "ldr r10, [%[a], #384]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #384]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x180\n\t" + "blt L_sp_3072_mont_reduce_96_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #64]\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r10, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #68]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r10, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #72]\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r10, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #76]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r10, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #80]\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r10, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #84]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r10, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #88]\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r10, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #92]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r10, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #96]\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r10, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #100]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r10, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #104]\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r10, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #108]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r10, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #112]\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r10, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #116]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r10, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #120]\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r10, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #124]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r10, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #128]\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r10, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #132]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r10, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #136]\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r10, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #140]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r10, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #144]\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r10, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #148]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r10, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #152]\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r10, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #156]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r10, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #160]\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r10, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #164]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r10, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #168]\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r10, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #172]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r10, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #176]\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r10, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #180]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r10, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #184]\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r10, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #188]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r10, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #192]\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r10, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #196]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r10, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #200]\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r10, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #204]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r10, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #208]\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r10, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #212]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r10, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #216]\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r10, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #220]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r10, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #224]\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r10, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #228]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r10, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #232]\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r10, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #236]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r10, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #240]\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r10, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #244]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r10, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #248]\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r10, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #252]\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r10, [%[m], #256]\n\t" + "ldr r9, [%[a], #256]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #256]\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r10, [%[m], #260]\n\t" + "ldr r9, [%[a], #260]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #260]\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r10, [%[m], #264]\n\t" + "ldr r9, [%[a], #264]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #264]\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r10, [%[m], #268]\n\t" + "ldr r9, [%[a], #268]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #268]\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r10, [%[m], #272]\n\t" + "ldr r9, [%[a], #272]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #272]\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r10, [%[m], #276]\n\t" + "ldr r9, [%[a], #276]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #276]\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r10, [%[m], #280]\n\t" + "ldr r9, [%[a], #280]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #280]\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r10, [%[m], #284]\n\t" + "ldr r9, [%[a], #284]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #284]\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r10, [%[m], #288]\n\t" + "ldr r9, [%[a], #288]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #288]\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r10, [%[m], #292]\n\t" + "ldr r9, [%[a], #292]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #292]\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r10, [%[m], #296]\n\t" + "ldr r9, [%[a], #296]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #296]\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r10, [%[m], #300]\n\t" + "ldr r9, [%[a], #300]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #300]\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r10, [%[m], #304]\n\t" + "ldr r9, [%[a], #304]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #304]\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r10, [%[m], #308]\n\t" + "ldr r9, [%[a], #308]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #308]\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r10, [%[m], #312]\n\t" + "ldr r9, [%[a], #312]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #312]\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r10, [%[m], #316]\n\t" + "ldr r9, [%[a], #316]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #316]\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r10, [%[m], #320]\n\t" + "ldr r9, [%[a], #320]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #320]\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r10, [%[m], #324]\n\t" + "ldr r9, [%[a], #324]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #324]\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r10, [%[m], #328]\n\t" + "ldr r9, [%[a], #328]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #328]\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r10, [%[m], #332]\n\t" + "ldr r9, [%[a], #332]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #332]\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r10, [%[m], #336]\n\t" + "ldr r9, [%[a], #336]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #336]\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r10, [%[m], #340]\n\t" + "ldr r9, [%[a], #340]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #340]\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r10, [%[m], #344]\n\t" + "ldr r9, [%[a], #344]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #344]\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r10, [%[m], #348]\n\t" + "ldr r9, [%[a], #348]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #348]\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r10, [%[m], #352]\n\t" + "ldr r9, [%[a], #352]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #352]\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r10, [%[m], #356]\n\t" + "ldr r9, [%[a], #356]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #356]\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r10, [%[m], #360]\n\t" + "ldr r9, [%[a], #360]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #360]\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r10, [%[m], #364]\n\t" + "ldr r9, [%[a], #364]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #364]\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r10, [%[m], #368]\n\t" + "ldr r9, [%[a], #368]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #368]\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r10, [%[m], #372]\n\t" + "ldr r9, [%[a], #372]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #372]\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r10, [%[m], #376]\n\t" + "ldr r9, [%[a], #376]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #376]\n\t" + /* a[i+95] += m[95] * mu */ + "ldr r10, [%[m], #380]\n\t" + "ldr r9, [%[a], #380]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #384]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #380]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #384]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0x180\n\t" + "blt L_sp_3072_mont_reduce_96_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -17370,9 +42168,9 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_96(r, a, b); @@ -17384,9 +42182,9 @@ static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_96(r, a); @@ -17400,40 +42198,34 @@ static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #384\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x180\n\t" + "\n" + "L_sp_3072_sub_96_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_3072_sub_96_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -17443,468 +42235,388 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "ldr r8, [%[b], #4]\n\t" - "ldr r9, [%[b], #8]\n\t" - "ldr r10, [%[b], #12]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "ldr r8, [%[b], #20]\n\t" - "ldr r9, [%[b], #24]\n\t" - "ldr r10, [%[b], #28]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r7, [%[b], #32]\n\t" - "ldr r8, [%[b], #36]\n\t" - "ldr r9, [%[b], #40]\n\t" - "ldr r10, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #32]\n\t" - "str r4, [%[r], #36]\n\t" - "str r5, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r7, [%[b], #48]\n\t" - "ldr r8, [%[b], #52]\n\t" - "ldr r9, [%[b], #56]\n\t" - "ldr r10, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #48]\n\t" - "str r4, [%[r], #52]\n\t" - "str r5, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r7, [%[b], #64]\n\t" - "ldr r8, [%[b], #68]\n\t" - "ldr r9, [%[b], #72]\n\t" - "ldr r10, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #64]\n\t" - "str r4, [%[r], #68]\n\t" - "str r5, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r7, [%[b], #80]\n\t" - "ldr r8, [%[b], #84]\n\t" - "ldr r9, [%[b], #88]\n\t" - "ldr r10, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #80]\n\t" - "str r4, [%[r], #84]\n\t" - "str r5, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r7, [%[b], #96]\n\t" - "ldr r8, [%[b], #100]\n\t" - "ldr r9, [%[b], #104]\n\t" - "ldr r10, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #96]\n\t" - "str r4, [%[r], #100]\n\t" - "str r5, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r7, [%[b], #112]\n\t" - "ldr r8, [%[b], #116]\n\t" - "ldr r9, [%[b], #120]\n\t" - "ldr r10, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #112]\n\t" - "str r4, [%[r], #116]\n\t" - "str r5, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r3, [%[a], #128]\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r7, [%[b], #128]\n\t" - "ldr r8, [%[b], #132]\n\t" - "ldr r9, [%[b], #136]\n\t" - "ldr r10, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #128]\n\t" - "str r4, [%[r], #132]\n\t" - "str r5, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r3, [%[a], #144]\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r7, [%[b], #144]\n\t" - "ldr r8, [%[b], #148]\n\t" - "ldr r9, [%[b], #152]\n\t" - "ldr r10, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #144]\n\t" - "str r4, [%[r], #148]\n\t" - "str r5, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r3, [%[a], #160]\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r7, [%[b], #160]\n\t" - "ldr r8, [%[b], #164]\n\t" - "ldr r9, [%[b], #168]\n\t" - "ldr r10, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #160]\n\t" - "str r4, [%[r], #164]\n\t" - "str r5, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r3, [%[a], #176]\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r7, [%[b], #176]\n\t" - "ldr r8, [%[b], #180]\n\t" - "ldr r9, [%[b], #184]\n\t" - "ldr r10, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #176]\n\t" - "str r4, [%[r], #180]\n\t" - "str r5, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r3, [%[a], #192]\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r7, [%[b], #192]\n\t" - "ldr r8, [%[b], #196]\n\t" - "ldr r9, [%[b], #200]\n\t" - "ldr r10, [%[b], #204]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #192]\n\t" - "str r4, [%[r], #196]\n\t" - "str r5, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r3, [%[a], #208]\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r7, [%[b], #208]\n\t" - "ldr r8, [%[b], #212]\n\t" - "ldr r9, [%[b], #216]\n\t" - "ldr r10, [%[b], #220]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #208]\n\t" - "str r4, [%[r], #212]\n\t" - "str r5, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r3, [%[a], #224]\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r7, [%[b], #224]\n\t" - "ldr r8, [%[b], #228]\n\t" - "ldr r9, [%[b], #232]\n\t" - "ldr r10, [%[b], #236]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #224]\n\t" - "str r4, [%[r], #228]\n\t" - "str r5, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r3, [%[a], #240]\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r7, [%[b], #240]\n\t" - "ldr r8, [%[b], #244]\n\t" - "ldr r9, [%[b], #248]\n\t" - "ldr r10, [%[b], #252]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #240]\n\t" - "str r4, [%[r], #244]\n\t" - "str r5, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "ldr r3, [%[a], #256]\n\t" - "ldr r4, [%[a], #260]\n\t" - "ldr r5, [%[a], #264]\n\t" - "ldr r6, [%[a], #268]\n\t" - "ldr r7, [%[b], #256]\n\t" - "ldr r8, [%[b], #260]\n\t" - "ldr r9, [%[b], #264]\n\t" - "ldr r10, [%[b], #268]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #256]\n\t" - "str r4, [%[r], #260]\n\t" - "str r5, [%[r], #264]\n\t" - "str r6, [%[r], #268]\n\t" - "ldr r3, [%[a], #272]\n\t" - "ldr r4, [%[a], #276]\n\t" - "ldr r5, [%[a], #280]\n\t" - "ldr r6, [%[a], #284]\n\t" - "ldr r7, [%[b], #272]\n\t" - "ldr r8, [%[b], #276]\n\t" - "ldr r9, [%[b], #280]\n\t" - "ldr r10, [%[b], #284]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #272]\n\t" - "str r4, [%[r], #276]\n\t" - "str r5, [%[r], #280]\n\t" - "str r6, [%[r], #284]\n\t" - "ldr r3, [%[a], #288]\n\t" - "ldr r4, [%[a], #292]\n\t" - "ldr r5, [%[a], #296]\n\t" - "ldr r6, [%[a], #300]\n\t" - "ldr r7, [%[b], #288]\n\t" - "ldr r8, [%[b], #292]\n\t" - "ldr r9, [%[b], #296]\n\t" - "ldr r10, [%[b], #300]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #288]\n\t" - "str r4, [%[r], #292]\n\t" - "str r5, [%[r], #296]\n\t" - "str r6, [%[r], #300]\n\t" - "ldr r3, [%[a], #304]\n\t" - "ldr r4, [%[a], #308]\n\t" - "ldr r5, [%[a], #312]\n\t" - "ldr r6, [%[a], #316]\n\t" - "ldr r7, [%[b], #304]\n\t" - "ldr r8, [%[b], #308]\n\t" - "ldr r9, [%[b], #312]\n\t" - "ldr r10, [%[b], #316]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #304]\n\t" - "str r4, [%[r], #308]\n\t" - "str r5, [%[r], #312]\n\t" - "str r6, [%[r], #316]\n\t" - "ldr r3, [%[a], #320]\n\t" - "ldr r4, [%[a], #324]\n\t" - "ldr r5, [%[a], #328]\n\t" - "ldr r6, [%[a], #332]\n\t" - "ldr r7, [%[b], #320]\n\t" - "ldr r8, [%[b], #324]\n\t" - "ldr r9, [%[b], #328]\n\t" - "ldr r10, [%[b], #332]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #320]\n\t" - "str r4, [%[r], #324]\n\t" - "str r5, [%[r], #328]\n\t" - "str r6, [%[r], #332]\n\t" - "ldr r3, [%[a], #336]\n\t" - "ldr r4, [%[a], #340]\n\t" - "ldr r5, [%[a], #344]\n\t" - "ldr r6, [%[a], #348]\n\t" - "ldr r7, [%[b], #336]\n\t" - "ldr r8, [%[b], #340]\n\t" - "ldr r9, [%[b], #344]\n\t" - "ldr r10, [%[b], #348]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #336]\n\t" - "str r4, [%[r], #340]\n\t" - "str r5, [%[r], #344]\n\t" - "str r6, [%[r], #348]\n\t" - "ldr r3, [%[a], #352]\n\t" - "ldr r4, [%[a], #356]\n\t" - "ldr r5, [%[a], #360]\n\t" - "ldr r6, [%[a], #364]\n\t" - "ldr r7, [%[b], #352]\n\t" - "ldr r8, [%[b], #356]\n\t" - "ldr r9, [%[b], #360]\n\t" - "ldr r10, [%[b], #364]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #352]\n\t" - "str r4, [%[r], #356]\n\t" - "str r5, [%[r], #360]\n\t" - "str r6, [%[r], #364]\n\t" - "ldr r3, [%[a], #368]\n\t" - "ldr r4, [%[a], #372]\n\t" - "ldr r5, [%[a], #376]\n\t" - "ldr r6, [%[a], #380]\n\t" - "ldr r7, [%[b], #368]\n\t" - "ldr r8, [%[b], #372]\n\t" - "ldr r9, [%[b], #376]\n\t" - "ldr r10, [%[b], #380]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #368]\n\t" - "str r4, [%[r], #372]\n\t" - "str r5, [%[r], #376]\n\t" - "str r6, [%[r], #380]\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_3072_word_96_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_3072_word_96_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -17914,8 +42626,8 @@ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -17923,12 +42635,22 @@ static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, s (void)m; - div = d[95]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); - for (i=95; i>=0; i--) { - sp_digit hi = t1[96 + i] - (t1[96 + i] == div); - r1 = div_3072_word_96(hi, t1[96 + i - 1], div); + for (i = 95; i > 0; i--) { + if (t1[i + 96] != d[i]) + break; + } + if (t1[i + 96] >= d[i]) { + sp_3072_sub_in_place_96(&t1[96], d); + } + for (i = 95; i >= 0; i--) { + if (t1[96 + i] == div) { + r1 = SP_DIGIT_MAX; + } + else { + r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div); + } sp_3072_mul_d_96(t2, d, r1); t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); @@ -17961,7 +42683,8 @@ static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, s * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_3072_div_96_cond(a, m, NULL, r); } @@ -18005,1104 +42728,1105 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #380\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r4, #0x1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #0x7c\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #380]\n\t" - "ldr r5, [%[b], #380]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r5, [%[b], #376]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #372]\n\t" - "ldr r5, [%[b], #372]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r5, [%[b], #368]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #364]\n\t" - "ldr r5, [%[b], #364]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r5, [%[b], #360]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #356]\n\t" - "ldr r5, [%[b], #356]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r5, [%[b], #352]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #348]\n\t" - "ldr r5, [%[b], #348]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r5, [%[b], #344]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #340]\n\t" - "ldr r5, [%[b], #340]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r5, [%[b], #336]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #332]\n\t" - "ldr r5, [%[b], #332]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r5, [%[b], #328]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #324]\n\t" - "ldr r5, [%[b], #324]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r5, [%[b], #320]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #316]\n\t" - "ldr r5, [%[b], #316]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r5, [%[b], #312]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #308]\n\t" - "ldr r5, [%[b], #308]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r5, [%[b], #304]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #300]\n\t" - "ldr r5, [%[b], #300]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r5, [%[b], #296]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #292]\n\t" - "ldr r5, [%[b], #292]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r5, [%[b], #288]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #284]\n\t" - "ldr r5, [%[b], #284]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r5, [%[b], #280]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #276]\n\t" - "ldr r5, [%[b], #276]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r5, [%[b], #272]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #268]\n\t" - "ldr r5, [%[b], #268]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r5, [%[b], #264]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #260]\n\t" - "ldr r5, [%[b], #260]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r5, [%[b], #256]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #252]\n\t" - "ldr r5, [%[b], #252]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[b], #248]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[b], #244]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[b], #240]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #236]\n\t" - "ldr r5, [%[b], #236]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[b], #232]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[b], #228]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[b], #224]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #220]\n\t" - "ldr r5, [%[b], #220]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[b], #216]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[b], #212]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[b], #208]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #204]\n\t" - "ldr r5, [%[b], #204]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[b], #200]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[b], #196]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[b], #192]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "mov r4, #0x17c\n\t" #endif - - return r; + "\n" + "L_sp_3072_cmp_96_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_3072_cmp_96_words_%=\n\t" + "eor r2, r2, r3\n\t" +#else + "ldr r12, [%[a], #380]\n\t" + "ldr lr, [%[b], #380]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #376]\n\t" + "ldr lr, [%[b], #376]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #372]\n\t" + "ldr lr, [%[b], #372]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #368]\n\t" + "ldr lr, [%[b], #368]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #364]\n\t" + "ldr lr, [%[b], #364]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #360]\n\t" + "ldr lr, [%[b], #360]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #356]\n\t" + "ldr lr, [%[b], #356]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #352]\n\t" + "ldr lr, [%[b], #352]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #348]\n\t" + "ldr lr, [%[b], #348]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #344]\n\t" + "ldr lr, [%[b], #344]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #340]\n\t" + "ldr lr, [%[b], #340]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #336]\n\t" + "ldr lr, [%[b], #336]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #332]\n\t" + "ldr lr, [%[b], #332]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #328]\n\t" + "ldr lr, [%[b], #328]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #324]\n\t" + "ldr lr, [%[b], #324]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #320]\n\t" + "ldr lr, [%[b], #320]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #316]\n\t" + "ldr lr, [%[b], #316]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #312]\n\t" + "ldr lr, [%[b], #312]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #308]\n\t" + "ldr lr, [%[b], #308]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #304]\n\t" + "ldr lr, [%[b], #304]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #300]\n\t" + "ldr lr, [%[b], #300]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #296]\n\t" + "ldr lr, [%[b], #296]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #292]\n\t" + "ldr lr, [%[b], #292]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #288]\n\t" + "ldr lr, [%[b], #288]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #284]\n\t" + "ldr lr, [%[b], #284]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #280]\n\t" + "ldr lr, [%[b], #280]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #276]\n\t" + "ldr lr, [%[b], #276]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #272]\n\t" + "ldr lr, [%[b], #272]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #268]\n\t" + "ldr lr, [%[b], #268]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #264]\n\t" + "ldr lr, [%[b], #264]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #260]\n\t" + "ldr lr, [%[b], #260]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #256]\n\t" + "ldr lr, [%[b], #256]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #252]\n\t" + "ldr lr, [%[b], #252]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #248]\n\t" + "ldr lr, [%[b], #248]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #244]\n\t" + "ldr lr, [%[b], #244]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #240]\n\t" + "ldr lr, [%[b], #240]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #236]\n\t" + "ldr lr, [%[b], #236]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #232]\n\t" + "ldr lr, [%[b], #232]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #228]\n\t" + "ldr lr, [%[b], #228]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #224]\n\t" + "ldr lr, [%[b], #224]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #220]\n\t" + "ldr lr, [%[b], #220]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #216]\n\t" + "ldr lr, [%[b], #216]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #212]\n\t" + "ldr lr, [%[b], #212]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #208]\n\t" + "ldr lr, [%[b], #208]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #204]\n\t" + "ldr lr, [%[b], #204]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #200]\n\t" + "ldr lr, [%[b], #200]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #196]\n\t" + "ldr lr, [%[b], #196]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #192]\n\t" + "ldr lr, [%[b], #192]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + ); + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -19114,8 +43838,8 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -19123,12 +43847,15 @@ static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_dig (void)m; - div = d[95]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 96); - for (i=95; i>=0; i--) { - sp_digit hi = t1[96 + i] - (t1[96 + i] == div); + r1 = sp_3072_cmp_96(&t1[96], d) >= 0; + sp_3072_cond_sub_96(&t1[96], &t1[96], d, (sp_digit)0 - r1); + for (i = 95; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[96 + i] == div); + sp_digit hi = t1[96 + i] + mask; r1 = div_3072_word_96(hi, t1[96 + i - 1], div); + r1 |= mask; sp_3072_mul_d_96(t2, d, r1); t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2); @@ -19152,7 +43879,8 @@ static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_3072_div_96(a, m, NULL, r); } @@ -19168,12 +43896,14 @@ static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[8 * 192]; @@ -19188,11 +43918,17 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 192), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 192), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19244,6 +43980,10 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -19284,7 +44024,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_96(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19299,12 +44039,14 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 192]; @@ -19319,11 +44061,17 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 192), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 192), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19383,6 +44131,10 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -19424,7 +44176,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_96(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19451,7 +44203,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[96 * 5]; @@ -19473,7 +44225,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -19483,9 +44235,9 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, #endif if (err == MP_OKAY) { + ah = a + 96; r = a + 96 * 2; m = r + 96 * 2; - ah = a + 96; sp_3072_from_bin(ah, 96, in, inLen); #if DIGIT_BIT >= 32 @@ -19503,7 +44255,38 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_3072_from_mp(m, 96, mm); - if (e[0] == 0x3) { + if (e[0] == 0x10001) { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 96); + err = sp_3072_mod_96_cond(r, a, m); + /* Montgomery form: r = a.R mod m */ + + if (err == MP_OKAY) { + /* r = a ^ 0x10000 => r = a squared 16 times */ + for (i = 15; i >= 0; i--) { + sp_3072_mont_sqr_96(r, r, m, mp); + } + /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m + * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m + */ + sp_3072_mont_mul_96(r, r, ah, m, mp); + + for (i = 95; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_3072_sub_in_place_96(r, m); + } + } + } + else if (e[0] == 0x3) { if (err == MP_OKAY) { sp_3072_sqr_96(r, ah); err = sp_3072_mod_96_cond(r, r, m); @@ -19531,7 +44314,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } XMEMCPY(r, a, sizeof(sp_digit) * 96); - for (i--; i>=0; i--) { + for (i--; i >= 0; i--) { sp_3072_mont_sqr_96(r, r, m, mp); if (((e[0] >> i) & 1) == 1) { sp_3072_mont_mul_96(r, r, a, m, mp); @@ -19557,7 +44340,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif @@ -19566,6 +44349,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -19574,284 +44358,232 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov lr, #0\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_3072_cond_add_48_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #192\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0xc0\n\t" + "blt L_sp_3072_cond_add_48_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r6, [%[a], #132]\n\t" - "ldr r5, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #128]\n\t" - "str r6, [%[r], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r5, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r6, [%[a], #148]\n\t" - "ldr r5, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #144]\n\t" - "str r6, [%[r], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r5, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r6, [%[a], #164]\n\t" - "ldr r5, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #160]\n\t" - "str r6, [%[r], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r5, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r6, [%[a], #180]\n\t" - "ldr r5, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #176]\n\t" - "str r6, [%[r], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r5, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "adc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov r8, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -19874,7 +44606,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, const mp_int* qim, const mp_int* mm, byte* out, word32* outLen) { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[96 * 4]; @@ -19908,7 +44640,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -19933,21 +44665,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 96); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[48 * 11]; @@ -19975,8 +44707,14 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL, DYNAMIC_TYPE_RSA); @@ -20024,12 +44762,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 48 * 11); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -20160,590 +44898,593 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 -static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n) +static void sp_3072_lshift_96(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register byte n asm ("r2") = (byte)n_p; + __asm__ __volatile__ ( - "mov r6, #31\n\t" - "sub r6, r6, %[n]\n\t" - "ldr r3, [%[a], #380]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #376]\n\t" - "str r4, [%[r], #384]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #372]\n\t" - "str r3, [%[r], #380]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #380]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #376]\n\t" + "str r6, [%[r], #384]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #368]\n\t" - "str r2, [%[r], #376]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #364]\n\t" - "str r4, [%[r], #372]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #360]\n\t" - "str r3, [%[r], #368]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #372]\n\t" + "str r5, [%[r], #380]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #368]\n\t" + "str r4, [%[r], #376]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #364]\n\t" + "str r6, [%[r], #372]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #356]\n\t" - "str r2, [%[r], #364]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #352]\n\t" - "str r4, [%[r], #360]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #348]\n\t" - "str r3, [%[r], #356]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #360]\n\t" + "str r5, [%[r], #368]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #356]\n\t" + "str r4, [%[r], #364]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #352]\n\t" + "str r6, [%[r], #360]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #344]\n\t" - "str r2, [%[r], #352]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #340]\n\t" - "str r4, [%[r], #348]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #336]\n\t" - "str r3, [%[r], #344]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #348]\n\t" + "str r5, [%[r], #356]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #344]\n\t" + "str r4, [%[r], #352]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #340]\n\t" + "str r6, [%[r], #348]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #332]\n\t" - "str r2, [%[r], #340]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #328]\n\t" - "str r4, [%[r], #336]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #324]\n\t" - "str r3, [%[r], #332]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #336]\n\t" + "str r5, [%[r], #344]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #332]\n\t" + "str r4, [%[r], #340]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #328]\n\t" + "str r6, [%[r], #336]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #320]\n\t" - "str r2, [%[r], #328]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #316]\n\t" - "str r4, [%[r], #324]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #312]\n\t" - "str r3, [%[r], #320]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #324]\n\t" + "str r5, [%[r], #332]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #320]\n\t" + "str r4, [%[r], #328]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #316]\n\t" + "str r6, [%[r], #324]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #308]\n\t" - "str r2, [%[r], #316]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #304]\n\t" - "str r4, [%[r], #312]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #300]\n\t" - "str r3, [%[r], #308]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #312]\n\t" + "str r5, [%[r], #320]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #308]\n\t" + "str r4, [%[r], #316]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #304]\n\t" + "str r6, [%[r], #312]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #296]\n\t" - "str r2, [%[r], #304]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #292]\n\t" - "str r4, [%[r], #300]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #288]\n\t" - "str r3, [%[r], #296]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #300]\n\t" + "str r5, [%[r], #308]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #296]\n\t" + "str r4, [%[r], #304]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #292]\n\t" + "str r6, [%[r], #300]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #284]\n\t" - "str r2, [%[r], #292]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #280]\n\t" - "str r4, [%[r], #288]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #276]\n\t" - "str r3, [%[r], #284]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #288]\n\t" + "str r5, [%[r], #296]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #284]\n\t" + "str r4, [%[r], #292]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #280]\n\t" + "str r6, [%[r], #288]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #272]\n\t" - "str r2, [%[r], #280]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #268]\n\t" - "str r4, [%[r], #276]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #264]\n\t" - "str r3, [%[r], #272]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #276]\n\t" + "str r5, [%[r], #284]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #272]\n\t" + "str r4, [%[r], #280]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #268]\n\t" + "str r6, [%[r], #276]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #260]\n\t" - "str r2, [%[r], #268]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #256]\n\t" - "str r4, [%[r], #264]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #252]\n\t" - "str r3, [%[r], #260]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #264]\n\t" + "str r5, [%[r], #272]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #260]\n\t" + "str r4, [%[r], #268]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #256]\n\t" + "str r6, [%[r], #264]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #248]\n\t" - "str r2, [%[r], #256]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #244]\n\t" - "str r4, [%[r], #252]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #240]\n\t" - "str r3, [%[r], #248]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #252]\n\t" + "str r5, [%[r], #260]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #248]\n\t" + "str r4, [%[r], #256]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #244]\n\t" + "str r6, [%[r], #252]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #236]\n\t" - "str r2, [%[r], #244]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #232]\n\t" - "str r4, [%[r], #240]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #228]\n\t" - "str r3, [%[r], #236]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #240]\n\t" + "str r5, [%[r], #248]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #236]\n\t" + "str r4, [%[r], #244]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #232]\n\t" + "str r6, [%[r], #240]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #224]\n\t" - "str r2, [%[r], #232]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #220]\n\t" - "str r4, [%[r], #228]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #216]\n\t" - "str r3, [%[r], #224]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #228]\n\t" + "str r5, [%[r], #236]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #224]\n\t" + "str r4, [%[r], #232]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #220]\n\t" + "str r6, [%[r], #228]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #212]\n\t" - "str r2, [%[r], #220]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #208]\n\t" - "str r4, [%[r], #216]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #204]\n\t" - "str r3, [%[r], #212]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #216]\n\t" + "str r5, [%[r], #224]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #212]\n\t" + "str r4, [%[r], #220]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #208]\n\t" + "str r6, [%[r], #216]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #200]\n\t" - "str r2, [%[r], #208]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #196]\n\t" - "str r4, [%[r], #204]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #192]\n\t" - "str r3, [%[r], #200]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #204]\n\t" + "str r5, [%[r], #212]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #200]\n\t" + "str r4, [%[r], #208]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #196]\n\t" + "str r6, [%[r], #204]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #188]\n\t" - "str r2, [%[r], #196]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #184]\n\t" - "str r4, [%[r], #192]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #180]\n\t" - "str r3, [%[r], #188]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #192]\n\t" + "str r5, [%[r], #200]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #188]\n\t" + "str r4, [%[r], #196]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #184]\n\t" + "str r6, [%[r], #192]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #176]\n\t" - "str r2, [%[r], #184]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #172]\n\t" - "str r4, [%[r], #180]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #168]\n\t" - "str r3, [%[r], #176]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #180]\n\t" + "str r5, [%[r], #188]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #176]\n\t" + "str r4, [%[r], #184]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #172]\n\t" + "str r6, [%[r], #180]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #164]\n\t" - "str r2, [%[r], #172]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #160]\n\t" - "str r4, [%[r], #168]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #156]\n\t" - "str r3, [%[r], #164]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #168]\n\t" + "str r5, [%[r], #176]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #164]\n\t" + "str r4, [%[r], #172]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #160]\n\t" + "str r6, [%[r], #168]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #152]\n\t" - "str r2, [%[r], #160]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #148]\n\t" - "str r4, [%[r], #156]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #144]\n\t" - "str r3, [%[r], #152]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #156]\n\t" + "str r5, [%[r], #164]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #152]\n\t" + "str r4, [%[r], #160]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #148]\n\t" + "str r6, [%[r], #156]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #140]\n\t" - "str r2, [%[r], #148]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #136]\n\t" - "str r4, [%[r], #144]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #132]\n\t" - "str r3, [%[r], #140]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #144]\n\t" + "str r5, [%[r], #152]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #140]\n\t" + "str r4, [%[r], #148]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #136]\n\t" + "str r6, [%[r], #144]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #128]\n\t" - "str r2, [%[r], #136]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #124]\n\t" - "str r4, [%[r], #132]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #120]\n\t" - "str r3, [%[r], #128]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #132]\n\t" + "str r5, [%[r], #140]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #128]\n\t" + "str r4, [%[r], #136]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #124]\n\t" + "str r6, [%[r], #132]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #116]\n\t" - "str r2, [%[r], #124]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #112]\n\t" - "str r4, [%[r], #120]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #108]\n\t" - "str r3, [%[r], #116]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #120]\n\t" + "str r5, [%[r], #128]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #116]\n\t" + "str r4, [%[r], #124]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #112]\n\t" + "str r6, [%[r], #120]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #104]\n\t" - "str r2, [%[r], #112]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #100]\n\t" - "str r4, [%[r], #108]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #96]\n\t" - "str r3, [%[r], #104]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #108]\n\t" + "str r5, [%[r], #116]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #104]\n\t" + "str r4, [%[r], #112]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #100]\n\t" + "str r6, [%[r], #108]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #92]\n\t" - "str r2, [%[r], #100]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #88]\n\t" - "str r4, [%[r], #96]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #84]\n\t" - "str r3, [%[r], #92]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #96]\n\t" + "str r5, [%[r], #104]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #92]\n\t" + "str r4, [%[r], #100]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #88]\n\t" + "str r6, [%[r], #96]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #80]\n\t" - "str r2, [%[r], #88]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #76]\n\t" - "str r4, [%[r], #84]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #72]\n\t" - "str r3, [%[r], #80]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #84]\n\t" + "str r5, [%[r], #92]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #80]\n\t" + "str r4, [%[r], #88]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #76]\n\t" + "str r6, [%[r], #84]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #68]\n\t" - "str r2, [%[r], #76]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #64]\n\t" - "str r4, [%[r], #72]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #60]\n\t" - "str r3, [%[r], #68]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #72]\n\t" + "str r5, [%[r], #80]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #68]\n\t" + "str r4, [%[r], #76]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #64]\n\t" + "str r6, [%[r], #72]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" - "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #48]\n\t" - "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #60]\n\t" + "str r5, [%[r], #68]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r6, [%[r], #60]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" - "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #36]\n\t" - "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #48]\n\t" + "str r5, [%[r], #56]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r6, [%[r], #48]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" - "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #24]\n\t" - "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #36]\n\t" + "str r5, [%[r], #44]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r6, [%[r], #36]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" - "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #12]\n\t" - "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #24]\n\t" + "str r5, [%[r], #32]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r6, [%[r], #24]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" - "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" - "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #12]\n\t" + "str r5, [%[r], #20]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r6, [%[r], #12]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "str r4, [%[r]]\n\t" - "str r2, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "str r6, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12", "cc" ); } @@ -20753,12 +45494,14 @@ static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[289]; @@ -20774,11 +45517,17 @@ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -20807,6 +45556,10 @@ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -20853,7 +45606,7 @@ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, sp_3072_cond_sub_96(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -20995,27 +45748,30 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) { int i; - int j = 0; - word32 s = 0; + int j; + byte* d; - r[0] = 0; - for (i = n-1; i >= 0; i--) { - r[j] |= (((sp_digit)a[i]) << s); - if (s >= 24U) { - r[j] &= 0xffffffff; - s = 32U - s; - if (j + 1 >= size) { - break; - } - r[++j] = (sp_digit)a[i] >> s; - s = 8U - s; - } - else { - s += 8U; - } + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; } - for (j++; j < size; j++) { + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { r[j] = 0; } } @@ -21029,20 +45785,23 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 32 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); } #elif DIGIT_BIT > 32 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffff; s = 32U - s; @@ -21072,12 +45831,12 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 32) { r[j] &= 0xffffffff; @@ -21114,34 +45873,13 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) static void sp_4096_to_bin_128(sp_digit* r, byte* a) { int i; - int j; - int s = 0; - int b; + int j = 0; - j = 4096 / 8 - 1; - a[j] = 0; - for (i=0; i<128 && j>=0; i++) { - b = 0; - /* lint allow cast of mismatch sp_digit and int */ - a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ - b += 8 - s; - if (j < 0) { - break; - } - while (b < 32) { - a[j--] = (byte)(r[i] >> b); - b += 8; - if (j < 0) { - break; - } - } - s = 8 - (b - 32); - if (j >= 0) { - a[j] = 0; - } - if (s != 0) { - j++; - } + for (i = 127; i >= 0; i--) { + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; } } @@ -21165,530 +45903,242 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "ldr r2, [%[a], #96]\n\t" - "ldr r3, [%[a], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "ldr r8, [%[b], #104]\n\t" - "ldr r9, [%[b], #108]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #96]\n\t" - "str r3, [%[a], #100]\n\t" - "str r4, [%[a], #104]\n\t" - "str r5, [%[a], #108]\n\t" - "ldr r2, [%[a], #112]\n\t" - "ldr r3, [%[a], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "ldr r8, [%[b], #120]\n\t" - "ldr r9, [%[b], #124]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #112]\n\t" - "str r3, [%[a], #116]\n\t" - "str r4, [%[a], #120]\n\t" - "str r5, [%[a], #124]\n\t" - "ldr r2, [%[a], #128]\n\t" - "ldr r3, [%[a], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[a], #140]\n\t" - "ldr r6, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "ldr r8, [%[b], #136]\n\t" - "ldr r9, [%[b], #140]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #128]\n\t" - "str r3, [%[a], #132]\n\t" - "str r4, [%[a], #136]\n\t" - "str r5, [%[a], #140]\n\t" - "ldr r2, [%[a], #144]\n\t" - "ldr r3, [%[a], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[a], #156]\n\t" - "ldr r6, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "ldr r8, [%[b], #152]\n\t" - "ldr r9, [%[b], #156]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #144]\n\t" - "str r3, [%[a], #148]\n\t" - "str r4, [%[a], #152]\n\t" - "str r5, [%[a], #156]\n\t" - "ldr r2, [%[a], #160]\n\t" - "ldr r3, [%[a], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[a], #172]\n\t" - "ldr r6, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "ldr r8, [%[b], #168]\n\t" - "ldr r9, [%[b], #172]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #160]\n\t" - "str r3, [%[a], #164]\n\t" - "str r4, [%[a], #168]\n\t" - "str r5, [%[a], #172]\n\t" - "ldr r2, [%[a], #176]\n\t" - "ldr r3, [%[a], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[a], #188]\n\t" - "ldr r6, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "ldr r8, [%[b], #184]\n\t" - "ldr r9, [%[b], #188]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #176]\n\t" - "str r3, [%[a], #180]\n\t" - "str r4, [%[a], #184]\n\t" - "str r5, [%[a], #188]\n\t" - "ldr r2, [%[a], #192]\n\t" - "ldr r3, [%[a], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[a], #204]\n\t" - "ldr r6, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "ldr r8, [%[b], #200]\n\t" - "ldr r9, [%[b], #204]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #192]\n\t" - "str r3, [%[a], #196]\n\t" - "str r4, [%[a], #200]\n\t" - "str r5, [%[a], #204]\n\t" - "ldr r2, [%[a], #208]\n\t" - "ldr r3, [%[a], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[a], #220]\n\t" - "ldr r6, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "ldr r8, [%[b], #216]\n\t" - "ldr r9, [%[b], #220]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #208]\n\t" - "str r3, [%[a], #212]\n\t" - "str r4, [%[a], #216]\n\t" - "str r5, [%[a], #220]\n\t" - "ldr r2, [%[a], #224]\n\t" - "ldr r3, [%[a], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[a], #236]\n\t" - "ldr r6, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "ldr r8, [%[b], #232]\n\t" - "ldr r9, [%[b], #236]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #224]\n\t" - "str r3, [%[a], #228]\n\t" - "str r4, [%[a], #232]\n\t" - "str r5, [%[a], #236]\n\t" - "ldr r2, [%[a], #240]\n\t" - "ldr r3, [%[a], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[a], #252]\n\t" - "ldr r6, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "ldr r8, [%[b], #248]\n\t" - "ldr r9, [%[b], #252]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #240]\n\t" - "str r3, [%[a], #244]\n\t" - "str r4, [%[a], #248]\n\t" - "str r5, [%[a], #252]\n\t" - "ldr r2, [%[a], #256]\n\t" - "ldr r3, [%[a], #260]\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r5, [%[a], #268]\n\t" - "ldr r6, [%[b], #256]\n\t" - "ldr r7, [%[b], #260]\n\t" - "ldr r8, [%[b], #264]\n\t" - "ldr r9, [%[b], #268]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #256]\n\t" - "str r3, [%[a], #260]\n\t" - "str r4, [%[a], #264]\n\t" - "str r5, [%[a], #268]\n\t" - "ldr r2, [%[a], #272]\n\t" - "ldr r3, [%[a], #276]\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r5, [%[a], #284]\n\t" - "ldr r6, [%[b], #272]\n\t" - "ldr r7, [%[b], #276]\n\t" - "ldr r8, [%[b], #280]\n\t" - "ldr r9, [%[b], #284]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #272]\n\t" - "str r3, [%[a], #276]\n\t" - "str r4, [%[a], #280]\n\t" - "str r5, [%[a], #284]\n\t" - "ldr r2, [%[a], #288]\n\t" - "ldr r3, [%[a], #292]\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r5, [%[a], #300]\n\t" - "ldr r6, [%[b], #288]\n\t" - "ldr r7, [%[b], #292]\n\t" - "ldr r8, [%[b], #296]\n\t" - "ldr r9, [%[b], #300]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #288]\n\t" - "str r3, [%[a], #292]\n\t" - "str r4, [%[a], #296]\n\t" - "str r5, [%[a], #300]\n\t" - "ldr r2, [%[a], #304]\n\t" - "ldr r3, [%[a], #308]\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r5, [%[a], #316]\n\t" - "ldr r6, [%[b], #304]\n\t" - "ldr r7, [%[b], #308]\n\t" - "ldr r8, [%[b], #312]\n\t" - "ldr r9, [%[b], #316]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #304]\n\t" - "str r3, [%[a], #308]\n\t" - "str r4, [%[a], #312]\n\t" - "str r5, [%[a], #316]\n\t" - "ldr r2, [%[a], #320]\n\t" - "ldr r3, [%[a], #324]\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r5, [%[a], #332]\n\t" - "ldr r6, [%[b], #320]\n\t" - "ldr r7, [%[b], #324]\n\t" - "ldr r8, [%[b], #328]\n\t" - "ldr r9, [%[b], #332]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #320]\n\t" - "str r3, [%[a], #324]\n\t" - "str r4, [%[a], #328]\n\t" - "str r5, [%[a], #332]\n\t" - "ldr r2, [%[a], #336]\n\t" - "ldr r3, [%[a], #340]\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r5, [%[a], #348]\n\t" - "ldr r6, [%[b], #336]\n\t" - "ldr r7, [%[b], #340]\n\t" - "ldr r8, [%[b], #344]\n\t" - "ldr r9, [%[b], #348]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #336]\n\t" - "str r3, [%[a], #340]\n\t" - "str r4, [%[a], #344]\n\t" - "str r5, [%[a], #348]\n\t" - "ldr r2, [%[a], #352]\n\t" - "ldr r3, [%[a], #356]\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r5, [%[a], #364]\n\t" - "ldr r6, [%[b], #352]\n\t" - "ldr r7, [%[b], #356]\n\t" - "ldr r8, [%[b], #360]\n\t" - "ldr r9, [%[b], #364]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #352]\n\t" - "str r3, [%[a], #356]\n\t" - "str r4, [%[a], #360]\n\t" - "str r5, [%[a], #364]\n\t" - "ldr r2, [%[a], #368]\n\t" - "ldr r3, [%[a], #372]\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r5, [%[a], #380]\n\t" - "ldr r6, [%[b], #368]\n\t" - "ldr r7, [%[b], #372]\n\t" - "ldr r8, [%[b], #376]\n\t" - "ldr r9, [%[b], #380]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #368]\n\t" - "str r3, [%[a], #372]\n\t" - "str r4, [%[a], #376]\n\t" - "str r5, [%[a], #380]\n\t" - "ldr r2, [%[a], #384]\n\t" - "ldr r3, [%[a], #388]\n\t" - "ldr r4, [%[a], #392]\n\t" - "ldr r5, [%[a], #396]\n\t" - "ldr r6, [%[b], #384]\n\t" - "ldr r7, [%[b], #388]\n\t" - "ldr r8, [%[b], #392]\n\t" - "ldr r9, [%[b], #396]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #384]\n\t" - "str r3, [%[a], #388]\n\t" - "str r4, [%[a], #392]\n\t" - "str r5, [%[a], #396]\n\t" - "ldr r2, [%[a], #400]\n\t" - "ldr r3, [%[a], #404]\n\t" - "ldr r4, [%[a], #408]\n\t" - "ldr r5, [%[a], #412]\n\t" - "ldr r6, [%[b], #400]\n\t" - "ldr r7, [%[b], #404]\n\t" - "ldr r8, [%[b], #408]\n\t" - "ldr r9, [%[b], #412]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #400]\n\t" - "str r3, [%[a], #404]\n\t" - "str r4, [%[a], #408]\n\t" - "str r5, [%[a], #412]\n\t" - "ldr r2, [%[a], #416]\n\t" - "ldr r3, [%[a], #420]\n\t" - "ldr r4, [%[a], #424]\n\t" - "ldr r5, [%[a], #428]\n\t" - "ldr r6, [%[b], #416]\n\t" - "ldr r7, [%[b], #420]\n\t" - "ldr r8, [%[b], #424]\n\t" - "ldr r9, [%[b], #428]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #416]\n\t" - "str r3, [%[a], #420]\n\t" - "str r4, [%[a], #424]\n\t" - "str r5, [%[a], #428]\n\t" - "ldr r2, [%[a], #432]\n\t" - "ldr r3, [%[a], #436]\n\t" - "ldr r4, [%[a], #440]\n\t" - "ldr r5, [%[a], #444]\n\t" - "ldr r6, [%[b], #432]\n\t" - "ldr r7, [%[b], #436]\n\t" - "ldr r8, [%[b], #440]\n\t" - "ldr r9, [%[b], #444]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #432]\n\t" - "str r3, [%[a], #436]\n\t" - "str r4, [%[a], #440]\n\t" - "str r5, [%[a], #444]\n\t" - "ldr r2, [%[a], #448]\n\t" - "ldr r3, [%[a], #452]\n\t" - "ldr r4, [%[a], #456]\n\t" - "ldr r5, [%[a], #460]\n\t" - "ldr r6, [%[b], #448]\n\t" - "ldr r7, [%[b], #452]\n\t" - "ldr r8, [%[b], #456]\n\t" - "ldr r9, [%[b], #460]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #448]\n\t" - "str r3, [%[a], #452]\n\t" - "str r4, [%[a], #456]\n\t" - "str r5, [%[a], #460]\n\t" - "ldr r2, [%[a], #464]\n\t" - "ldr r3, [%[a], #468]\n\t" - "ldr r4, [%[a], #472]\n\t" - "ldr r5, [%[a], #476]\n\t" - "ldr r6, [%[b], #464]\n\t" - "ldr r7, [%[b], #468]\n\t" - "ldr r8, [%[b], #472]\n\t" - "ldr r9, [%[b], #476]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #464]\n\t" - "str r3, [%[a], #468]\n\t" - "str r4, [%[a], #472]\n\t" - "str r5, [%[a], #476]\n\t" - "ldr r2, [%[a], #480]\n\t" - "ldr r3, [%[a], #484]\n\t" - "ldr r4, [%[a], #488]\n\t" - "ldr r5, [%[a], #492]\n\t" - "ldr r6, [%[b], #480]\n\t" - "ldr r7, [%[b], #484]\n\t" - "ldr r8, [%[b], #488]\n\t" - "ldr r9, [%[b], #492]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #480]\n\t" - "str r3, [%[a], #484]\n\t" - "str r4, [%[a], #488]\n\t" - "str r5, [%[a], #492]\n\t" - "ldr r2, [%[a], #496]\n\t" - "ldr r3, [%[a], #500]\n\t" - "ldr r4, [%[a], #504]\n\t" - "ldr r5, [%[a], #508]\n\t" - "ldr r6, [%[b], #496]\n\t" - "ldr r7, [%[b], #500]\n\t" - "ldr r8, [%[b], #504]\n\t" - "ldr r9, [%[b], #508]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #496]\n\t" - "str r3, [%[a], #500]\n\t" - "str r4, [%[a], #504]\n\t" - "str r5, [%[a], #508]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -21697,532 +46147,244 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[a], #104]\n\t" - "ldr r7, [%[a], #108]\n\t" - "ldr r8, [%[b], #96]\n\t" - "ldr r9, [%[b], #100]\n\t" - "ldr r10, [%[b], #104]\n\t" - "ldr r14, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" - "str r6, [%[r], #104]\n\t" - "str r7, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[a], #120]\n\t" - "ldr r7, [%[a], #124]\n\t" - "ldr r8, [%[b], #112]\n\t" - "ldr r9, [%[b], #116]\n\t" - "ldr r10, [%[b], #120]\n\t" - "ldr r14, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" - "str r6, [%[r], #120]\n\t" - "str r7, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[a], #132]\n\t" - "ldr r6, [%[a], #136]\n\t" - "ldr r7, [%[a], #140]\n\t" - "ldr r8, [%[b], #128]\n\t" - "ldr r9, [%[b], #132]\n\t" - "ldr r10, [%[b], #136]\n\t" - "ldr r14, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #128]\n\t" - "str r5, [%[r], #132]\n\t" - "str r6, [%[r], #136]\n\t" - "str r7, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[a], #148]\n\t" - "ldr r6, [%[a], #152]\n\t" - "ldr r7, [%[a], #156]\n\t" - "ldr r8, [%[b], #144]\n\t" - "ldr r9, [%[b], #148]\n\t" - "ldr r10, [%[b], #152]\n\t" - "ldr r14, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #144]\n\t" - "str r5, [%[r], #148]\n\t" - "str r6, [%[r], #152]\n\t" - "str r7, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[a], #164]\n\t" - "ldr r6, [%[a], #168]\n\t" - "ldr r7, [%[a], #172]\n\t" - "ldr r8, [%[b], #160]\n\t" - "ldr r9, [%[b], #164]\n\t" - "ldr r10, [%[b], #168]\n\t" - "ldr r14, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #160]\n\t" - "str r5, [%[r], #164]\n\t" - "str r6, [%[r], #168]\n\t" - "str r7, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[a], #180]\n\t" - "ldr r6, [%[a], #184]\n\t" - "ldr r7, [%[a], #188]\n\t" - "ldr r8, [%[b], #176]\n\t" - "ldr r9, [%[b], #180]\n\t" - "ldr r10, [%[b], #184]\n\t" - "ldr r14, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #176]\n\t" - "str r5, [%[r], #180]\n\t" - "str r6, [%[r], #184]\n\t" - "str r7, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[a], #196]\n\t" - "ldr r6, [%[a], #200]\n\t" - "ldr r7, [%[a], #204]\n\t" - "ldr r8, [%[b], #192]\n\t" - "ldr r9, [%[b], #196]\n\t" - "ldr r10, [%[b], #200]\n\t" - "ldr r14, [%[b], #204]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #192]\n\t" - "str r5, [%[r], #196]\n\t" - "str r6, [%[r], #200]\n\t" - "str r7, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[a], #212]\n\t" - "ldr r6, [%[a], #216]\n\t" - "ldr r7, [%[a], #220]\n\t" - "ldr r8, [%[b], #208]\n\t" - "ldr r9, [%[b], #212]\n\t" - "ldr r10, [%[b], #216]\n\t" - "ldr r14, [%[b], #220]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #208]\n\t" - "str r5, [%[r], #212]\n\t" - "str r6, [%[r], #216]\n\t" - "str r7, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[a], #228]\n\t" - "ldr r6, [%[a], #232]\n\t" - "ldr r7, [%[a], #236]\n\t" - "ldr r8, [%[b], #224]\n\t" - "ldr r9, [%[b], #228]\n\t" - "ldr r10, [%[b], #232]\n\t" - "ldr r14, [%[b], #236]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #224]\n\t" - "str r5, [%[r], #228]\n\t" - "str r6, [%[r], #232]\n\t" - "str r7, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[a], #244]\n\t" - "ldr r6, [%[a], #248]\n\t" - "ldr r7, [%[a], #252]\n\t" - "ldr r8, [%[b], #240]\n\t" - "ldr r9, [%[b], #244]\n\t" - "ldr r10, [%[b], #248]\n\t" - "ldr r14, [%[b], #252]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #240]\n\t" - "str r5, [%[r], #244]\n\t" - "str r6, [%[r], #248]\n\t" - "str r7, [%[r], #252]\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r5, [%[a], #260]\n\t" - "ldr r6, [%[a], #264]\n\t" - "ldr r7, [%[a], #268]\n\t" - "ldr r8, [%[b], #256]\n\t" - "ldr r9, [%[b], #260]\n\t" - "ldr r10, [%[b], #264]\n\t" - "ldr r14, [%[b], #268]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #256]\n\t" - "str r5, [%[r], #260]\n\t" - "str r6, [%[r], #264]\n\t" - "str r7, [%[r], #268]\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r5, [%[a], #276]\n\t" - "ldr r6, [%[a], #280]\n\t" - "ldr r7, [%[a], #284]\n\t" - "ldr r8, [%[b], #272]\n\t" - "ldr r9, [%[b], #276]\n\t" - "ldr r10, [%[b], #280]\n\t" - "ldr r14, [%[b], #284]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #272]\n\t" - "str r5, [%[r], #276]\n\t" - "str r6, [%[r], #280]\n\t" - "str r7, [%[r], #284]\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r5, [%[a], #292]\n\t" - "ldr r6, [%[a], #296]\n\t" - "ldr r7, [%[a], #300]\n\t" - "ldr r8, [%[b], #288]\n\t" - "ldr r9, [%[b], #292]\n\t" - "ldr r10, [%[b], #296]\n\t" - "ldr r14, [%[b], #300]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #288]\n\t" - "str r5, [%[r], #292]\n\t" - "str r6, [%[r], #296]\n\t" - "str r7, [%[r], #300]\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r5, [%[a], #308]\n\t" - "ldr r6, [%[a], #312]\n\t" - "ldr r7, [%[a], #316]\n\t" - "ldr r8, [%[b], #304]\n\t" - "ldr r9, [%[b], #308]\n\t" - "ldr r10, [%[b], #312]\n\t" - "ldr r14, [%[b], #316]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #304]\n\t" - "str r5, [%[r], #308]\n\t" - "str r6, [%[r], #312]\n\t" - "str r7, [%[r], #316]\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r5, [%[a], #324]\n\t" - "ldr r6, [%[a], #328]\n\t" - "ldr r7, [%[a], #332]\n\t" - "ldr r8, [%[b], #320]\n\t" - "ldr r9, [%[b], #324]\n\t" - "ldr r10, [%[b], #328]\n\t" - "ldr r14, [%[b], #332]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #320]\n\t" - "str r5, [%[r], #324]\n\t" - "str r6, [%[r], #328]\n\t" - "str r7, [%[r], #332]\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r5, [%[a], #340]\n\t" - "ldr r6, [%[a], #344]\n\t" - "ldr r7, [%[a], #348]\n\t" - "ldr r8, [%[b], #336]\n\t" - "ldr r9, [%[b], #340]\n\t" - "ldr r10, [%[b], #344]\n\t" - "ldr r14, [%[b], #348]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #336]\n\t" - "str r5, [%[r], #340]\n\t" - "str r6, [%[r], #344]\n\t" - "str r7, [%[r], #348]\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r5, [%[a], #356]\n\t" - "ldr r6, [%[a], #360]\n\t" - "ldr r7, [%[a], #364]\n\t" - "ldr r8, [%[b], #352]\n\t" - "ldr r9, [%[b], #356]\n\t" - "ldr r10, [%[b], #360]\n\t" - "ldr r14, [%[b], #364]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #352]\n\t" - "str r5, [%[r], #356]\n\t" - "str r6, [%[r], #360]\n\t" - "str r7, [%[r], #364]\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r5, [%[a], #372]\n\t" - "ldr r6, [%[a], #376]\n\t" - "ldr r7, [%[a], #380]\n\t" - "ldr r8, [%[b], #368]\n\t" - "ldr r9, [%[b], #372]\n\t" - "ldr r10, [%[b], #376]\n\t" - "ldr r14, [%[b], #380]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #368]\n\t" - "str r5, [%[r], #372]\n\t" - "str r6, [%[r], #376]\n\t" - "str r7, [%[r], #380]\n\t" - "ldr r4, [%[a], #384]\n\t" - "ldr r5, [%[a], #388]\n\t" - "ldr r6, [%[a], #392]\n\t" - "ldr r7, [%[a], #396]\n\t" - "ldr r8, [%[b], #384]\n\t" - "ldr r9, [%[b], #388]\n\t" - "ldr r10, [%[b], #392]\n\t" - "ldr r14, [%[b], #396]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #384]\n\t" - "str r5, [%[r], #388]\n\t" - "str r6, [%[r], #392]\n\t" - "str r7, [%[r], #396]\n\t" - "ldr r4, [%[a], #400]\n\t" - "ldr r5, [%[a], #404]\n\t" - "ldr r6, [%[a], #408]\n\t" - "ldr r7, [%[a], #412]\n\t" - "ldr r8, [%[b], #400]\n\t" - "ldr r9, [%[b], #404]\n\t" - "ldr r10, [%[b], #408]\n\t" - "ldr r14, [%[b], #412]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #400]\n\t" - "str r5, [%[r], #404]\n\t" - "str r6, [%[r], #408]\n\t" - "str r7, [%[r], #412]\n\t" - "ldr r4, [%[a], #416]\n\t" - "ldr r5, [%[a], #420]\n\t" - "ldr r6, [%[a], #424]\n\t" - "ldr r7, [%[a], #428]\n\t" - "ldr r8, [%[b], #416]\n\t" - "ldr r9, [%[b], #420]\n\t" - "ldr r10, [%[b], #424]\n\t" - "ldr r14, [%[b], #428]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #416]\n\t" - "str r5, [%[r], #420]\n\t" - "str r6, [%[r], #424]\n\t" - "str r7, [%[r], #428]\n\t" - "ldr r4, [%[a], #432]\n\t" - "ldr r5, [%[a], #436]\n\t" - "ldr r6, [%[a], #440]\n\t" - "ldr r7, [%[a], #444]\n\t" - "ldr r8, [%[b], #432]\n\t" - "ldr r9, [%[b], #436]\n\t" - "ldr r10, [%[b], #440]\n\t" - "ldr r14, [%[b], #444]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #432]\n\t" - "str r5, [%[r], #436]\n\t" - "str r6, [%[r], #440]\n\t" - "str r7, [%[r], #444]\n\t" - "ldr r4, [%[a], #448]\n\t" - "ldr r5, [%[a], #452]\n\t" - "ldr r6, [%[a], #456]\n\t" - "ldr r7, [%[a], #460]\n\t" - "ldr r8, [%[b], #448]\n\t" - "ldr r9, [%[b], #452]\n\t" - "ldr r10, [%[b], #456]\n\t" - "ldr r14, [%[b], #460]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #448]\n\t" - "str r5, [%[r], #452]\n\t" - "str r6, [%[r], #456]\n\t" - "str r7, [%[r], #460]\n\t" - "ldr r4, [%[a], #464]\n\t" - "ldr r5, [%[a], #468]\n\t" - "ldr r6, [%[a], #472]\n\t" - "ldr r7, [%[a], #476]\n\t" - "ldr r8, [%[b], #464]\n\t" - "ldr r9, [%[b], #468]\n\t" - "ldr r10, [%[b], #472]\n\t" - "ldr r14, [%[b], #476]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #464]\n\t" - "str r5, [%[r], #468]\n\t" - "str r6, [%[r], #472]\n\t" - "str r7, [%[r], #476]\n\t" - "ldr r4, [%[a], #480]\n\t" - "ldr r5, [%[a], #484]\n\t" - "ldr r6, [%[a], #488]\n\t" - "ldr r7, [%[a], #492]\n\t" - "ldr r8, [%[b], #480]\n\t" - "ldr r9, [%[b], #484]\n\t" - "ldr r10, [%[b], #488]\n\t" - "ldr r14, [%[b], #492]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #480]\n\t" - "str r5, [%[r], #484]\n\t" - "str r6, [%[r], #488]\n\t" - "str r7, [%[r], #492]\n\t" - "ldr r4, [%[a], #496]\n\t" - "ldr r5, [%[a], #500]\n\t" - "ldr r6, [%[a], #504]\n\t" - "ldr r7, [%[a], #508]\n\t" - "ldr r8, [%[b], #496]\n\t" - "ldr r9, [%[b], #500]\n\t" - "ldr r10, [%[b], #504]\n\t" - "ldr r14, [%[b], #508]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #496]\n\t" - "str r5, [%[r], #500]\n\t" - "str r6, [%[r], #504]\n\t" - "str r7, [%[r], #508]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* Multiply a and b into r. (r = a * b) @@ -22238,7 +46400,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, sp_digit z1[128]; sp_digit a1[64]; sp_digit b1[64]; - sp_digit z2[128]; + sp_digit* z2 = r + 128; sp_digit u; sp_digit ca; sp_digit cb; @@ -22246,18 +46408,22 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, ca = sp_2048_add_64(a1, a, &a[64]); cb = sp_2048_add_64(b1, b, &b[64]); u = ca & cb; - sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); sp_2048_mul_64(z0, a, b); - sp_2048_mask_64(r + 128, a1, 0 - cb); - sp_2048_mask_64(b1, b1, 0 - ca); - u += sp_2048_add_64(r + 128, r + 128, b1); - u += sp_4096_sub_in_place_128(z1, z2); + sp_2048_mul_64(z1, a1, b1); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_sub_in_place_128(z1, z2); + sp_2048_mask_64(a1, a1, 0 - cb); + u += sp_2048_add_64(z1 + 64, z1 + 64, a1); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(z1 + 64, z1 + 64, b1); + u += sp_4096_add_128(r + 64, r + 64, z1); - r[192] = u; - XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); - (void)sp_4096_add_128(r + 128, r + 128, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1)); + a1[0] = u; + (void)sp_2048_add_64(r + 192, r + 192, a1); } /* Square a and put result in r. (r = a * a) @@ -22268,23 +46434,32 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[128]; + sp_digit* z2 = r + 128; sp_digit z1[128]; - sp_digit a1[64]; + sp_digit* a1 = z1; + sp_digit zero[64]; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 64); + + mask = sp_2048_sub_64(a1, a, &a[64]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_2048_sub_64(a1, p1, p2); - u = sp_2048_add_64(a1, a, &a[64]); - sp_2048_sqr_64(z1, a1); sp_2048_sqr_64(z2, &a[64]); sp_2048_sqr_64(z0, a); - sp_2048_mask_64(r + 128, a1, 0 - u); - u += sp_2048_add_64(r + 128, r + 128, r + 128); - u += sp_4096_sub_in_place_128(z1, z2); - u += sp_4096_sub_in_place_128(z1, z0); - u += sp_4096_add_128(r + 64, r + 64, z1); - r[192] = u; - XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); - (void)sp_4096_add_128(r + 128, r + 128, z2); + sp_2048_sqr_64(z1, a1); + + u = 0; + u -= sp_4096_sub_in_place_128(z1, z2); + u -= sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_sub_in_place_128(r + 64, z1); + zero[0] = u; + (void)sp_2048_add_64(r + 192, r + 192, zero); } #endif /* !WOLFSSL_SP_SMALL */ @@ -22295,41 +46470,35 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #512\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x200\n\t" + "\n" + "L_sp_4096_add_128_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_4096_add_128_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -22339,40 +46508,33 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #512\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x200\n\t" + "\n" + "L_sp_4096_sub_in_pkace_128_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -22383,57 +46545,196 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #1024\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x400\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #508\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_4096_mul_128_outer_%=: \n\t" + "subs r3, r5, #0x1fc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_4096_mul_128_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #512\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_4096_mul_128_inner_done_%=\n\t" + "blt L_sp_4096_mul_128_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_4096_mul_128_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #1016\n\t" - "ble 1b\n\t" + "cmp r5, #0x3f4\n\t" + "ble L_sp_4096_mul_128_outer_%=\n\t" + "ldr lr, [%[a], #508]\n\t" + "ldr r11, [%[b], #508]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_4096_mul_128_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_4096_mul_128_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -22442,78 +46743,160 @@ static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #1024\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x400\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #508\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_4096_sqr_128_outer_%=: \n\t" + "subs r3, r5, #0x1fc\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_4096_sqr_128_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #512\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_4096_sqr_128_inner_done_%=\n\t" + "blt L_sp_4096_sqr_128_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_4096_sqr_128_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #1016\n\t" - "ble 1b\n\t" + "cmp r5, #0x3f4\n\t" + "ble L_sp_4096_sqr_128_outer_%=\n\t" + "ldr lr, [%[a], #508]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_4096_sqr_128_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_4096_sqr_128_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } #endif /* WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -22533,947 +46916,4218 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) *rho = (sp_digit)0 - x; } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_4096_mul_d_128_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #512\n\t" - "blt 1b\n\t" + "cmp r9, #0x200\n\t" + "blt L_sp_4096_mul_d_128_word_%=\n\t" "str r3, [%[r], #512]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" - "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" - "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" - "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" - "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" - "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" - "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" - "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" - "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" - "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" - "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" - "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" - "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" - "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" - "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" - "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" - "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #188]\n\t" - "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #192]\n\t" - "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #196]\n\t" - "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #200]\n\t" - "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #204]\n\t" - "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #208]\n\t" - "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #212]\n\t" - "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #216]\n\t" - "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #220]\n\t" - "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #224]\n\t" - "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #228]\n\t" - "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #232]\n\t" - "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #236]\n\t" - "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #240]\n\t" - "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #244]\n\t" - "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #248]\n\t" - "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #252]\n\t" - "# A[64] * B\n\t" - "ldr r8, [%[a], #256]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #256]\n\t" - "# A[65] * B\n\t" - "ldr r8, [%[a], #260]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #260]\n\t" - "# A[66] * B\n\t" - "ldr r8, [%[a], #264]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #264]\n\t" - "# A[67] * B\n\t" - "ldr r8, [%[a], #268]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #268]\n\t" - "# A[68] * B\n\t" - "ldr r8, [%[a], #272]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #272]\n\t" - "# A[69] * B\n\t" - "ldr r8, [%[a], #276]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #276]\n\t" - "# A[70] * B\n\t" - "ldr r8, [%[a], #280]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #280]\n\t" - "# A[71] * B\n\t" - "ldr r8, [%[a], #284]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #284]\n\t" - "# A[72] * B\n\t" - "ldr r8, [%[a], #288]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #288]\n\t" - "# A[73] * B\n\t" - "ldr r8, [%[a], #292]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #292]\n\t" - "# A[74] * B\n\t" - "ldr r8, [%[a], #296]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #296]\n\t" - "# A[75] * B\n\t" - "ldr r8, [%[a], #300]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #300]\n\t" - "# A[76] * B\n\t" - "ldr r8, [%[a], #304]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #304]\n\t" - "# A[77] * B\n\t" - "ldr r8, [%[a], #308]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #308]\n\t" - "# A[78] * B\n\t" - "ldr r8, [%[a], #312]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #312]\n\t" - "# A[79] * B\n\t" - "ldr r8, [%[a], #316]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #316]\n\t" - "# A[80] * B\n\t" - "ldr r8, [%[a], #320]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #320]\n\t" - "# A[81] * B\n\t" - "ldr r8, [%[a], #324]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #324]\n\t" - "# A[82] * B\n\t" - "ldr r8, [%[a], #328]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #328]\n\t" - "# A[83] * B\n\t" - "ldr r8, [%[a], #332]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #332]\n\t" - "# A[84] * B\n\t" - "ldr r8, [%[a], #336]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #336]\n\t" - "# A[85] * B\n\t" - "ldr r8, [%[a], #340]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #340]\n\t" - "# A[86] * B\n\t" - "ldr r8, [%[a], #344]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #344]\n\t" - "# A[87] * B\n\t" - "ldr r8, [%[a], #348]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #348]\n\t" - "# A[88] * B\n\t" - "ldr r8, [%[a], #352]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #352]\n\t" - "# A[89] * B\n\t" - "ldr r8, [%[a], #356]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #356]\n\t" - "# A[90] * B\n\t" - "ldr r8, [%[a], #360]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #360]\n\t" - "# A[91] * B\n\t" - "ldr r8, [%[a], #364]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #364]\n\t" - "# A[92] * B\n\t" - "ldr r8, [%[a], #368]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #368]\n\t" - "# A[93] * B\n\t" - "ldr r8, [%[a], #372]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #372]\n\t" - "# A[94] * B\n\t" - "ldr r8, [%[a], #376]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #376]\n\t" - "# A[95] * B\n\t" - "ldr r8, [%[a], #380]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #380]\n\t" - "# A[96] * B\n\t" - "ldr r8, [%[a], #384]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #384]\n\t" - "# A[97] * B\n\t" - "ldr r8, [%[a], #388]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #388]\n\t" - "# A[98] * B\n\t" - "ldr r8, [%[a], #392]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #392]\n\t" - "# A[99] * B\n\t" - "ldr r8, [%[a], #396]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #396]\n\t" - "# A[100] * B\n\t" - "ldr r8, [%[a], #400]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #400]\n\t" - "# A[101] * B\n\t" - "ldr r8, [%[a], #404]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #404]\n\t" - "# A[102] * B\n\t" - "ldr r8, [%[a], #408]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #408]\n\t" - "# A[103] * B\n\t" - "ldr r8, [%[a], #412]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #412]\n\t" - "# A[104] * B\n\t" - "ldr r8, [%[a], #416]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #416]\n\t" - "# A[105] * B\n\t" - "ldr r8, [%[a], #420]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #420]\n\t" - "# A[106] * B\n\t" - "ldr r8, [%[a], #424]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #424]\n\t" - "# A[107] * B\n\t" - "ldr r8, [%[a], #428]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #428]\n\t" - "# A[108] * B\n\t" - "ldr r8, [%[a], #432]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #432]\n\t" - "# A[109] * B\n\t" - "ldr r8, [%[a], #436]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #436]\n\t" - "# A[110] * B\n\t" - "ldr r8, [%[a], #440]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #440]\n\t" - "# A[111] * B\n\t" - "ldr r8, [%[a], #444]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #444]\n\t" - "# A[112] * B\n\t" - "ldr r8, [%[a], #448]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #448]\n\t" - "# A[113] * B\n\t" - "ldr r8, [%[a], #452]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #452]\n\t" - "# A[114] * B\n\t" - "ldr r8, [%[a], #456]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #456]\n\t" - "# A[115] * B\n\t" - "ldr r8, [%[a], #460]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #460]\n\t" - "# A[116] * B\n\t" - "ldr r8, [%[a], #464]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #464]\n\t" - "# A[117] * B\n\t" - "ldr r8, [%[a], #468]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #468]\n\t" - "# A[118] * B\n\t" - "ldr r8, [%[a], #472]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #472]\n\t" - "# A[119] * B\n\t" - "ldr r8, [%[a], #476]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #476]\n\t" - "# A[120] * B\n\t" - "ldr r8, [%[a], #480]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #480]\n\t" - "# A[121] * B\n\t" - "ldr r8, [%[a], #484]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #484]\n\t" - "# A[122] * B\n\t" - "ldr r8, [%[a], #488]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #488]\n\t" - "# A[123] * B\n\t" - "ldr r8, [%[a], #492]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #492]\n\t" - "# A[124] * B\n\t" - "ldr r8, [%[a], #496]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #496]\n\t" - "# A[125] * B\n\t" - "ldr r8, [%[a], #500]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #500]\n\t" - "# A[126] * B\n\t" - "ldr r8, [%[a], #504]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #504]\n\t" - "# A[127] * B\n\t" - "ldr r8, [%[a], #508]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" - "str r4, [%[r], #508]\n\t" - "str r5, [%[r], #512]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[17] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[18] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[19] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[20] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[21] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[22] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[23] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[24] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[25] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[26] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[27] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[28] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[29] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[30] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[31] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[32] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[33] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[34] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[35] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[36] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[37] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[38] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[39] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[40] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[41] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[42] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[43] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[44] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[45] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[46] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[47] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[48] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[49] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[50] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[51] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[52] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[53] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[54] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[55] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[56] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[57] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[58] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[59] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[60] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[61] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[62] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[63] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[64] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[65] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[66] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[67] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[68] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[69] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[70] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[71] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[72] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[73] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[74] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[75] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[76] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[77] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[78] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[79] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[80] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[81] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[82] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[83] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[84] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[85] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[86] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[87] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[88] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[89] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[90] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[91] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[92] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[93] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[94] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[95] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[96] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[97] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[98] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[99] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[100] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[101] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[102] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[103] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[104] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[105] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[106] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[107] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[108] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[109] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[110] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[111] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[112] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[113] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[114] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[115] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[116] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[117] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[118] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[119] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[120] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[121] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[122] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[123] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[124] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[125] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[126] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[127] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 4096 bits, just need to subtract. @@ -23490,6 +51144,7 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) } #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -23498,1870 +51153,6025 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_4096_cond_sub_128_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #512\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x200\n\t" + "blt L_sp_4096_cond_sub_128_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r6, [%[a], #132]\n\t" - "ldr r5, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #128]\n\t" - "str r6, [%[r], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r5, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r6, [%[a], #148]\n\t" - "ldr r5, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #144]\n\t" - "str r6, [%[r], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r5, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r6, [%[a], #164]\n\t" - "ldr r5, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #160]\n\t" - "str r6, [%[r], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r5, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r6, [%[a], #180]\n\t" - "ldr r5, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #176]\n\t" - "str r6, [%[r], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r5, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r6, [%[a], #196]\n\t" - "ldr r5, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #192]\n\t" - "str r6, [%[r], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r5, [%[b], #200]\n\t" - "ldr r7, [%[b], #204]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r6, [%[a], #212]\n\t" - "ldr r5, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #208]\n\t" - "str r6, [%[r], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r5, [%[b], #216]\n\t" - "ldr r7, [%[b], #220]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r6, [%[a], #228]\n\t" - "ldr r5, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #224]\n\t" - "str r6, [%[r], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r5, [%[b], #232]\n\t" - "ldr r7, [%[b], #236]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r6, [%[a], #244]\n\t" - "ldr r5, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #240]\n\t" - "str r6, [%[r], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r5, [%[b], #248]\n\t" - "ldr r7, [%[b], #252]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r6, [%[a], #260]\n\t" - "ldr r5, [%[b], #256]\n\t" - "ldr r7, [%[b], #260]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #256]\n\t" - "str r6, [%[r], #260]\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r6, [%[a], #268]\n\t" - "ldr r5, [%[b], #264]\n\t" - "ldr r7, [%[b], #268]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #264]\n\t" - "str r6, [%[r], #268]\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r6, [%[a], #276]\n\t" - "ldr r5, [%[b], #272]\n\t" - "ldr r7, [%[b], #276]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #272]\n\t" - "str r6, [%[r], #276]\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r6, [%[a], #284]\n\t" - "ldr r5, [%[b], #280]\n\t" - "ldr r7, [%[b], #284]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #280]\n\t" - "str r6, [%[r], #284]\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r6, [%[a], #292]\n\t" - "ldr r5, [%[b], #288]\n\t" - "ldr r7, [%[b], #292]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #288]\n\t" - "str r6, [%[r], #292]\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r6, [%[a], #300]\n\t" - "ldr r5, [%[b], #296]\n\t" - "ldr r7, [%[b], #300]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #296]\n\t" - "str r6, [%[r], #300]\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r6, [%[a], #308]\n\t" - "ldr r5, [%[b], #304]\n\t" - "ldr r7, [%[b], #308]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #304]\n\t" - "str r6, [%[r], #308]\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r6, [%[a], #316]\n\t" - "ldr r5, [%[b], #312]\n\t" - "ldr r7, [%[b], #316]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #312]\n\t" - "str r6, [%[r], #316]\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r6, [%[a], #324]\n\t" - "ldr r5, [%[b], #320]\n\t" - "ldr r7, [%[b], #324]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #320]\n\t" - "str r6, [%[r], #324]\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r6, [%[a], #332]\n\t" - "ldr r5, [%[b], #328]\n\t" - "ldr r7, [%[b], #332]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #328]\n\t" - "str r6, [%[r], #332]\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r6, [%[a], #340]\n\t" - "ldr r5, [%[b], #336]\n\t" - "ldr r7, [%[b], #340]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #336]\n\t" - "str r6, [%[r], #340]\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r6, [%[a], #348]\n\t" - "ldr r5, [%[b], #344]\n\t" - "ldr r7, [%[b], #348]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #344]\n\t" - "str r6, [%[r], #348]\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r6, [%[a], #356]\n\t" - "ldr r5, [%[b], #352]\n\t" - "ldr r7, [%[b], #356]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #352]\n\t" - "str r6, [%[r], #356]\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r6, [%[a], #364]\n\t" - "ldr r5, [%[b], #360]\n\t" - "ldr r7, [%[b], #364]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #360]\n\t" - "str r6, [%[r], #364]\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r6, [%[a], #372]\n\t" - "ldr r5, [%[b], #368]\n\t" - "ldr r7, [%[b], #372]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #368]\n\t" - "str r6, [%[r], #372]\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r6, [%[a], #380]\n\t" - "ldr r5, [%[b], #376]\n\t" - "ldr r7, [%[b], #380]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #376]\n\t" - "str r6, [%[r], #380]\n\t" - "ldr r4, [%[a], #384]\n\t" - "ldr r6, [%[a], #388]\n\t" - "ldr r5, [%[b], #384]\n\t" - "ldr r7, [%[b], #388]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #384]\n\t" - "str r6, [%[r], #388]\n\t" - "ldr r4, [%[a], #392]\n\t" - "ldr r6, [%[a], #396]\n\t" - "ldr r5, [%[b], #392]\n\t" - "ldr r7, [%[b], #396]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #392]\n\t" - "str r6, [%[r], #396]\n\t" - "ldr r4, [%[a], #400]\n\t" - "ldr r6, [%[a], #404]\n\t" - "ldr r5, [%[b], #400]\n\t" - "ldr r7, [%[b], #404]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #400]\n\t" - "str r6, [%[r], #404]\n\t" - "ldr r4, [%[a], #408]\n\t" - "ldr r6, [%[a], #412]\n\t" - "ldr r5, [%[b], #408]\n\t" - "ldr r7, [%[b], #412]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #408]\n\t" - "str r6, [%[r], #412]\n\t" - "ldr r4, [%[a], #416]\n\t" - "ldr r6, [%[a], #420]\n\t" - "ldr r5, [%[b], #416]\n\t" - "ldr r7, [%[b], #420]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #416]\n\t" - "str r6, [%[r], #420]\n\t" - "ldr r4, [%[a], #424]\n\t" - "ldr r6, [%[a], #428]\n\t" - "ldr r5, [%[b], #424]\n\t" - "ldr r7, [%[b], #428]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #424]\n\t" - "str r6, [%[r], #428]\n\t" - "ldr r4, [%[a], #432]\n\t" - "ldr r6, [%[a], #436]\n\t" - "ldr r5, [%[b], #432]\n\t" - "ldr r7, [%[b], #436]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #432]\n\t" - "str r6, [%[r], #436]\n\t" - "ldr r4, [%[a], #440]\n\t" - "ldr r6, [%[a], #444]\n\t" - "ldr r5, [%[b], #440]\n\t" - "ldr r7, [%[b], #444]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #440]\n\t" - "str r6, [%[r], #444]\n\t" - "ldr r4, [%[a], #448]\n\t" - "ldr r6, [%[a], #452]\n\t" - "ldr r5, [%[b], #448]\n\t" - "ldr r7, [%[b], #452]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #448]\n\t" - "str r6, [%[r], #452]\n\t" - "ldr r4, [%[a], #456]\n\t" - "ldr r6, [%[a], #460]\n\t" - "ldr r5, [%[b], #456]\n\t" - "ldr r7, [%[b], #460]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #456]\n\t" - "str r6, [%[r], #460]\n\t" - "ldr r4, [%[a], #464]\n\t" - "ldr r6, [%[a], #468]\n\t" - "ldr r5, [%[b], #464]\n\t" - "ldr r7, [%[b], #468]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #464]\n\t" - "str r6, [%[r], #468]\n\t" - "ldr r4, [%[a], #472]\n\t" - "ldr r6, [%[a], #476]\n\t" - "ldr r5, [%[b], #472]\n\t" - "ldr r7, [%[b], #476]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #472]\n\t" - "str r6, [%[r], #476]\n\t" - "ldr r4, [%[a], #480]\n\t" - "ldr r6, [%[a], #484]\n\t" - "ldr r5, [%[b], #480]\n\t" - "ldr r7, [%[b], #484]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #480]\n\t" - "str r6, [%[r], #484]\n\t" - "ldr r4, [%[a], #488]\n\t" - "ldr r6, [%[a], #492]\n\t" - "ldr r5, [%[b], #488]\n\t" - "ldr r7, [%[b], #492]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #488]\n\t" - "str r6, [%[r], #492]\n\t" - "ldr r4, [%[a], #496]\n\t" - "ldr r6, [%[a], #500]\n\t" - "ldr r5, [%[b], #496]\n\t" - "ldr r7, [%[b], #500]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #496]\n\t" - "str r6, [%[r], #500]\n\t" - "ldr r4, [%[a], #504]\n\t" - "ldr r6, [%[a], #508]\n\t" - "ldr r5, [%[b], #504]\n\t" - "ldr r7, [%[b], #508]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #504]\n\t" - "str r6, [%[r], #508]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 4096 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" - "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+48] += m[48] * mu\n\t" - "ldr r7, [%[m], #192]\n\t" - "ldr r9, [%[a], #192]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #192]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+49] += m[49] * mu\n\t" - "ldr r7, [%[m], #196]\n\t" - "ldr r9, [%[a], #196]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+50] += m[50] * mu\n\t" - "ldr r7, [%[m], #200]\n\t" - "ldr r9, [%[a], #200]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #200]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+51] += m[51] * mu\n\t" - "ldr r7, [%[m], #204]\n\t" - "ldr r9, [%[a], #204]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+52] += m[52] * mu\n\t" - "ldr r7, [%[m], #208]\n\t" - "ldr r9, [%[a], #208]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #208]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+53] += m[53] * mu\n\t" - "ldr r7, [%[m], #212]\n\t" - "ldr r9, [%[a], #212]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+54] += m[54] * mu\n\t" - "ldr r7, [%[m], #216]\n\t" - "ldr r9, [%[a], #216]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #216]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+55] += m[55] * mu\n\t" - "ldr r7, [%[m], #220]\n\t" - "ldr r9, [%[a], #220]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+56] += m[56] * mu\n\t" - "ldr r7, [%[m], #224]\n\t" - "ldr r9, [%[a], #224]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #224]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+57] += m[57] * mu\n\t" - "ldr r7, [%[m], #228]\n\t" - "ldr r9, [%[a], #228]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+58] += m[58] * mu\n\t" - "ldr r7, [%[m], #232]\n\t" - "ldr r9, [%[a], #232]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #232]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+59] += m[59] * mu\n\t" - "ldr r7, [%[m], #236]\n\t" - "ldr r9, [%[a], #236]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+60] += m[60] * mu\n\t" - "ldr r7, [%[m], #240]\n\t" - "ldr r9, [%[a], #240]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #240]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+61] += m[61] * mu\n\t" - "ldr r7, [%[m], #244]\n\t" - "ldr r9, [%[a], #244]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+62] += m[62] * mu\n\t" - "ldr r7, [%[m], #248]\n\t" - "ldr r9, [%[a], #248]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #248]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+63] += m[63] * mu\n\t" - "ldr r7, [%[m], #252]\n\t" - "ldr r9, [%[a], #252]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #252]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+64] += m[64] * mu\n\t" - "ldr r7, [%[m], #256]\n\t" - "ldr r9, [%[a], #256]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #256]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+65] += m[65] * mu\n\t" - "ldr r7, [%[m], #260]\n\t" - "ldr r9, [%[a], #260]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #260]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" "adc r4, r4, #0\n\t" - "# a[i+66] += m[66] * mu\n\t" - "ldr r7, [%[m], #264]\n\t" - "ldr r9, [%[a], #264]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #264]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+67] += m[67] * mu\n\t" - "ldr r7, [%[m], #268]\n\t" - "ldr r9, [%[a], #268]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #268]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+68] += m[68] * mu\n\t" - "ldr r7, [%[m], #272]\n\t" - "ldr r9, [%[a], #272]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #272]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+69] += m[69] * mu\n\t" - "ldr r7, [%[m], #276]\n\t" - "ldr r9, [%[a], #276]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #276]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" "adc r4, r4, #0\n\t" - "# a[i+70] += m[70] * mu\n\t" - "ldr r7, [%[m], #280]\n\t" - "ldr r9, [%[a], #280]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #280]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+71] += m[71] * mu\n\t" - "ldr r7, [%[m], #284]\n\t" - "ldr r9, [%[a], #284]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #284]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+72] += m[72] * mu\n\t" - "ldr r7, [%[m], #288]\n\t" - "ldr r9, [%[a], #288]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #288]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+73] += m[73] * mu\n\t" - "ldr r7, [%[m], #292]\n\t" - "ldr r9, [%[a], #292]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #292]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" "adc r4, r4, #0\n\t" - "# a[i+74] += m[74] * mu\n\t" - "ldr r7, [%[m], #296]\n\t" - "ldr r9, [%[a], #296]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #296]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+75] += m[75] * mu\n\t" - "ldr r7, [%[m], #300]\n\t" - "ldr r9, [%[a], #300]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #300]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+76] += m[76] * mu\n\t" - "ldr r7, [%[m], #304]\n\t" - "ldr r9, [%[a], #304]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #304]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+77] += m[77] * mu\n\t" - "ldr r7, [%[m], #308]\n\t" - "ldr r9, [%[a], #308]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #308]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" "adc r4, r4, #0\n\t" - "# a[i+78] += m[78] * mu\n\t" - "ldr r7, [%[m], #312]\n\t" - "ldr r9, [%[a], #312]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #312]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+79] += m[79] * mu\n\t" - "ldr r7, [%[m], #316]\n\t" - "ldr r9, [%[a], #316]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #316]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+80] += m[80] * mu\n\t" - "ldr r7, [%[m], #320]\n\t" - "ldr r9, [%[a], #320]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #320]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+81] += m[81] * mu\n\t" - "ldr r7, [%[m], #324]\n\t" - "ldr r9, [%[a], #324]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #324]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" "adc r4, r4, #0\n\t" - "# a[i+82] += m[82] * mu\n\t" - "ldr r7, [%[m], #328]\n\t" - "ldr r9, [%[a], #328]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #328]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+83] += m[83] * mu\n\t" - "ldr r7, [%[m], #332]\n\t" - "ldr r9, [%[a], #332]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #332]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+84] += m[84] * mu\n\t" - "ldr r7, [%[m], #336]\n\t" - "ldr r9, [%[a], #336]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #336]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+85] += m[85] * mu\n\t" - "ldr r7, [%[m], #340]\n\t" - "ldr r9, [%[a], #340]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #340]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" "adc r4, r4, #0\n\t" - "# a[i+86] += m[86] * mu\n\t" - "ldr r7, [%[m], #344]\n\t" - "ldr r9, [%[a], #344]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #344]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+87] += m[87] * mu\n\t" - "ldr r7, [%[m], #348]\n\t" - "ldr r9, [%[a], #348]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #348]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+88] += m[88] * mu\n\t" - "ldr r7, [%[m], #352]\n\t" - "ldr r9, [%[a], #352]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #352]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+89] += m[89] * mu\n\t" - "ldr r7, [%[m], #356]\n\t" - "ldr r9, [%[a], #356]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #356]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" "adc r4, r4, #0\n\t" - "# a[i+90] += m[90] * mu\n\t" - "ldr r7, [%[m], #360]\n\t" - "ldr r9, [%[a], #360]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #360]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+91] += m[91] * mu\n\t" - "ldr r7, [%[m], #364]\n\t" - "ldr r9, [%[a], #364]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #364]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+92] += m[92] * mu\n\t" - "ldr r7, [%[m], #368]\n\t" - "ldr r9, [%[a], #368]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #368]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+93] += m[93] * mu\n\t" - "ldr r7, [%[m], #372]\n\t" - "ldr r9, [%[a], #372]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #372]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" "adc r4, r4, #0\n\t" - "# a[i+94] += m[94] * mu\n\t" - "ldr r7, [%[m], #376]\n\t" - "ldr r9, [%[a], #376]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #376]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+95] += m[95] * mu\n\t" - "ldr r7, [%[m], #380]\n\t" - "ldr r9, [%[a], #380]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #380]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+96] += m[96] * mu\n\t" - "ldr r7, [%[m], #384]\n\t" - "ldr r9, [%[a], #384]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #384]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+97] += m[97] * mu\n\t" - "ldr r7, [%[m], #388]\n\t" - "ldr r9, [%[a], #388]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #388]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" "adc r4, r4, #0\n\t" - "# a[i+98] += m[98] * mu\n\t" - "ldr r7, [%[m], #392]\n\t" - "ldr r9, [%[a], #392]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #392]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+99] += m[99] * mu\n\t" - "ldr r7, [%[m], #396]\n\t" - "ldr r9, [%[a], #396]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #396]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+100] += m[100] * mu\n\t" - "ldr r7, [%[m], #400]\n\t" - "ldr r9, [%[a], #400]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #400]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+101] += m[101] * mu\n\t" - "ldr r7, [%[m], #404]\n\t" - "ldr r9, [%[a], #404]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #404]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" "adc r4, r4, #0\n\t" - "# a[i+102] += m[102] * mu\n\t" - "ldr r7, [%[m], #408]\n\t" - "ldr r9, [%[a], #408]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #408]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+103] += m[103] * mu\n\t" - "ldr r7, [%[m], #412]\n\t" - "ldr r9, [%[a], #412]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #412]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+104] += m[104] * mu\n\t" - "ldr r7, [%[m], #416]\n\t" - "ldr r9, [%[a], #416]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #416]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+105] += m[105] * mu\n\t" - "ldr r7, [%[m], #420]\n\t" - "ldr r9, [%[a], #420]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #420]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" "adc r4, r4, #0\n\t" - "# a[i+106] += m[106] * mu\n\t" - "ldr r7, [%[m], #424]\n\t" - "ldr r9, [%[a], #424]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #424]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+107] += m[107] * mu\n\t" - "ldr r7, [%[m], #428]\n\t" - "ldr r9, [%[a], #428]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #428]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+108] += m[108] * mu\n\t" - "ldr r7, [%[m], #432]\n\t" - "ldr r9, [%[a], #432]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #432]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+109] += m[109] * mu\n\t" - "ldr r7, [%[m], #436]\n\t" - "ldr r9, [%[a], #436]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #436]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" "adc r4, r4, #0\n\t" - "# a[i+110] += m[110] * mu\n\t" - "ldr r7, [%[m], #440]\n\t" - "ldr r9, [%[a], #440]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #440]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+111] += m[111] * mu\n\t" - "ldr r7, [%[m], #444]\n\t" - "ldr r9, [%[a], #444]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #444]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+112] += m[112] * mu\n\t" - "ldr r7, [%[m], #448]\n\t" - "ldr r9, [%[a], #448]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #448]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+113] += m[113] * mu\n\t" - "ldr r7, [%[m], #452]\n\t" - "ldr r9, [%[a], #452]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #452]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" "adc r4, r4, #0\n\t" - "# a[i+114] += m[114] * mu\n\t" - "ldr r7, [%[m], #456]\n\t" - "ldr r9, [%[a], #456]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #456]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+115] += m[115] * mu\n\t" - "ldr r7, [%[m], #460]\n\t" - "ldr r9, [%[a], #460]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #460]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+116] += m[116] * mu\n\t" - "ldr r7, [%[m], #464]\n\t" - "ldr r9, [%[a], #464]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #464]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+117] += m[117] * mu\n\t" - "ldr r7, [%[m], #468]\n\t" - "ldr r9, [%[a], #468]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #468]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" "adc r4, r4, #0\n\t" - "# a[i+118] += m[118] * mu\n\t" - "ldr r7, [%[m], #472]\n\t" - "ldr r9, [%[a], #472]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #472]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+119] += m[119] * mu\n\t" - "ldr r7, [%[m], #476]\n\t" - "ldr r9, [%[a], #476]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #476]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+120] += m[120] * mu\n\t" - "ldr r7, [%[m], #480]\n\t" - "ldr r9, [%[a], #480]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #480]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+121] += m[121] * mu\n\t" - "ldr r7, [%[m], #484]\n\t" - "ldr r9, [%[a], #484]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #484]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" "adc r4, r4, #0\n\t" - "# a[i+122] += m[122] * mu\n\t" - "ldr r7, [%[m], #488]\n\t" - "ldr r9, [%[a], #488]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #488]\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r7, [%[m], #256]\n\t" + "ldr r10, [%[a], #256]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+123] += m[123] * mu\n\t" - "ldr r7, [%[m], #492]\n\t" - "ldr r9, [%[a], #492]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #492]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r7, [%[m], #260]\n\t" + "ldr r10, [%[a], #260]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+124] += m[124] * mu\n\t" - "ldr r7, [%[m], #496]\n\t" - "ldr r9, [%[a], #496]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #496]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+125] += m[125] * mu\n\t" - "ldr r7, [%[m], #500]\n\t" - "ldr r9, [%[a], #500]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #500]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #260]\n\t" "adc r4, r4, #0\n\t" - "# a[i+126] += m[126] * mu\n\t" - "ldr r7, [%[m], #504]\n\t" - "ldr r9, [%[a], #504]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #504]\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r7, [%[m], #264]\n\t" + "ldr r10, [%[a], #264]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+127] += m[127] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r7, [%[m], #268]\n\t" + "ldr r10, [%[a], #268]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r7, [%[m], #272]\n\t" + "ldr r10, [%[a], #272]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r7, [%[m], #276]\n\t" + "ldr r10, [%[a], #276]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r7, [%[m], #280]\n\t" + "ldr r10, [%[a], #280]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r7, [%[m], #284]\n\t" + "ldr r10, [%[a], #284]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r7, [%[m], #288]\n\t" + "ldr r10, [%[a], #288]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r7, [%[m], #292]\n\t" + "ldr r10, [%[a], #292]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r7, [%[m], #296]\n\t" + "ldr r10, [%[a], #296]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r7, [%[m], #300]\n\t" + "ldr r10, [%[a], #300]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r7, [%[m], #304]\n\t" + "ldr r10, [%[a], #304]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r7, [%[m], #308]\n\t" + "ldr r10, [%[a], #308]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r7, [%[m], #312]\n\t" + "ldr r10, [%[a], #312]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r7, [%[m], #316]\n\t" + "ldr r10, [%[a], #316]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r7, [%[m], #320]\n\t" + "ldr r10, [%[a], #320]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r7, [%[m], #324]\n\t" + "ldr r10, [%[a], #324]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r7, [%[m], #328]\n\t" + "ldr r10, [%[a], #328]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r7, [%[m], #332]\n\t" + "ldr r10, [%[a], #332]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r7, [%[m], #336]\n\t" + "ldr r10, [%[a], #336]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r7, [%[m], #340]\n\t" + "ldr r10, [%[a], #340]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r7, [%[m], #344]\n\t" + "ldr r10, [%[a], #344]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r7, [%[m], #348]\n\t" + "ldr r10, [%[a], #348]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r7, [%[m], #352]\n\t" + "ldr r10, [%[a], #352]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r7, [%[m], #356]\n\t" + "ldr r10, [%[a], #356]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r7, [%[m], #360]\n\t" + "ldr r10, [%[a], #360]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r7, [%[m], #364]\n\t" + "ldr r10, [%[a], #364]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r7, [%[m], #368]\n\t" + "ldr r10, [%[a], #368]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r7, [%[m], #372]\n\t" + "ldr r10, [%[a], #372]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r7, [%[m], #376]\n\t" + "ldr r10, [%[a], #376]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+95] += m[95] * mu */ + "ldr r7, [%[m], #380]\n\t" + "ldr r10, [%[a], #380]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #380]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+96] += m[96] * mu */ + "ldr r7, [%[m], #384]\n\t" + "ldr r10, [%[a], #384]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #384]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+97] += m[97] * mu */ + "ldr r7, [%[m], #388]\n\t" + "ldr r10, [%[a], #388]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #388]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+98] += m[98] * mu */ + "ldr r7, [%[m], #392]\n\t" + "ldr r10, [%[a], #392]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #392]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+99] += m[99] * mu */ + "ldr r7, [%[m], #396]\n\t" + "ldr r10, [%[a], #396]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #396]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+100] += m[100] * mu */ + "ldr r7, [%[m], #400]\n\t" + "ldr r10, [%[a], #400]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #400]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+101] += m[101] * mu */ + "ldr r7, [%[m], #404]\n\t" + "ldr r10, [%[a], #404]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #404]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+102] += m[102] * mu */ + "ldr r7, [%[m], #408]\n\t" + "ldr r10, [%[a], #408]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #408]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+103] += m[103] * mu */ + "ldr r7, [%[m], #412]\n\t" + "ldr r10, [%[a], #412]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #412]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+104] += m[104] * mu */ + "ldr r7, [%[m], #416]\n\t" + "ldr r10, [%[a], #416]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #416]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+105] += m[105] * mu */ + "ldr r7, [%[m], #420]\n\t" + "ldr r10, [%[a], #420]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #420]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+106] += m[106] * mu */ + "ldr r7, [%[m], #424]\n\t" + "ldr r10, [%[a], #424]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #424]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+107] += m[107] * mu */ + "ldr r7, [%[m], #428]\n\t" + "ldr r10, [%[a], #428]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #428]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+108] += m[108] * mu */ + "ldr r7, [%[m], #432]\n\t" + "ldr r10, [%[a], #432]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #432]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+109] += m[109] * mu */ + "ldr r7, [%[m], #436]\n\t" + "ldr r10, [%[a], #436]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #436]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+110] += m[110] * mu */ + "ldr r7, [%[m], #440]\n\t" + "ldr r10, [%[a], #440]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #440]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+111] += m[111] * mu */ + "ldr r7, [%[m], #444]\n\t" + "ldr r10, [%[a], #444]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #444]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+112] += m[112] * mu */ + "ldr r7, [%[m], #448]\n\t" + "ldr r10, [%[a], #448]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #448]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+113] += m[113] * mu */ + "ldr r7, [%[m], #452]\n\t" + "ldr r10, [%[a], #452]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #452]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+114] += m[114] * mu */ + "ldr r7, [%[m], #456]\n\t" + "ldr r10, [%[a], #456]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #456]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+115] += m[115] * mu */ + "ldr r7, [%[m], #460]\n\t" + "ldr r10, [%[a], #460]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #460]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+116] += m[116] * mu */ + "ldr r7, [%[m], #464]\n\t" + "ldr r10, [%[a], #464]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #464]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+117] += m[117] * mu */ + "ldr r7, [%[m], #468]\n\t" + "ldr r10, [%[a], #468]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #468]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+118] += m[118] * mu */ + "ldr r7, [%[m], #472]\n\t" + "ldr r10, [%[a], #472]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #472]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+119] += m[119] * mu */ + "ldr r7, [%[m], #476]\n\t" + "ldr r10, [%[a], #476]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #476]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+120] += m[120] * mu */ + "ldr r7, [%[m], #480]\n\t" + "ldr r10, [%[a], #480]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #480]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+121] += m[121] * mu */ + "ldr r7, [%[m], #484]\n\t" + "ldr r10, [%[a], #484]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #484]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+122] += m[122] * mu */ + "ldr r7, [%[m], #488]\n\t" + "ldr r10, [%[a], #488]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #488]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+123] += m[123] * mu */ + "ldr r7, [%[m], #492]\n\t" + "ldr r10, [%[a], #492]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #492]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+124] += m[124] * mu */ + "ldr r7, [%[m], #496]\n\t" + "ldr r10, [%[a], #496]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #496]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+125] += m[125] * mu */ + "ldr r7, [%[m], #500]\n\t" + "ldr r10, [%[a], #500]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #500]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+126] += m[126] * mu */ + "ldr r7, [%[m], #504]\n\t" + "ldr r10, [%[a], #504]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #504]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+127] += m[127] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #508]\n\t" +#else "ldr r7, [%[m], #508]\n\t" - "ldr r9, [%[a], #508]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #508]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #508]\n\t" - "ldr r9, [%[a], #512]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #512]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #508]\n\t" + "ldr r10, [%[a], #512]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #512]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #512\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x200\n\t" + "blt L_sp_4096_mont_reduce_128_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r7, [%[m], #256]\n\t" + "ldr r10, [%[a], #256]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r7, [%[m], #260]\n\t" + "ldr r10, [%[a], #260]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #260]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r7, [%[m], #264]\n\t" + "ldr r10, [%[a], #264]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r7, [%[m], #268]\n\t" + "ldr r10, [%[a], #268]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r7, [%[m], #272]\n\t" + "ldr r10, [%[a], #272]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r7, [%[m], #276]\n\t" + "ldr r10, [%[a], #276]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r7, [%[m], #280]\n\t" + "ldr r10, [%[a], #280]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r7, [%[m], #284]\n\t" + "ldr r10, [%[a], #284]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r7, [%[m], #288]\n\t" + "ldr r10, [%[a], #288]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r7, [%[m], #292]\n\t" + "ldr r10, [%[a], #292]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r7, [%[m], #296]\n\t" + "ldr r10, [%[a], #296]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r7, [%[m], #300]\n\t" + "ldr r10, [%[a], #300]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r7, [%[m], #304]\n\t" + "ldr r10, [%[a], #304]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r7, [%[m], #308]\n\t" + "ldr r10, [%[a], #308]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r7, [%[m], #312]\n\t" + "ldr r10, [%[a], #312]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r7, [%[m], #316]\n\t" + "ldr r10, [%[a], #316]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r7, [%[m], #320]\n\t" + "ldr r10, [%[a], #320]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r7, [%[m], #324]\n\t" + "ldr r10, [%[a], #324]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r7, [%[m], #328]\n\t" + "ldr r10, [%[a], #328]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r7, [%[m], #332]\n\t" + "ldr r10, [%[a], #332]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r7, [%[m], #336]\n\t" + "ldr r10, [%[a], #336]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r7, [%[m], #340]\n\t" + "ldr r10, [%[a], #340]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r7, [%[m], #344]\n\t" + "ldr r10, [%[a], #344]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r7, [%[m], #348]\n\t" + "ldr r10, [%[a], #348]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r7, [%[m], #352]\n\t" + "ldr r10, [%[a], #352]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r7, [%[m], #356]\n\t" + "ldr r10, [%[a], #356]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r7, [%[m], #360]\n\t" + "ldr r10, [%[a], #360]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r7, [%[m], #364]\n\t" + "ldr r10, [%[a], #364]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r7, [%[m], #368]\n\t" + "ldr r10, [%[a], #368]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r7, [%[m], #372]\n\t" + "ldr r10, [%[a], #372]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r7, [%[m], #376]\n\t" + "ldr r10, [%[a], #376]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+95] += m[95] * mu */ + "ldr r7, [%[m], #380]\n\t" + "ldr r10, [%[a], #380]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #380]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+96] += m[96] * mu */ + "ldr r7, [%[m], #384]\n\t" + "ldr r10, [%[a], #384]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #384]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+97] += m[97] * mu */ + "ldr r7, [%[m], #388]\n\t" + "ldr r10, [%[a], #388]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #388]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+98] += m[98] * mu */ + "ldr r7, [%[m], #392]\n\t" + "ldr r10, [%[a], #392]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #392]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+99] += m[99] * mu */ + "ldr r7, [%[m], #396]\n\t" + "ldr r10, [%[a], #396]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #396]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+100] += m[100] * mu */ + "ldr r7, [%[m], #400]\n\t" + "ldr r10, [%[a], #400]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #400]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+101] += m[101] * mu */ + "ldr r7, [%[m], #404]\n\t" + "ldr r10, [%[a], #404]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #404]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+102] += m[102] * mu */ + "ldr r7, [%[m], #408]\n\t" + "ldr r10, [%[a], #408]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #408]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+103] += m[103] * mu */ + "ldr r7, [%[m], #412]\n\t" + "ldr r10, [%[a], #412]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #412]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+104] += m[104] * mu */ + "ldr r7, [%[m], #416]\n\t" + "ldr r10, [%[a], #416]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #416]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+105] += m[105] * mu */ + "ldr r7, [%[m], #420]\n\t" + "ldr r10, [%[a], #420]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #420]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+106] += m[106] * mu */ + "ldr r7, [%[m], #424]\n\t" + "ldr r10, [%[a], #424]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #424]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+107] += m[107] * mu */ + "ldr r7, [%[m], #428]\n\t" + "ldr r10, [%[a], #428]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #428]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+108] += m[108] * mu */ + "ldr r7, [%[m], #432]\n\t" + "ldr r10, [%[a], #432]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #432]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+109] += m[109] * mu */ + "ldr r7, [%[m], #436]\n\t" + "ldr r10, [%[a], #436]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #436]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+110] += m[110] * mu */ + "ldr r7, [%[m], #440]\n\t" + "ldr r10, [%[a], #440]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #440]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+111] += m[111] * mu */ + "ldr r7, [%[m], #444]\n\t" + "ldr r10, [%[a], #444]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #444]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+112] += m[112] * mu */ + "ldr r7, [%[m], #448]\n\t" + "ldr r10, [%[a], #448]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #448]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+113] += m[113] * mu */ + "ldr r7, [%[m], #452]\n\t" + "ldr r10, [%[a], #452]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #452]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+114] += m[114] * mu */ + "ldr r7, [%[m], #456]\n\t" + "ldr r10, [%[a], #456]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #456]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+115] += m[115] * mu */ + "ldr r7, [%[m], #460]\n\t" + "ldr r10, [%[a], #460]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #460]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+116] += m[116] * mu */ + "ldr r7, [%[m], #464]\n\t" + "ldr r10, [%[a], #464]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #464]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+117] += m[117] * mu */ + "ldr r7, [%[m], #468]\n\t" + "ldr r10, [%[a], #468]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #468]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+118] += m[118] * mu */ + "ldr r7, [%[m], #472]\n\t" + "ldr r10, [%[a], #472]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #472]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+119] += m[119] * mu */ + "ldr r7, [%[m], #476]\n\t" + "ldr r10, [%[a], #476]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #476]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+120] += m[120] * mu */ + "ldr r7, [%[m], #480]\n\t" + "ldr r10, [%[a], #480]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #480]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+121] += m[121] * mu */ + "ldr r7, [%[m], #484]\n\t" + "ldr r10, [%[a], #484]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #484]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+122] += m[122] * mu */ + "ldr r7, [%[m], #488]\n\t" + "ldr r10, [%[a], #488]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #488]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+123] += m[123] * mu */ + "ldr r7, [%[m], #492]\n\t" + "ldr r10, [%[a], #492]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #492]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+124] += m[124] * mu */ + "ldr r7, [%[m], #496]\n\t" + "ldr r10, [%[a], #496]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #496]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+125] += m[125] * mu */ + "ldr r7, [%[m], #500]\n\t" + "ldr r10, [%[a], #500]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #500]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+126] += m[126] * mu */ + "ldr r7, [%[m], #504]\n\t" + "ldr r10, [%[a], #504]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #504]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+127] += m[127] * mu */ + "ldr r7, [%[m], #508]\n\t" + "ldr r10, [%[a], #508]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #508]\n\t" + "ldr r10, [%[a], #512]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #512]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x200\n\t" + "blt L_sp_4096_mont_reduce_128_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #64]\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r10, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #68]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r10, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #72]\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r10, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #76]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r10, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #80]\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r10, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #84]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r10, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #88]\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r10, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #92]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r10, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #96]\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r10, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #100]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r10, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #104]\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r10, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #108]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r10, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #112]\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r10, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #116]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r10, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #120]\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r10, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #124]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r10, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #128]\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r10, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #132]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r10, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #136]\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r10, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #140]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r10, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #144]\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r10, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #148]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r10, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #152]\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r10, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #156]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r10, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #160]\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r10, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #164]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r10, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #168]\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r10, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #172]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r10, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #176]\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r10, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #180]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r10, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #184]\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r10, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #188]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r10, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #192]\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r10, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #196]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r10, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #200]\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r10, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #204]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r10, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #208]\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r10, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #212]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r10, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #216]\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r10, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #220]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r10, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #224]\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r10, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #228]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r10, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #232]\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r10, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #236]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r10, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #240]\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r10, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #244]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r10, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #248]\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r10, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #252]\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r10, [%[m], #256]\n\t" + "ldr r9, [%[a], #256]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #256]\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r10, [%[m], #260]\n\t" + "ldr r9, [%[a], #260]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #260]\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r10, [%[m], #264]\n\t" + "ldr r9, [%[a], #264]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #264]\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r10, [%[m], #268]\n\t" + "ldr r9, [%[a], #268]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #268]\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r10, [%[m], #272]\n\t" + "ldr r9, [%[a], #272]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #272]\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r10, [%[m], #276]\n\t" + "ldr r9, [%[a], #276]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #276]\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r10, [%[m], #280]\n\t" + "ldr r9, [%[a], #280]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #280]\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r10, [%[m], #284]\n\t" + "ldr r9, [%[a], #284]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #284]\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r10, [%[m], #288]\n\t" + "ldr r9, [%[a], #288]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #288]\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r10, [%[m], #292]\n\t" + "ldr r9, [%[a], #292]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #292]\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r10, [%[m], #296]\n\t" + "ldr r9, [%[a], #296]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #296]\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r10, [%[m], #300]\n\t" + "ldr r9, [%[a], #300]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #300]\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r10, [%[m], #304]\n\t" + "ldr r9, [%[a], #304]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #304]\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r10, [%[m], #308]\n\t" + "ldr r9, [%[a], #308]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #308]\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r10, [%[m], #312]\n\t" + "ldr r9, [%[a], #312]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #312]\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r10, [%[m], #316]\n\t" + "ldr r9, [%[a], #316]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #316]\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r10, [%[m], #320]\n\t" + "ldr r9, [%[a], #320]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #320]\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r10, [%[m], #324]\n\t" + "ldr r9, [%[a], #324]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #324]\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r10, [%[m], #328]\n\t" + "ldr r9, [%[a], #328]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #328]\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r10, [%[m], #332]\n\t" + "ldr r9, [%[a], #332]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #332]\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r10, [%[m], #336]\n\t" + "ldr r9, [%[a], #336]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #336]\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r10, [%[m], #340]\n\t" + "ldr r9, [%[a], #340]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #340]\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r10, [%[m], #344]\n\t" + "ldr r9, [%[a], #344]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #344]\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r10, [%[m], #348]\n\t" + "ldr r9, [%[a], #348]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #348]\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r10, [%[m], #352]\n\t" + "ldr r9, [%[a], #352]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #352]\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r10, [%[m], #356]\n\t" + "ldr r9, [%[a], #356]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #356]\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r10, [%[m], #360]\n\t" + "ldr r9, [%[a], #360]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #360]\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r10, [%[m], #364]\n\t" + "ldr r9, [%[a], #364]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #364]\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r10, [%[m], #368]\n\t" + "ldr r9, [%[a], #368]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #368]\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r10, [%[m], #372]\n\t" + "ldr r9, [%[a], #372]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #372]\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r10, [%[m], #376]\n\t" + "ldr r9, [%[a], #376]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #376]\n\t" + /* a[i+95] += m[95] * mu */ + "ldr r10, [%[m], #380]\n\t" + "ldr r9, [%[a], #380]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #380]\n\t" + /* a[i+96] += m[96] * mu */ + "ldr r10, [%[m], #384]\n\t" + "ldr r9, [%[a], #384]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #384]\n\t" + /* a[i+97] += m[97] * mu */ + "ldr r10, [%[m], #388]\n\t" + "ldr r9, [%[a], #388]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #388]\n\t" + /* a[i+98] += m[98] * mu */ + "ldr r10, [%[m], #392]\n\t" + "ldr r9, [%[a], #392]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #392]\n\t" + /* a[i+99] += m[99] * mu */ + "ldr r10, [%[m], #396]\n\t" + "ldr r9, [%[a], #396]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #396]\n\t" + /* a[i+100] += m[100] * mu */ + "ldr r10, [%[m], #400]\n\t" + "ldr r9, [%[a], #400]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #400]\n\t" + /* a[i+101] += m[101] * mu */ + "ldr r10, [%[m], #404]\n\t" + "ldr r9, [%[a], #404]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #404]\n\t" + /* a[i+102] += m[102] * mu */ + "ldr r10, [%[m], #408]\n\t" + "ldr r9, [%[a], #408]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #408]\n\t" + /* a[i+103] += m[103] * mu */ + "ldr r10, [%[m], #412]\n\t" + "ldr r9, [%[a], #412]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #412]\n\t" + /* a[i+104] += m[104] * mu */ + "ldr r10, [%[m], #416]\n\t" + "ldr r9, [%[a], #416]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #416]\n\t" + /* a[i+105] += m[105] * mu */ + "ldr r10, [%[m], #420]\n\t" + "ldr r9, [%[a], #420]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #420]\n\t" + /* a[i+106] += m[106] * mu */ + "ldr r10, [%[m], #424]\n\t" + "ldr r9, [%[a], #424]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #424]\n\t" + /* a[i+107] += m[107] * mu */ + "ldr r10, [%[m], #428]\n\t" + "ldr r9, [%[a], #428]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #428]\n\t" + /* a[i+108] += m[108] * mu */ + "ldr r10, [%[m], #432]\n\t" + "ldr r9, [%[a], #432]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #432]\n\t" + /* a[i+109] += m[109] * mu */ + "ldr r10, [%[m], #436]\n\t" + "ldr r9, [%[a], #436]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #436]\n\t" + /* a[i+110] += m[110] * mu */ + "ldr r10, [%[m], #440]\n\t" + "ldr r9, [%[a], #440]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #440]\n\t" + /* a[i+111] += m[111] * mu */ + "ldr r10, [%[m], #444]\n\t" + "ldr r9, [%[a], #444]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #444]\n\t" + /* a[i+112] += m[112] * mu */ + "ldr r10, [%[m], #448]\n\t" + "ldr r9, [%[a], #448]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #448]\n\t" + /* a[i+113] += m[113] * mu */ + "ldr r10, [%[m], #452]\n\t" + "ldr r9, [%[a], #452]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #452]\n\t" + /* a[i+114] += m[114] * mu */ + "ldr r10, [%[m], #456]\n\t" + "ldr r9, [%[a], #456]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #456]\n\t" + /* a[i+115] += m[115] * mu */ + "ldr r10, [%[m], #460]\n\t" + "ldr r9, [%[a], #460]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #460]\n\t" + /* a[i+116] += m[116] * mu */ + "ldr r10, [%[m], #464]\n\t" + "ldr r9, [%[a], #464]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #464]\n\t" + /* a[i+117] += m[117] * mu */ + "ldr r10, [%[m], #468]\n\t" + "ldr r9, [%[a], #468]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #468]\n\t" + /* a[i+118] += m[118] * mu */ + "ldr r10, [%[m], #472]\n\t" + "ldr r9, [%[a], #472]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #472]\n\t" + /* a[i+119] += m[119] * mu */ + "ldr r10, [%[m], #476]\n\t" + "ldr r9, [%[a], #476]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #476]\n\t" + /* a[i+120] += m[120] * mu */ + "ldr r10, [%[m], #480]\n\t" + "ldr r9, [%[a], #480]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #480]\n\t" + /* a[i+121] += m[121] * mu */ + "ldr r10, [%[m], #484]\n\t" + "ldr r9, [%[a], #484]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #484]\n\t" + /* a[i+122] += m[122] * mu */ + "ldr r10, [%[m], #488]\n\t" + "ldr r9, [%[a], #488]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #488]\n\t" + /* a[i+123] += m[123] * mu */ + "ldr r10, [%[m], #492]\n\t" + "ldr r9, [%[a], #492]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #492]\n\t" + /* a[i+124] += m[124] * mu */ + "ldr r10, [%[m], #496]\n\t" + "ldr r9, [%[a], #496]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #496]\n\t" + /* a[i+125] += m[125] * mu */ + "ldr r10, [%[m], #500]\n\t" + "ldr r9, [%[a], #500]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #500]\n\t" + /* a[i+126] += m[126] * mu */ + "ldr r10, [%[m], #504]\n\t" + "ldr r9, [%[a], #504]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #504]\n\t" + /* a[i+127] += m[127] * mu */ + "ldr r10, [%[m], #508]\n\t" + "ldr r9, [%[a], #508]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #512]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #508]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #512]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0x200\n\t" + "blt L_sp_4096_mont_reduce_128_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -25369,9 +57179,9 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_128(r, a, b); @@ -25383,9 +57193,9 @@ static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_128(r, a); @@ -25399,40 +57209,34 @@ static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #512\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x200\n\t" + "\n" + "L_sp_4096_sub_128_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_4096_sub_128_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -25442,596 +57246,444 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "ldr r8, [%[b], #4]\n\t" - "ldr r9, [%[b], #8]\n\t" - "ldr r10, [%[b], #12]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "ldr r8, [%[b], #20]\n\t" - "ldr r9, [%[b], #24]\n\t" - "ldr r10, [%[b], #28]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r7, [%[b], #32]\n\t" - "ldr r8, [%[b], #36]\n\t" - "ldr r9, [%[b], #40]\n\t" - "ldr r10, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #32]\n\t" - "str r4, [%[r], #36]\n\t" - "str r5, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r7, [%[b], #48]\n\t" - "ldr r8, [%[b], #52]\n\t" - "ldr r9, [%[b], #56]\n\t" - "ldr r10, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #48]\n\t" - "str r4, [%[r], #52]\n\t" - "str r5, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r7, [%[b], #64]\n\t" - "ldr r8, [%[b], #68]\n\t" - "ldr r9, [%[b], #72]\n\t" - "ldr r10, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #64]\n\t" - "str r4, [%[r], #68]\n\t" - "str r5, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r7, [%[b], #80]\n\t" - "ldr r8, [%[b], #84]\n\t" - "ldr r9, [%[b], #88]\n\t" - "ldr r10, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #80]\n\t" - "str r4, [%[r], #84]\n\t" - "str r5, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r7, [%[b], #96]\n\t" - "ldr r8, [%[b], #100]\n\t" - "ldr r9, [%[b], #104]\n\t" - "ldr r10, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #96]\n\t" - "str r4, [%[r], #100]\n\t" - "str r5, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r7, [%[b], #112]\n\t" - "ldr r8, [%[b], #116]\n\t" - "ldr r9, [%[b], #120]\n\t" - "ldr r10, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #112]\n\t" - "str r4, [%[r], #116]\n\t" - "str r5, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r3, [%[a], #128]\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r7, [%[b], #128]\n\t" - "ldr r8, [%[b], #132]\n\t" - "ldr r9, [%[b], #136]\n\t" - "ldr r10, [%[b], #140]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #128]\n\t" - "str r4, [%[r], #132]\n\t" - "str r5, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r3, [%[a], #144]\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r7, [%[b], #144]\n\t" - "ldr r8, [%[b], #148]\n\t" - "ldr r9, [%[b], #152]\n\t" - "ldr r10, [%[b], #156]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #144]\n\t" - "str r4, [%[r], #148]\n\t" - "str r5, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r3, [%[a], #160]\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r7, [%[b], #160]\n\t" - "ldr r8, [%[b], #164]\n\t" - "ldr r9, [%[b], #168]\n\t" - "ldr r10, [%[b], #172]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #160]\n\t" - "str r4, [%[r], #164]\n\t" - "str r5, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r3, [%[a], #176]\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r7, [%[b], #176]\n\t" - "ldr r8, [%[b], #180]\n\t" - "ldr r9, [%[b], #184]\n\t" - "ldr r10, [%[b], #188]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #176]\n\t" - "str r4, [%[r], #180]\n\t" - "str r5, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r3, [%[a], #192]\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r7, [%[b], #192]\n\t" - "ldr r8, [%[b], #196]\n\t" - "ldr r9, [%[b], #200]\n\t" - "ldr r10, [%[b], #204]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #192]\n\t" - "str r4, [%[r], #196]\n\t" - "str r5, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r3, [%[a], #208]\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r7, [%[b], #208]\n\t" - "ldr r8, [%[b], #212]\n\t" - "ldr r9, [%[b], #216]\n\t" - "ldr r10, [%[b], #220]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #208]\n\t" - "str r4, [%[r], #212]\n\t" - "str r5, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r3, [%[a], #224]\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r7, [%[b], #224]\n\t" - "ldr r8, [%[b], #228]\n\t" - "ldr r9, [%[b], #232]\n\t" - "ldr r10, [%[b], #236]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #224]\n\t" - "str r4, [%[r], #228]\n\t" - "str r5, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r3, [%[a], #240]\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r7, [%[b], #240]\n\t" - "ldr r8, [%[b], #244]\n\t" - "ldr r9, [%[b], #248]\n\t" - "ldr r10, [%[b], #252]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #240]\n\t" - "str r4, [%[r], #244]\n\t" - "str r5, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "ldr r3, [%[a], #256]\n\t" - "ldr r4, [%[a], #260]\n\t" - "ldr r5, [%[a], #264]\n\t" - "ldr r6, [%[a], #268]\n\t" - "ldr r7, [%[b], #256]\n\t" - "ldr r8, [%[b], #260]\n\t" - "ldr r9, [%[b], #264]\n\t" - "ldr r10, [%[b], #268]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #256]\n\t" - "str r4, [%[r], #260]\n\t" - "str r5, [%[r], #264]\n\t" - "str r6, [%[r], #268]\n\t" - "ldr r3, [%[a], #272]\n\t" - "ldr r4, [%[a], #276]\n\t" - "ldr r5, [%[a], #280]\n\t" - "ldr r6, [%[a], #284]\n\t" - "ldr r7, [%[b], #272]\n\t" - "ldr r8, [%[b], #276]\n\t" - "ldr r9, [%[b], #280]\n\t" - "ldr r10, [%[b], #284]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #272]\n\t" - "str r4, [%[r], #276]\n\t" - "str r5, [%[r], #280]\n\t" - "str r6, [%[r], #284]\n\t" - "ldr r3, [%[a], #288]\n\t" - "ldr r4, [%[a], #292]\n\t" - "ldr r5, [%[a], #296]\n\t" - "ldr r6, [%[a], #300]\n\t" - "ldr r7, [%[b], #288]\n\t" - "ldr r8, [%[b], #292]\n\t" - "ldr r9, [%[b], #296]\n\t" - "ldr r10, [%[b], #300]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #288]\n\t" - "str r4, [%[r], #292]\n\t" - "str r5, [%[r], #296]\n\t" - "str r6, [%[r], #300]\n\t" - "ldr r3, [%[a], #304]\n\t" - "ldr r4, [%[a], #308]\n\t" - "ldr r5, [%[a], #312]\n\t" - "ldr r6, [%[a], #316]\n\t" - "ldr r7, [%[b], #304]\n\t" - "ldr r8, [%[b], #308]\n\t" - "ldr r9, [%[b], #312]\n\t" - "ldr r10, [%[b], #316]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #304]\n\t" - "str r4, [%[r], #308]\n\t" - "str r5, [%[r], #312]\n\t" - "str r6, [%[r], #316]\n\t" - "ldr r3, [%[a], #320]\n\t" - "ldr r4, [%[a], #324]\n\t" - "ldr r5, [%[a], #328]\n\t" - "ldr r6, [%[a], #332]\n\t" - "ldr r7, [%[b], #320]\n\t" - "ldr r8, [%[b], #324]\n\t" - "ldr r9, [%[b], #328]\n\t" - "ldr r10, [%[b], #332]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #320]\n\t" - "str r4, [%[r], #324]\n\t" - "str r5, [%[r], #328]\n\t" - "str r6, [%[r], #332]\n\t" - "ldr r3, [%[a], #336]\n\t" - "ldr r4, [%[a], #340]\n\t" - "ldr r5, [%[a], #344]\n\t" - "ldr r6, [%[a], #348]\n\t" - "ldr r7, [%[b], #336]\n\t" - "ldr r8, [%[b], #340]\n\t" - "ldr r9, [%[b], #344]\n\t" - "ldr r10, [%[b], #348]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #336]\n\t" - "str r4, [%[r], #340]\n\t" - "str r5, [%[r], #344]\n\t" - "str r6, [%[r], #348]\n\t" - "ldr r3, [%[a], #352]\n\t" - "ldr r4, [%[a], #356]\n\t" - "ldr r5, [%[a], #360]\n\t" - "ldr r6, [%[a], #364]\n\t" - "ldr r7, [%[b], #352]\n\t" - "ldr r8, [%[b], #356]\n\t" - "ldr r9, [%[b], #360]\n\t" - "ldr r10, [%[b], #364]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #352]\n\t" - "str r4, [%[r], #356]\n\t" - "str r5, [%[r], #360]\n\t" - "str r6, [%[r], #364]\n\t" - "ldr r3, [%[a], #368]\n\t" - "ldr r4, [%[a], #372]\n\t" - "ldr r5, [%[a], #376]\n\t" - "ldr r6, [%[a], #380]\n\t" - "ldr r7, [%[b], #368]\n\t" - "ldr r8, [%[b], #372]\n\t" - "ldr r9, [%[b], #376]\n\t" - "ldr r10, [%[b], #380]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #368]\n\t" - "str r4, [%[r], #372]\n\t" - "str r5, [%[r], #376]\n\t" - "str r6, [%[r], #380]\n\t" - "ldr r3, [%[a], #384]\n\t" - "ldr r4, [%[a], #388]\n\t" - "ldr r5, [%[a], #392]\n\t" - "ldr r6, [%[a], #396]\n\t" - "ldr r7, [%[b], #384]\n\t" - "ldr r8, [%[b], #388]\n\t" - "ldr r9, [%[b], #392]\n\t" - "ldr r10, [%[b], #396]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #384]\n\t" - "str r4, [%[r], #388]\n\t" - "str r5, [%[r], #392]\n\t" - "str r6, [%[r], #396]\n\t" - "ldr r3, [%[a], #400]\n\t" - "ldr r4, [%[a], #404]\n\t" - "ldr r5, [%[a], #408]\n\t" - "ldr r6, [%[a], #412]\n\t" - "ldr r7, [%[b], #400]\n\t" - "ldr r8, [%[b], #404]\n\t" - "ldr r9, [%[b], #408]\n\t" - "ldr r10, [%[b], #412]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #400]\n\t" - "str r4, [%[r], #404]\n\t" - "str r5, [%[r], #408]\n\t" - "str r6, [%[r], #412]\n\t" - "ldr r3, [%[a], #416]\n\t" - "ldr r4, [%[a], #420]\n\t" - "ldr r5, [%[a], #424]\n\t" - "ldr r6, [%[a], #428]\n\t" - "ldr r7, [%[b], #416]\n\t" - "ldr r8, [%[b], #420]\n\t" - "ldr r9, [%[b], #424]\n\t" - "ldr r10, [%[b], #428]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #416]\n\t" - "str r4, [%[r], #420]\n\t" - "str r5, [%[r], #424]\n\t" - "str r6, [%[r], #428]\n\t" - "ldr r3, [%[a], #432]\n\t" - "ldr r4, [%[a], #436]\n\t" - "ldr r5, [%[a], #440]\n\t" - "ldr r6, [%[a], #444]\n\t" - "ldr r7, [%[b], #432]\n\t" - "ldr r8, [%[b], #436]\n\t" - "ldr r9, [%[b], #440]\n\t" - "ldr r10, [%[b], #444]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #432]\n\t" - "str r4, [%[r], #436]\n\t" - "str r5, [%[r], #440]\n\t" - "str r6, [%[r], #444]\n\t" - "ldr r3, [%[a], #448]\n\t" - "ldr r4, [%[a], #452]\n\t" - "ldr r5, [%[a], #456]\n\t" - "ldr r6, [%[a], #460]\n\t" - "ldr r7, [%[b], #448]\n\t" - "ldr r8, [%[b], #452]\n\t" - "ldr r9, [%[b], #456]\n\t" - "ldr r10, [%[b], #460]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #448]\n\t" - "str r4, [%[r], #452]\n\t" - "str r5, [%[r], #456]\n\t" - "str r6, [%[r], #460]\n\t" - "ldr r3, [%[a], #464]\n\t" - "ldr r4, [%[a], #468]\n\t" - "ldr r5, [%[a], #472]\n\t" - "ldr r6, [%[a], #476]\n\t" - "ldr r7, [%[b], #464]\n\t" - "ldr r8, [%[b], #468]\n\t" - "ldr r9, [%[b], #472]\n\t" - "ldr r10, [%[b], #476]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #464]\n\t" - "str r4, [%[r], #468]\n\t" - "str r5, [%[r], #472]\n\t" - "str r6, [%[r], #476]\n\t" - "ldr r3, [%[a], #480]\n\t" - "ldr r4, [%[a], #484]\n\t" - "ldr r5, [%[a], #488]\n\t" - "ldr r6, [%[a], #492]\n\t" - "ldr r7, [%[b], #480]\n\t" - "ldr r8, [%[b], #484]\n\t" - "ldr r9, [%[b], #488]\n\t" - "ldr r10, [%[b], #492]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #480]\n\t" - "str r4, [%[r], #484]\n\t" - "str r5, [%[r], #488]\n\t" - "str r6, [%[r], #492]\n\t" - "ldr r3, [%[a], #496]\n\t" - "ldr r4, [%[a], #500]\n\t" - "ldr r5, [%[a], #504]\n\t" - "ldr r6, [%[a], #508]\n\t" - "ldr r7, [%[b], #496]\n\t" - "ldr r8, [%[b], #500]\n\t" - "ldr r9, [%[b], #504]\n\t" - "ldr r10, [%[b], #508]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #496]\n\t" - "str r4, [%[r], #500]\n\t" - "str r5, [%[r], #504]\n\t" - "str r6, [%[r], #508]\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_4096_word_128_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_4096_word_128_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -26041,8 +57693,8 @@ static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -26050,12 +57702,22 @@ static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, (void)m; - div = d[127]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); - for (i=127; i>=0; i--) { - sp_digit hi = t1[128 + i] - (t1[128 + i] == div); - r1 = div_4096_word_128(hi, t1[128 + i - 1], div); + for (i = 127; i > 0; i--) { + if (t1[i + 128] != d[i]) + break; + } + if (t1[i + 128] >= d[i]) { + sp_4096_sub_in_place_128(&t1[128], d); + } + for (i = 127; i >= 0; i--) { + if (t1[128 + i] == div) { + r1 = SP_DIGIT_MAX; + } + else { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + } sp_4096_mul_d_128(t2, d, r1); t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); @@ -26088,7 +57750,8 @@ static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_4096_div_128_cond(a, m, NULL, r); } @@ -26132,1456 +57795,1457 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #508\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r4, #0x1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #0xfc\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #508]\n\t" - "ldr r5, [%[b], #508]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #504]\n\t" - "ldr r5, [%[b], #504]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #500]\n\t" - "ldr r5, [%[b], #500]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #496]\n\t" - "ldr r5, [%[b], #496]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #492]\n\t" - "ldr r5, [%[b], #492]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #488]\n\t" - "ldr r5, [%[b], #488]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #484]\n\t" - "ldr r5, [%[b], #484]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #480]\n\t" - "ldr r5, [%[b], #480]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #476]\n\t" - "ldr r5, [%[b], #476]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #472]\n\t" - "ldr r5, [%[b], #472]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #468]\n\t" - "ldr r5, [%[b], #468]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #464]\n\t" - "ldr r5, [%[b], #464]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #460]\n\t" - "ldr r5, [%[b], #460]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #456]\n\t" - "ldr r5, [%[b], #456]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #452]\n\t" - "ldr r5, [%[b], #452]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #448]\n\t" - "ldr r5, [%[b], #448]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #444]\n\t" - "ldr r5, [%[b], #444]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #440]\n\t" - "ldr r5, [%[b], #440]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #436]\n\t" - "ldr r5, [%[b], #436]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #432]\n\t" - "ldr r5, [%[b], #432]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #428]\n\t" - "ldr r5, [%[b], #428]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #424]\n\t" - "ldr r5, [%[b], #424]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #420]\n\t" - "ldr r5, [%[b], #420]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #416]\n\t" - "ldr r5, [%[b], #416]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #412]\n\t" - "ldr r5, [%[b], #412]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #408]\n\t" - "ldr r5, [%[b], #408]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #404]\n\t" - "ldr r5, [%[b], #404]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #400]\n\t" - "ldr r5, [%[b], #400]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #396]\n\t" - "ldr r5, [%[b], #396]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #392]\n\t" - "ldr r5, [%[b], #392]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #388]\n\t" - "ldr r5, [%[b], #388]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #384]\n\t" - "ldr r5, [%[b], #384]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #380]\n\t" - "ldr r5, [%[b], #380]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r5, [%[b], #376]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #372]\n\t" - "ldr r5, [%[b], #372]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r5, [%[b], #368]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #364]\n\t" - "ldr r5, [%[b], #364]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r5, [%[b], #360]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #356]\n\t" - "ldr r5, [%[b], #356]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r5, [%[b], #352]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #348]\n\t" - "ldr r5, [%[b], #348]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r5, [%[b], #344]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #340]\n\t" - "ldr r5, [%[b], #340]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r5, [%[b], #336]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #332]\n\t" - "ldr r5, [%[b], #332]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r5, [%[b], #328]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #324]\n\t" - "ldr r5, [%[b], #324]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r5, [%[b], #320]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #316]\n\t" - "ldr r5, [%[b], #316]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r5, [%[b], #312]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #308]\n\t" - "ldr r5, [%[b], #308]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r5, [%[b], #304]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #300]\n\t" - "ldr r5, [%[b], #300]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r5, [%[b], #296]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #292]\n\t" - "ldr r5, [%[b], #292]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r5, [%[b], #288]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #284]\n\t" - "ldr r5, [%[b], #284]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r5, [%[b], #280]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #276]\n\t" - "ldr r5, [%[b], #276]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r5, [%[b], #272]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #268]\n\t" - "ldr r5, [%[b], #268]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r5, [%[b], #264]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #260]\n\t" - "ldr r5, [%[b], #260]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r5, [%[b], #256]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #252]\n\t" - "ldr r5, [%[b], #252]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[b], #248]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[b], #244]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[b], #240]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #236]\n\t" - "ldr r5, [%[b], #236]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[b], #232]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[b], #228]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[b], #224]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #220]\n\t" - "ldr r5, [%[b], #220]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[b], #216]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[b], #212]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[b], #208]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #204]\n\t" - "ldr r5, [%[b], #204]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[b], #200]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[b], #196]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[b], #192]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "mov r4, #0x1fc\n\t" #endif - - return r; + "\n" + "L_sp_4096_cmp_128_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_4096_cmp_128_words_%=\n\t" + "eor r2, r2, r3\n\t" +#else + "ldr r12, [%[a], #508]\n\t" + "ldr lr, [%[b], #508]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #504]\n\t" + "ldr lr, [%[b], #504]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #500]\n\t" + "ldr lr, [%[b], #500]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #496]\n\t" + "ldr lr, [%[b], #496]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #492]\n\t" + "ldr lr, [%[b], #492]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #488]\n\t" + "ldr lr, [%[b], #488]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #484]\n\t" + "ldr lr, [%[b], #484]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #480]\n\t" + "ldr lr, [%[b], #480]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #476]\n\t" + "ldr lr, [%[b], #476]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #472]\n\t" + "ldr lr, [%[b], #472]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #468]\n\t" + "ldr lr, [%[b], #468]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #464]\n\t" + "ldr lr, [%[b], #464]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #460]\n\t" + "ldr lr, [%[b], #460]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #456]\n\t" + "ldr lr, [%[b], #456]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #452]\n\t" + "ldr lr, [%[b], #452]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #448]\n\t" + "ldr lr, [%[b], #448]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #444]\n\t" + "ldr lr, [%[b], #444]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #440]\n\t" + "ldr lr, [%[b], #440]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #436]\n\t" + "ldr lr, [%[b], #436]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #432]\n\t" + "ldr lr, [%[b], #432]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #428]\n\t" + "ldr lr, [%[b], #428]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #424]\n\t" + "ldr lr, [%[b], #424]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #420]\n\t" + "ldr lr, [%[b], #420]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #416]\n\t" + "ldr lr, [%[b], #416]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #412]\n\t" + "ldr lr, [%[b], #412]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #408]\n\t" + "ldr lr, [%[b], #408]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #404]\n\t" + "ldr lr, [%[b], #404]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #400]\n\t" + "ldr lr, [%[b], #400]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #396]\n\t" + "ldr lr, [%[b], #396]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #392]\n\t" + "ldr lr, [%[b], #392]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #388]\n\t" + "ldr lr, [%[b], #388]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #384]\n\t" + "ldr lr, [%[b], #384]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #380]\n\t" + "ldr lr, [%[b], #380]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #376]\n\t" + "ldr lr, [%[b], #376]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #372]\n\t" + "ldr lr, [%[b], #372]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #368]\n\t" + "ldr lr, [%[b], #368]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #364]\n\t" + "ldr lr, [%[b], #364]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #360]\n\t" + "ldr lr, [%[b], #360]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #356]\n\t" + "ldr lr, [%[b], #356]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #352]\n\t" + "ldr lr, [%[b], #352]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #348]\n\t" + "ldr lr, [%[b], #348]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #344]\n\t" + "ldr lr, [%[b], #344]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #340]\n\t" + "ldr lr, [%[b], #340]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #336]\n\t" + "ldr lr, [%[b], #336]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #332]\n\t" + "ldr lr, [%[b], #332]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #328]\n\t" + "ldr lr, [%[b], #328]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #324]\n\t" + "ldr lr, [%[b], #324]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #320]\n\t" + "ldr lr, [%[b], #320]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #316]\n\t" + "ldr lr, [%[b], #316]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #312]\n\t" + "ldr lr, [%[b], #312]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #308]\n\t" + "ldr lr, [%[b], #308]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #304]\n\t" + "ldr lr, [%[b], #304]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #300]\n\t" + "ldr lr, [%[b], #300]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #296]\n\t" + "ldr lr, [%[b], #296]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #292]\n\t" + "ldr lr, [%[b], #292]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #288]\n\t" + "ldr lr, [%[b], #288]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #284]\n\t" + "ldr lr, [%[b], #284]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #280]\n\t" + "ldr lr, [%[b], #280]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #276]\n\t" + "ldr lr, [%[b], #276]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #272]\n\t" + "ldr lr, [%[b], #272]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #268]\n\t" + "ldr lr, [%[b], #268]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #264]\n\t" + "ldr lr, [%[b], #264]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #260]\n\t" + "ldr lr, [%[b], #260]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #256]\n\t" + "ldr lr, [%[b], #256]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #252]\n\t" + "ldr lr, [%[b], #252]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #248]\n\t" + "ldr lr, [%[b], #248]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #244]\n\t" + "ldr lr, [%[b], #244]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #240]\n\t" + "ldr lr, [%[b], #240]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #236]\n\t" + "ldr lr, [%[b], #236]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #232]\n\t" + "ldr lr, [%[b], #232]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #228]\n\t" + "ldr lr, [%[b], #228]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #224]\n\t" + "ldr lr, [%[b], #224]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #220]\n\t" + "ldr lr, [%[b], #220]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #216]\n\t" + "ldr lr, [%[b], #216]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #212]\n\t" + "ldr lr, [%[b], #212]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #208]\n\t" + "ldr lr, [%[b], #208]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #204]\n\t" + "ldr lr, [%[b], #204]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #200]\n\t" + "ldr lr, [%[b], #200]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #196]\n\t" + "ldr lr, [%[b], #196]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #192]\n\t" + "ldr lr, [%[b], #192]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + ); + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -27593,8 +59257,8 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -27602,12 +59266,15 @@ static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_di (void)m; - div = d[127]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); - for (i=127; i>=0; i--) { - sp_digit hi = t1[128 + i] - (t1[128 + i] == div); + r1 = sp_4096_cmp_128(&t1[128], d) >= 0; + sp_4096_cond_sub_128(&t1[128], &t1[128], d, (sp_digit)0 - r1); + for (i = 127; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[128 + i] == div); + sp_digit hi = t1[128 + i] + mask; r1 = div_4096_word_128(hi, t1[128 + i - 1], div); + r1 |= mask; sp_4096_mul_d_128(t2, d, r1); t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); @@ -27631,7 +59298,8 @@ static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_di * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_4096_div_128(a, m, NULL, r); } @@ -27647,12 +59315,14 @@ static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_di * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[8 * 256]; @@ -27667,11 +59337,17 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 256), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 256), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -27723,6 +59399,10 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -27763,7 +59443,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_cond_sub_128(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -27778,12 +59458,14 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 256]; @@ -27798,11 +59480,17 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 256), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 256), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -27862,6 +59550,10 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -27903,7 +59595,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_cond_sub_128(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -27930,7 +59622,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[128 * 5]; @@ -27952,7 +59644,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -27962,9 +59654,9 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, #endif if (err == MP_OKAY) { + ah = a + 128; r = a + 128 * 2; m = r + 128 * 2; - ah = a + 128; sp_4096_from_bin(ah, 128, in, inLen); #if DIGIT_BIT >= 32 @@ -27982,7 +59674,38 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_4096_from_mp(m, 128, mm); - if (e[0] == 0x3) { + if (e[0] == 0x10001) { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(r, a, m); + /* Montgomery form: r = a.R mod m */ + + if (err == MP_OKAY) { + /* r = a ^ 0x10000 => r = a squared 16 times */ + for (i = 15; i >= 0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + } + /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m + * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m + */ + sp_4096_mont_mul_128(r, r, ah, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_4096_sub_in_place_128(r, m); + } + } + } + else if (e[0] == 0x3) { if (err == MP_OKAY) { sp_4096_sqr_128(r, ah); err = sp_4096_mod_128_cond(r, r, m); @@ -28010,7 +59733,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } XMEMCPY(r, a, sizeof(sp_digit) * 128); - for (i--; i>=0; i--) { + for (i--; i >= 0; i--) { sp_4096_mont_sqr_128(r, r, m, mp); if (((e[0] >> i) & 1) == 1) { sp_4096_mont_mul_128(r, r, a, m, mp); @@ -28036,7 +59759,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif @@ -28045,6 +59768,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -28053,364 +59777,288 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov lr, #0\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_4096_cond_add_64_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #256\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0x100\n\t" + "blt L_sp_4096_cond_add_64_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r6, [%[a], #132]\n\t" - "ldr r5, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #128]\n\t" - "str r6, [%[r], #132]\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r6, [%[a], #140]\n\t" - "ldr r5, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #136]\n\t" - "str r6, [%[r], #140]\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r6, [%[a], #148]\n\t" - "ldr r5, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #144]\n\t" - "str r6, [%[r], #148]\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r6, [%[a], #156]\n\t" - "ldr r5, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #152]\n\t" - "str r6, [%[r], #156]\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r6, [%[a], #164]\n\t" - "ldr r5, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #160]\n\t" - "str r6, [%[r], #164]\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r6, [%[a], #172]\n\t" - "ldr r5, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #168]\n\t" - "str r6, [%[r], #172]\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r6, [%[a], #180]\n\t" - "ldr r5, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #176]\n\t" - "str r6, [%[r], #180]\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r6, [%[a], #188]\n\t" - "ldr r5, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #184]\n\t" - "str r6, [%[r], #188]\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r6, [%[a], #196]\n\t" - "ldr r5, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #192]\n\t" - "str r6, [%[r], #196]\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r6, [%[a], #204]\n\t" - "ldr r5, [%[b], #200]\n\t" - "ldr r7, [%[b], #204]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #200]\n\t" - "str r6, [%[r], #204]\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r6, [%[a], #212]\n\t" - "ldr r5, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #208]\n\t" - "str r6, [%[r], #212]\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r6, [%[a], #220]\n\t" - "ldr r5, [%[b], #216]\n\t" - "ldr r7, [%[b], #220]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #216]\n\t" - "str r6, [%[r], #220]\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r6, [%[a], #228]\n\t" - "ldr r5, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #224]\n\t" - "str r6, [%[r], #228]\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r6, [%[a], #236]\n\t" - "ldr r5, [%[b], #232]\n\t" - "ldr r7, [%[b], #236]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #232]\n\t" - "str r6, [%[r], #236]\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r6, [%[a], #244]\n\t" - "ldr r5, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #240]\n\t" - "str r6, [%[r], #244]\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r6, [%[a], #252]\n\t" - "ldr r5, [%[b], #248]\n\t" - "ldr r7, [%[b], #252]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #248]\n\t" - "str r6, [%[r], #252]\n\t" - "adc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov r8, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -28433,7 +60081,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, const mp_int* qim, const mp_int* mm, byte* out, word32* outLen) { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[128 * 4]; @@ -28467,7 +60115,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -28492,21 +60140,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 128); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[64 * 11]; @@ -28534,8 +60182,14 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, DYNAMIC_TYPE_RSA); @@ -28583,12 +60237,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 64 * 11); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -28719,782 +60373,785 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 -static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +static void sp_4096_lshift_128(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register byte n asm ("r2") = (byte)n_p; + __asm__ __volatile__ ( - "mov r6, #31\n\t" - "sub r6, r6, %[n]\n\t" - "ldr r3, [%[a], #508]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #504]\n\t" - "str r4, [%[r], #512]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #500]\n\t" - "str r3, [%[r], #508]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #508]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #504]\n\t" + "str r6, [%[r], #512]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #496]\n\t" - "str r2, [%[r], #504]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #492]\n\t" - "str r4, [%[r], #500]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #488]\n\t" - "str r3, [%[r], #496]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #500]\n\t" + "str r5, [%[r], #508]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #496]\n\t" + "str r4, [%[r], #504]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #492]\n\t" + "str r6, [%[r], #500]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #484]\n\t" - "str r2, [%[r], #492]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #480]\n\t" - "str r4, [%[r], #488]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #476]\n\t" - "str r3, [%[r], #484]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #488]\n\t" + "str r5, [%[r], #496]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #484]\n\t" + "str r4, [%[r], #492]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #480]\n\t" + "str r6, [%[r], #488]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #472]\n\t" - "str r2, [%[r], #480]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #468]\n\t" - "str r4, [%[r], #476]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #464]\n\t" - "str r3, [%[r], #472]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #476]\n\t" + "str r5, [%[r], #484]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #472]\n\t" + "str r4, [%[r], #480]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #468]\n\t" + "str r6, [%[r], #476]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #460]\n\t" - "str r2, [%[r], #468]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #456]\n\t" - "str r4, [%[r], #464]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #452]\n\t" - "str r3, [%[r], #460]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #464]\n\t" + "str r5, [%[r], #472]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #460]\n\t" + "str r4, [%[r], #468]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #456]\n\t" + "str r6, [%[r], #464]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #448]\n\t" - "str r2, [%[r], #456]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #444]\n\t" - "str r4, [%[r], #452]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #440]\n\t" - "str r3, [%[r], #448]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #452]\n\t" + "str r5, [%[r], #460]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #448]\n\t" + "str r4, [%[r], #456]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #444]\n\t" + "str r6, [%[r], #452]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #436]\n\t" - "str r2, [%[r], #444]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #432]\n\t" - "str r4, [%[r], #440]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #428]\n\t" - "str r3, [%[r], #436]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #440]\n\t" + "str r5, [%[r], #448]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #436]\n\t" + "str r4, [%[r], #444]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #432]\n\t" + "str r6, [%[r], #440]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #424]\n\t" - "str r2, [%[r], #432]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #420]\n\t" - "str r4, [%[r], #428]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #416]\n\t" - "str r3, [%[r], #424]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #428]\n\t" + "str r5, [%[r], #436]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #424]\n\t" + "str r4, [%[r], #432]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #420]\n\t" + "str r6, [%[r], #428]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #412]\n\t" - "str r2, [%[r], #420]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #408]\n\t" - "str r4, [%[r], #416]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #404]\n\t" - "str r3, [%[r], #412]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #416]\n\t" + "str r5, [%[r], #424]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #412]\n\t" + "str r4, [%[r], #420]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #408]\n\t" + "str r6, [%[r], #416]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #400]\n\t" - "str r2, [%[r], #408]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #396]\n\t" - "str r4, [%[r], #404]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #392]\n\t" - "str r3, [%[r], #400]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #404]\n\t" + "str r5, [%[r], #412]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #400]\n\t" + "str r4, [%[r], #408]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #396]\n\t" + "str r6, [%[r], #404]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #388]\n\t" - "str r2, [%[r], #396]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #384]\n\t" - "str r4, [%[r], #392]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #380]\n\t" - "str r3, [%[r], #388]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #392]\n\t" + "str r5, [%[r], #400]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #388]\n\t" + "str r4, [%[r], #396]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #384]\n\t" + "str r6, [%[r], #392]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #376]\n\t" - "str r2, [%[r], #384]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #372]\n\t" - "str r4, [%[r], #380]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #368]\n\t" - "str r3, [%[r], #376]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #380]\n\t" + "str r5, [%[r], #388]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #376]\n\t" + "str r4, [%[r], #384]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #372]\n\t" + "str r6, [%[r], #380]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #364]\n\t" - "str r2, [%[r], #372]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #360]\n\t" - "str r4, [%[r], #368]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #356]\n\t" - "str r3, [%[r], #364]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #368]\n\t" + "str r5, [%[r], #376]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #364]\n\t" + "str r4, [%[r], #372]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #360]\n\t" + "str r6, [%[r], #368]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #352]\n\t" - "str r2, [%[r], #360]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #348]\n\t" - "str r4, [%[r], #356]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #344]\n\t" - "str r3, [%[r], #352]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #356]\n\t" + "str r5, [%[r], #364]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #352]\n\t" + "str r4, [%[r], #360]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #348]\n\t" + "str r6, [%[r], #356]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #340]\n\t" - "str r2, [%[r], #348]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #336]\n\t" - "str r4, [%[r], #344]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #332]\n\t" - "str r3, [%[r], #340]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #344]\n\t" + "str r5, [%[r], #352]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #340]\n\t" + "str r4, [%[r], #348]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #336]\n\t" + "str r6, [%[r], #344]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #328]\n\t" - "str r2, [%[r], #336]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #324]\n\t" - "str r4, [%[r], #332]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #320]\n\t" - "str r3, [%[r], #328]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #332]\n\t" + "str r5, [%[r], #340]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #328]\n\t" + "str r4, [%[r], #336]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #324]\n\t" + "str r6, [%[r], #332]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #316]\n\t" - "str r2, [%[r], #324]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #312]\n\t" - "str r4, [%[r], #320]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #308]\n\t" - "str r3, [%[r], #316]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #320]\n\t" + "str r5, [%[r], #328]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #316]\n\t" + "str r4, [%[r], #324]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #312]\n\t" + "str r6, [%[r], #320]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #304]\n\t" - "str r2, [%[r], #312]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #300]\n\t" - "str r4, [%[r], #308]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #296]\n\t" - "str r3, [%[r], #304]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #308]\n\t" + "str r5, [%[r], #316]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #304]\n\t" + "str r4, [%[r], #312]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #300]\n\t" + "str r6, [%[r], #308]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #292]\n\t" - "str r2, [%[r], #300]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #288]\n\t" - "str r4, [%[r], #296]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #284]\n\t" - "str r3, [%[r], #292]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #296]\n\t" + "str r5, [%[r], #304]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #292]\n\t" + "str r4, [%[r], #300]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #288]\n\t" + "str r6, [%[r], #296]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #280]\n\t" - "str r2, [%[r], #288]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #276]\n\t" - "str r4, [%[r], #284]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #272]\n\t" - "str r3, [%[r], #280]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #284]\n\t" + "str r5, [%[r], #292]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #280]\n\t" + "str r4, [%[r], #288]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #276]\n\t" + "str r6, [%[r], #284]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #268]\n\t" - "str r2, [%[r], #276]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #264]\n\t" - "str r4, [%[r], #272]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #260]\n\t" - "str r3, [%[r], #268]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #272]\n\t" + "str r5, [%[r], #280]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #268]\n\t" + "str r4, [%[r], #276]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #264]\n\t" + "str r6, [%[r], #272]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #256]\n\t" - "str r2, [%[r], #264]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #252]\n\t" - "str r4, [%[r], #260]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #248]\n\t" - "str r3, [%[r], #256]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #260]\n\t" + "str r5, [%[r], #268]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #256]\n\t" + "str r4, [%[r], #264]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #252]\n\t" + "str r6, [%[r], #260]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #244]\n\t" - "str r2, [%[r], #252]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #240]\n\t" - "str r4, [%[r], #248]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #236]\n\t" - "str r3, [%[r], #244]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #248]\n\t" + "str r5, [%[r], #256]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #244]\n\t" + "str r4, [%[r], #252]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #240]\n\t" + "str r6, [%[r], #248]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #232]\n\t" - "str r2, [%[r], #240]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #228]\n\t" - "str r4, [%[r], #236]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #224]\n\t" - "str r3, [%[r], #232]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #236]\n\t" + "str r5, [%[r], #244]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #232]\n\t" + "str r4, [%[r], #240]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #228]\n\t" + "str r6, [%[r], #236]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #220]\n\t" - "str r2, [%[r], #228]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #216]\n\t" - "str r4, [%[r], #224]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #212]\n\t" - "str r3, [%[r], #220]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #224]\n\t" + "str r5, [%[r], #232]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #220]\n\t" + "str r4, [%[r], #228]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #216]\n\t" + "str r6, [%[r], #224]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #208]\n\t" - "str r2, [%[r], #216]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #204]\n\t" - "str r4, [%[r], #212]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #200]\n\t" - "str r3, [%[r], #208]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #212]\n\t" + "str r5, [%[r], #220]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #208]\n\t" + "str r4, [%[r], #216]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #204]\n\t" + "str r6, [%[r], #212]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #196]\n\t" - "str r2, [%[r], #204]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #192]\n\t" - "str r4, [%[r], #200]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #188]\n\t" - "str r3, [%[r], #196]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #200]\n\t" + "str r5, [%[r], #208]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #196]\n\t" + "str r4, [%[r], #204]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #192]\n\t" + "str r6, [%[r], #200]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #184]\n\t" - "str r2, [%[r], #192]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #180]\n\t" - "str r4, [%[r], #188]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #176]\n\t" - "str r3, [%[r], #184]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #188]\n\t" + "str r5, [%[r], #196]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #184]\n\t" + "str r4, [%[r], #192]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #180]\n\t" + "str r6, [%[r], #188]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #172]\n\t" - "str r2, [%[r], #180]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #168]\n\t" - "str r4, [%[r], #176]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #164]\n\t" - "str r3, [%[r], #172]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #176]\n\t" + "str r5, [%[r], #184]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #172]\n\t" + "str r4, [%[r], #180]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #168]\n\t" + "str r6, [%[r], #176]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #160]\n\t" - "str r2, [%[r], #168]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #156]\n\t" - "str r4, [%[r], #164]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #152]\n\t" - "str r3, [%[r], #160]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #164]\n\t" + "str r5, [%[r], #172]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #160]\n\t" + "str r4, [%[r], #168]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #156]\n\t" + "str r6, [%[r], #164]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #148]\n\t" - "str r2, [%[r], #156]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #144]\n\t" - "str r4, [%[r], #152]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #140]\n\t" - "str r3, [%[r], #148]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #152]\n\t" + "str r5, [%[r], #160]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #148]\n\t" + "str r4, [%[r], #156]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #144]\n\t" + "str r6, [%[r], #152]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #136]\n\t" - "str r2, [%[r], #144]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #132]\n\t" - "str r4, [%[r], #140]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #128]\n\t" - "str r3, [%[r], #136]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #140]\n\t" + "str r5, [%[r], #148]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #136]\n\t" + "str r4, [%[r], #144]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #132]\n\t" + "str r6, [%[r], #140]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #124]\n\t" - "str r2, [%[r], #132]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #120]\n\t" - "str r4, [%[r], #128]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #116]\n\t" - "str r3, [%[r], #124]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #128]\n\t" + "str r5, [%[r], #136]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #124]\n\t" + "str r4, [%[r], #132]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #120]\n\t" + "str r6, [%[r], #128]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #112]\n\t" - "str r2, [%[r], #120]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #108]\n\t" - "str r4, [%[r], #116]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #104]\n\t" - "str r3, [%[r], #112]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #116]\n\t" + "str r5, [%[r], #124]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #112]\n\t" + "str r4, [%[r], #120]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #108]\n\t" + "str r6, [%[r], #116]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #100]\n\t" - "str r2, [%[r], #108]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #96]\n\t" - "str r4, [%[r], #104]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #92]\n\t" - "str r3, [%[r], #100]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #104]\n\t" + "str r5, [%[r], #112]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #100]\n\t" + "str r4, [%[r], #108]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #96]\n\t" + "str r6, [%[r], #104]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #88]\n\t" - "str r2, [%[r], #96]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #84]\n\t" - "str r4, [%[r], #92]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #80]\n\t" - "str r3, [%[r], #88]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #92]\n\t" + "str r5, [%[r], #100]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #88]\n\t" + "str r4, [%[r], #96]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #84]\n\t" + "str r6, [%[r], #92]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #76]\n\t" - "str r2, [%[r], #84]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #72]\n\t" - "str r4, [%[r], #80]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #68]\n\t" - "str r3, [%[r], #76]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #80]\n\t" + "str r5, [%[r], #88]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #76]\n\t" + "str r4, [%[r], #84]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #72]\n\t" + "str r6, [%[r], #80]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #64]\n\t" - "str r2, [%[r], #72]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #68]\n\t" + "str r5, [%[r], #76]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #64]\n\t" + "str r4, [%[r], #72]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r6, [%[r], #68]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #56]\n\t" + "str r5, [%[r], #64]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r6, [%[r], #56]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #44]\n\t" + "str r5, [%[r], #52]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r6, [%[r], #44]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #32]\n\t" + "str r5, [%[r], #40]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r6, [%[r], #32]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #20]\n\t" + "str r5, [%[r], #28]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r6, [%[r], #20]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" - "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "str r2, [%[r]]\n\t" - "str r3, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #8]\n\t" + "str r5, [%[r], #16]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a]]\n\t" + "str r6, [%[r], #8]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12", "cc" ); } @@ -29504,12 +61161,14 @@ static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[385]; @@ -29525,11 +61184,17 @@ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -29558,6 +61223,10 @@ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 32 - c; @@ -29604,7 +61273,7 @@ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, sp_4096_cond_sub_128(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -29775,57 +61444,2545 @@ static const sp_digit p256_b[8] = { * a A single precision integer. * b A single precision integer. */ -static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x40\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_256_mul_8_outer_%=: \n\t" "subs r3, r5, #28\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_256_mul_8_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #32\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_256_mul_8_inner_done_%=\n\t" + "blt L_sp_256_mul_8_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_256_mul_8_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #56\n\t" - "ble 1b\n\t" + "cmp r5, #52\n\t" + "ble L_sp_256_mul_8_outer_%=\n\t" + "ldr lr, [%[a], #28]\n\t" + "ldr r11, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_256_mul_8_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_256_mul_8_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + ); +} + +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + /* A[0] * B[1] */ + "ldr r9, [%[b], #4]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[0] */ + "ldr r8, [%[a], #4]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [sp, #4]\n\t" + /* A[2] * B[0] */ + "ldr r8, [%[a], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[1] */ + "ldr r11, [%[a], #4]\n\t" + "ldr r12, [%[b], #4]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #8]\n\t" + /* A[0] * B[3] */ + "ldr r9, [%[b], #12]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[2] */ + "ldr r9, [%[b], #8]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[1] */ + "ldr r8, [%[a], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[0] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b]]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [sp, #12]\n\t" + /* A[4] * B[0] */ + "ldr r8, [%[a], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[1] */ + "ldr r8, [%[a], #12]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[2] */ + "ldr r11, [%[a], #8]\n\t" + "ldr r12, [%[b], #8]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[3] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [sp, #16]\n\t" + /* A[0] * B[5] */ + "ldr r9, [%[b], #20]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[4] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[3] */ + "ldr r9, [%[b], #12]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[2] */ + "ldr r8, [%[a], #12]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[1] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[0] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b]]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [sp, #20]\n\t" + /* A[6] * B[0] */ + "ldr r8, [%[a], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[1] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[2] */ + "ldr r8, [%[a], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[3] */ + "ldr r11, [%[a], #12]\n\t" + "ldr r12, [%[b], #12]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[2] * B[4] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[1] * B[5] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [sp, #24]\n\t" + /* A[0] * B[7] */ + "ldr r9, [%[b], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * B[6] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[2] * B[5] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[4] */ + "ldr r9, [%[b], #16]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[3] */ + "ldr r8, [%[a], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[2] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[1] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[0] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b]]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [sp, #28]\n\t" + /* A[7] * B[1] */ + "ldr r9, [%[b], #4]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[2] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[3] */ + "ldr r8, [%[a], #20]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[4] * B[4] */ + "ldr r11, [%[a], #16]\n\t" + "ldr r12, [%[b], #16]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[3] * B[5] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[2] * B[6] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * B[7] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #32]\n\t" + /* A[2] * B[7] */ + "ldr r8, [%[a], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[3] * B[6] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[4] * B[5] */ + "ldr r9, [%[b], #20]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[4] */ + "ldr r8, [%[a], #20]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[3] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[7] * B[2] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #36]\n\t" + /* A[7] * B[3] */ + "ldr r9, [%[b], #12]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[6] * B[4] */ + "ldr r8, [%[a], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * B[5] */ + "ldr r11, [%[a], #20]\n\t" + "ldr r12, [%[b], #20]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[4] * B[6] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[3] * B[7] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #40]\n\t" + /* A[4] * B[7] */ + "ldr r8, [%[a], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[5] * B[6] */ + "ldr r9, [%[b], #24]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * B[5] */ + "ldr r8, [%[a], #24]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + /* A[7] * B[4] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" + /* A[7] * B[5] */ + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[6] * B[6] */ + "ldr r11, [%[a], #24]\n\t" + "ldr r12, [%[b], #24]\n\t" + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + /* A[5] * B[7] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" + /* A[6] * B[7] */ + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + /* A[7] * B[6] */ + "ldr r8, [%[a], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" + /* A[7] * B[7] */ + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + "ldm sp!, {r3, r4, r5, r6}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm sp!, {r3, r4, r5, r6}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + ); +} + +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #36\n\t" + "str %[r], [sp, #32]\n\t" + "mov %[r], #0\n\t" + "ldr r12, [%[a]]\n\t" + /* A[0] * B[0] */ + "ldr lr, [%[b]]\n\t" + "umull r3, r4, r12, lr\n\t" + /* A[0] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "umull r5, r6, r12, lr\n\t" + /* A[0] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "umull r7, r8, r12, lr\n\t" + /* A[0] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "umull r9, r10, r12, lr\n\t" + "str r3, [sp]\n\t" + /* A[0] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "mov r11, %[r]\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[0] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adcs r6, r6, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[0] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adcs r8, r8, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[0] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adcs r10, r10, #0\n\t" + "adc r3, %[r], #0\n\t" + "umlal r10, r3, r12, lr\n\t" + /* A[1] * B[0] */ + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "str r4, [sp, #4]\n\t" + "adds r5, r5, r11\n\t" + /* A[1] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[1] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[1] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[1] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[1] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[1] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[1] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r4, %[r], #0\n\t" + "umlal r3, r4, r12, lr\n\t" + /* A[2] * B[0] */ + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "str r5, [sp, #8]\n\t" + "adds r6, r6, r11\n\t" + /* A[2] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[2] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[2] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[2] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[2] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[2] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[2] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r5, %[r], #0\n\t" + "umlal r4, r5, r12, lr\n\t" + /* A[3] * B[0] */ + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "str r6, [sp, #12]\n\t" + "adds r7, r7, r11\n\t" + /* A[3] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[3] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[3] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[3] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[3] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[3] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[3] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r6, %[r], #0\n\t" + "umlal r5, r6, r12, lr\n\t" + /* A[4] * B[0] */ + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "str r7, [sp, #16]\n\t" + "adds r8, r8, r11\n\t" + /* A[4] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[4] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[4] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[4] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[4] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[4] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[4] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r7, %[r], #0\n\t" + "umlal r6, r7, r12, lr\n\t" + /* A[5] * B[0] */ + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "str r8, [sp, #20]\n\t" + "adds r9, r9, r11\n\t" + /* A[5] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[5] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[5] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[5] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[5] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[5] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[5] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r8, %[r], #0\n\t" + "umlal r7, r8, r12, lr\n\t" + /* A[6] * B[0] */ + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "str r9, [sp, #24]\n\t" + "adds r10, r10, r11\n\t" + /* A[6] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[6] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[6] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[6] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[6] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[6] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[6] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r9, %[r], #0\n\t" + "umlal r8, r9, r12, lr\n\t" + /* A[7] * B[0] */ + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "str r10, [sp, #28]\n\t" + "adds r3, r3, r11\n\t" + /* A[7] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[7] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[7] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[7] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[7] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[7] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[7] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r10, %[r], #0\n\t" + "umlal r9, r10, r12, lr\n\t" + "ldr %[r], [sp, #32]\n\t" + "add %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "sub %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add sp, sp, #36\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" ); } @@ -29836,450 +63993,131 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #32\n\t" - "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" - "umull r3, r4, r11, r12\n\t" + "sub sp, sp, #44\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str %[r], [sp, #36]\n\t" + "str %[a], [sp, #40]\n\t" +#else + "strd %[r], %[a], [sp, #36]\n\t" +#endif + "mov lr, %[b]\n\t" + "ldm %[a], {%[r], %[a], %[b], r3}\n\t" + "ldm lr!, {r4, r5, r6}\n\t" + "umull r10, r11, %[r], r4\n\t" + "umull r12, r7, %[a], r4\n\t" + "umaal r11, r12, %[r], r5\n\t" + "umull r8, r9, %[b], r4\n\t" + "umaal r12, r8, %[a], r5\n\t" + "umaal r12, r7, %[r], r6\n\t" + "umaal r8, r9, r3, r4\n\t" + "stm sp, {r10, r11, r12}\n\t" + "umaal r7, r8, %[b], r5\n\t" + "ldm lr!, {r4}\n\t" + "umull r10, r11, %[a], r6\n\t" + "umaal r8, r9, %[b], r6\n\t" + "umaal r7, r10, %[r], r4\n\t" + "umaal r8, r11, r3, r5\n\t" + "str r7, [sp, #12]\n\t" + "umaal r8, r10, %[a], r4\n\t" + "umaal r9, r11, r3, r6\n\t" + "umaal r9, r10, %[b], r4\n\t" + "umaal r10, r11, r3, r4\n\t" + "ldm lr, {r4, r5, r6, r7}\n\t" + "mov r12, #0\n\t" + "umlal r8, r12, %[r], r4\n\t" + "umaal r9, r12, %[a], r4\n\t" + "umaal r10, r12, %[b], r4\n\t" + "umaal r11, r12, r3, r4\n\t" + "mov r4, #0\n\t" + "umlal r9, r4, %[r], r5\n\t" + "umaal r10, r4, %[a], r5\n\t" + "umaal r11, r4, %[b], r5\n\t" + "umaal r12, r4, r3, r5\n\t" "mov r5, #0\n\t" - "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" - "ldr r11, [%[a], #4]\n\t" - "ldr r12, [%[b], #4]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" - "ldr r11, [%[a], #8]\n\t" - "ldr r12, [%[b], #8]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" - "ldr r11, [%[a], #12]\n\t" - "ldr r12, [%[b], #12]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #28]\n\t" - "# A[7] * B[1]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[6] * B[2]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" - "ldr r11, [%[a], #16]\n\t" - "ldr r12, [%[b], #16]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[2] * B[7]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[3] * B[6]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[7] * B[3]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[6] * B[4]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" - "ldr r11, [%[a], #20]\n\t" - "ldr r12, [%[b], #20]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[4] * B[7]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[5] * B[6]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[7] * B[5]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[6] * B[6]\n\t" - "ldr r11, [%[a], #24]\n\t" - "ldr r12, [%[b], #24]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[6] * B[7]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[7] * B[6]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[7] * B[7]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" - "str r5, [%[r], #56]\n\t" - "str r3, [%[r], #60]\n\t" - "ldm sp!, {r3, r4, r5, r6}\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm sp!, {r3, r4, r5, r6}\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #32\n\t" + "umlal r10, r5, %[r], r6\n\t" + "umaal r11, r5, %[a], r6\n\t" + "umaal r12, r5, %[b], r6\n\t" + "umaal r4, r5, r3, r6\n\t" + "mov r6, #0\n\t" + "umlal r11, r6, %[r], r7\n\t" + "ldr %[r], [sp, #40]\n\t" + "umaal r12, r6, %[a], r7\n\t" + "add %[r], %[r], #16\n\t" + "umaal r4, r6, %[b], r7\n\t" + "sub lr, lr, #16\n\t" + "umaal r5, r6, r3, r7\n\t" + "ldm %[r], {%[r], %[a], %[b], r3}\n\t" + "str r6, [sp, #32]\n\t" + "ldm lr!, {r6}\n\t" + "mov r7, #0\n\t" + "umlal r8, r7, %[r], r6\n\t" + "umaal r9, r7, %[a], r6\n\t" + "str r8, [sp, #16]\n\t" + "umaal r10, r7, %[b], r6\n\t" + "umaal r11, r7, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r8, #0\n\t" + "umlal r9, r8, %[r], r6\n\t" + "umaal r10, r8, %[a], r6\n\t" + "str r9, [sp, #20]\n\t" + "umaal r11, r8, %[b], r6\n\t" + "umaal r12, r8, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r9, #0\n\t" + "umlal r10, r9, %[r], r6\n\t" + "umaal r11, r9, %[a], r6\n\t" + "str r10, [sp, #24]\n\t" + "umaal r12, r9, %[b], r6\n\t" + "umaal r4, r9, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r10, #0\n\t" + "umlal r11, r10, %[r], r6\n\t" + "umaal r12, r10, %[a], r6\n\t" + "str r11, [sp, #28]\n\t" + "umaal r4, r10, %[b], r6\n\t" + "umaal r5, r10, r3, r6\n\t" + "ldm lr!, {r11}\n\t" + "umaal r12, r7, %[r], r11\n\t" + "umaal r4, r7, %[a], r11\n\t" + "ldr r6, [sp, #32]\n\t" + "umaal r5, r7, %[b], r11\n\t" + "umaal r6, r7, r3, r11\n\t" + "ldm lr!, {r11}\n\t" + "umaal r4, r8, %[r], r11\n\t" + "umaal r5, r8, %[a], r11\n\t" + "umaal r6, r8, %[b], r11\n\t" + "umaal r7, r8, r3, r11\n\t" + "ldm lr, {r11, lr}\n\t" + "umaal r5, r9, %[r], r11\n\t" + "umaal r6, r10, %[r], lr\n\t" + "umaal r6, r9, %[a], r11\n\t" + "umaal r7, r10, %[a], lr\n\t" + "umaal r7, r9, %[b], r11\n\t" + "umaal r8, r10, %[b], lr\n\t" + "umaal r8, r9, r3, r11\n\t" + "umaal r9, r10, r3, lr\n\t" + "mov r3, r12\n\t" + "ldr lr, [sp, #36]\n\t" + "add lr, lr, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "sub lr, lr, #32\n\t" + "ldm sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add sp, sp, #44\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr", "cc" ); } +#endif #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Square a and put result in r. (r = a * a) @@ -30287,188 +64125,576 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x40\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_256_sqr_8_outer_%=: \n\t" "subs r3, r5, #28\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_256_sqr_8_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #32\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_256_sqr_8_inner_done_%=\n\t" + "blt L_sp_256_sqr_8_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_256_sqr_8_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #56\n\t" - "ble 1b\n\t" + "cmp r5, #52\n\t" + "ble L_sp_256_sqr_8_outer_%=\n\t" + "ldr lr, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_256_sqr_8_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_256_sqr_8_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } #else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ -static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" - "umull r8, r3, r10, r10\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -30476,66 +64702,241 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -30543,59 +64944,210 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[6]\n\t" + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #32]\n\t" - "# A[2] * A[7]\n\t" + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[6]\n\t" + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -30603,100 +65155,699 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #36]\n\t" - "# A[3] * A[7]\n\t" + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" "str r3, [%[r], #40]\n\t" - "# A[4] * A[7]\n\t" + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[r], #44]\n\t" - "# A[5] * A[7]\n\t" + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" "str r2, [%[r], #48]\n\t" - "# A[6] * A[7]\n\t" + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" "str r3, [%[r], #52]\n\t" - "# A[7] * A[7]\n\t" + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "umull r8, r9, r10, r10\n\t" + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" "adds r4, r4, r8\n\t" "adc r2, r2, r9\n\t" "str r4, [%[r], #56]\n\t" "str r2, [%[r], #60]\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #32\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" ); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "str %[r], [sp, #64]\n\t" + "mov %[r], #0\n\t" + "ldr r12, [%[a]]\n\t" + /* A[0] * A[1] */ + "ldr lr, [%[a], #4]\n\t" + "umull r4, r5, r12, lr\n\t" + /* A[0] * A[3] */ + "ldr lr, [%[a], #12]\n\t" + "umull r6, r7, r12, lr\n\t" + /* A[0] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "umull r8, r9, r12, lr\n\t" + /* A[0] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "umull r10, r3, r12, lr\n\t" + /* A[0] * A[2] */ + "ldr lr, [%[a], #8]\n\t" + "mov r11, #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[0] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[0] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adcs r9, r9, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + "adcs r3, r3, #0\n\t" + "str r4, [sp, #4]\n\t" + "str r5, [sp, #8]\n\t" + /* A[1] * A[2] */ + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "str r6, [sp, #12]\n\t" + "adds r7, r7, r11\n\t" + /* A[1] * A[3] */ + "ldr lr, [%[a], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "str r7, [sp, #16]\n\t" + "adds r8, r8, r11\n\t" + /* A[1] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[1] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[1] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[1] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r4, %[r], #0\n\t" + "umlal r3, r4, r12, lr\n\t" + /* A[2] * A[3] */ + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" + "mov r11, #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "str r8, [sp, #20]\n\t" + "adds r9, r9, r11\n\t" + /* A[2] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "str r9, [sp, #24]\n\t" + "adds r10, r10, r11\n\t" + /* A[2] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[2] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[2] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r5, %[r], #0\n\t" + "umlal r4, r5, r12, lr\n\t" + /* A[3] * A[4] */ + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[a], #16]\n\t" + "mov r11, #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "str r10, [sp, #28]\n\t" + "adds r3, r3, r11\n\t" + /* A[3] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[3] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[3] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r6, %[r], #0\n\t" + "umlal r5, r6, r12, lr\n\t" + /* A[4] * A[5] */ + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[a], #20]\n\t" + "mov r11, #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[4] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[4] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r7, %[r], #0\n\t" + "umlal r6, r7, r12, lr\n\t" + /* A[5] * A[6] */ + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[a], #24]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[5] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r8, %[r], #0\n\t" + "umlal r7, r8, r12, lr\n\t" + /* A[6] * A[7] */ + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[a], #28]\n\t" + "mov r9, #0\n\t" + "umlal r8, r9, r12, lr\n\t" + "add lr, sp, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "add lr, sp, #4\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "stm lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "adcs r3, r3, r3\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, %[r], #0\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add lr, sp, #4\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "mov lr, sp\n\t" + /* A[0] * A[0] */ + "ldr r12, [%[a]]\n\t" + "umull r3, r11, r12, r12\n\t" + "adds r4, r4, r11\n\t" + /* A[1] * A[1] */ + "ldr r12, [%[a], #4]\n\t" + "adcs r5, r5, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, r12\n\t" + "adds r6, r6, r11\n\t" + /* A[2] * A[2] */ + "ldr r12, [%[a], #8]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, r12\n\t" + "adds r8, r8, r11\n\t" + /* A[3] * A[3] */ + "ldr r12, [%[a], #12]\n\t" + "adcs r9, r9, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, r12\n\t" + "adds r10, r10, r11\n\t" + "stm lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* A[4] * A[4] */ + "ldr r12, [%[a], #16]\n\t" + "adcs r3, r3, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, r12\n\t" + "adds r4, r4, r11\n\t" + /* A[5] * A[5] */ + "ldr r12, [%[a], #20]\n\t" + "adcs r5, r5, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, r12\n\t" + "adds r6, r6, r11\n\t" + /* A[6] * A[6] */ + "ldr r12, [%[a], #24]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, r12\n\t" + "adds r8, r8, r11\n\t" + /* A[7] * A[7] */ + "ldr r12, [%[a], #28]\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r10, #0\n\t" + "umlal r9, r10, r12, r12\n\t" + "ldr %[r], [sp, #64]\n\t" + "add %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "sub %[r], %[r], #32\n\t" + "stm %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #32\n\t" + "str %[r], [sp, #28]\n\t" + "ldm %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" + "umull r9, r10, %[r], %[r]\n\t" + "umull r11, r12, %[r], %[a]\n\t" + "adds r11, r11, r11\n\t" + "mov lr, #0\n\t" + "umaal r10, r11, lr, lr\n\t" + "stm sp, {r9, r10}\n\t" + "mov r8, lr\n\t" + "umaal r8, r12, %[r], r2\n\t" + "adcs r8, r8, r8\n\t" + "umaal r8, r11, %[a], %[a]\n\t" + "umull r9, r10, %[r], r3\n\t" + "umaal r9, r12, %[a], r2\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, lr, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #8]\n\t" + "str r9, [sp, #12]\n\t" +#else + "strd r8, r9, [sp, #8]\n\t" +#endif + "mov r9, lr\n\t" + "umaal r9, r10, %[r], r4\n\t" + "umaal r9, r12, %[a], r3\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, r2, r2\n\t" + "str r9, [sp, #16]\n\t" + "umull r9, r8, %[r], r5\n\t" + "umaal r9, r12, %[a], r4\n\t" + "umaal r9, r10, r2, r3\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, lr, lr\n\t" + "str r9, [sp, #20]\n\t" + "mov r9, lr\n\t" + "umaal r9, r8, %[r], r6\n\t" + "umaal r9, r12, %[a], r5\n\t" + "umaal r9, r10, r2, r4\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, r3, r3\n\t" + "str r9, [sp, #24]\n\t" + "umull %[r], r9, %[r], r7\n\t" + "umaal %[r], r8, %[a], r6\n\t" + "umaal %[r], r12, r2, r5\n\t" + "umaal %[r], r10, r3, r4\n\t" + "adcs %[r], %[r], %[r]\n\t" + "umaal %[r], r11, lr, lr\n\t" + /* R[7] = r0 */ + "umaal r9, r8, %[a], r7\n\t" + "umaal r9, r10, r2, r6\n\t" + "umaal r12, r9, r3, r5\n\t" + "adcs r12, r12, r12\n\t" + "umaal r12, r11, r4, r4\n\t" + /* R[8] = r12 */ + "umaal r9, r8, r2, r7\n\t" + "umaal r10, r9, r3, r6\n\t" + "mov r2, lr\n\t" + "umaal r10, r2, r4, r5\n\t" + "adcs r10, r10, r10\n\t" + "umaal r11, r10, lr, lr\n\t" + /* R[9] = r11 */ + "umaal r2, r8, r3, r7\n\t" + "umaal r2, r9, r4, r6\n\t" + "adcs r3, r2, r2\n\t" + "umaal r10, r3, r5, r5\n\t" + /* R[10] = r10 */ + "mov %[a], lr\n\t" + "umaal %[a], r8, r4, r7\n\t" + "umaal %[a], r9, r5, r6\n\t" + "adcs r4, %[a], %[a]\n\t" + "umaal r3, r4, lr, lr\n\t" + /* R[11] = r3 */ + "umaal r8, r9, r5, r7\n\t" + "adcs r8, r8, r8\n\t" + "umaal r4, r8, r6, r6\n\t" + /* R[12] = r4 */ + "mov r5, lr\n\t" + "umaal r5, r9, r6, r7\n\t" + "adcs r5, r5, r5\n\t" + "umaal r8, r5, lr, lr\n\t" + /* R[13] = r8 */ + "adcs r9, r9, r9\n\t" + "umaal r9, r5, r7, r7\n\t" + "adcs r7, r5, lr\n\t" + /* R[14] = r9 */ + /* R[15] = r7 */ + "ldr lr, [sp, #28]\n\t" + "add lr, lr, #28\n\t" + "stm lr!, {%[r], r12}\n\t" + "stm lr!, {r11}\n\t" + "stm lr!, {r10}\n\t" + "stm lr!, {r3, r4, r8, r9}\n\t" + "stm lr!, {r7}\n\t" + "sub lr, lr, #0x40\n\t" + "ldm sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "stm lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "add sp, sp, #32\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); +} + +#endif #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) @@ -30705,41 +65856,35 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( + "mov r3, #0\n\t" "add r12, %[a], #32\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "\n" + "L_sp_256_add_8_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_256_add_8_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -30749,150 +65894,34 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" - ); - - return c; -} - -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add r12, %[a], #32\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "ldr r8, [%[b], #4]\n\t" - "ldr r9, [%[b], #8]\n\t" - "ldr r10, [%[b], #12]\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "ldr r8, [%[b], #20]\n\t" - "ldr r9, [%[b], #24]\n\t" - "ldr r10, [%[b], #28]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -30902,233 +65931,234 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, * a The number to convert. * m The modulus (prime). */ -static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; __asm__ __volatile__ ( "sub sp, sp, #24\n\t" - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #20]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[a], #28]\n\t" - "# Clear overflow and underflow\n\t" - "mov r14, #0\n\t" - "mov r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + /* Clear overflow and underflow */ + "mov lr, #0\n\t" + "mov r10, #0\n\t" "# t[0] = 1 1 0 -1 -1 -1 -1 0\n\t" - "adds r10, r2, r3\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r5\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r6\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r7\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r8\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[0]\n\t" - "str r10, [sp, #0]\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" + "adds r12, r2, r3\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r5\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r6\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r7\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r8\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[0] */ + "str r12, [sp]\n\t" + "neg r10, r10\n\t" + "mov r12, #0\n\t" "# t[1] = 0 1 1 0 -1 -1 -1 -1\n\t" - "adds r14, r14, r3\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r4\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r6\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r7\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r8\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r9\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[1]\n\t" - "str r14, [sp, #4]\n\t" - "neg r12, r12\n\t" - "mov r14, #0\n\t" + "adds lr, lr, r3\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r4\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r6\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r7\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r8\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r9\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[1] */ + "str lr, [sp, #4]\n\t" + "neg r10, r10\n\t" + "mov lr, #0\n\t" "# t[2] = 0 0 1 1 0 -1 -1 -1\n\t" - "adds r10, r10, r4\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r5\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r12\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r5\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs r12, r12, r7\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r8\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r9\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[2] */ + "str r12, [sp, #8]\n\t" + "neg r10, r10\n\t" "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r7\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r8\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r9\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[2]\n\t" - "str r10, [sp, #8]\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" "# t[3] = -1 -1 0 2 2 1 0 -1\n\t" - "adds r14, r14, r5\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r5\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r6\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r6\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r7\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r2\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r3\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r9\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[3]\n\t" - "str r14, [sp, #12]\n\t" - "neg r12, r12\n\t" - "mov r14, #0\n\t" + "adds lr, lr, r5\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r5\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r6\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r6\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r7\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r2\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r3\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r9\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[3] */ + "str lr, [sp, #12]\n\t" + "neg r10, r10\n\t" + "mov lr, #0\n\t" "# t[4] = 0 -1 -1 0 2 2 1 0\n\t" - "adds r10, r10, r6\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r6\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r7\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r7\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r12\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs r12, r12, r3\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r4\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[4] */ + "str r12, [sp, #16]\n\t" + "neg r10, r10\n\t" "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r3\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r4\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[4]\n\t" - "str r10, [sp, #16]\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" "# t[5] = 0 0 -1 -1 0 2 2 1\n\t" - "adds r14, r14, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r8\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r8\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r4\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r5\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[5]\n\t" - "str r14, [sp, #20]\n\t" - "neg r12, r12\n\t" - "mov r14, #0\n\t" + "adds lr, lr, r7\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r7\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r8\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r8\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r4\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r5\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[5] */ + "str lr, [sp, #20]\n\t" + "neg r10, r10\n\t" + "mov lr, #0\n\t" "# t[6] = -1 -1 0 0 0 1 3 2\n\t" - "adds r10, r10, r7\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r9\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r9\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r12\n\t" + "adds r12, r12, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r9\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs r12, r12, r2\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r3\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[6] */ + "mov r8, r12\n\t" + "neg r10, r10\n\t" "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r2\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r3\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[6]\n\t" - "mov r8, r10\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" "# t[7] = 1 0 -1 -1 -1 -1 0 3\n\t" - "adds r14, r14, r2\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r4\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r5\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r6\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r7\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[7]\n\t" - "# Load intermediate\n\t" - "ldr r2, [sp, #0]\n\t" - "ldr r3, [sp, #4]\n\t" - "ldr r4, [sp, #8]\n\t" - "ldr r5, [sp, #12]\n\t" - "ldr r6, [sp, #16]\n\t" - "ldr r7, [sp, #20]\n\t" - "neg r12, r12\n\t" - "# Add overflow\n\t" - "# Subtract underflow - add neg underflow\n\t" - "adds r2, r2, r10\n\t" + "adds lr, lr, r2\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r4\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r5\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r6\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r7\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[7] */ + /* Load intermediate */ + "ldm sp, {r2, r3, r4, r5, r6, r7}\n\t" + "neg r10, r10\n\t" + /* Add overflow */ + /* Subtract underflow - add neg underflow */ + "adds r2, r2, r12\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adds r5, r5, r12\n\t" + "adcs r5, r5, r10\n\t" "adcs r6, r6, #0\n\t" "adcs r7, r7, #0\n\t" - "adcs r8, r8, r12\n\t" - "adc r14, r14, r10\n\t" - "# Subtract overflow\n\t" - "# Add underflow - subtract neg underflow\n\t" - "subs r2, r2, r12\n\t" + "adcs r8, r8, r10\n\t" + "adcs lr, lr, r12\n\t" + "mov r9, #0\n\t" + "adc r9, r9, #0\n\t" + /* Subtract overflow */ + /* Add underflow - subtract neg underflow */ + "subs r2, r2, r10\n\t" "sbcs r3, r3, #0\n\t" "sbcs r4, r4, #0\n\t" - "subs r5, r5, r10\n\t" + "sbcs r5, r5, r12\n\t" "sbcs r6, r6, #0\n\t" "sbcs r7, r7, #0\n\t" - "sbcs r8, r8, r10\n\t" - "sbc r14, r14, r12\n\t" - "# Store result\n\t" - "str r2, [%[r], #0]\n\t" - "str r3, [%[r], #4]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" - "str r6, [%[r], #16]\n\t" - "str r7, [%[r], #20]\n\t" - "str r8, [%[r], #24]\n\t" - "str r14, [%[r], #28]\n\t" + "sbcs r8, r8, r12\n\t" + "sbcs lr, lr, r10\n\t" + "mov r10, #0\n\t" + "sbc r10, r10, #0\n\t" + "neg r10, r10\n\t" + /* Add overflow */ + /* Subtract underflow - add neg underflow */ + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, r10\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, r10\n\t" + "adc lr, lr, r9\n\t" + /* Subtract overflow */ + /* Add underflow - subtract neg underflow */ + "subs r2, r2, r10\n\t" + "sbcs r3, r3, #0\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, r9\n\t" + "sbc lr, lr, r10\n\t" + /* Store result */ + "stm %[r], {r2, r3, r4, r5, r6, r7, r8, lr}\n\t" + "mov %[r], #0\n\t" "add sp, sp, #24\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "cc" ); - - return MP_OKAY; + (void)m_p; + return (uint32_t)(size_t)r; } /* Convert an mp_int to an array of sp_digit. @@ -31140,20 +66170,23 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 32 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); } #elif DIGIT_BIT > 32 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffff; s = 32U - s; @@ -31183,12 +66216,12 @@ static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 32) { r[j] &= 0xffffffff; @@ -31323,6 +66356,7 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) return err; } +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -31330,884 +66364,3721 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) { - (void)mp; - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" + "sub sp, sp, #0x44\n\t" "mov r5, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r8, r9, r6, r7\n\t" - "str r8, [sp, #0]\n\t" - "# A[0] * B[1]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * B[0] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r8, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r3, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r9, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "mul r4, r3, r4\n\t" + "add r9, r9, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, r4\n\t" + "str r8, [sp]\n\t" + /* A[0] * B[1] */ "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adc r10, r4, #0\n\t" - "# A[1] * B[0]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "mov r10, #0\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + /* A[1] * B[0] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" "str r9, [sp, #4]\n\t" - "# A[0] * B[2]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adc r14, r4, r14\n\t" - "# A[1] * B[1]\n\t" + /* A[2] * B[0] */ + "ldr r6, [%[a], #8]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add lr, lr, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + /* A[1] * B[1] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[2] * B[0]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[0] * B[2] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b], #8]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" "str r10, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * B[3] */ "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, #0\n\t" - "# A[1] * B[2]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[1] * B[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[2] * B[1]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[2] * B[1] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[3] * B[0]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[3] * B[0] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #12]\n\t" - "# A[0] * B[4]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "str lr, [sp, #12]\n\t" + /* A[4] * B[0] */ + "ldr r6, [%[a], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, #0\n\t" - "# A[1] * B[3]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[3] * B[1] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #4]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[2] * B[2] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #8]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[1] * B[3] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, r10\n\t" - "# A[2] * B[2]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[3] * B[1]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[4] * B[0]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[0] * B[4] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" "str r8, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * B[5] */ "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[1] * B[4]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[1] * B[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[2] * B[3]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[2] * B[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[3] * B[2]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[3] * B[2] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[4] * B[1]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[4] * B[1] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[5] * B[0]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[5] * B[0] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" "str r9, [sp, #20]\n\t" - "# A[0] * B[6]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + /* A[6] * B[0] */ + "ldr r6, [%[a], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[1] * B[5]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[2] * B[4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[3] * B[3]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[4] * B[2]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[5] * B[1]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[5] * B[1] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[6] * B[0]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[4] * B[2] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #8]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[3] * B[3] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #12]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[2] * B[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[1] * B[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #20]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[0] * B[6] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" "str r10, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * B[7] */ "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, #0\n\t" - "# A[1] * B[6]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[1] * B[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[2] * B[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[2] * B[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[3] * B[4]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[3] * B[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[4] * B[3]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[4] * B[3] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[5] * B[2]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[5] * B[2] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[6] * B[1]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[6] * B[1] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[7] * B[0]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[7] * B[0] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #28]\n\t" - "# A[1] * B[7]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "str lr, [sp, #28]\n\t" + /* A[7] * B[1] */ + "ldr r7, [%[b], #4]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[6] * B[2] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #8]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[5] * B[3] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #12]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[4] * B[4] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[3] * B[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #20]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[2] * B[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[1] * B[7] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, #0\n\t" - "# A[2] * B[6]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[3] * B[5]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[4] * B[4]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[5] * B[3]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[6] * B[2]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[7] * B[1]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" "str r8, [sp, #32]\n\t" - "# A[2] * B[7]\n\t" + /* A[2] * B[7] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[3] * B[6]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[3] * B[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[4] * B[5]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[4] * B[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[5] * B[4]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[5] * B[4] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[6] * B[3]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[6] * B[3] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[7] * B[2]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[7] * B[2] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" "str r9, [sp, #36]\n\t" - "# A[3] * B[7]\n\t" + /* A[7] * B[3] */ + "ldr r7, [%[b], #12]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[6] * B[4] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[5] * B[5] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #20]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[4] * B[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[3] * B[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, #0\n\t" - "# A[4] * B[6]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[5] * B[5]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[6] * B[4]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, r8\n\t" - "# A[7] * B[3]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" "str r10, [sp, #40]\n\t" - "# A[4] * B[7]\n\t" + /* A[4] * B[7] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, #0\n\t" - "# A[5] * B[6]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[5] * B[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[6] * B[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[6] * B[5] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[7] * B[4]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[7] * B[4] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #44]\n\t" - "# A[5] * B[7]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "str lr, [sp, #44]\n\t" + /* A[7] * B[5] */ + "ldr r7, [%[b], #20]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[6] * B[6] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[5] * B[7] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, #0\n\t" - "# A[6] * B[6]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, r10\n\t" - "# A[7] * B[5]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[6] * B[7]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[6] * B[7] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[7] * B[6]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[7] * B[6] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[7] * B[7]\n\t" - "ldr r6, [%[a], #28]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[7] * B[7] */ "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adc r14, r4, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add lr, lr, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" "str r8, [sp, #48]\n\t" "str r9, [sp, #52]\n\t" "str r10, [sp, #56]\n\t" - "str r14, [sp, #60]\n\t" - "# Start Reduction\n\t" - "ldr r4, [sp, #0]\n\t" - "ldr r5, [sp, #4]\n\t" - "ldr r6, [sp, #8]\n\t" - "ldr r7, [sp, #12]\n\t" - "ldr r8, [sp, #16]\n\t" - "ldr r9, [sp, #20]\n\t" - "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" - "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t" - "# - a[0] << 224\n\t" - "# + (a[0]-a[1] * 2) << (6 * 32)\n\t" - "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" - "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" - "# - a[0] << (7 * 32)\n\t" - "sub r14, r14, r4\n\t" - "# + a[0]-a[4] << (3 * 32)\n\t" - "mov %[a], r7\n\t" - "mov %[b], r8\n\t" - "adds r7, r7, r4\n\t" - "adcs r8, r8, r5\n\t" + "str lr, [sp, #60]\n\t" + "str %[r], [sp, #64]\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" "adcs r9, r9, r6\n\t" - "adcs r10, r10, %[a]\n\t" - "adc r14, r14, %[b]\n\t" - "str r4, [sp, #0]\n\t" - "str r5, [sp, #4]\n\t" - "str r6, [sp, #8]\n\t" - "str r7, [sp, #12]\n\t" - "str r8, [sp, #16]\n\t" - "str r9, [sp, #20]\n\t" - "# a += mu * m\n\t" - "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t" - "mov %[a], #0\n\t" - "# a[6] += t[0] + t[3]\n\t" - "ldr r3, [sp, #24]\n\t" - "adds r3, r3, r4\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adc %[b], %[b], #0\n\t" - "str r10, [sp, #24]\n\t" - "# a[7] += t[1] + t[4]\n\t" - "ldr r3, [sp, #28]\n\t" - "adds r3, r3, %[b]\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r5\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r8\n\t" - "adc %[b], %[b], #0\n\t" - "str r14, [sp, #28]\n\t" - "str r3, [sp, #64]\n\t" - "# a[8] += t[0] + t[2] + t[5]\n\t" - "ldr r3, [sp, #32]\n\t" - "adds r3, r3, %[b]\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r4\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r6\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r9\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #32]\n\t" - "# a[9] += t[1] + t[3] + t[6]\n\t" - "# a[10] += t[2] + t[4] + t[7]\n\t" - "ldr r3, [sp, #36]\n\t" - "ldr r4, [sp, #40]\n\t" - "adds r3, r3, %[b]\n\t" - "adcs r4, r4, #0\n\t" - "adc %[b], %[a], #0\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r10\n\t" - "adcs r4, r4, r14\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #36]\n\t" - "str r4, [sp, #40]\n\t" - "# a[11] += t[3] + t[5]\n\t" - "# a[12] += t[4] + t[6]\n\t" - "# a[13] += t[5] + t[7]\n\t" - "# a[14] += t[6]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "adds r3, r3, %[b]\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r9\n\t" - "adcs r4, r4, r10\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, #0\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #44]\n\t" - "str r4, [sp, #48]\n\t" - "str r5, [sp, #52]\n\t" - "str r6, [sp, #56]\n\t" - "# a[15] += t[7]\n\t" - "ldr r3, [sp, #60]\n\t" - "adds r3, r3, %[b]\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r14\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #60]\n\t" - "ldr r3, [sp, #64]\n\t" - "ldr r4, [sp, #32]\n\t" - "ldr r5, [sp, #36]\n\t" - "ldr r6, [sp, #40]\n\t" - "ldr r8, [sp, #0]\n\t" - "ldr r9, [sp, #4]\n\t" - "ldr r10, [sp, #8]\n\t" - "ldr r14, [sp, #12]\n\t" - "subs r3, r3, r8\n\t" - "sbcs r4, r4, r9\n\t" - "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "str r4, [sp, #32]\n\t" - "str r5, [sp, #36]\n\t" - "str r6, [sp, #40]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "ldr r7, [sp, #60]\n\t" - "ldr r8, [sp, #16]\n\t" - "ldr r9, [sp, #20]\n\t" - "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" "sbcs r4, r4, r9\n\t" "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "sbc r7, r7, #0\n\t" - "str r3, [sp, #44]\n\t" - "str r4, [sp, #48]\n\t" - "str r5, [sp, #52]\n\t" - "str r6, [sp, #56]\n\t" - "str r7, [sp, #60]\n\t" - "# mask m and sub from result if overflow\n\t" - "sub %[b], %[a], %[b]\n\t" - "and %[a], %[b], #1\n\t" - "ldr r3, [sp, #32]\n\t" - "ldr r4, [sp, #36]\n\t" - "ldr r5, [sp, #40]\n\t" - "ldr r6, [sp, #44]\n\t" - "ldr r7, [sp, #48]\n\t" - "ldr r8, [sp, #52]\n\t" - "ldr r9, [sp, #56]\n\t" - "ldr r10, [sp, #60]\n\t" - "subs r3, r3, %[b]\n\t" - "sbcs r4, r4, %[b]\n\t" - "sbcs r5, r5, %[b]\n\t" - "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, #0\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" "sbcs r8, r8, #0\n\t" - "sbcs r9, r9, %[a]\n\t" - "sbc r10, r10, %[b]\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "str r7, [%[r], #16]\n\t" - "str r8, [%[r], #20]\n\t" - "str r9, [%[r], #24]\n\t" - "str r10, [%[r], #28]\n\t" - "add sp, sp, #68\n\t" - : [a] "+r" (a), [b] "+r" (b) - : [r] "r" (r) - : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[r], [sp, #64]\n\t" + "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r12", "cc" ); + (void)m_p; + (void)mp_p; } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Multiply two Montgomery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montgomery form. + * b Second number to multiply in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "str %[r], [sp, #64]\n\t" + "mov %[r], #0\n\t" + "ldr r12, [%[a]]\n\t" + /* A[0] * B[0] */ + "ldr lr, [%[b]]\n\t" + "umull r3, r4, r12, lr\n\t" + /* A[0] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "umull r5, r6, r12, lr\n\t" + /* A[0] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "umull r7, r8, r12, lr\n\t" + /* A[0] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "umull r9, r10, r12, lr\n\t" + "str r3, [sp]\n\t" + /* A[0] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "mov r11, %[r]\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[0] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adcs r6, r6, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[0] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adcs r8, r8, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[0] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adcs r10, r10, #0\n\t" + "adc r3, %[r], #0\n\t" + "umlal r10, r3, r12, lr\n\t" + /* A[1] * B[0] */ + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "str r4, [sp, #4]\n\t" + "adds r5, r5, r11\n\t" + /* A[1] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[1] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[1] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[1] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[1] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[1] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[1] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r4, %[r], #0\n\t" + "umlal r3, r4, r12, lr\n\t" + /* A[2] * B[0] */ + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "str r5, [sp, #8]\n\t" + "adds r6, r6, r11\n\t" + /* A[2] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[2] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[2] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[2] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[2] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[2] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[2] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r5, %[r], #0\n\t" + "umlal r4, r5, r12, lr\n\t" + /* A[3] * B[0] */ + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "str r6, [sp, #12]\n\t" + "adds r7, r7, r11\n\t" + /* A[3] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[3] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[3] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[3] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[3] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[3] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[3] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r6, %[r], #0\n\t" + "umlal r5, r6, r12, lr\n\t" + /* A[4] * B[0] */ + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "str r7, [sp, #16]\n\t" + "adds r8, r8, r11\n\t" + /* A[4] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[4] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[4] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[4] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[4] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[4] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[4] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r7, %[r], #0\n\t" + "umlal r6, r7, r12, lr\n\t" + /* A[5] * B[0] */ + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "str r8, [sp, #20]\n\t" + "adds r9, r9, r11\n\t" + /* A[5] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[5] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[5] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[5] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[5] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[5] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[5] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r8, %[r], #0\n\t" + "umlal r7, r8, r12, lr\n\t" + /* A[6] * B[0] */ + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "str r9, [sp, #24]\n\t" + "adds r10, r10, r11\n\t" + /* A[6] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[6] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[6] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[6] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[6] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[6] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[6] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r9, %[r], #0\n\t" + "umlal r8, r9, r12, lr\n\t" + /* A[7] * B[0] */ + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b]]\n\t" + "mov r11, #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "str r10, [sp, #28]\n\t" + "adds r3, r3, r11\n\t" + /* A[7] * B[1] */ + "ldr lr, [%[b], #4]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[7] * B[2] */ + "ldr lr, [%[b], #8]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[7] * B[3] */ + "ldr lr, [%[b], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[7] * B[4] */ + "ldr lr, [%[b], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[7] * B[5] */ + "ldr lr, [%[b], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[7] * B[6] */ + "ldr lr, [%[b], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[7] * B[7] */ + "ldr lr, [%[b], #28]\n\t" + "adc r10, %[r], #0\n\t" + "umlal r9, r10, r12, lr\n\t" + "add lr, sp, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" + "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, #0\n\t" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[r], [sp, #64]\n\t" + "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); + (void)m_p; + (void)mp_p; +} + +#else +/* Multiply two Montgomery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montgomery form. + * b Second number to multiply in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x4c\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str %[r], [sp, #68]\n\t" + "str %[a], [sp, #72]\n\t" +#else + "strd %[r], %[a], [sp, #68]\n\t" +#endif + "mov lr, %[b]\n\t" + "ldm %[a], {%[r], %[a], %[b], r3}\n\t" + "ldm lr!, {r4, r5, r6}\n\t" + "umull r10, r11, %[r], r4\n\t" + "umull r12, r7, %[a], r4\n\t" + "umaal r11, r12, %[r], r5\n\t" + "umull r8, r9, %[b], r4\n\t" + "umaal r12, r8, %[a], r5\n\t" + "umaal r12, r7, %[r], r6\n\t" + "umaal r8, r9, r3, r4\n\t" + "stm sp, {r10, r11, r12}\n\t" + "umaal r7, r8, %[b], r5\n\t" + "ldm lr!, {r4}\n\t" + "umull r10, r11, %[a], r6\n\t" + "umaal r8, r9, %[b], r6\n\t" + "umaal r7, r10, %[r], r4\n\t" + "umaal r8, r11, r3, r5\n\t" + "str r7, [sp, #12]\n\t" + "umaal r8, r10, %[a], r4\n\t" + "umaal r9, r11, r3, r6\n\t" + "umaal r9, r10, %[b], r4\n\t" + "umaal r10, r11, r3, r4\n\t" + "ldm lr, {r4, r5, r6, r7}\n\t" + "mov r12, #0\n\t" + "umlal r8, r12, %[r], r4\n\t" + "umaal r9, r12, %[a], r4\n\t" + "umaal r10, r12, %[b], r4\n\t" + "umaal r11, r12, r3, r4\n\t" + "mov r4, #0\n\t" + "umlal r9, r4, %[r], r5\n\t" + "umaal r10, r4, %[a], r5\n\t" + "umaal r11, r4, %[b], r5\n\t" + "umaal r12, r4, r3, r5\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, %[r], r6\n\t" + "umaal r11, r5, %[a], r6\n\t" + "umaal r12, r5, %[b], r6\n\t" + "umaal r4, r5, r3, r6\n\t" + "mov r6, #0\n\t" + "umlal r11, r6, %[r], r7\n\t" + "ldr %[r], [sp, #72]\n\t" + "umaal r12, r6, %[a], r7\n\t" + "add %[r], %[r], #16\n\t" + "umaal r4, r6, %[b], r7\n\t" + "sub lr, lr, #16\n\t" + "umaal r5, r6, r3, r7\n\t" + "ldm %[r], {%[r], %[a], %[b], r3}\n\t" + "str r6, [sp, #64]\n\t" + "ldm lr!, {r6}\n\t" + "mov r7, #0\n\t" + "umlal r8, r7, %[r], r6\n\t" + "umaal r9, r7, %[a], r6\n\t" + "str r8, [sp, #16]\n\t" + "umaal r10, r7, %[b], r6\n\t" + "umaal r11, r7, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r8, #0\n\t" + "umlal r9, r8, %[r], r6\n\t" + "umaal r10, r8, %[a], r6\n\t" + "str r9, [sp, #20]\n\t" + "umaal r11, r8, %[b], r6\n\t" + "umaal r12, r8, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r9, #0\n\t" + "umlal r10, r9, %[r], r6\n\t" + "umaal r11, r9, %[a], r6\n\t" + "str r10, [sp, #24]\n\t" + "umaal r12, r9, %[b], r6\n\t" + "umaal r4, r9, r3, r6\n\t" + "ldm lr!, {r6}\n\t" + "mov r10, #0\n\t" + "umlal r11, r10, %[r], r6\n\t" + "umaal r12, r10, %[a], r6\n\t" + "str r11, [sp, #28]\n\t" + "umaal r4, r10, %[b], r6\n\t" + "umaal r5, r10, r3, r6\n\t" + "ldm lr!, {r11}\n\t" + "umaal r12, r7, %[r], r11\n\t" + "umaal r4, r7, %[a], r11\n\t" + "ldr r6, [sp, #64]\n\t" + "umaal r5, r7, %[b], r11\n\t" + "umaal r6, r7, r3, r11\n\t" + "ldm lr!, {r11}\n\t" + "umaal r4, r8, %[r], r11\n\t" + "umaal r5, r8, %[a], r11\n\t" + "umaal r6, r8, %[b], r11\n\t" + "umaal r7, r8, r3, r11\n\t" + "ldm lr, {r11, lr}\n\t" + "umaal r5, r9, %[r], r11\n\t" + "umaal r6, r10, %[r], lr\n\t" + "umaal r6, r9, %[a], r11\n\t" + "umaal r7, r10, %[a], lr\n\t" + "umaal r7, r9, %[b], r11\n\t" + "umaal r8, r10, %[b], lr\n\t" + "umaal r8, r9, r3, r11\n\t" + "umaal r9, r10, r3, lr\n\t" + "mov r3, r12\n\t" + "add lr, sp, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" + "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, #0\n\t" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[r], [sp, #68]\n\t" + "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x4c\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr", "cc" + ); + (void)m_p; + (void)mp_p; +} + +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) * * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - (void)mp; - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" + "sub sp, sp, #0x44\n\t" + "sub sp, sp, #0x44\n\t" "mov r5, #0\n\t" - "# A[0] * A[1]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[1] */ + "ldr r6, [%[a]]\n\t" "ldr r7, [%[a], #4]\n\t" - "umull r9, r10, r6, r7\n\t" + "lsl r3, r6, #16\n\t" + "lsl r9, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r3, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r10, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" "str r9, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[2] */ "ldr r7, [%[a], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adc r14, r4, #0\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "mov lr, #0\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add lr, lr, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" "str r10, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[3] */ "ldr r7, [%[a], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adc r8, r4, #0\n\t" - "# A[1] * A[2]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "mov r8, #0\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adc r8, r8, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r8, r8, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adc r8, r8, r4\n\t" + /* A[1] * A[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, #0\n\t" - "str r14, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adc r9, r4, r9\n\t" - "# A[1] * A[3]\n\t" - "ldr r6, [%[a], #4]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "str lr, [sp, #12]\n\t" + /* A[1] * A[3] */ "ldr r7, [%[a], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r9, r9, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, r4\n\t" + /* A[0] * A[4] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[a], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" "str r8, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[5] */ "ldr r7, [%[a], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adc r10, r4, r10\n\t" - "# A[1] * A[4]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + /* A[1] * A[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[2] * A[3]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[2] * A[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" "str r9, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + /* A[2] * A[4] */ + "ldr r7, [%[a], #16]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[1] * A[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[1] * A[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[2] * A[4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[0] * A[6] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[a], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" "str r10, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[7] */ "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, #0\n\t" - "# A[1] * A[6]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[1] * A[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[2] * A[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[2] * A[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "# A[3] * A[4]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[3] * A[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #28]\n\t" - "# A[1] * A[7]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "str lr, [sp, #28]\n\t" + /* A[3] * A[5] */ + "ldr r7, [%[a], #20]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[2] * A[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + /* A[1] * A[7] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, #0\n\t" - "# A[2] * A[6]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" - "adc r10, r5, r10\n\t" - "# A[3] * A[5]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" "str r8, [sp, #32]\n\t" - "# A[2] * A[7]\n\t" + /* A[2] * A[7] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[3] * A[6]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[3] * A[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[4] * A[5]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + /* A[4] * A[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" "str r9, [sp, #36]\n\t" - "# A[3] * A[7]\n\t" + /* A[4] * A[6] */ + "ldr r7, [%[a], #24]\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" + "adc r8, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + /* A[3] * A[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" - "adc r8, r5, #0\n\t" - "# A[4] * A[6]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" "str r10, [sp, #40]\n\t" - "# A[4] * A[7]\n\t" + /* A[4] * A[7] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, #0\n\t" - "# A[5] * A[6]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + /* A[5] * A[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" - "adcs r8, r4, r8\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #44]\n\t" - "# A[5] * A[7]\n\t" - "ldr r6, [%[a], #20]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "str lr, [sp, #44]\n\t" + /* A[5] * A[7] */ "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" - "adcs r9, r4, r9\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" "str r8, [sp, #48]\n\t" - "# A[6] * A[7]\n\t" + /* A[6] * A[7] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" - "adc r10, r4, r10\n\t" + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" "str r9, [sp, #52]\n\t" "str r10, [sp, #56]\n\t" - "# Double\n\t" + /* Double */ "ldr r4, [sp, #4]\n\t" "ldr r6, [sp, #8]\n\t" "ldr r7, [sp, #12]\n\t" "ldr r8, [sp, #16]\n\t" "ldr r9, [sp, #20]\n\t" "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" + "ldr lr, [sp, #28]\n\t" "ldr r12, [sp, #32]\n\t" "ldr r3, [sp, #36]\n\t" "adds r4, r4, r4\n\t" @@ -32216,7 +70087,7 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const "adcs r8, r8, r8\n\t" "adcs r9, r9, r9\n\t" "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" + "adcs lr, lr, lr\n\t" "adcs r12, r12, r12\n\t" "adcs r3, r3, r3\n\t" "str r4, [sp, #4]\n\t" @@ -32225,7 +70096,7 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const "str r8, [sp, #16]\n\t" "str r9, [sp, #20]\n\t" "str r10, [sp, #24]\n\t" - "str r14, [sp, #28]\n\t" + "str lr, [sp, #28]\n\t" "str r12, [sp, #32]\n\t" "str r3, [sp, #36]\n\t" "ldr r4, [sp, #40]\n\t" @@ -32248,256 +70119,900 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const "ldr r4, [sp, #4]\n\t" "ldr r5, [sp, #8]\n\t" "ldr r12, [sp, #12]\n\t" - "# A[0] * A[0]\n\t" - "ldr r6, [%[a], #0]\n\t" - "umull r8, r9, r6, r6\n\t" - "# A[1] * A[1]\n\t" + /* A[0] * A[0] */ + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" + /* A[1] * A[1] */ "ldr r6, [%[a], #4]\n\t" - "umull r10, r14, r6, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" "adds r9, r9, r4\n\t" "adcs r10, r10, r5\n\t" - "adcs r14, r14, r12\n\t" - "str r8, [sp, #0]\n\t" + "adcs lr, lr, r12\n\t" + "str r8, [sp]\n\t" "str r9, [sp, #4]\n\t" "str r10, [sp, #8]\n\t" - "str r14, [sp, #12]\n\t" + "str lr, [sp, #12]\n\t" "ldr r3, [sp, #16]\n\t" "ldr r4, [sp, #20]\n\t" "ldr r5, [sp, #24]\n\t" "ldr r12, [sp, #28]\n\t" - "# A[2] * A[2]\n\t" + /* A[2] * A[2] */ "ldr r6, [%[a], #8]\n\t" - "umull r8, r9, r6, r6\n\t" - "# A[3] * A[3]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adcs r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" + /* A[3] * A[3] */ "ldr r6, [%[a], #12]\n\t" - "umull r10, r14, r6, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) "adcs r8, r8, r3\n\t" "adcs r9, r9, r4\n\t" +#else + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" +#endif "adcs r10, r10, r5\n\t" - "adcs r14, r14, r12\n\t" + "adcs lr, lr, r12\n\t" "str r8, [sp, #16]\n\t" "str r9, [sp, #20]\n\t" "str r10, [sp, #24]\n\t" - "str r14, [sp, #28]\n\t" + "str lr, [sp, #28]\n\t" "ldr r3, [sp, #32]\n\t" "ldr r4, [sp, #36]\n\t" "ldr r5, [sp, #40]\n\t" "ldr r12, [sp, #44]\n\t" - "# A[4] * A[4]\n\t" + /* A[4] * A[4] */ "ldr r6, [%[a], #16]\n\t" - "umull r8, r9, r6, r6\n\t" - "# A[5] * A[5]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adcs r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" + /* A[5] * A[5] */ "ldr r6, [%[a], #20]\n\t" - "umull r10, r14, r6, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) "adcs r8, r8, r3\n\t" "adcs r9, r9, r4\n\t" +#else + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" +#endif "adcs r10, r10, r5\n\t" - "adcs r14, r14, r12\n\t" + "adcs lr, lr, r12\n\t" "str r8, [sp, #32]\n\t" "str r9, [sp, #36]\n\t" "str r10, [sp, #40]\n\t" - "str r14, [sp, #44]\n\t" + "str lr, [sp, #44]\n\t" "ldr r3, [sp, #48]\n\t" "ldr r4, [sp, #52]\n\t" "ldr r5, [sp, #56]\n\t" "ldr r12, [sp, #60]\n\t" - "# A[6] * A[6]\n\t" + /* A[6] * A[6] */ "ldr r6, [%[a], #24]\n\t" - "umull r8, r9, r6, r6\n\t" - "# A[7] * A[7]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adcs r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" + /* A[7] * A[7] */ "ldr r6, [%[a], #28]\n\t" - "umull r10, r14, r6, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) "adcs r8, r8, r3\n\t" "adcs r9, r9, r4\n\t" +#else + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" +#endif "adcs r10, r10, r5\n\t" - "adc r14, r14, r12\n\t" + "adc lr, lr, r12\n\t" "str r8, [sp, #48]\n\t" "str r9, [sp, #52]\n\t" "str r10, [sp, #56]\n\t" - "str r14, [sp, #60]\n\t" - "# Start Reduction\n\t" - "ldr r4, [sp, #0]\n\t" - "ldr r5, [sp, #4]\n\t" - "ldr r6, [sp, #8]\n\t" - "ldr r7, [sp, #12]\n\t" - "ldr r8, [sp, #16]\n\t" - "ldr r9, [sp, #20]\n\t" - "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" - "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t" - "# - a[0] << 224\n\t" - "# + (a[0]-a[1] * 2) << (6 * 32)\n\t" - "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" - "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" - "# - a[0] << (7 * 32)\n\t" - "sub r14, r14, r4\n\t" - "# + a[0]-a[4] << (3 * 32)\n\t" - "mov %[a], r7\n\t" - "mov r12, r8\n\t" - "adds r7, r7, r4\n\t" - "adcs r8, r8, r5\n\t" + "str lr, [sp, #60]\n\t" + "str %[r], [sp, #64]\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" "adcs r9, r9, r6\n\t" - "adcs r10, r10, %[a]\n\t" - "adc r14, r14, r12\n\t" - "str r4, [sp, #0]\n\t" - "str r5, [sp, #4]\n\t" - "str r6, [sp, #8]\n\t" - "str r7, [sp, #12]\n\t" - "str r8, [sp, #16]\n\t" - "str r9, [sp, #20]\n\t" - "# a += mu * m\n\t" - "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t" - "mov %[a], #0\n\t" - "# a[6] += t[0] + t[3]\n\t" - "ldr r3, [sp, #24]\n\t" - "adds r3, r3, r4\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adc r12, r12, #0\n\t" - "str r10, [sp, #24]\n\t" - "# a[7] += t[1] + t[4]\n\t" - "ldr r3, [sp, #28]\n\t" - "adds r3, r3, r12\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r5\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r8\n\t" - "adc r12, r12, #0\n\t" - "str r14, [sp, #28]\n\t" - "str r3, [sp, #64]\n\t" - "# a[8] += t[0] + t[2] + t[5]\n\t" - "ldr r3, [sp, #32]\n\t" - "adds r3, r3, r12\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r4\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r6\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r9\n\t" - "adc r12, r12, #0\n\t" - "str r3, [sp, #32]\n\t" - "# a[9] += t[1] + t[3] + t[6]\n\t" - "# a[10] += t[2] + t[4] + t[7]\n\t" - "ldr r3, [sp, #36]\n\t" - "ldr r4, [sp, #40]\n\t" - "adds r3, r3, r12\n\t" - "adcs r4, r4, #0\n\t" - "adc r12, %[a], #0\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r10\n\t" - "adcs r4, r4, r14\n\t" - "adc r12, r12, #0\n\t" - "str r3, [sp, #36]\n\t" - "str r4, [sp, #40]\n\t" - "# a[11] += t[3] + t[5]\n\t" - "# a[12] += t[4] + t[6]\n\t" - "# a[13] += t[5] + t[7]\n\t" - "# a[14] += t[6]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "adds r3, r3, r12\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r9\n\t" - "adcs r4, r4, r10\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, #0\n\t" - "adc r12, r12, #0\n\t" - "str r3, [sp, #44]\n\t" - "str r4, [sp, #48]\n\t" - "str r5, [sp, #52]\n\t" - "str r6, [sp, #56]\n\t" - "# a[15] += t[7]\n\t" - "ldr r3, [sp, #60]\n\t" - "adds r3, r3, r12\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r14\n\t" - "adc r12, r12, #0\n\t" - "str r3, [sp, #60]\n\t" - "ldr r3, [sp, #64]\n\t" - "ldr r4, [sp, #32]\n\t" - "ldr r5, [sp, #36]\n\t" - "ldr r6, [sp, #40]\n\t" - "ldr r8, [sp, #0]\n\t" - "ldr r9, [sp, #4]\n\t" - "ldr r10, [sp, #8]\n\t" - "ldr r14, [sp, #12]\n\t" - "subs r3, r3, r8\n\t" - "sbcs r4, r4, r9\n\t" - "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "str r4, [sp, #32]\n\t" - "str r5, [sp, #36]\n\t" - "str r6, [sp, #40]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "ldr r7, [sp, #60]\n\t" - "ldr r8, [sp, #16]\n\t" - "ldr r9, [sp, #20]\n\t" - "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" "sbcs r4, r4, r9\n\t" "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "sbc r7, r7, #0\n\t" - "str r3, [sp, #44]\n\t" - "str r4, [sp, #48]\n\t" - "str r5, [sp, #52]\n\t" - "str r6, [sp, #56]\n\t" - "str r7, [sp, #60]\n\t" - "# mask m and sub from result if overflow\n\t" - "sub r12, %[a], r12\n\t" - "and %[a], r12, #1\n\t" - "ldr r3, [sp, #32]\n\t" - "ldr r4, [sp, #36]\n\t" - "ldr r5, [sp, #40]\n\t" - "ldr r6, [sp, #44]\n\t" - "ldr r7, [sp, #48]\n\t" - "ldr r8, [sp, #52]\n\t" - "ldr r9, [sp, #56]\n\t" - "ldr r10, [sp, #60]\n\t" - "subs r3, r3, r12\n\t" - "sbcs r4, r4, r12\n\t" - "sbcs r5, r5, r12\n\t" - "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, #0\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" "sbcs r8, r8, #0\n\t" - "sbcs r9, r9, %[a]\n\t" - "sbc r10, r10, r12\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "str r7, [%[r], #16]\n\t" - "str r8, [%[r], #20]\n\t" - "str r9, [%[r], #24]\n\t" - "str r10, [%[r], #28]\n\t" - "add sp, sp, #68\n\t" - : [a] "+r" (a) - : [r] "r" (r) - : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[r], [sp, #64]\n\t" + "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r12", "r8", "r9", "r10", "lr", "cc" ); + (void)m_p; + (void)mp_p; } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "str %[r], [sp, #64]\n\t" + "mov %[r], #0\n\t" + "ldr r12, [%[a]]\n\t" + /* A[0] * A[1] */ + "ldr lr, [%[a], #4]\n\t" + "umull r4, r5, r12, lr\n\t" + /* A[0] * A[3] */ + "ldr lr, [%[a], #12]\n\t" + "umull r6, r7, r12, lr\n\t" + /* A[0] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "umull r8, r9, r12, lr\n\t" + /* A[0] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "umull r10, r3, r12, lr\n\t" + /* A[0] * A[2] */ + "ldr lr, [%[a], #8]\n\t" + "mov r11, #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[0] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "adds r8, r8, r11\n\t" + /* A[0] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adcs r9, r9, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + "adcs r3, r3, #0\n\t" + "str r4, [sp, #4]\n\t" + "str r5, [sp, #8]\n\t" + /* A[1] * A[2] */ + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "str r6, [sp, #12]\n\t" + "adds r7, r7, r11\n\t" + /* A[1] * A[3] */ + "ldr lr, [%[a], #12]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, lr\n\t" + "str r7, [sp, #16]\n\t" + "adds r8, r8, r11\n\t" + /* A[1] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "adds r9, r9, r11\n\t" + /* A[1] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "adds r10, r10, r11\n\t" + /* A[1] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[1] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r4, %[r], #0\n\t" + "umlal r3, r4, r12, lr\n\t" + /* A[2] * A[3] */ + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" + "mov r11, #0\n\t" + "umlal r8, r11, r12, lr\n\t" + "str r8, [sp, #20]\n\t" + "adds r9, r9, r11\n\t" + /* A[2] * A[4] */ + "ldr lr, [%[a], #16]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, lr\n\t" + "str r9, [sp, #24]\n\t" + "adds r10, r10, r11\n\t" + /* A[2] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "adds r3, r3, r11\n\t" + /* A[2] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[2] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r5, %[r], #0\n\t" + "umlal r4, r5, r12, lr\n\t" + /* A[3] * A[4] */ + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[a], #16]\n\t" + "mov r11, #0\n\t" + "umlal r10, r11, r12, lr\n\t" + "str r10, [sp, #28]\n\t" + "adds r3, r3, r11\n\t" + /* A[3] * A[5] */ + "ldr lr, [%[a], #20]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, lr\n\t" + "adds r4, r4, r11\n\t" + /* A[3] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[3] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r6, %[r], #0\n\t" + "umlal r5, r6, r12, lr\n\t" + /* A[4] * A[5] */ + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[a], #20]\n\t" + "mov r11, #0\n\t" + "umlal r4, r11, r12, lr\n\t" + "adds r5, r5, r11\n\t" + /* A[4] * A[6] */ + "ldr lr, [%[a], #24]\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, lr\n\t" + "adds r6, r6, r11\n\t" + /* A[4] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r7, %[r], #0\n\t" + "umlal r6, r7, r12, lr\n\t" + /* A[5] * A[6] */ + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[a], #24]\n\t" + "mov r11, #0\n\t" + "umlal r6, r11, r12, lr\n\t" + "adds r7, r7, r11\n\t" + /* A[5] * A[7] */ + "ldr lr, [%[a], #28]\n\t" + "adc r8, %[r], #0\n\t" + "umlal r7, r8, r12, lr\n\t" + /* A[6] * A[7] */ + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[a], #28]\n\t" + "mov r9, #0\n\t" + "umlal r8, r9, r12, lr\n\t" + "add lr, sp, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "add lr, sp, #4\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "stm lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "adcs r3, r3, r3\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, %[r], #0\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "add lr, sp, #4\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "mov lr, sp\n\t" + /* A[0] * A[0] */ + "ldr r12, [%[a]]\n\t" + "umull r3, r11, r12, r12\n\t" + "adds r4, r4, r11\n\t" + /* A[1] * A[1] */ + "ldr r12, [%[a], #4]\n\t" + "adcs r5, r5, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, r12\n\t" + "adds r6, r6, r11\n\t" + /* A[2] * A[2] */ + "ldr r12, [%[a], #8]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, r12\n\t" + "adds r8, r8, r11\n\t" + /* A[3] * A[3] */ + "ldr r12, [%[a], #12]\n\t" + "adcs r9, r9, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r9, r11, r12, r12\n\t" + "adds r10, r10, r11\n\t" + "stm lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* A[4] * A[4] */ + "ldr r12, [%[a], #16]\n\t" + "adcs r3, r3, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r3, r11, r12, r12\n\t" + "adds r4, r4, r11\n\t" + /* A[5] * A[5] */ + "ldr r12, [%[a], #20]\n\t" + "adcs r5, r5, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r5, r11, r12, r12\n\t" + "adds r6, r6, r11\n\t" + /* A[6] * A[6] */ + "ldr r12, [%[a], #24]\n\t" + "adcs r7, r7, #0\n\t" + "adc r11, %[r], #0\n\t" + "umlal r7, r11, r12, r12\n\t" + "adds r8, r8, r11\n\t" + /* A[7] * A[7] */ + "ldr r12, [%[a], #28]\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r10, #0\n\t" + "umlal r9, r10, r12, r12\n\t" + "add lr, sp, #32\n\t" + "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" + "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, #0\n\t" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[r], [sp, #64]\n\t" + "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); + (void)m_p; + (void)mp_p; +} + +#else +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "str %[r], [sp, #64]\n\t" + "ldm %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" + "umull r9, r10, %[r], %[r]\n\t" + "umull r11, r12, %[r], %[a]\n\t" + "adds r11, r11, r11\n\t" + "mov lr, #0\n\t" + "umaal r10, r11, lr, lr\n\t" + "stm sp, {r9, r10}\n\t" + "mov r8, lr\n\t" + "umaal r8, r12, %[r], r2\n\t" + "adcs r8, r8, r8\n\t" + "umaal r8, r11, %[a], %[a]\n\t" + "umull r9, r10, %[r], r3\n\t" + "umaal r9, r12, %[a], r2\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, lr, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #8]\n\t" + "str r9, [sp, #12]\n\t" +#else + "strd r8, r9, [sp, #8]\n\t" +#endif + "mov r9, lr\n\t" + "umaal r9, r10, %[r], r4\n\t" + "umaal r9, r12, %[a], r3\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, r2, r2\n\t" + "str r9, [sp, #16]\n\t" + "umull r9, r8, %[r], r5\n\t" + "umaal r9, r12, %[a], r4\n\t" + "umaal r9, r10, r2, r3\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, lr, lr\n\t" + "str r9, [sp, #20]\n\t" + "mov r9, lr\n\t" + "umaal r9, r8, %[r], r6\n\t" + "umaal r9, r12, %[a], r5\n\t" + "umaal r9, r10, r2, r4\n\t" + "adcs r9, r9, r9\n\t" + "umaal r9, r11, r3, r3\n\t" + "str r9, [sp, #24]\n\t" + "umull %[r], r9, %[r], r7\n\t" + "umaal %[r], r8, %[a], r6\n\t" + "umaal %[r], r12, r2, r5\n\t" + "umaal %[r], r10, r3, r4\n\t" + "adcs %[r], %[r], %[r]\n\t" + "umaal %[r], r11, lr, lr\n\t" + /* R[7] = r0 */ + "umaal r9, r8, %[a], r7\n\t" + "umaal r9, r10, r2, r6\n\t" + "umaal r12, r9, r3, r5\n\t" + "adcs r12, r12, r12\n\t" + "umaal r12, r11, r4, r4\n\t" + /* R[8] = r12 */ + "umaal r9, r8, r2, r7\n\t" + "umaal r10, r9, r3, r6\n\t" + "mov r2, lr\n\t" + "umaal r10, r2, r4, r5\n\t" + "adcs r10, r10, r10\n\t" + "umaal r11, r10, lr, lr\n\t" + /* R[9] = r11 */ + "umaal r2, r8, r3, r7\n\t" + "umaal r2, r9, r4, r6\n\t" + "adcs r3, r2, r2\n\t" + "umaal r10, r3, r5, r5\n\t" + /* R[10] = r10 */ + "mov %[a], lr\n\t" + "umaal %[a], r8, r4, r7\n\t" + "umaal %[a], r9, r5, r6\n\t" + "adcs r4, %[a], %[a]\n\t" + "umaal r3, r4, lr, lr\n\t" + /* R[11] = r3 */ + "umaal r8, r9, r5, r7\n\t" + "adcs r8, r8, r8\n\t" + "umaal r4, r8, r6, r6\n\t" + /* R[12] = r4 */ + "mov r5, lr\n\t" + "umaal r5, r9, r6, r7\n\t" + "adcs r5, r5, r5\n\t" + "umaal r8, r5, lr, lr\n\t" + /* R[13] = r8 */ + "adcs r9, r9, r9\n\t" + "umaal r9, r5, r7, r7\n\t" + "adcs r7, r5, lr\n\t" + /* R[14] = r9 */ + /* R[15] = r7 */ + "mov lr, sp\n\t" + "add lr, lr, #28\n\t" + "stm lr!, {%[r], r12}\n\t" + "stm lr!, {r11}\n\t" + "stm lr!, {r10}\n\t" + "stm lr!, {r3, r4, r8, r9}\n\t" + "stm lr!, {r7}\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" + "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, #0\n\t" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[r], [sp, #64]\n\t" + "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); + (void)m_p; + (void)mp_p; +} + +#endif #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) /* Square the Montgomery form number a number of times. (r = a ^ n mod m) * @@ -32505,10 +71020,10 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_8(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_8(r, a, m, mp); for (; n > 1; n--) { @@ -32516,7 +71031,7 @@ static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P256 curve. */ static const uint32_t p256_mod_minus_2[8] = { @@ -32601,136 +71116,131 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #28\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #28\n\t" + "\n" + "L_sp_256_cmp_8_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_256_cmp_8_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -32739,6 +71249,7 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) */ #define sp_256_norm_8(a) +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -32747,192 +71258,1193 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_256_cond_sub_8_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #32\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #32\n\t" + "blt L_sp_256_cond_sub_8_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifndef WOLFSSL_SP_SMALL #define sp_256_mont_reduce_order_8 sp_256_mont_reduce_8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 256 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_256_mont_reduce_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #28]\n\t" +#else "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" - "ldr r9, [%[a], #32]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #32]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #32\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #32\n\t" + "blt L_sp_256_mont_reduce_8_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_256_mont_reduce_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #32\n\t" + "blt L_sp_256_mont_reduce_8_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_256_mont_reduce_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #32]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #28]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #32]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #32\n\t" + "blt L_sp_256_mont_reduce_8_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); +} + +#endif +#else +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "str %[a], [sp, #64]\n\t" + "mov lr, sp\n\t" + "ldm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "stm lr!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "stm lr, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" + "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, #0\n\t" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[a], [sp, #64]\n\t" + "stm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); + (void)m_p; + (void)mp_p; +} + +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #28]\n\t" +#else + "ldr r7, [%[m], #28]\n\t" +#endif + "ldr r10, [%[a], #28]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #32\n\t" + "blt L_sp_256_mont_reduce_order_8_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); +} + +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #32\n\t" + "blt L_sp_256_mont_reduce_order_8_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #32]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #28]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #32]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #32\n\t" + "blt L_sp_256_mont_reduce_order_8_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); +} + +#endif +#endif /* WOLFSSL_SP_SMALL */ /* Map the Montgomery form projective coordinate point to an affine point. * * r Resulting affine coordinate point. @@ -32953,27 +72465,24 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); - sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->x, r->x, p256_mod, ~(n >> 31)); sp_256_norm_8(r->x); /* y /= z^3 */ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); - sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->y, r->y, p256_mod, ~(n >> 31)); sp_256_norm_8(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -32983,68 +72492,53 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[b],#0]\n\t" - "ldr r9, [%[b],#4]\n\t" - "ldr r10, [%[b],#8]\n\t" - "ldr r14, [%[b],#12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[a],#24]\n\t" - "ldr r7, [%[a],#28]\n\t" - "ldr r8, [%[b],#16]\n\t" - "ldr r9, [%[b],#20]\n\t" - "ldr r10, [%[b],#24]\n\t" - "ldr r14, [%[b],#28]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "ldr r8, [%[r],#0]\n\t" - "ldr r9, [%[r],#4]\n\t" - "ldr r10, [%[r],#8]\n\t" - "ldr r14, [%[r],#12]\n\t" - "subs r8, r8, r3\n\t" - "sbcs r9, r9, r3\n\t" - "sbcs r10, r10, r3\n\t" - "sbcs r14, r14, #0\n\t" - "sbcs r4, r4, #0\n\t" - "sbcs r5, r5, #0\n\t" - "sbcs r6, r6, r12\n\t" - "sbc r7, r7, r3\n\t" - "str r8, [%[r],#0]\n\t" - "str r9, [%[r],#4]\n\t" - "str r10, [%[r],#8]\n\t" - "str r14, [%[r],#12]\n\t" - "str r4, [%[r],#16]\n\t" - "str r5, [%[r],#20]\n\t" - "str r6, [%[r],#24]\n\t" - "str r7, [%[r],#28]\n\t" + "mov lr, #0\n\t" + "ldm %[a], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "adds r5, r5, r3\n\t" + "adcs r6, r6, r4\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "adcs r7, r7, r3\n\t" + "adcs r8, r8, r4\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "adcs r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "adcs r11, r11, r3\n\t" + "adcs r12, r12, r4\n\t" + "adc lr, lr, #0\n\t" + "rsb lr, lr, #0\n\t" + "subs r5, r5, lr\n\t" + "sbcs r6, r6, lr\n\t" + "sbcs r7, r7, lr\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, #0\n\t" + "sbcs r11, r11, lr, lsr #31\n\t" + "sbcs r12, r12, lr\n\t" + "sbc %[b], %[b], %[b]\n\t" + "sub lr, lr, %[b]\n\t" + "subs r5, r5, lr\n\t" + "sbcs r6, r6, lr\n\t" + "sbcs r7, r7, lr\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, #0\n\t" + "sbcs r11, r11, lr, lsr #31\n\t" + "sbc r12, r12, lr\n\t" + "stm %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" ); + (void)m_p; } /* Double a Montgomery form number (r = a + a % m). @@ -33053,20 +72547,14 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[a],#16]\n\t" - "ldr r9, [%[a],#20]\n\t" - "ldr r10, [%[a],#24]\n\t" - "ldr r14, [%[a],#28]\n\t" + "mov r2, #0\n\t" + "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adds r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -33074,30 +72562,33 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "adcs r8, r8, r8\n\t" "adcs r9, r9, r9\n\t" "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" + "adcs r11, r11, r11\n\t" + "adc r2, r2, #0\n\t" + "rsb r2, r2, #0\n\t" + "subs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r2\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12\n\t" - "sbc r14, r14, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "str r8, [%[r],#16]\n\t" - "str r9, [%[r],#20]\n\t" - "str r10, [%[r],#24]\n\t" - "str r14, [%[r],#28]\n\t" + "sbcs r10, r10, r2, lsr #31\n\t" + "sbcs r11, r11, r2\n\t" + "sbc %[a], %[a], %[a]\n\t" + "sub r2, r2, %[a]\n\t" + "subs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r2\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r2, lsr #31\n\t" + "sbc r11, r11, r2\n\t" + "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "cc" ); + (void)m_p; } /* Triple a Montgomery form number (r = a + a + a % m). @@ -33106,20 +72597,14 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; __asm__ __volatile__ ( "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[a],#16]\n\t" - "ldr r9, [%[a],#20]\n\t" - "ldr r10, [%[a],#24]\n\t" - "ldr r14, [%[a],#28]\n\t" + "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adds r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -33127,74 +72612,65 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "adcs r8, r8, r8\n\t" "adcs r9, r9, r9\n\t" "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" + "adcs r11, r11, r11\n\t" + "adc r12, r12, #0\n\t" + "rsb r12, r12, #0\n\t" + "subs r4, r4, r12\n\t" + "sbcs r5, r5, r12\n\t" + "sbcs r6, r6, r12\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12\n\t" - "sbc r14, r14, r3\n\t" - "str r8, [%[r],#16]\n\t" - "str r9, [%[r],#20]\n\t" - "str r10, [%[r],#24]\n\t" - "str r14, [%[r],#28]\n\t" - "mov r12, #0\n\t" - "ldr r8, [%[a],#0]\n\t" - "ldr r9, [%[a],#4]\n\t" - "ldr r10, [%[a],#8]\n\t" - "ldr r14, [%[a],#12]\n\t" - "adds r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "str r8, [%[r],#0]\n\t" - "str r9, [%[r],#4]\n\t" - "str r10, [%[r],#8]\n\t" - "str r14, [%[r],#12]\n\t" - "ldr r8, [%[a],#16]\n\t" - "ldr r9, [%[a],#20]\n\t" - "ldr r10, [%[a],#24]\n\t" - "ldr r14, [%[a],#28]\n\t" - "ldr r4, [%[r],#16]\n\t" - "ldr r5, [%[r],#20]\n\t" - "ldr r6, [%[r],#24]\n\t" - "ldr r7, [%[r],#28]\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "ldr r4, [%[r],#0]\n\t" - "ldr r5, [%[r],#4]\n\t" - "ldr r6, [%[r],#8]\n\t" - "ldr r7, [%[r],#12]\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" + "sbcs r11, r11, r12\n\t" + "sbc r2, r2, r2\n\t" + "sub r12, r12, r2\n\t" + "subs r4, r4, r12\n\t" + "sbcs r5, r5, r12\n\t" + "sbcs r6, r6, r12\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12\n\t" - "sbc r14, r14, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "str r8, [%[r],#16]\n\t" - "str r9, [%[r],#20]\n\t" - "str r10, [%[r],#24]\n\t" - "str r14, [%[r],#28]\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" + "sbc r11, r11, r12\n\t" + "ldm %[a]!, {r2, r3}\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, r3\n\t" + "ldm %[a]!, {r2, r3}\n\t" + "adcs r6, r6, r2\n\t" + "adcs r7, r7, r3\n\t" + "ldm %[a]!, {r2, r3}\n\t" + "adcs r8, r8, r2\n\t" + "adcs r9, r9, r3\n\t" + "ldm %[a]!, {r2, r3}\n\t" + "adcs r10, r10, r2\n\t" + "adcs r11, r11, r3\n\t" + "adc r12, r12, #0\n\t" + "rsb r12, r12, #0\n\t" + "subs r4, r4, r12\n\t" + "sbcs r5, r5, r12\n\t" + "sbcs r6, r6, r12\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" + "sbcs r11, r11, r12\n\t" + "sbc r2, r2, r2\n\t" + "sub r12, r12, r2\n\t" + "subs r4, r4, r12\n\t" + "sbcs r5, r5, r12\n\t" + "sbcs r6, r6, r12\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" + "sbc r11, r11, r12\n\t" + "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "r12", "cc" ); + (void)m_p; } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -33204,67 +72680,51 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { - (void)m; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[b],#0]\n\t" - "ldr r9, [%[b],#4]\n\t" - "ldr r10, [%[b],#8]\n\t" - "ldr r14, [%[b],#12]\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[a],#24]\n\t" - "ldr r7, [%[a],#28]\n\t" - "ldr r8, [%[b],#16]\n\t" - "ldr r9, [%[b],#20]\n\t" - "ldr r10, [%[b],#24]\n\t" - "ldr r14, [%[b],#28]\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "sbc r3, r12, #0\n\t" - "and r12, r3, #1\n\t" - "ldr r8, [%[r],#0]\n\t" - "ldr r9, [%[r],#4]\n\t" - "ldr r10, [%[r],#8]\n\t" - "ldr r14, [%[r],#12]\n\t" - "adds r8, r8, r3\n\t" - "adcs r9, r9, r3\n\t" - "adcs r10, r10, r3\n\t" - "adcs r14, r14, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r7, r7, r3\n\t" - "str r8, [%[r],#0]\n\t" - "str r9, [%[r],#4]\n\t" - "str r10, [%[r],#8]\n\t" - "str r14, [%[r],#12]\n\t" - "str r4, [%[r],#16]\n\t" - "str r5, [%[r],#20]\n\t" - "str r6, [%[r],#24]\n\t" - "str r7, [%[r],#28]\n\t" + "mov lr, #0\n\t" + "ldm %[a], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "subs r5, r5, r3\n\t" + "sbcs r6, r6, r4\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "sbcs r7, r7, r3\n\t" + "sbcs r8, r8, r4\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "sbcs r9, r9, r3\n\t" + "sbcs r10, r10, r4\n\t" + "ldm %[b]!, {r3, r4}\n\t" + "sbcs r11, r11, r3\n\t" + "sbcs r12, r12, r4\n\t" + "sbc lr, lr, #0\n\t" + "adds r5, r5, lr\n\t" + "adcs r6, r6, lr\n\t" + "adcs r7, r7, lr\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, lr, lsr #31\n\t" + "adcs r12, r12, lr\n\t" + "adc lr, lr, #0\n\t" + "adds r5, r5, lr\n\t" + "adcs r6, r6, lr\n\t" + "adcs r7, r7, lr\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, lr, lsr #31\n\t" + "adc r12, r12, lr\n\t" + "stm %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" ); + (void)m_p; } /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -33273,69 +72733,74 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "and r9, r3, #1\n\t" - "sub r7, r10, r9\n\t" - "and r8, r7, #1\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r7\n\t" - "adcs r5, r5, r7\n\t" - "adcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "adcs r3, r3, r10\n\t" - "adcs r4, r4, r10\n\t" - "adcs r5, r5, r8\n\t" - "adcs r6, r6, r7\n\t" - "adc r9, r10, r10\n\t" - "lsr r7, r3, #1\n\t" - "and r3, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "mov r9, r3\n\t" - "str r7, [%[r], #16]\n\t" - "str r8, [%[r], #20]\n\t" - "str r10, [%[r], #24]\n\t" - "str r14, [%[r], #28]\n\t" - "ldr r3, [%[r], #0]\n\t" - "ldr r4, [%[r], #4]\n\t" - "ldr r5, [%[r], #8]\n\t" - "ldr r6, [%[r], #12]\n\t" - "lsr r7, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "str r7, [%[r], #0]\n\t" - "str r8, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r14, [%[r], #12]\n\t" - : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9" - ); + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + __asm__ __volatile__ ( + "ldm %[a], {r4, r5, r6, r7}\n\t" + "and r3, r4, #1\n\t" + "rsb r8, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r8\n\t" + "adcs r6, r6, r8\n\t" + "adcs r7, r7, #0\n\t" + "stm %[r], {r4, r5, r6, r7}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" +#else + "ldrd r4, r5, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" +#else + "ldrd r6, r7, [%[a], #24]\n\t" +#endif + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, r8, lsr #31\n\t" + "adcs r7, r7, r8\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "lsr r10, r6, #1\n\t" + "lsr r11, r7, #1\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r6, lsl #31\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "mov r3, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else + "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else + "strd r10, r11, [%[r], #24]\n\t" +#endif + "ldm %[r], {r4, r5, r6, r7}\n\t" + "lsr r8, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "lsr r10, r6, #1\n\t" + "lsr r11, r7, #1\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r6, lsl #31\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "stm %[r], {r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "cc" + ); } /* Double the Montgomery form projective point p. @@ -33344,6 +72809,61 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_mont_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_8_ctx { int state; @@ -33354,7 +72874,14 @@ typedef struct sp_256_proj_point_dbl_8_ctx { sp_digit* z; } sp_256_proj_point_dbl_8_ctx; -static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_8_ctx* ctx = (sp_256_proj_point_dbl_8_ctx*)sp_ctx->data; @@ -33428,7 +72955,7 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_8(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_8(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -33478,61 +73005,6 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_8(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_8(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_8(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_8(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_8(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_8(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_8(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -33547,6 +73019,18 @@ static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -33554,6 +73038,84 @@ static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*8; + sp_digit* t2 = t + 4*8; + sp_digit* t3 = t + 6*8; + sp_digit* t4 = t + 8*8; + sp_digit* t5 = t + 10*8; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(t2, t1) & + sp_256_cmp_equal_8(t4, t3)) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_8_ctx { @@ -33566,11 +73128,19 @@ typedef struct sp_256_proj_point_add_8_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_256_proj_point_add_8_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -33589,266 +73159,173 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*8; - ctx->t3 = t + 4*8; - ctx->t4 = t + 6*8; - ctx->t5 = t + 8*8; + ctx->t6 = t; + ctx->t1 = t + 2*8; + ctx->t2 = t + 4*8; + ctx->t3 = t + 6*8; + ctx->t4 = t + 8*8; + ctx->t5 = t + 10*8; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); - sp_256_norm_8(ctx->t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_256)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<8; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<8; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<8; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; + break; + case 2: + sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; + break; + case 3: + sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_8(ctx->t1, ctx->t1, ctx->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_8(ctx->t4, ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(ctx->t2, ctx->t1) & + sp_256_cmp_equal_8(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_8(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(ctx->t3, ctx->t3, ctx->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_8(ctx->z, ctx->z, ctx->t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_dbl_8(ctx->t1, ctx->y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_8(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t1, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_8(ctx->y, ctx->y, ctx->x, p256_mod); + sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_8(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* t3 = t + 4*8; - sp_digit* t4 = t + 6*8; - sp_digit* t5 = t + 8*8; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_256* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<8; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<8; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<8; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_8(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_8(t4, t4, t3, p256_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_dbl_8(t1, y, p256_mod); - sp_256_mont_sub_8(x, x, t1, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_point_16_8(sp_point_256* r, const sp_point_256* table, @@ -33930,16 +73407,16 @@ static void sp_256_get_point_16_8(sp_point_256* r, const sp_point_256* table, static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; sp_digit* tmp = NULL; #else sp_point_256 t[16 + 1]; - sp_digit tmp[2 * 8 * 5]; + sp_digit tmp[2 * 8 * 6]; #endif sp_point_256* rt = NULL; #ifndef WC_NO_CACHE_RESISTANT -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* p = NULL; #else sp_point_256 p[1]; @@ -33955,7 +73432,7 @@ static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, cons (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * (16 + 1), heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -33969,7 +73446,7 @@ static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, cons } #endif if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 6, heap, DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; @@ -34066,32 +73543,32 @@ static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, cons } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) #endif { - ForceZero(tmp, sizeof(sp_digit) * 2 * 8 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + ForceZero(tmp, sizeof(sp_digit) * 2 * 8 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(tmp, heap, DYNAMIC_TYPE_ECC); #endif } #ifndef WC_NO_CACHE_RESISTANT - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK if (p != NULL) #endif { ForceZero(p, sizeof(sp_point_256)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(p, heap, DYNAMIC_TYPE_ECC); #endif } #endif /* !WC_NO_CACHE_RESISTANT */ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) #endif { ForceZero(t, sizeof(sp_point_256) * 17); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif } @@ -34107,7 +73584,7 @@ static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, cons * n Number of times to double * t Temporary ordinate data. */ -static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, +static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_digit* t) { sp_digit* w = t; @@ -34118,6 +73595,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -34128,7 +73606,6 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, /* W = Z^4 */ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -34146,9 +73623,12 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_8(t2, b, x, p256_mod); + sp_256_mont_dbl_8(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -34158,9 +73638,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_8(y, b, x, p256_mod); - sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_mul_8(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_8(y, y, t1, p256_mod); } #ifndef WOLFSSL_SP_SMALL @@ -34175,18 +73653,19 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_8(t2, b, x, p256_mod); + sp_256_mont_dbl_8(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_8(y, b, x, p256_mod); - sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(y, y, p256_mod); + sp_256_mont_mul_8(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_8(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_8(y, y, p256_mod); + sp_256_mont_div2_8(y, y, p256_mod); } /* Convert the projective point to affine. @@ -34229,76 +73708,75 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* t3 = t + 4*8; - sp_digit* t4 = t + 6*8; - sp_digit* t5 = t + 8*8; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*8; + sp_digit* t6 = t + 4*8; + sp_digit* t1 = t + 6*8; + sp_digit* t4 = t + 8*8; + sp_digit* t5 = t + 10*8; - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(p->x, t2) & + sp_256_cmp_equal_8(p->y, t4)) { sp_256_proj_point_dbl_8(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<8; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<8; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<8; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ - sp_256_mont_sub_8(t2, t2, x, p256_mod); + sp_256_mont_sub_8(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ - sp_256_mont_sub_8(t4, t4, y, p256_mod); + sp_256_mont_sub_8(t4, t4, p->y, p256_mod); /* Z3 = H*Z1 */ - sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, t1, t5, p256_mod); - sp_256_mont_dbl_8(t1, t3, p256_mod); - sp_256_mont_sub_8(x, x, t1, p256_mod); + sp_256_mont_sqr_8(t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, p->x, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(t2, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + sp_256_mont_dbl_8(t5, t3, p256_mod); + sp_256_mont_sub_8(x, t2, t5, p256_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_256_mont_sub_8(t3, t3, x, p256_mod); sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, t3, t5, p256_mod); + sp_256_mont_mul_8(t1, t1, p->y, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, t3, t1, p256_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -34318,7 +73796,7 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, static int sp_256_gen_stripe_table_8(const sp_point_256* a, sp_table_entry_256* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; #else sp_point_256 t[3]; @@ -34331,7 +73809,7 @@ static int sp_256_gen_stripe_table_8(const sp_point_256* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -34386,7 +73864,7 @@ static int sp_256_gen_stripe_table_8(const sp_point_256* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -34399,7 +73877,7 @@ static int sp_256_gen_stripe_table_8(const sp_point_256* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_entry_16_8(sp_point_256* r, @@ -34465,12 +73943,12 @@ static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* t = NULL; #else sp_point_256 rt[2]; - sp_digit t[2 * 8 * 5]; + sp_digit t[2 * 8 * 6]; #endif sp_point_256* p = NULL; int i; @@ -34485,13 +73963,13 @@ static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 6, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -34551,7 +74029,7 @@ static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -34594,7 +74072,7 @@ static THREAD_LS_T int sp_cache_256_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) @@ -34665,23 +74143,36 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 8 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 8 * 6]; +#endif sp_cache_256_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_256 == 0) { - wc_InitMutex(&sp_cache_256_lock); - initCacheMutex_256 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_256_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -34702,6 +74193,9 @@ static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -34722,7 +74216,7 @@ static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_ static int sp_256_gen_stripe_table_8(const sp_point_256* a, sp_table_entry_256* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; #else sp_point_256 t[3]; @@ -34735,7 +74229,7 @@ static int sp_256_gen_stripe_table_8(const sp_point_256* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -34790,7 +74284,7 @@ static int sp_256_gen_stripe_table_8(const sp_point_256* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -34803,7 +74297,7 @@ static int sp_256_gen_stripe_table_8(const sp_point_256* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_entry_256_8(sp_point_256* r, @@ -34869,12 +74363,12 @@ static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* t = NULL; #else sp_point_256 rt[2]; - sp_digit t[2 * 8 * 5]; + sp_digit t[2 * 8 * 6]; #endif sp_point_256* p = NULL; int i; @@ -34889,13 +74383,13 @@ static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 6, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -34955,7 +74449,7 @@ static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -34998,7 +74492,7 @@ static THREAD_LS_T int sp_cache_256_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) @@ -35069,23 +74563,36 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 8 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 8 * 6]; +#endif sp_cache_256_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_256 == 0) { - wc_InitMutex(&sp_cache_256_lock); - initCacheMutex_256 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_256_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -35106,6 +74613,9 @@ static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -35124,7 +74634,7 @@ static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -35133,7 +74643,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -35156,7 +74666,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_256_point_to_ecc_point_8(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -35171,7 +74681,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -35181,25 +74691,25 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_256* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[8 + 8 * 2 * 5]; + sp_digit k[8 + 8 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (8 + 8 * 2 * 5), heap, + sizeof(sp_digit) * (8 + 8 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -35236,7 +74746,7 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, err = sp_256_point_to_ecc_point_8(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -36675,7 +76185,7 @@ static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k, */ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -36684,7 +76194,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -36706,7 +76216,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_256_point_to_ecc_point_8(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -36720,7 +76230,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -36730,25 +76240,25 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[8 + 8 * 2 * 5]; + sp_digit k[8 + 8 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (8 + 8 * 2 * 5), + sizeof(sp_digit) * (8 + 8 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -36784,7 +76294,7 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, err = sp_256_point_to_ecc_point_8(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -36796,52 +76306,31 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_8(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * * a A single precision integer. */ -static void sp_256_add_one_8(sp_digit* a) +static void sp_256_add_one_8(sp_digit* a_p) { + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + __asm__ __volatile__ ( - "ldr r1, [%[a], #0]\n\t" - "ldr r2, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" "adds r1, r1, #1\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "str r1, [%[a], #0]\n\t" - "str r2, [%[a], #4]\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r1, [%[a], #16]\n\t" - "ldr r2, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "str r1, [%[a], #16]\n\t" - "str r2, [%[a], #20]\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + : [a] "+r" (a) : - : [a] "r" (a) - : "memory", "r1", "r2", "r3", "r4" + : "memory", "r1", "r2", "r3", "r4", "cc" ); } @@ -36855,27 +76344,30 @@ static void sp_256_add_one_8(sp_digit* a) static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) { int i; - int j = 0; - word32 s = 0; + int j; + byte* d; - r[0] = 0; - for (i = n-1; i >= 0; i--) { - r[j] |= (((sp_digit)a[i]) << s); - if (s >= 24U) { - r[j] &= 0xffffffff; - s = 32U - s; - if (j + 1 >= size) { - break; - } - r[++j] = (sp_digit)a[i] >> s; - s = 8U - s; - } - else { - s += 8U; - } + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; } - for (j++; j < size; j++) { + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { r[j] = 0; } } @@ -36896,7 +76388,7 @@ static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_256_from_bin(k, 8, buf, (int)sizeof(buf)); - if (sp_256_cmp_8(k, p256_order2) < 0) { + if (sp_256_cmp_8(k, p256_order2) <= 0) { sp_256_add_one_8(k); break; } @@ -36918,7 +76410,7 @@ static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -36933,15 +76425,15 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_256* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -36982,7 +76474,7 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_256_point_to_ecc_point_8(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -36994,6 +76486,84 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_256_ctx { + int state; + sp_256_ecc_mulmod_8_ctx mulmod_ctx; + sp_digit k[8]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 point[2]; +#else + sp_point_256 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_256_ctx; + +int sp_ecc_make_key_256_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_256_ctx* ctx = (sp_ecc_key_gen_256_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_256_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_256_ecc_gen_k_8(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_256_ecc_mulmod_base_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p256_order, 1, 1); + if (err == MP_OKAY) { + if (sp_256_iszero_8(ctx->point->x) || + sp_256_iszero_8(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_256_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_8(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 32 @@ -37004,34 +76574,13 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) static void sp_256_to_bin_8(sp_digit* r, byte* a) { int i; - int j; - int s = 0; - int b; + int j = 0; - j = 256 / 8 - 1; - a[j] = 0; - for (i=0; i<8 && j>=0; i++) { - b = 0; - /* lint allow cast of mismatch sp_digit and int */ - a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ - b += 8 - s; - if (j < 0) { - break; - } - while (b < 32) { - a[j--] = (byte)(r[i] >> b); - b += 8; - if (j < 0) { - break; - } - } - s = 8 - (b - 32); - if (j >= 0) { - a[j] = 0; - } - if (s != 0) { - j++; - } + for (i = 7; i >= 0; i--) { + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; } } @@ -37050,7 +76599,7 @@ static void sp_256_to_bin_8(sp_digit* r, byte* a) int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -37063,7 +76612,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); @@ -37088,7 +76637,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 32; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -37097,8 +76646,60 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_256_ctx { + int state; + union { + sp_256_ecc_mulmod_8_ctx mulmod_ctx; + }; + sp_digit k[8]; + sp_point_256 point; +} sp_ecc_sec_gen_256_ctx; + +int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_256_ctx* ctx = (sp_ecc_sec_gen_256_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_256_from_mp(ctx->k, 8, priv); + sp_256_point_from_ecc_point_8(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_256_to_bin_8(ctx->point.x, out); + *outLen = 32; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -37108,40 +76709,33 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #32\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #32\n\t" + "\n" + "L_sp_256_sub_in_pkace_8_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_256_sub_in_pkace_8_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #else @@ -37150,216 +76744,605 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_256_mul_d_8_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" "cmp r9, #32\n\t" - "blt 1b\n\t" + "blt L_sp_256_mul_d_8_word_%=\n\t" "str r3, [%[r], #32]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" - "str r4, [%[r], #28]\n\t" - "str r5, [%[r], #32]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_256_word_8_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_256_word_8_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -37395,8 +77378,8 @@ static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[16], t2[9]; sp_digit div, r1; @@ -37404,12 +77387,15 @@ static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit (void)m; - div = d[7]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 8); - for (i=7; i>=0; i--) { - sp_digit hi = t1[8 + i] - (t1[8 + i] == div); + r1 = sp_256_cmp_8(&t1[8], d) >= 0; + sp_256_cond_sub_8(&t1[8], &t1[8], d, (sp_digit)0 - r1); + for (i = 7; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[8 + i] == div); + sp_digit hi = t1[8 + i] + mask; r1 = div_256_word_8(hi, t1[8 + i - 1], div); + r1 |= mask; sp_256_mul_d_8(t2, d, r1); t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2); @@ -37433,13 +77419,27 @@ static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_256_div_8(a, m, NULL, r); } #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_8(r, a, b); + sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P256 curve. */ static const uint32_t p256_order_minus_2[8] = { @@ -37453,18 +77453,6 @@ static const sp_int_digit p256_order_low[4] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P256 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_256_mul_8(r, a, b); - sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order); -} - /* Square number mod the order of P256 curve. (r = a * a mod order) * * r Result of the squaring. @@ -37575,7 +77563,7 @@ static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a, sp_256_mont_sqr_n_order_8(t2, t3, 4); /* t = a^ff = t2 * t3 */ sp_256_mont_mul_order_8(t, t2, t3); - /* t3= a^ff00 = t ^ 2 ^ 8 */ + /* t2= a^ff00 = t ^ 2 ^ 8 */ sp_256_mont_sqr_n_order_8(t2, t, 8); /* t = a^ffff = t2 * t */ sp_256_mont_mul_order_8(t, t2, t); @@ -37592,7 +77580,11 @@ static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a, /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ sp_256_mont_mul_order_8(t2, t2, t); /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ - for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_8(t2, t2); + sp_256_mont_mul_order_8(t2, t2, a); + sp_256_mont_sqr_n_order_8(t2, t2, 5); + sp_256_mont_mul_order_8(t2, t2, t3); + for (i=121; i>=112; i--) { sp_256_mont_sqr_order_8(t2, t2); if ((p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { sp_256_mont_mul_order_8(t2, t2, a); @@ -37635,6 +77627,7 @@ static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -37709,6 +77702,128 @@ static int sp_256_calc_s_8(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_256* point = NULL; +#else + sp_digit e[7 * 2 * 8]; + sp_point_256 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int32 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 8; + k = e + 4 * 8; + r = e + 6 * 8; + tmp = e + 8 * 8; + s = e; + + if (hashLen > 32U) { + hashLen = 32U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_8(rng, k); + } + else { + sp_256_from_mp(k, 8, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_8(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 8U); + sp_256_norm_8(r); + c = sp_256_cmp_8(r, p256_order); + sp_256_cond_sub_8(r, r, p256_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_256_norm_8(r); + + if (!sp_256_iszero_8(r)) { + /* x is modified in calculation of s. */ + sp_256_from_mp(x, 8, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_256_from_bin(e, 8, hash, (int)hashLen); + + err = sp_256_calc_s_8(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_256_iszero_8(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 8); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_256)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_256_ctx { int state; @@ -37736,15 +77851,10 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 32U) { - hashLen = 32U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -37779,6 +77889,9 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_8(ctx->r); + if (hashLen > 32U) { + hashLen = 32U; + } sp_256_from_mp(ctx->x, 8, priv); sp_256_from_bin(ctx->e, 8, hash, (int)hashLen); ctx->state = 4; @@ -37873,171 +77986,159 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_256* point = NULL; -#else - sp_digit e[7 * 2 * 8]; - sp_point_256 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int32 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 8; - k = e + 4 * 8; - r = e + 6 * 8; - tmp = e + 8 * 8; - s = e; - - if (hashLen > 32U) { - hashLen = 32U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_256_ecc_gen_k_8(rng, k); - } - else { - sp_256_from_mp(k, 8, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_256_ecc_mulmod_base_8(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 8U); - sp_256_norm_8(r); - c = sp_256_cmp_8(r, p256_order); - sp_256_cond_sub_8(r, r, p256_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_256_norm_8(r); - - sp_256_from_mp(x, 8, priv); - sp_256_from_bin(e, 8, hash, (int)hashLen); - - err = sp_256_calc_s_8(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_8(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_256_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_256_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 8); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_256)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL -static void sp_256_rshift1_8(sp_digit* r, sp_digit* a) +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "mov r9, #0\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "lsr r7, r3, #1\n\t" - "and r3, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "mov r9, r3\n\t" - "str r7, [%[r], #16]\n\t" - "str r8, [%[r], #20]\n\t" - "str r10, [%[r], #24]\n\t" - "str r14, [%[r], #28]\n\t" - "ldr r3, [%[r], #0]\n\t" - "ldr r4, [%[r], #4]\n\t" - "ldr r5, [%[r], #8]\n\t" - "ldr r6, [%[r], #12]\n\t" - "lsr r7, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "str r7, [%[r], #0]\n\t" - "str r8, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r14, [%[r], #12]\n\t" - : - : [r] "r" (r), [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9" - ); + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #32\n\t" + "\n" + "L_sp_256_sub_8_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_256_sub_8_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" +#else + "ldrd r2, r3, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" +#else + "ldrd r4, r5, [%[a], #24]\n\t" +#endif + "lsr r6, r2, #1\n\t" + "lsr r7, r3, #1\n\t" + "lsr r8, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "orr r6, r6, r3, lsl #31\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r12, lsl #31\n\t" + "mov r12, r2\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" +#else + "strd r6, r7, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" +#else + "strd r8, r9, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else + "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" +#else + "ldrd r4, r5, [%[a], #8]\n\t" +#endif + "lsr r6, r2, #1\n\t" + "lsr r7, r3, #1\n\t" + "lsr r8, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "orr r6, r6, r3, lsl #31\n\t" + "orr r7, r7, r4, lsl #31\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r12, lsl #31\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[r]]\n\t" + "str r7, [%[r], #4]\n\t" +#else + "strd r6, r7, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else + "strd r8, r9, [%[r], #8]\n\t" +#endif + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "cc" + ); } /* Divide the number by 2 mod the modulus. (r = a / 2 % m) @@ -38046,159 +78147,534 @@ static void sp_256_rshift1_8(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus. */ -static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, - const sp_digit* m) +static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "ldr r3, [%[a], #0]\n\t" - "ands r9, r3, #1\n\t" - "beq 1f\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[m], #0]\n\t" - "ldr r8, [%[m], #4]\n\t" - "ldr r10, [%[m], #8]\n\t" - "ldr r14, [%[m], #12]\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r10\n\t" - "adcs r6, r6, r14\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r8, [%[m], #20]\n\t" - "ldr r10, [%[m], #24]\n\t" - "ldr r14, [%[m], #28]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r10\n\t" - "adcs r6, r6, r14\n\t" - "adc r9, r10, r10\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "\n2:\n\t" - "lsr r7, r3, #1\n\t" - "and r3, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "mov r9, r3\n\t" - "str r7, [%[r], #16]\n\t" - "str r8, [%[r], #20]\n\t" - "str r10, [%[r], #24]\n\t" - "str r14, [%[r], #28]\n\t" - "ldr r3, [%[r], #0]\n\t" - "ldr r4, [%[r], #4]\n\t" - "ldr r5, [%[r], #8]\n\t" - "ldr r6, [%[r], #12]\n\t" - "lsr r7, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "str r7, [%[r], #0]\n\t" - "str r8, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r14, [%[r], #12]\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4}\n\t" + "ands r3, r4, #1\n\t" + "beq L_sp_256_div2_mod_8_even_%=\n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "adc r3, r12, r12\n\t" + "b L_sp_256_div2_mod_8_div2_%=\n\t" + "\n" + "L_sp_256_div2_mod_8_even_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[a], #16]\n\t" +#else + "ldrd r4, r5, [%[a], #12]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #24]\n\t" +#else + "ldrd r6, r7, [%[a], #20]\n\t" +#endif + "\n" + "L_sp_256_div2_mod_8_div2_%=: \n\t" + "lsr r8, r4, #1\n\t" + "and r4, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "lsr r10, r6, #1\n\t" + "lsr r11, r7, #1\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r6, lsl #31\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "mov r3, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else + "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else + "strd r10, r11, [%[r], #24]\n\t" +#endif + "ldm %[r], {r4, r5, r6, r7}\n\t" + "lsr r8, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "lsr r10, r6, #1\n\t" + "lsr r11, r7, #1\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r6, lsl #31\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "stm %[r], {r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); } -static int sp_256_num_bits_8(sp_digit* a) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +static const unsigned char L_sp_256_num_bits_8_table[] = { + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +static int sp_256_num_bits_8(const sp_digit* a_p) { - int r = 0; + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register unsigned char* L_sp_256_num_bits_8_table_c asm ("r1") = (unsigned char*)&L_sp_256_num_bits_8_table; __asm__ __volatile__ ( - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "mov r3, #256\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "mov r3, #224\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "mov r3, #192\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "mov r3, #160\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "mov r3, #128\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "mov r3, #96\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "mov r3, #64\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "mov r3, #32\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "\n9:\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "r2", "r3" + "mov lr, %[L_sp_256_num_bits_8_table]\n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_7_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_3_%=\n\t" + "mov r2, #0xf8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_2_%=\n\t" + "mov r2, #0xf0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_1_%=\n\t" + "mov r2, #0xe8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xe0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_6_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_3_%=\n\t" + "mov r2, #0xd8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_2_%=\n\t" + "mov r2, #0xd0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_1_%=\n\t" + "mov r2, #0xc8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xc0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_5_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_3_%=\n\t" + "mov r2, #0xb8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_2_%=\n\t" + "mov r2, #0xb0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_1_%=\n\t" + "mov r2, #0xa8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xa0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_4_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_3_%=\n\t" + "mov r2, #0x98\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_2_%=\n\t" + "mov r2, #0x90\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_1_%=\n\t" + "mov r2, #0x88\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x80\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_3_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_3_%=\n\t" + "mov r2, #0x78\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_2_%=\n\t" + "mov r2, #0x70\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_1_%=\n\t" + "mov r2, #0x68\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x60\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_2_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_3_%=\n\t" + "mov r2, #0x58\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_2_%=\n\t" + "mov r2, #0x50\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_1_%=\n\t" + "mov r2, #0x48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_1_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_3_%=\n\t" + "mov r2, #56\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_2_%=\n\t" + "mov r2, #48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_1_%=\n\t" + "mov r2, #40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #32\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_0_3_%=\n\t" + "mov r2, #24\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_0_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_0_2_%=\n\t" + "mov r2, #16\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_0_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_0_1_%=\n\t" + "mov r2, #8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_0_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "ldrb r12, [lr, r3]\n\t" + "\n" + "L_sp_256_num_bits_8_9_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [L_sp_256_num_bits_8_table] "+r" (L_sp_256_num_bits_8_table_c) + : + : "memory", "r2", "r3", "r12", "lr", "cc" ); - - return r; + return (uint32_t)(size_t)a; } +#else +static int sp_256_num_bits_8(const sp_digit* a_p) +{ + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_7_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_6_%=\n\t" + "mov r2, #0xe0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_5_%=\n\t" + "mov r2, #0xc0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_4_%=\n\t" + "mov r2, #0xa0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_3_%=\n\t" + "mov r2, #0x80\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_2_%=\n\t" + "mov r2, #0x60\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_1_%=\n\t" + "mov r2, #0x40\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "mov r2, #32\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "\n" + "L_sp_256_num_bits_8_9_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)a; +} + +#endif /* WOLFSSL_ARM_ARCH && (WOLFSSL_ARM_ARCH < 7) */ /* Non-constant time modular inversion. * * @param [out] r Resulting number. @@ -38215,6 +78691,7 @@ static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut, vt; sp_digit o; + XMEMCPY(u, m, sizeof(u)); XMEMCPY(v, a, sizeof(v)); @@ -38241,7 +78718,7 @@ static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_256_cmp_8(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_256_cmp_8(u, v) >= 0))) { sp_256_sub_8(u, u, v); o = sp_256_sub_8(b, b, d); if (o != 0) @@ -38276,6 +78753,7 @@ static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(d)); + return MP_OKAY; } @@ -38317,7 +78795,7 @@ static void sp_256_add_points_8(sp_point_256* p1, const sp_point_256* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -38389,6 +78867,106 @@ static int sp_256_calc_vfy_point_8(sp_point_256* p1, sp_point_256* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_256* p1 = NULL; +#else + sp_digit u1[18 * 8]; + sp_point_256 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p2 = NULL; + sp_digit carry; + sp_int32 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 8, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 8; + s = u1 + 4 * 8; + tmp = u1 + 6 * 8; + p2 = p1 + 1; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 8, hash, (int)hashLen); + sp_256_from_mp(u2, 8, rm); + sp_256_from_mp(s, 8, sm); + sp_256_from_mp(p2->x, 8, pX); + sp_256_from_mp(p2->y, 8, pY); + sp_256_from_mp(p2->z, 8, pZ); + + err = sp_256_calc_vfy_point_8(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 8, rm); + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 8, rm); + carry = sp_256_add_8(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_8(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_8(u2, p256_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod); + } + *res = (sp_256_cmp_8(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_256_ctx { int state; @@ -38401,7 +78979,7 @@ typedef struct sp_ecc_verify_256_ctx { sp_digit u1[2*8]; sp_digit u2[2*8]; sp_digit s[2*8]; - sp_digit tmp[2*8 * 5]; + sp_digit tmp[2*8 * 6]; sp_point_256 p1; sp_point_256 p2; } sp_ecc_verify_256_ctx; @@ -38538,109 +79116,10 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_256* p1 = NULL; -#else - sp_digit u1[16 * 8]; - sp_point_256 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_256* p2 = NULL; - sp_digit carry; - sp_int32 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 8; - s = u1 + 4 * 8; - tmp = u1 + 6 * 8; - p2 = p1 + 1; - - if (hashLen > 32U) { - hashLen = 32U; - } - - sp_256_from_bin(u1, 8, hash, (int)hashLen); - sp_256_from_mp(u2, 8, rm); - sp_256_from_mp(s, 8, sm); - sp_256_from_mp(p2->x, 8, pX); - sp_256_from_mp(p2->y, 8, pY); - sp_256_from_mp(p2->z, 8, pZ); - - err = sp_256_calc_vfy_point_8(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_256_from_mp(u2, 8, rm); - err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod); - *res = (int)(sp_256_cmp_8(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_256_from_mp(u2, 8, rm); - carry = sp_256_add_8(u2, u2, p256_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_256_norm_8(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_256_cmp_8(u2, p256_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_256_mod_mul_norm_8(u2, u2, p256_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, - p256_mp_mod); - *res = (sp_256_cmp_8(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -38650,7 +79129,7 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_256_ecc_is_point_8(const sp_point_256* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[8 * 4]; @@ -38658,7 +79137,7 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -38668,25 +79147,27 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 8; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_8(t1, point->y); (void)sp_256_mod_8(t1, t1, p256_mod); sp_256_sqr_8(t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); sp_256_mul_8(t2, t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); - (void)sp_256_sub_8(t2, p256_mod, t2); - sp_256_mont_add_8(t1, t1, t2, p256_mod); + sp_256_mont_sub_8(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); + if (sp_256_cmp_8(t1, p256_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -38694,7 +79175,7 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -38703,7 +79184,7 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, */ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* pub = NULL; #else sp_point_256 pub[1]; @@ -38711,7 +79192,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -38726,7 +79207,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) err = sp_256_ecc_is_point_8(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -38748,7 +79229,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_256* pub = NULL; #else @@ -38769,7 +79250,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); @@ -38835,7 +79316,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -38864,17 +79345,17 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else - sp_digit tmp[2 * 8 * 5]; + sp_digit tmp[2 * 8 * 6]; sp_point_256 p[2]; #endif sp_point_256* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -38882,7 +79363,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -38917,7 +79398,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -38941,7 +79422,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -38950,7 +79431,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -38985,7 +79466,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -39005,7 +79486,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -39015,7 +79496,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -39049,7 +79530,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_256_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -39067,7 +79548,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_256_mont_sqrt_8(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 8]; @@ -39075,7 +79556,7 @@ static int sp_256_mont_sqrt_8(sp_digit* y) sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) { err = MEMORY_E; @@ -39118,7 +79599,7 @@ static int sp_256_mont_sqrt_8(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -39136,7 +79617,7 @@ static int sp_256_mont_sqrt_8(sp_digit* y) */ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 8]; @@ -39144,7 +79625,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -39184,7 +79665,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) err = sp_256_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -39286,57 +79767,196 @@ static const sp_digit p384_b[12] = { * a A single precision integer. * b A single precision integer. */ -static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #96\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x60\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_384_mul_12_outer_%=: \n\t" "subs r3, r5, #44\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_384_mul_12_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #48\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_384_mul_12_inner_done_%=\n\t" + "blt L_sp_384_mul_12_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_384_mul_12_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #88\n\t" - "ble 1b\n\t" + "cmp r5, #0x54\n\t" + "ble L_sp_384_mul_12_outer_%=\n\t" + "ldr lr, [%[a], #44]\n\t" + "ldr r11, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_384_mul_12_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_384_mul_12_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -39347,985 +79967,5482 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" - "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" + /* A[8] * B[0] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" + /* A[0] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" + /* A[10] * B[0] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" + /* A[0] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #44]\n\t" - "# A[11] * B[1]\n\t" + /* A[11] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #48]\n\t" - "# A[2] * B[11]\n\t" + /* A[2] * B[11] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #52]\n\t" - "# A[11] * B[3]\n\t" + /* A[11] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ "ldr r11, [%[a], #28]\n\t" "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #56]\n\t" - "# A[4] * B[11]\n\t" + /* A[4] * B[11] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #60]\n\t" - "# A[11] * B[5]\n\t" + /* A[11] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[10] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ "ldr r11, [%[a], #32]\n\t" "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #64]\n\t" - "# A[6] * B[11]\n\t" + /* A[6] * B[11] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #68]\n\t" - "# A[11] * B[7]\n\t" + /* A[11] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ "ldr r11, [%[a], #36]\n\t" "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #72]\n\t" - "# A[8] * B[11]\n\t" + /* A[8] * B[11] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #76]\n\t" - "# A[11] * B[9]\n\t" + /* A[11] * B[9] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ "ldr r11, [%[a], #40]\n\t" "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #80]\n\t" - "# A[10] * B[11]\n\t" + /* A[10] * B[11] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #84]\n\t" - "# A[11] * B[11]\n\t" - "umull r6, r7, r8, r9\n\t" + /* A[11] * B[11] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, r8, r9\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -40334,10 +85451,9 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #48\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" ); } @@ -40348,73 +85464,155 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #96\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x60\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_384_sqr_12_outer_%=: \n\t" "subs r3, r5, #44\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_384_sqr_12_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #48\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_384_sqr_12_inner_done_%=\n\t" + "blt L_sp_384_sqr_12_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_384_sqr_12_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #88\n\t" - "ble 1b\n\t" + "cmp r5, #0x54\n\t" + "ble L_sp_384_sqr_12_outer_%=\n\t" + "ldr lr, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_384_sqr_12_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_384_sqr_12_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -40424,112 +85622,521 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -40537,66 +86144,294 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -40604,80 +86439,370 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" + /* A[0] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" + /* A[0] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" + /* A[1] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -40685,94 +86810,446 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" + /* A[0] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" + /* A[1] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" + /* A[0] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" + /* A[1] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -40780,87 +87257,408 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #44]\n\t" - "# A[1] * A[11]\n\t" + /* A[1] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[10]\n\t" + /* A[2] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #48]\n\t" - "# A[2] * A[11]\n\t" + /* A[2] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[10]\n\t" + /* A[3] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -40868,73 +87666,332 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #52]\n\t" - "# A[3] * A[11]\n\t" + /* A[3] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[4] * A[10]\n\t" + /* A[4] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #56]\n\t" - "# A[4] * A[11]\n\t" + /* A[4] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[5] * A[10]\n\t" + /* A[5] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -40942,59 +87999,256 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #60]\n\t" - "# A[5] * A[11]\n\t" + /* A[5] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[6] * A[10]\n\t" + /* A[6] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #64]\n\t" - "# A[6] * A[11]\n\t" + /* A[6] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[7] * A[10]\n\t" + /* A[7] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -41002,99 +88256,426 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #68]\n\t" - "# A[7] * A[11]\n\t" + /* A[7] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[8] * A[10]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[9] * A[9]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * A[9] */ "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #72]\n\t" - "# A[8] * A[11]\n\t" + /* A[8] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[9] * A[10]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[9] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #76]\n\t" - "# A[9] * A[11]\n\t" + /* A[9] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[10] * A[10]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * A[10] */ "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #80]\n\t" - "# A[10] * A[11]\n\t" + /* A[10] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #84]\n\t" - "# A[11] * A[11]\n\t" + /* A[11] * A[11] */ "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adc r4, r4, r9\n\t" +#endif "str r3, [%[r], #88]\n\t" "str r4, [%[r], #92]\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #48\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" ); } @@ -41106,41 +88687,35 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( + "mov r3, #0\n\t" "add r12, %[a], #48\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "\n" + "L_sp_384_add_12_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_384_add_12_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -41150,182 +88725,41 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" - ); - - return c; -} - -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add r12, %[a], #48\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "ldr r8, [%[b], #4]\n\t" - "ldr r9, [%[b], #8]\n\t" - "ldr r10, [%[b], #12]\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "ldr r8, [%[b], #20]\n\t" - "ldr r9, [%[b], #24]\n\t" - "ldr r10, [%[b], #28]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r7, [%[b], #32]\n\t" - "ldr r8, [%[b], #36]\n\t" - "ldr r9, [%[b], #40]\n\t" - "ldr r10, [%[b], #44]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #32]\n\t" - "str r4, [%[r], #36]\n\t" - "str r5, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -41338,7 +88772,7 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, */ static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK int64_t* t = NULL; #else int64_t t[12]; @@ -41348,7 +88782,7 @@ static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC); if (t == NULL) { err = MEMORY_E; @@ -41423,7 +88857,7 @@ static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit r[11] = t[11]; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, NULL, DYNAMIC_TYPE_ECC); #endif @@ -41440,20 +88874,23 @@ static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 32 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); } #elif DIGIT_BIT > 32 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffff; s = 32U - s; @@ -41483,12 +88920,12 @@ static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 32) { r[j] &= 0xffffffff; @@ -41623,6 +89060,7 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) return err; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -41631,248 +89069,749 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_384_cond_sub_12_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #48\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #48\n\t" + "blt L_sp_384_cond_sub_12_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ #define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 384 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_384_mont_reduce_12_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #44]\n\t" +#else "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #44]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" - "ldr r9, [%[a], #48]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #48]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "ldr r10, [%[a], #48]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #48\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #48\n\t" + "blt L_sp_384_mont_reduce_12_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca); + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_384_mont_reduce_12_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "ldr r10, [%[a], #48]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #48\n\t" + "blt L_sp_384_mont_reduce_12_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_384_mont_reduce_12_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #48]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #44]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #48]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #48\n\t" + "blt L_sp_384_mont_reduce_12_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -41880,9 +89819,9 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_384_mul_12(r, a, b); @@ -41894,9 +89833,9 @@ static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_384_sqr_12(r, a); @@ -41910,10 +89849,10 @@ static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_12(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_12(r, a, m, mp); for (; n > 1; n--) { @@ -41921,7 +89860,7 @@ static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P384 curve. */ static const uint32_t p384_mod_minus_2[12] = { @@ -42022,180 +89961,175 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_384_cmp_12(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #44\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #44\n\t" + "\n" + "L_sp_384_cmp_12_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_384_cmp_12_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -42224,27 +90158,24 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); - sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->x, r->x, p384_mod, ~(n >> 31)); sp_384_norm_12(r->x); /* y /= z^3 */ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); - sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->y, r->y, p384_mod, ~(n >> 31)); sp_384_norm_12(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -42254,9 +90185,13 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + sp_digit o; o = sp_384_add_12(r, a, b); @@ -42269,8 +90204,12 @@ static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + sp_digit o; o = sp_384_add_12(r, a, a); @@ -42283,8 +90222,12 @@ static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + sp_digit o; o = sp_384_add_12(r, a, a); @@ -42293,6 +90236,88 @@ static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m sp_384_cond_sub_12(r, r, m, 0 - o); } +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #48\n\t" + "\n" + "L_sp_384_sub_12_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_384_sub_12_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -42301,104 +90326,106 @@ static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov lr, #0\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_384_cond_add_12_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #48\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #48\n\t" + "blt L_sp_384_cond_add_12_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "adc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov r8, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * * r Result of subtration. @@ -42406,25 +90433,34 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + sp_digit o; o = sp_384_sub_12(r, a, b); sp_384_cond_add_12(r, r, m, o); } -static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) +#ifdef WOLFSSL_SP_SMALL +#else +#endif /* WOLFSSL_SP_SMALL */ +static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" + "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" "orr r3, r3, r4, lsl #31\n\t" "lsr r4, r4, #1\n\t" "ldr r2, [%[a], #12]\n\t" @@ -42465,9 +90501,9 @@ static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) "lsr r4, r4, #1\n\t" "str r3, [%[r], #40]\n\t" "str r4, [%[r], #44]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4" + : "memory", "r2", "r3", "r4", "cc" ); } @@ -42477,7 +90513,7 @@ static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -42492,6 +90528,61 @@ static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_mont_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_12_ctx { int state; @@ -42502,7 +90593,14 @@ typedef struct sp_384_proj_point_dbl_12_ctx { sp_digit* z; } sp_384_proj_point_dbl_12_ctx; -static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_12_ctx* ctx = (sp_384_proj_point_dbl_12_ctx*)sp_ctx->data; @@ -42576,7 +90674,7 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co break; case 11: /* T2 = T2/2 */ - sp_384_div2_12(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_12(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -42626,61 +90724,6 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_12(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_12(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_12(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_12(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_12(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_12(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_12(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -42696,6 +90739,19 @@ static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -42703,6 +90759,84 @@ static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*12; + sp_digit* t2 = t + 4*12; + sp_digit* t3 = t + 6*12; + sp_digit* t4 = t + 8*12; + sp_digit* t5 = t + 10*12; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(t2, t1) & + sp_384_cmp_equal_12(t4, t3)) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_12_ctx { @@ -42715,11 +90849,19 @@ typedef struct sp_384_proj_point_add_12_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_384_proj_point_add_12_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -42738,266 +90880,173 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*12; - ctx->t3 = t + 4*12; - ctx->t4 = t + 6*12; - ctx->t5 = t + 8*12; + ctx->t6 = t; + ctx->t1 = t + 2*12; + ctx->t2 = t + 4*12; + ctx->t3 = t + 6*12; + ctx->t4 = t + 8*12; + ctx->t5 = t + 10*12; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); - sp_384_norm_12(ctx->t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_384)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<12; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<12; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<12; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; + break; + case 2: + sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; + break; + case 3: + sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_12(ctx->t1, ctx->t1, ctx->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_12(ctx->t4, ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(ctx->t2, ctx->t1) & + sp_384_cmp_equal_12(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_12(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(ctx->t3, ctx->t3, ctx->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_12(ctx->z, ctx->z, ctx->t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_dbl_12(ctx->t1, ctx->y, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_12(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t1, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_12(ctx->y, ctx->y, ctx->x, p384_mod); + sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_12(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* t3 = t + 4*12; - sp_digit* t4 = t + 6*12; - sp_digit* t5 = t + 8*12; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_384* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<12; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<12; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<12; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_12(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_12(t4, t4, t3, p384_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_dbl_12(t1, y, p384_mod); - sp_384_mont_sub_12(x, x, t1, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_point_16_12(sp_point_384* r, const sp_point_384* table, @@ -43103,7 +91152,7 @@ static void sp_384_get_point_16_12(sp_point_384* r, const sp_point_384* table, static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; sp_digit* tmp = NULL; #else @@ -43112,7 +91161,7 @@ static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, con #endif sp_point_384* rt = NULL; #ifndef WC_NO_CACHE_RESISTANT -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* p = NULL; #else sp_point_384 p[1]; @@ -43128,7 +91177,7 @@ static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, con (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * (16 + 1), heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -43239,32 +91288,32 @@ static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, con } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) #endif { ForceZero(tmp, sizeof(sp_digit) * 2 * 12 * 6); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(tmp, heap, DYNAMIC_TYPE_ECC); #endif } #ifndef WC_NO_CACHE_RESISTANT - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK if (p != NULL) #endif { ForceZero(p, sizeof(sp_point_384)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(p, heap, DYNAMIC_TYPE_ECC); #endif } #endif /* !WC_NO_CACHE_RESISTANT */ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) #endif { ForceZero(t, sizeof(sp_point_384) * 17); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif } @@ -43280,7 +91329,7 @@ static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, con * n Number of times to double * t Temporary ordinate data. */ -static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, +static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_digit* t) { sp_digit* w = t; @@ -43291,6 +91340,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -43301,7 +91351,6 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, /* W = Z^4 */ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -43319,9 +91368,12 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_12(t2, b, x, p384_mod); + sp_384_mont_dbl_12(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -43331,9 +91383,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_12(y, b, x, p384_mod); - sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_mul_12(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_12(y, y, t1, p384_mod); } #ifndef WOLFSSL_SP_SMALL @@ -43348,18 +91398,19 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_12(t2, b, x, p384_mod); + sp_384_mont_dbl_12(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_12(y, b, x, p384_mod); - sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(y, y, p384_mod); + sp_384_mont_mul_12(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_12(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_12(y, y, p384_mod); + sp_384_mont_div2_12(y, y, p384_mod); } /* Convert the projective point to affine. @@ -43402,76 +91453,75 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* t3 = t + 4*12; - sp_digit* t4 = t + 6*12; - sp_digit* t5 = t + 8*12; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*12; + sp_digit* t6 = t + 4*12; + sp_digit* t1 = t + 6*12; + sp_digit* t4 = t + 8*12; + sp_digit* t5 = t + 10*12; - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(p->x, t2) & + sp_384_cmp_equal_12(p->y, t4)) { sp_384_proj_point_dbl_12(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<12; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<12; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<12; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ - sp_384_mont_sub_12(t2, t2, x, p384_mod); + sp_384_mont_sub_12(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ - sp_384_mont_sub_12(t4, t4, y, p384_mod); + sp_384_mont_sub_12(t4, t4, p->y, p384_mod); /* Z3 = H*Z1 */ - sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, t1, t5, p384_mod); - sp_384_mont_dbl_12(t1, t3, p384_mod); - sp_384_mont_sub_12(x, x, t1, p384_mod); + sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, p->x, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(t2, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + sp_384_mont_dbl_12(t5, t3, p384_mod); + sp_384_mont_sub_12(x, t2, t5, p384_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_384_mont_sub_12(t3, t3, x, p384_mod); sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, t3, t5, p384_mod); + sp_384_mont_mul_12(t1, t1, p->y, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, t3, t1, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -43491,7 +91541,7 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, static int sp_384_gen_stripe_table_12(const sp_point_384* a, sp_table_entry_384* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; #else sp_point_384 t[3]; @@ -43504,7 +91554,7 @@ static int sp_384_gen_stripe_table_12(const sp_point_384* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -43559,7 +91609,7 @@ static int sp_384_gen_stripe_table_12(const sp_point_384* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -43572,7 +91622,7 @@ static int sp_384_gen_stripe_table_12(const sp_point_384* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_16_12(sp_point_384* r, @@ -43654,7 +91704,7 @@ static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* t = NULL; #else @@ -43674,7 +91724,7 @@ static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -43740,7 +91790,7 @@ static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -43783,7 +91833,7 @@ static THREAD_LS_T int sp_cache_384_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) @@ -43854,23 +91904,36 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 12 * 7]; +#endif sp_cache_384_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_384 == 0) { - wc_InitMutex(&sp_cache_384_lock); - initCacheMutex_384 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 7, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_384_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -43891,6 +91954,9 @@ static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -43911,7 +91977,7 @@ static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp static int sp_384_gen_stripe_table_12(const sp_point_384* a, sp_table_entry_384* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; #else sp_point_384 t[3]; @@ -43924,7 +91990,7 @@ static int sp_384_gen_stripe_table_12(const sp_point_384* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -43979,7 +92045,7 @@ static int sp_384_gen_stripe_table_12(const sp_point_384* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -43992,7 +92058,7 @@ static int sp_384_gen_stripe_table_12(const sp_point_384* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_256_12(sp_point_384* r, @@ -44074,7 +92140,7 @@ static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* t = NULL; #else @@ -44094,7 +92160,7 @@ static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -44160,7 +92226,7 @@ static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -44203,7 +92269,7 @@ static THREAD_LS_T int sp_cache_384_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) @@ -44274,23 +92340,36 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 12 * 7]; +#endif sp_cache_384_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_384 == 0) { - wc_InitMutex(&sp_cache_384_lock); - initCacheMutex_384 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 7, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_384_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -44311,6 +92390,9 @@ static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -44329,7 +92411,7 @@ static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -44338,7 +92420,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -44361,7 +92443,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_384_point_to_ecc_point_12(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -44376,7 +92458,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -44386,8 +92468,8 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_384* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_384* point = NULL; sp_digit* k = NULL; #else sp_point_384 point[2]; @@ -44397,7 +92479,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -44441,7 +92523,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, err = sp_384_point_to_ecc_point_12(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -45880,7 +93962,7 @@ static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k, */ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -45889,7 +93971,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -45911,7 +93993,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_384_point_to_ecc_point_12(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -45925,7 +94007,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -45935,7 +94017,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -45946,8 +94028,8 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; @@ -45989,7 +94071,7 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, err = sp_384_point_to_ecc_point_12(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -46001,65 +94083,37 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_12(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * * a A single precision integer. */ -static void sp_384_add_one_12(sp_digit* a) +static void sp_384_add_one_12(sp_digit* a_p) { + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + __asm__ __volatile__ ( - "ldr r1, [%[a], #0]\n\t" - "ldr r2, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" "adds r1, r1, #1\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "str r1, [%[a], #0]\n\t" - "str r2, [%[a], #4]\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r1, [%[a], #16]\n\t" - "ldr r2, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "str r1, [%[a], #16]\n\t" - "str r2, [%[a], #20]\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r1, [%[a], #32]\n\t" - "ldr r2, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "str r1, [%[a], #32]\n\t" - "str r2, [%[a], #36]\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + : [a] "+r" (a) : - : [a] "r" (a) - : "memory", "r1", "r2", "r3", "r4" + : "memory", "r1", "r2", "r3", "r4", "cc" ); } @@ -46073,27 +94127,30 @@ static void sp_384_add_one_12(sp_digit* a) static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) { int i; - int j = 0; - word32 s = 0; + int j; + byte* d; - r[0] = 0; - for (i = n-1; i >= 0; i--) { - r[j] |= (((sp_digit)a[i]) << s); - if (s >= 24U) { - r[j] &= 0xffffffff; - s = 32U - s; - if (j + 1 >= size) { - break; - } - r[++j] = (sp_digit)a[i] >> s; - s = 8U - s; - } - else { - s += 8U; - } + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; } - for (j++; j < size; j++) { + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { r[j] = 0; } } @@ -46114,7 +94171,7 @@ static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_384_from_bin(k, 12, buf, (int)sizeof(buf)); - if (sp_384_cmp_12(k, p384_order2) < 0) { + if (sp_384_cmp_12(k, p384_order2) <= 0) { sp_384_add_one_12(k); break; } @@ -46136,7 +94193,7 @@ static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -46151,15 +94208,15 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_384* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -46200,7 +94257,7 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_384_point_to_ecc_point_12(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -46212,6 +94269,84 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_384_ctx { + int state; + sp_384_ecc_mulmod_12_ctx mulmod_ctx; + sp_digit k[12]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 point[2]; +#else + sp_point_384 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_384_ctx; + +int sp_ecc_make_key_384_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_384_ctx* ctx = (sp_ecc_key_gen_384_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_384_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_384_ecc_gen_k_12(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_384_ecc_mulmod_base_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p384_order, 1, 1); + if (err == MP_OKAY) { + if (sp_384_iszero_12(ctx->point->x) || + sp_384_iszero_12(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_384_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_12(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 48 @@ -46222,34 +94357,13 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) static void sp_384_to_bin_12(sp_digit* r, byte* a) { int i; - int j; - int s = 0; - int b; + int j = 0; - j = 384 / 8 - 1; - a[j] = 0; - for (i=0; i<12 && j>=0; i++) { - b = 0; - /* lint allow cast of mismatch sp_digit and int */ - a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ - b += 8 - s; - if (j < 0) { - break; - } - while (b < 32) { - a[j--] = (byte)(r[i] >> b); - b += 8; - if (j < 0) { - break; - } - } - s = 8 - (b - 32); - if (j >= 0) { - a[j] = 0; - } - if (s != 0) { - j++; - } + for (i = 11; i >= 0; i--) { + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; } } @@ -46268,7 +94382,7 @@ static void sp_384_to_bin_12(sp_digit* r, byte* a) int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -46281,7 +94395,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); @@ -46306,7 +94420,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 48; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -46315,8 +94429,60 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_384_ctx { + int state; + union { + sp_384_ecc_mulmod_12_ctx mulmod_ctx; + }; + sp_digit k[12]; + sp_point_384 point; +} sp_ecc_sec_gen_384_ctx; + +int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_384_ctx* ctx = (sp_ecc_sec_gen_384_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_384_from_mp(ctx->k, 12, priv); + sp_384_point_from_ecc_point_12(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_384_to_bin_12(ctx->point.x, out); + *outLen = 48; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -46326,40 +94492,33 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #48\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #48\n\t" + "\n" + "L_sp_384_sub_in_pkace_12_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_384_sub_in_pkace_12_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #else @@ -46368,260 +94527,740 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_384_mul_d_12_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" "cmp r9, #48\n\t" - "blt 1b\n\t" + "blt L_sp_384_mul_d_12_word_%=\n\t" "str r3, [%[r], #48]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" - "str r5, [%[r], #44]\n\t" - "str r3, [%[r], #48]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_384_word_12_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_384_word_12_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -46661,8 +95300,8 @@ static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[24], t2[13]; sp_digit div, r1; @@ -46670,12 +95309,15 @@ static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digi (void)m; - div = d[11]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 12); - for (i=11; i>=0; i--) { - sp_digit hi = t1[12 + i] - (t1[12 + i] == div); + r1 = sp_384_cmp_12(&t1[12], d) >= 0; + sp_384_cond_sub_12(&t1[12], &t1[12], d, (sp_digit)0 - r1); + for (i = 11; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[12 + i] == div); + sp_digit hi = t1[12 + i] + mask; r1 = div_384_word_12(hi, t1[12 + i - 1], div); + r1 |= mask; sp_384_mul_d_12(t2, d, r1); t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2); @@ -46699,13 +95341,27 @@ static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digi * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_384_div_12(a, m, NULL, r); } #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_12(r, a, b); + sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P384 curve. */ static const uint32_t p384_order_minus_2[12] = { @@ -46719,18 +95375,6 @@ static const uint32_t p384_order_low[6] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P384 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_384_mul_12(r, a, b); - sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order); -} - /* Square number mod the order of P384 curve. (r = a * a mod order) * * r Result of the squaring. @@ -46872,6 +95516,7 @@ static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -46946,6 +95591,128 @@ static int sp_384_calc_s_12(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_384* point = NULL; +#else + sp_digit e[7 * 2 * 12]; + sp_point_384 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int32 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 12; + k = e + 4 * 12; + r = e + 6 * 12; + tmp = e + 8 * 12; + s = e; + + if (hashLen > 48U) { + hashLen = 48U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_12(rng, k); + } + else { + sp_384_from_mp(k, 12, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_12(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 12U); + sp_384_norm_12(r); + c = sp_384_cmp_12(r, p384_order); + sp_384_cond_sub_12(r, r, p384_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_384_norm_12(r); + + if (!sp_384_iszero_12(r)) { + /* x is modified in calculation of s. */ + sp_384_from_mp(x, 12, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_384_from_bin(e, 12, hash, (int)hashLen); + + err = sp_384_calc_s_12(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_384_iszero_12(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 12); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_384)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_384_ctx { int state; @@ -46973,15 +95740,10 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 48U) { - hashLen = 48U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -47016,6 +95778,9 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_12(ctx->r); + if (hashLen > 48U) { + hashLen = 48U; + } sp_384_from_mp(ctx->x, 12, priv); sp_384_from_bin(ctx->e, 12, hash, (int)hashLen); ctx->state = 4; @@ -47110,124 +95875,6 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_384* point = NULL; -#else - sp_digit e[7 * 2 * 12]; - sp_point_384 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int32 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 12; - k = e + 4 * 12; - r = e + 6 * 12; - tmp = e + 8 * 12; - s = e; - - if (hashLen > 48U) { - hashLen = 48U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_384_ecc_gen_k_12(rng, k); - } - else { - sp_384_from_mp(k, 12, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_384_ecc_mulmod_base_12(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 12U); - sp_384_norm_12(r); - c = sp_384_cmp_12(r, p384_order); - sp_384_cond_sub_12(r, r, p384_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_384_norm_12(r); - - sp_384_from_mp(x, 12, priv); - sp_384_from_bin(e, 12, hash, (int)hashLen); - - err = sp_384_calc_s_12(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_12(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_384_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_384_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 12); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_384)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -47237,248 +95884,869 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, * a Number to divide. * m Modulus. */ -static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) +static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + __asm__ __volatile__ ( - "ldr r4, [%[a]]\n\t" - "ands r8, r4, #1\n\t" - "beq 1f\n\t" - "mov r12, #0\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[m], #0]\n\t" - "ldr r9, [%[m], #4]\n\t" - "ldr r10, [%[m], #8]\n\t" - "ldr r14, [%[m], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[m], #16]\n\t" - "ldr r9, [%[m], #20]\n\t" - "ldr r10, [%[m], #24]\n\t" - "ldr r14, [%[m], #28]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[m], #32]\n\t" - "ldr r9, [%[m], #36]\n\t" - "ldr r10, [%[m], #40]\n\t" - "ldr r14, [%[m], #44]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "adc r8, r12, r12\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r5, [%[a], #2]\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #2]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #6]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #6]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #10]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #10]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[a], #14]\n\t" - "str r4, [%[r], #12]\n\t" - "str r5, [%[r], #14]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #18]\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #18]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #22]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #22]\n\t" - "\n2:\n\t" - "ldr r3, [%[r]]\n\t" - "ldr r4, [%[r], #4]\n\t" - "lsr r3, r3, #1\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #8]\n\t" - "str r3, [%[r], #0]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r4, [%[r], #4]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r5, [%[r], #8]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #20]\n\t" - "str r3, [%[r], #12]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r4, [%[r], #16]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r5, [%[r], #20]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #32]\n\t" - "str r3, [%[r], #24]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r4, [%[r], #28]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r5, [%[r], #32]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #44]\n\t" - "str r3, [%[r], #36]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "orr r5, r5, r8, lsl #31\n\t" - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" + "ldm %[a]!, {r4}\n\t" + "ands r3, r4, #1\n\t" + "beq L_sp_384_div2_mod_12_even_%=\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "adc r3, r12, r12\n\t" + "b L_sp_384_div2_mod_12_div2_%=\n\t" + "\n" + "L_sp_384_div2_mod_12_even_%=: \n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "\n" + "L_sp_384_div2_mod_12_div2_%=: \n\t" + "sub %[r], %[r], #48\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[r]]\n\t" + "ldr r9, [%[r], #4]\n\t" +#else + "ldrd r8, r9, [%[r]]\n\t" +#endif + "lsr r8, r8, #1\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #8]\n\t" + "str r8, [%[r]]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #12]\n\t" + "str r9, [%[r], #4]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #16]\n\t" + "str r10, [%[r], #8]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #20]\n\t" + "str r8, [%[r], #12]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #24]\n\t" + "str r9, [%[r], #16]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #28]\n\t" + "str r10, [%[r], #20]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #32]\n\t" + "str r8, [%[r], #24]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #36]\n\t" + "str r9, [%[r], #28]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #40]\n\t" + "str r10, [%[r], #32]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #44]\n\t" + "str r8, [%[r], #36]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "orr r10, r10, r3, lsl #31\n\t" + "str r9, [%[r], #40]\n\t" + "str r10, [%[r], #44]\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); } -static int sp_384_num_bits_12(sp_digit* a) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +static const unsigned char L_sp_384_num_bits_12_table[] = { + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +static int sp_384_num_bits_12(const sp_digit* a_p) { - int r = 0; + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register unsigned char* L_sp_384_num_bits_12_table_c asm ("r1") = (unsigned char*)&L_sp_384_num_bits_12_table; __asm__ __volatile__ ( - "ldr r2, [%[a], #44]\n\t" - "cmp r2, #0\n\t" - "beq 11f\n\t" - "mov r3, #384\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n11:\n\t" - "ldr r2, [%[a], #40]\n\t" - "cmp r2, #0\n\t" - "beq 10f\n\t" - "mov r3, #352\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n10:\n\t" - "ldr r2, [%[a], #36]\n\t" - "cmp r2, #0\n\t" - "beq 9f\n\t" - "mov r3, #320\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n9:\n\t" - "ldr r2, [%[a], #32]\n\t" - "cmp r2, #0\n\t" - "beq 8f\n\t" - "mov r3, #288\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n8:\n\t" - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "mov r3, #256\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "mov r3, #224\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "mov r3, #192\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "mov r3, #160\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "mov r3, #128\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "mov r3, #96\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "mov r3, #64\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "mov r3, #32\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "\n13:\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "r2", "r3" + "mov lr, %[L_sp_384_num_bits_12_table]\n\t" + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_11_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x78\n\t" +#else + "mov r2, #0x178\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x70\n\t" +#else + "mov r2, #0x170\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x68\n\t" +#else + "mov r2, #0x168\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_10_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x58\n\t" +#else + "mov r2, #0x158\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x50\n\t" +#else + "mov r2, #0x150\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x48\n\t" +#else + "mov r2, #0x148\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_9_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x38\n\t" +#else + "mov r2, #0x138\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x30\n\t" +#else + "mov r2, #0x130\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x28\n\t" +#else + "mov r2, #0x128\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_8_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x18\n\t" +#else + "mov r2, #0x118\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x10\n\t" +#else + "mov r2, #0x110\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x8\n\t" +#else + "mov r2, #0x108\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_7_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_3_%=\n\t" + "mov r2, #0xf8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_2_%=\n\t" + "mov r2, #0xf0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_1_%=\n\t" + "mov r2, #0xe8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xe0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_6_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_3_%=\n\t" + "mov r2, #0xd8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_2_%=\n\t" + "mov r2, #0xd0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_1_%=\n\t" + "mov r2, #0xc8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xc0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_5_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_3_%=\n\t" + "mov r2, #0xb8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_2_%=\n\t" + "mov r2, #0xb0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_1_%=\n\t" + "mov r2, #0xa8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xa0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_4_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_3_%=\n\t" + "mov r2, #0x98\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_2_%=\n\t" + "mov r2, #0x90\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_1_%=\n\t" + "mov r2, #0x88\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x80\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_3_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_3_%=\n\t" + "mov r2, #0x78\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_2_%=\n\t" + "mov r2, #0x70\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_1_%=\n\t" + "mov r2, #0x68\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x60\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_2_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_3_%=\n\t" + "mov r2, #0x58\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_2_%=\n\t" + "mov r2, #0x50\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_1_%=\n\t" + "mov r2, #0x48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_1_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_3_%=\n\t" + "mov r2, #56\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_2_%=\n\t" + "mov r2, #48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_1_%=\n\t" + "mov r2, #40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #32\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_0_3_%=\n\t" + "mov r2, #24\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_0_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_0_2_%=\n\t" + "mov r2, #16\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_0_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_0_1_%=\n\t" + "mov r2, #8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_0_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "ldrb r12, [lr, r3]\n\t" + "\n" + "L_sp_384_num_bits_12_13_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [L_sp_384_num_bits_12_table] "+r" (L_sp_384_num_bits_12_table_c) + : + : "memory", "r2", "r3", "r12", "lr", "cc" ); - - return r; + return (uint32_t)(size_t)a; } +#else +static int sp_384_num_bits_12(const sp_digit* a_p) +{ + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_11_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x80\n\t" +#else + "mov r2, #0x180\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_10_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_9_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_8_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_7_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_6_%=\n\t" + "mov r2, #0xe0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_5_%=\n\t" + "mov r2, #0xc0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_4_%=\n\t" + "mov r2, #0xa0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_3_%=\n\t" + "mov r2, #0x80\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_2_%=\n\t" + "mov r2, #0x60\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_1_%=\n\t" + "mov r2, #0x40\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "mov r2, #32\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "\n" + "L_sp_384_num_bits_12_13_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)a; +} + +#endif /* WOLFSSL_ARM_ARCH && (WOLFSSL_ARM_ARCH < 7) */ /* Non-constant time modular inversion. * * @param [out] r Resulting number. @@ -47495,6 +96763,7 @@ static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut, vt; sp_digit o; + XMEMCPY(u, m, sizeof(u)); XMEMCPY(v, a, sizeof(v)); @@ -47521,7 +96790,7 @@ static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_12(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_12(u, v) >= 0))) { sp_384_sub_12(u, u, v); o = sp_384_sub_12(b, b, d); if (o != 0) @@ -47556,6 +96825,7 @@ static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(d)); + return MP_OKAY; } @@ -47601,7 +96871,7 @@ static void sp_384_add_points_12(sp_point_384* p1, const sp_point_384* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -47673,6 +96943,106 @@ static int sp_384_calc_vfy_point_12(sp_point_384* p1, sp_point_384* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_384* p1 = NULL; +#else + sp_digit u1[18 * 12]; + sp_point_384 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p2 = NULL; + sp_digit carry; + sp_int32 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 12, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 12; + s = u1 + 4 * 12; + tmp = u1 + 6 * 12; + p2 = p1 + 1; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 12, hash, (int)hashLen); + sp_384_from_mp(u2, 12, rm); + sp_384_from_mp(s, 12, sm); + sp_384_from_mp(p2->x, 12, pX); + sp_384_from_mp(p2->y, 12, pY); + sp_384_from_mp(p2->z, 12, pZ); + + err = sp_384_calc_vfy_point_12(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 12, rm); + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 12, rm); + carry = sp_384_add_12(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_12(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_12(u2, p384_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod); + } + *res = (sp_384_cmp_12(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_384_ctx { int state; @@ -47685,7 +97055,7 @@ typedef struct sp_ecc_verify_384_ctx { sp_digit u1[2*12]; sp_digit u2[2*12]; sp_digit s[2*12]; - sp_digit tmp[2*12 * 5]; + sp_digit tmp[2*12 * 6]; sp_point_384 p1; sp_point_384 p2; } sp_ecc_verify_384_ctx; @@ -47822,109 +97192,10 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_384* p1 = NULL; -#else - sp_digit u1[16 * 12]; - sp_point_384 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_384* p2 = NULL; - sp_digit carry; - sp_int32 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 12; - s = u1 + 4 * 12; - tmp = u1 + 6 * 12; - p2 = p1 + 1; - - if (hashLen > 48U) { - hashLen = 48U; - } - - sp_384_from_bin(u1, 12, hash, (int)hashLen); - sp_384_from_mp(u2, 12, rm); - sp_384_from_mp(s, 12, sm); - sp_384_from_mp(p2->x, 12, pX); - sp_384_from_mp(p2->y, 12, pY); - sp_384_from_mp(p2->z, 12, pZ); - - err = sp_384_calc_vfy_point_12(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_384_from_mp(u2, 12, rm); - err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod); - *res = (int)(sp_384_cmp_12(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_384_from_mp(u2, 12, rm); - carry = sp_384_add_12(u2, u2, p384_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_384_norm_12(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_384_cmp_12(u2, p384_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_384_mod_mul_norm_12(u2, u2, p384_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, - p384_mp_mod); - *res = (sp_384_cmp_12(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -47934,7 +97205,7 @@ int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_384_ecc_is_point_12(const sp_point_384* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[12 * 4]; @@ -47942,7 +97213,7 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -47952,25 +97223,27 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 12; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_12(t1, point->y); (void)sp_384_mod_12(t1, t1, p384_mod); sp_384_sqr_12(t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); sp_384_mul_12(t2, t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); - (void)sp_384_sub_12(t2, p384_mod, t2); - sp_384_mont_add_12(t1, t1, t2, p384_mod); + sp_384_mont_sub_12(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); + if (sp_384_cmp_12(t1, p384_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -47978,7 +97251,7 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -47987,7 +97260,7 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, */ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* pub = NULL; #else sp_point_384 pub[1]; @@ -47995,7 +97268,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -48010,7 +97283,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) err = sp_384_ecc_is_point_12(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -48032,7 +97305,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_384* pub = NULL; #else @@ -48053,7 +97326,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); @@ -48119,7 +97392,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -48148,17 +97421,17 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else - sp_digit tmp[2 * 12 * 5]; + sp_digit tmp[2 * 12 * 6]; sp_point_384 p[2]; #endif sp_point_384* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -48166,7 +97439,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -48201,7 +97474,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -48225,7 +97498,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -48234,7 +97507,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -48269,7 +97542,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -48289,7 +97562,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -48299,7 +97572,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -48333,7 +97606,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_384_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -48351,7 +97624,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_384_mont_sqrt_12(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[5 * 2 * 12]; @@ -48362,7 +97635,7 @@ static int sp_384_mont_sqrt_12(sp_digit* y) sp_digit* t5 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -48432,7 +97705,7 @@ static int sp_384_mont_sqrt_12(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -48450,7 +97723,7 @@ static int sp_384_mont_sqrt_12(sp_digit* y) */ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 12]; @@ -48458,7 +97731,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -48498,7 +97771,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) err = sp_384_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -48507,6 +97780,28911 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) } #endif #endif /* WOLFSSL_SP_384 */ +#ifdef WOLFSSL_SP_521 + +/* Point structure to use. */ +typedef struct sp_point_521 { + /* X ordinate of point. */ + sp_digit x[2 * 17]; + /* Y ordinate of point. */ + sp_digit y[2 * 17]; + /* Z ordinate of point. */ + sp_digit z[2 * 17]; + /* Indicates point is at infinity. */ + int infinity; +} sp_point_521; + +/* The modulus (prime) of the curve P521. */ +static const sp_digit p521_mod[17] = { + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x000001ff +}; +/* The Montgomery normalizer for modulus of the curve P521. */ +static const sp_digit p521_norm_mod[17] = { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +/* The Montgomery multiplier for modulus of the curve P521. */ +static sp_digit p521_mp_mod = 0x00000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P521. */ +static const sp_digit p521_order[17] = { + 0x91386409,0xbb6fb71e,0x899c47ae,0x3bb5c9b8,0xf709a5d0,0x7fcc0148, + 0xbf2f966b,0x51868783,0xfffffffa,0xffffffff,0xffffffff,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x000001ff +}; +#endif +/* The order of the curve P521 minus 2. */ +static const sp_digit p521_order2[17] = { + 0x91386407,0xbb6fb71e,0x899c47ae,0x3bb5c9b8,0xf709a5d0,0x7fcc0148, + 0xbf2f966b,0x51868783,0xfffffffa,0xffffffff,0xffffffff,0xffffffff, + 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x000001ff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery normalizer for order of the curve P521. */ +static const sp_digit p521_norm_order[17] = { + 0x6ec79bf7,0x449048e1,0x7663b851,0xc44a3647,0x08f65a2f,0x8033feb7, + 0x40d06994,0xae79787c,0x00000005,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery multiplier for order of the curve P521. */ +static sp_digit p521_mp_order = 0x79a995c7; +#endif +/* The base point of curve P521. */ +static const sp_point_521 p521_base = { + /* X ordinate */ + { + 0xc2e5bd66,0xf97e7e31,0x856a429b,0x3348b3c1,0xa2ffa8de,0xfe1dc127, + 0xefe75928,0xa14b5e77,0x6b4d3dba,0xf828af60,0x053fb521,0x9c648139, + 0x2395b442,0x9e3ecb66,0x0404e9cd,0x858e06b7,0x000000c6, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0 + }, + /* Y ordinate */ + { + 0x9fd16650,0x88be9476,0xa272c240,0x353c7086,0x3fad0761,0xc550b901, + 0x5ef42640,0x97ee7299,0x273e662c,0x17afbd17,0x579b4468,0x98f54449, + 0x2c7d1bd9,0x5c8a5fb4,0x9a3bc004,0x39296a78,0x00000118, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0 + }, + /* Z ordinate */ + { + 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0 + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p521_b[17] = { + 0x6b503f00,0xef451fd4,0x3d2c34f1,0x3573df88,0x3bb1bf07,0x1652c0bd, + 0xec7e937b,0x56193951,0x8ef109e1,0xb8b48991,0x99b315f3,0xa2da725b, + 0xb68540ee,0x929a21a0,0x8e1c9a1f,0x953eb961,0x00000051 +}; +#endif + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x88\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_521_mul_17_outer_%=: \n\t" + "subs r3, r5, #0x40\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n" + "L_sp_521_mul_17_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_521_mul_17_inner_done_%=\n\t" + "blt L_sp_521_mul_17_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_521_mul_17_inner_done_%=: \n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #0x7c\n\t" + "ble L_sp_521_mul_17_outer_%=\n\t" + "ldr lr, [%[a], #64]\n\t" + "ldr r11, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif + "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "ldm sp!, {r6, r7}\n\t" + "stm %[r]!, {r6, r7}\n\t" + "sub r5, r5, #8\n\t" + "\n" + "L_sp_521_mul_17_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_521_mul_17_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + ); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else + "umull r3, r4, r11, r12\n\t" + "mov r5, #0\n\t" +#endif + "str r3, [sp]\n\t" + /* A[0] * B[1] */ + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ + "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #4]\n\t" + /* A[2] * B[0] */ + "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ + "ldr r11, [%[a], #4]\n\t" + "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #8]\n\t" + /* A[0] * B[3] */ + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ + "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #12]\n\t" + /* A[4] * B[0] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ + "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ + "ldr r11, [%[a], #8]\n\t" + "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #16]\n\t" + /* A[0] * B[5] */ + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ + "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #20]\n\t" + /* A[6] * B[0] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ + "ldr r11, [%[a], #12]\n\t" + "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #24]\n\t" + /* A[0] * B[7] */ + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #28]\n\t" + /* A[8] * B[0] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ + "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ + "ldr r11, [%[a], #16]\n\t" + "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #32]\n\t" + /* A[0] * B[9] */ + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ + "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #36]\n\t" + /* A[10] * B[0] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ + "ldr r11, [%[a], #20]\n\t" + "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #40]\n\t" + /* A[0] * B[11] */ + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #44]\n\t" + /* A[12] * B[0] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[1] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ + "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ + "ldr r11, [%[a], #24]\n\t" + "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[12] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #48]\n\t" + /* A[0] * B[13] */ + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[12] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[11] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ + "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[1] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[0] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #52]\n\t" + /* A[14] * B[0] */ + "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[1] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[2] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[3] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ + "ldr r11, [%[a], #28]\n\t" + "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[12] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[13] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[14] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #56]\n\t" + /* A[0] * B[15] */ + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[14] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[13] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[12] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[11] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[3] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[2] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[1] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[0] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #60]\n\t" + /* A[16] * B[0] */ + "ldr r8, [%[a], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[1] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[2] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[3] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[4] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[5] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ + "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ + "ldr r11, [%[a], #32]\n\t" + "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[12] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[13] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[14] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[15] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[16] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #64]\n\t" + /* A[1] * B[16] */ + "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[15] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[14] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[13] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[12] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[11] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ + "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[5] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[4] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[3] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[2] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[16] * B[1] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #68]\n\t" + /* A[16] * B[2] */ + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[3] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[4] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[5] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[6] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[7] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ + "ldr r11, [%[a], #36]\n\t" + "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[12] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[13] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[14] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[15] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[16] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #72]\n\t" + /* A[3] * B[16] */ + "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[15] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[14] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[13] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[12] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[11] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[7] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[6] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[5] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[4] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[16] * B[3] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #76]\n\t" + /* A[16] * B[4] */ + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[5] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[6] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[7] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[8] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[9] */ + "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ + "ldr r11, [%[a], #40]\n\t" + "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[12] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[13] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[14] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[15] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[16] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #80]\n\t" + /* A[5] * B[16] */ + "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[15] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[14] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[13] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[12] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[11] */ + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ + "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[9] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[8] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[7] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[6] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[16] * B[5] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #84]\n\t" + /* A[16] * B[6] */ + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[7] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[8] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[9] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[10] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[11] */ + "ldr r11, [%[a], #44]\n\t" + "ldr r12, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[12] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[13] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[14] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[15] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[16] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #88]\n\t" + /* A[7] * B[16] */ + "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[15] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[14] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[13] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[12] */ + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[11] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[10] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[9] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[8] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[16] * B[7] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #92]\n\t" + /* A[16] * B[8] */ + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[9] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[10] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[11] */ + "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[12] */ + "ldr r11, [%[a], #48]\n\t" + "ldr r12, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[13] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[14] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[15] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[16] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #96]\n\t" + /* A[9] * B[16] */ + "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[15] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[14] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[13] */ + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[12] */ + "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[11] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[10] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[16] * B[9] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #100]\n\t" + /* A[16] * B[10] */ + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[11] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[12] */ + "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[13] */ + "ldr r11, [%[a], #52]\n\t" + "ldr r12, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[14] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[15] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[16] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #104]\n\t" + /* A[11] * B[16] */ + "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[15] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[14] */ + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[13] */ + "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[12] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[16] * B[11] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #108]\n\t" + /* A[16] * B[12] */ + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[13] */ + "ldr r8, [%[a], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[14] */ + "ldr r11, [%[a], #56]\n\t" + "ldr r12, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[15] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[16] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #112]\n\t" + /* A[13] * B[16] */ + "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[15] */ + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[14] */ + "ldr r8, [%[a], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[16] * B[13] */ + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #116]\n\t" + /* A[16] * B[14] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[15] */ + "ldr r11, [%[a], #60]\n\t" + "ldr r12, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[16] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #120]\n\t" + /* A[15] * B[16] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[16] * B[15] */ + "ldr r8, [%[a], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #124]\n\t" + /* A[16] * B[16] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, r8, r9\n\t" +#endif + "str r5, [%[r], #128]\n\t" + "str r3, [%[r], #132]\n\t" + "ldm sp!, {r3, r4, r5, r6}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm sp!, {r3, r4, r5, r6}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm sp!, {r3, r4, r5, r6}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm sp!, {r3, r4, r5, r6}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm sp!, {r3}\n\t" + "stm %[r]!, {r3}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x88\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_521_sqr_17_outer_%=: \n\t" + "subs r3, r5, #0x40\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n" + "L_sp_521_sqr_17_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_521_sqr_17_inner_done_%=\n\t" + "blt L_sp_521_sqr_17_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_521_sqr_17_inner_done_%=: \n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #0x7c\n\t" + "ble L_sp_521_sqr_17_outer_%=\n\t" + "ldr lr, [%[a], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif + "str r6, [sp, r5]\n\t" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "ldm sp!, {r6, r7}\n\t" + "stm %[r]!, {r6, r7}\n\t" + "sub r5, r5, #8\n\t" + "\n" + "L_sp_521_sqr_17_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_521_sqr_17_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + ); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else + "umull r8, r3, r10, r10\n\t" +#endif + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + /* A[0] * A[1] */ + "ldr r10, [%[a], #4]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [sp, #4]\n\t" + /* A[0] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ + "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #8]\n\t" + /* A[0] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ + "ldr r10, [%[a], #8]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r2, [sp, #12]\n\t" + /* A[0] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ + "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [sp, #16]\n\t" + /* A[0] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ + "ldr r10, [%[a], #12]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + /* A[0] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + /* A[0] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ + "ldr r10, [%[a], #16]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + /* A[0] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #32]\n\t" + /* A[0] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ + "ldr r10, [%[a], #20]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #36]\n\t" + /* A[0] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #40]\n\t" + /* A[0] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ + "ldr r10, [%[a], #24]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #44]\n\t" + /* A[0] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #48]\n\t" + /* A[0] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ + "ldr r10, [%[a], #28]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #52]\n\t" + /* A[0] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #56]\n\t" + /* A[0] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ + "ldr r10, [%[a], #32]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #60]\n\t" + /* A[0] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[1] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #64]\n\t" + /* A[1] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[2] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ + "ldr r10, [%[a], #36]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #68]\n\t" + /* A[2] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[3] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[9] */ + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #72]\n\t" + /* A[3] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[4] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[10] */ + "ldr r10, [%[a], #40]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #76]\n\t" + /* A[4] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[5] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[10] */ + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #80]\n\t" + /* A[5] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[6] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[11] */ + "ldr r10, [%[a], #44]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #84]\n\t" + /* A[6] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[7] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[11] */ + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #88]\n\t" + /* A[7] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[8] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[12] */ + "ldr r10, [%[a], #48]\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #92]\n\t" + /* A[8] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[9] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[12] */ + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #96]\n\t" + /* A[9] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r2, #0\n\t" + "mov r7, #0\n\t" + /* A[10] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[13] */ + "ldr r10, [%[a], #52]\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #100]\n\t" + /* A[10] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r3, #0\n\t" + "mov r7, #0\n\t" + /* A[11] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[13] * A[13] */ + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #104]\n\t" + /* A[11] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif + "mov r4, #0\n\t" + "mov r7, #0\n\t" + /* A[12] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[13] * A[14] */ + "ldr r10, [%[a], #56]\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #108]\n\t" + /* A[12] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[13] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[14] * A[14] */ + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [%[r], #112]\n\t" + /* A[13] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * A[15] */ + "ldr r10, [%[a], #60]\n\t" + "ldr r12, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #116]\n\t" + /* A[14] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * A[15] */ + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r2, [%[r], #120]\n\t" + /* A[15] * A[16] */ + "ldr r10, [%[a], #64]\n\t" + "ldr r12, [%[a], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + "str r3, [%[r], #124]\n\t" + /* A[16] * A[16] */ + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" +#else + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" +#endif + "str r4, [%[r], #128]\n\t" + "str r2, [%[r], #132]\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2}\n\t" + "stm %[r]!, {r2}\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "add r12, %[a], #0x40\n\t" + "\n" + "L_sp_521_add_17_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_521_add_17_word_%=\n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a], {r4}\n\t" + "ldm %[b], {r8}\n\t" + "adcs r4, r4, r8\n\t" + "stm %[r]!, {r4}\n\t" + "mov r4, #0\n\t" + "adc %[r], r4, #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "adcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply a number by Montgomery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mod_mul_norm_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + if (r != a) { + XMEMCPY(r, a, 17 * sizeof(sp_digit)); + } + + return MP_OKAY; +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_521_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; + + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); + } +#elif DIGIT_BIT > 32 + unsigned int i; + int j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = (sp_digit)0; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + unsigned int i; + int j = 0; + int s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_521. + * + * p Point of type sp_point_521 (result). + * pm Point of type ecc_point. + */ +static void sp_521_point_from_ecc_point_17(sp_point_521* p, + const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_521_from_mp(p->x, 17, pm->x); + sp_521_from_mp(p->y, 17, pm->y); + sp_521_from_mp(p->z, 17, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_521_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (521 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 17); + r->used = 17; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 17; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 32 - s; + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 17; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_521 to type ecc_point. + * + * p Point of type sp_point_521. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) +{ + int err; + + err = sp_521_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pm->z); + } + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_521_cond_sub_17_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x44\n\t" + "blt L_sp_521_cond_sub_17_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r6, [%[b]]\n\t" + "and r6, r6, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "str r4, [%[r]]\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + + __asm__ __volatile__ ( + "sub sp, sp, #0x44\n\t" + "mov r12, sp\n\t" + /* Shift top down by 9 bits */ + "add lr, %[a], #0x40\n\t" + /* 0-7 */ + "ldm lr!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "lsr r1, r1, #9\n\t" + "orr r1, r1, r2, lsl #23\n\t" + "lsr r2, r2, #9\n\t" + "orr r2, r2, r3, lsl #23\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r4, lsl #23\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r6, lsl #23\n\t" + "lsr r6, r6, #9\n\t" + "orr r6, r6, r7, lsl #23\n\t" + "lsr r7, r7, #9\n\t" + "orr r7, r7, r8, lsl #23\n\t" + "lsr r8, r8, #9\n\t" + "orr r8, r8, r9, lsl #23\n\t" + "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "mov r1, r9\n\t" + /* 8-16 */ + "ldm lr!, {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "lsr r1, r1, #9\n\t" + "orr r1, r1, r2, lsl #23\n\t" + "lsr r2, r2, #9\n\t" + "orr r2, r2, r3, lsl #23\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r4, lsl #23\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r6, lsl #23\n\t" + "lsr r6, r6, #9\n\t" + "orr r6, r6, r7, lsl #23\n\t" + "lsr r7, r7, #9\n\t" + "orr r7, r7, r8, lsl #23\n\t" + "lsr r8, r8, #9\n\t" + "orr r8, r8, r9, lsl #23\n\t" + "lsr r9, r9, #9\n\t" + "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + /* Add top to bottom */ + /* 0-5 */ + "ldm %[a], {r1, r2, r3, r4, r5, r6}\n\t" + "ldm sp!, {r7, r8, r9, r10, r11, r12}\n\t" + "adds r1, r1, r7\n\t" + "adcs r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r12\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5, r6}\n\t" + /* 6-11 */ + "ldm %[a], {r1, r2, r3, r4, r5, r6}\n\t" + "ldm sp!, {r7, r8, r9, r10, r11, r12}\n\t" + "adcs r1, r1, r7\n\t" + "adcs r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r11\n\t" + "adcs r6, r6, r12\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5, r6}\n\t" + /* 12-16 */ + "ldm %[a], {r1, r2, r3, r4, r5}\n\t" + "ldm sp!, {r7, r8, r9, r10, r11}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov lr, #0x1\n\t" + "lsl lr, lr, #8\n\t" + "add lr, lr, #0xff\n\t" +#else + "mov lr, #0x1ff\n\t" +#endif + "and r5, r5, lr\n\t" + "adcs r1, r1, r7\n\t" + "adcs r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r11\n\t" + "lsr r12, r5, #9\n\t" + "and r5, r5, lr\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5}\n\t" + "sub %[a], %[a], #0x44\n\t" + /* Add overflow */ + /* 0-8 */ + "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "adds r1, r1, r12\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + /* 9-16 */ + "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + ); + (void)m_p; + (void)mp_p; +} + +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + "cmp r9, #0x40\n\t" + "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r7, #0x1\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0xff\n\t" +#else + "mov r7, #0x1ff\n\t" +#endif + "and r8, r8, r7\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "str r12, [%[a]]\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #64]\n\t" +#else + "ldr r7, [%[m], #64]\n\t" +#endif + "ldr r10, [%[a], #64]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "ldr r10, [%[a], #68]\n\t" + "adcs r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x44\n\t" + "blt L_sp_521_mont_reduce_order_17_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "sub %[a], %[a], #4\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #64]\n\t" + "lsr r4, r4, #9\n\t" + "str r4, [%[a], #68]\n\t" + "lsr r3, r4, #9\n\t" + "add %[a], %[a], #4\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); +} + +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + "cmp r9, #0x40\n\t" + "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r7, #0x1\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0xff\n\t" +#else + "mov r7, #0x1ff\n\t" +#endif + "and r8, r8, r7\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + "str r12, [%[a]]\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "ldr r10, [%[a], #68]\n\t" + "adcs r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x44\n\t" + "blt L_sp_521_mont_reduce_order_17_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "sub %[a], %[a], #4\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r4, lsl #23\n\t" + "str r5, [%[a], #64]\n\t" + "lsr r4, r4, #9\n\t" + "str r4, [%[a], #68]\n\t" + "lsr r3, r4, #9\n\t" + "add %[a], %[a], #4\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); +} + +#else +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + "cmp r12, #0x40\n\t" + "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0x1\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0x1ff\n\t" +#endif + "and r11, r11, r10\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + "str r4, [%[a]]\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #68]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #64]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #68]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0x44\n\t" + "blt L_sp_521_mont_reduce_order_17_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "sub %[a], %[a], #4\n\t" + "ldr r10, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #4]\n\t" + "ldr r10, [%[a], #8]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #8]\n\t" + "ldr r3, [%[a], #12]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #12]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #20]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #24]\n\t" + "ldr r3, [%[a], #28]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #36]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #40]\n\t" + "ldr r3, [%[a], #44]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #44]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #52]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #56]\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r10, r10, #9\n\t" + "orr r10, r10, r3, lsl #23\n\t" + "str r10, [%[a], #60]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r10, lsl #23\n\t" + "str r3, [%[a], #64]\n\t" + "lsr r10, r10, #9\n\t" + "str r10, [%[a], #68]\n\t" + "lsr lr, r10, #9\n\t" + "add %[a], %[a], #4\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); +} + +#endif +/* Multiply two Montgomery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montgomery form. + * b Second number to multiply in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_mul_17(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m, sp_digit mp) +{ + sp_521_mul_17(r, a, b); + sp_521_mont_reduce_17(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_17(sp_digit* r, const sp_digit* a, + const sp_digit* m, sp_digit mp) +{ + sp_521_sqr_17(r, a); + sp_521_mont_reduce_17(r, m, mp); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_n_17(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) +{ + sp_521_mont_sqr_17(r, a, m, mp); + for (; n > 1; n--) { + sp_521_mont_sqr_17(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P521 curve. */ +static const uint32_t p521_mod_minus_2[17] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0x000001ffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P521 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 17); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_17(t, t, p521_mod, p521_mp_mod); + if (p521_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_521_mont_mul_17(t, t, a, p521_mod, p521_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 17); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 17; + sp_digit* t3 = td + 4 * 17; + + /* 0x2 */ + sp_521_mont_sqr_17(t1, a, p521_mod, p521_mp_mod); + /* 0x3 */ + sp_521_mont_mul_17(t2, t1, a, p521_mod, p521_mp_mod); + /* 0x6 */ + sp_521_mont_sqr_17(t1, t2, p521_mod, p521_mp_mod); + /* 0x7 */ + sp_521_mont_mul_17(t3, t1, a, p521_mod, p521_mp_mod); + /* 0xc */ + sp_521_mont_sqr_n_17(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0xf */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x78 */ + sp_521_mont_sqr_n_17(t1, t2, 3, p521_mod, p521_mp_mod); + /* 0x7f */ + sp_521_mont_mul_17(t3, t3, t1, p521_mod, p521_mp_mod); + /* 0xf0 */ + sp_521_mont_sqr_n_17(t1, t2, 4, p521_mod, p521_mp_mod); + /* 0xff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xff00 */ + sp_521_mont_sqr_n_17(t1, t2, 8, p521_mod, p521_mp_mod); + /* 0xffff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffff0000 */ + sp_521_mont_sqr_n_17(t1, t2, 16, p521_mod, p521_mp_mod); + /* 0xffffffff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffff00000000 */ + sp_521_mont_sqr_n_17(t1, t2, 32, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff0000000000000000 */ + sp_521_mont_sqr_n_17(t1, t2, 64, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff00000000000000000000000000000000 */ + sp_521_mont_sqr_n_17(t1, t2, 128, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000000000000000 */ + sp_521_mont_sqr_n_17(t1, t2, 256, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_17(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80 */ + sp_521_mont_sqr_n_17(t1, t2, 7, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_17(t2, t3, t1, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc */ + sp_521_mont_sqr_n_17(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ + sp_521_mont_mul_17(r, t1, a, p521_mod, p521_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_int32 sp_521_cmp_17(const sp_digit* a_p, const sp_digit* b_p) +{ + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" +#ifdef WOLFSSL_SP_SMALL + "mov r4, #0x40\n\t" + "\n" + "L_sp_521_cmp_17_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_521_cmp_17_words_%=\n\t" + "eor r2, r2, r3\n\t" +#else + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + ); + return (uint32_t)(size_t)a; +} + +/* Normalize the values in each word to 32. + * + * a Array of sp_digit to normalize. + */ +#define sp_521_norm_17(a) + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_int32 n; + + sp_521_mont_inv_17(t1, p->z, t + 2*17); + + sp_521_mont_sqr_17(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t2, t1, p521_mod, p521_mp_mod); + + /* x /= z^2 */ + sp_521_mont_mul_17(r->x, p->x, t2, p521_mod, p521_mp_mod); + XMEMSET(r->x + 17, 0, sizeof(sp_digit) * 17U); + sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); + /* Reduce x to less than modulus */ + n = sp_521_cmp_17(r->x, p521_mod); + sp_521_cond_sub_17(r->x, r->x, p521_mod, ~(n >> 31)); + sp_521_norm_17(r->x); + + /* y /= z^3 */ + sp_521_mont_mul_17(r->y, p->y, t1, p521_mod, p521_mp_mod); + XMEMSET(r->y + 17, 0, sizeof(sp_digit) * 17U); + sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); + /* Reduce y to less than modulus */ + n = sp_521_cmp_17(r->y, p521_mod); + sp_521_cond_sub_17(r->y, r->y, p521_mod, ~(n >> 31)); + sp_521_norm_17(r->y); + + XMEMSET(r->z, 0, sizeof(r->z) / 2); + r->z[0] = 1; +} + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montgomery form. + * b Second number to add in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8}\n\t" + "ldm %[b]!, {r4}\n\t" + "adcs r8, r8, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0x1\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0x1ff\n\t" +#endif + "lsr r3, r8, #9\n\t" + "and r8, r8, r12\n\t" + "stm %[r]!, {r8}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4}\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + ); + (void)m_p; +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "mov r2, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4}\n\t" + "adcs r4, r4, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r3, #0x1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #0xff\n\t" +#else + "mov r3, #0x1ff\n\t" +#endif + "lsr r2, r4, #9\n\t" + "and r4, r4, r3\n\t" + "stm %[r]!, {r4}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4}\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4}\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "cc" + ); + (void)m_p; +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "mov r2, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4}\n\t" + "adcs r4, r4, r4\n\t" + "stm %[r]!, {r4}\n\t" + "sub %[r], %[r], #0x44\n\t" + "sub %[a], %[a], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4}\n\t" + "ldm %[a]!, {r8}\n\t" + "adcs r4, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r3, #0x1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #0xff\n\t" +#else + "mov r3, #0x1ff\n\t" +#endif + "lsr r2, r4, #9\n\t" + "and r4, r4, r3\n\t" + "stm %[r]!, {r4}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r2\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4}\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4}\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "cc" + ); + (void)m_p; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montgomery form. + * b Number to subtract with in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "subs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8}\n\t" + "ldm %[b]!, {r4}\n\t" + "sbcs r8, r8, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0x1\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0x1ff\n\t" +#endif + "asr r3, r8, #9\n\t" + "and r8, r8, r12\n\t" + "neg r3, r3\n\t" + "stm %[r]!, {r8}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, #0\n\t" + "sbcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, #0\n\t" + "sbcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4}\n\t" + "sbcs r4, r4, #0\n\t" + "stm %[r]!, {r4}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + ); + (void)m_p; +} + +static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "ldm %[a], {r2, r3}\n\t" + "lsr r2, r2, #1\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r2, [%[r]]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r3, [%[r], #4]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r4, [%[r], #8]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r2, [%[r], #12]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r3, [%[r], #16]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r4, [%[r], #20]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r2, [%[r], #24]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r3, [%[r], #28]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r4, [%[r], #32]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r2, [%[r], #36]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r3, [%[r], #40]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r4, [%[r], #44]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r2, [%[r], #48]\n\t" + "orr r3, r3, r4, lsl #31\n\t" + "lsr r4, r4, #1\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r3, [%[r], #52]\n\t" + "orr r4, r4, r2, lsl #31\n\t" + "lsr r2, r2, #1\n\t" + "ldr r3, [%[a], #64]\n\t" + "str r4, [%[r], #56]\n\t" + "orr r2, r2, r3, lsl #31\n\t" + "lsr r3, r3, #1\n\t" + "str r2, [%[r], #60]\n\t" + "str r3, [%[r], #64]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "cc" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_521_mont_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit o = a[0] & 1; + + (void)m; + + sp_521_rshift1_17(r, r); + r[16] |= o << 8; +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_17(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_17(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_17(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_17(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_17(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_mont_div2_17(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_17(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_17(y, y, t2, p521_mod); +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_dbl_17_ctx { + int state; + sp_digit* t1; + sp_digit* t2; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_dbl_17_ctx; + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_dbl_17_ctx* ctx = (sp_521_proj_point_dbl_17_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_521_proj_point_dbl_17_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + ctx->t1 = t; + ctx->t2 = t + 2*17; + ctx->x = r->x; + ctx->y = r->y; + ctx->z = r->z; + + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + ctx->state = 1; + break; + case 1: + /* T1 = Z * Z */ + sp_521_mont_sqr_17(ctx->t1, p->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + /* Z = Y * Z */ + sp_521_mont_mul_17(ctx->z, p->y, p->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + /* Z = 2Z */ + sp_521_mont_dbl_17(ctx->z, ctx->z, p521_mod); + ctx->state = 4; + break; + case 4: + /* T2 = X - T1 */ + sp_521_mont_sub_17(ctx->t2, p->x, ctx->t1, p521_mod); + ctx->state = 5; + break; + case 5: + /* T1 = X + T1 */ + sp_521_mont_add_17(ctx->t1, p->x, ctx->t1, p521_mod); + ctx->state = 6; + break; + case 6: + /* T2 = T1 * T2 */ + sp_521_mont_mul_17(ctx->t2, ctx->t1, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* T1 = 3T2 */ + sp_521_mont_tpl_17(ctx->t1, ctx->t2, p521_mod); + ctx->state = 8; + break; + case 8: + /* Y = 2Y */ + sp_521_mont_dbl_17(ctx->y, p->y, p521_mod); + ctx->state = 9; + break; + case 9: + /* Y = Y * Y */ + sp_521_mont_sqr_17(ctx->y, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 10; + break; + case 10: + /* T2 = Y * Y */ + sp_521_mont_sqr_17(ctx->t2, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: + /* T2 = T2/2 */ + sp_521_mont_div2_17(ctx->t2, ctx->t2, p521_mod); + ctx->state = 12; + break; + case 12: + /* Y = Y * X */ + sp_521_mont_mul_17(ctx->y, ctx->y, p->x, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + /* X = T1 * T1 */ + sp_521_mont_sqr_17(ctx->x, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + /* X = X - Y */ + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 15; + break; + case 15: + /* X = X - Y */ + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 16; + break; + case 16: + /* Y = Y - X */ + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 17; + break; + case 17: + /* Y = Y * T1 */ + sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + /* Y = Y - T2 */ + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t2, p521_mod); + ctx->state = 19; + /* fall-through */ + case 19: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 19) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_521_cmp_equal_17(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | + (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) | + (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8]) | + (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11]) | + (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14]) | + (a[15] ^ b[15]) | (a[16] ^ b[16])) == 0; +} + +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_17(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16]) == 0; +} + + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*17; + sp_digit* t2 = t + 4*17; + sp_digit* t3 = t + 6*17; + sp_digit* t4 = t + 8*17; + sp_digit* t5 = t + 10*17; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(t2, t1) & + sp_521_cmp_equal_17(t4, t3)) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_521_mont_sub_17(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_add_17_ctx { + int state; + sp_521_proj_point_dbl_17_ctx dbl_ctx; + const sp_point_521* ap[2]; + sp_point_521* rp[2]; + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + sp_digit* t6; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_add_17_ctx; + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_add_17_ctx* ctx = (sp_521_proj_point_add_17_ctx*)sp_ctx->data; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_521* a = p; + p = q; + q = a; + } + + typedef char ctx_size_test[sizeof(sp_521_proj_point_add_17_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->t6 = t; + ctx->t1 = t + 2*17; + ctx->t2 = t + 4*17; + ctx->t3 = t + 6*17; + ctx->t4 = t + 8*17; + ctx->t5 = t + 10*17; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; + + ctx->state = 1; + break; + case 1: + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 5; + break; + case 5: + sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 6; + break; + case 6: + sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + ctx->state = 8; + break; + case 8: + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + ctx->state = 9; + break; + case 9: + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(ctx->t2, ctx->t1) & + sp_521_cmp_equal_17(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_17(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } + break; + case 10: + /* H = U2 - U1 */ + sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); + ctx->state = 11; + break; + case 11: + /* R = S2 - S1 */ + sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); + ctx->state = 12; + break; + case 12: + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 15; + break; + case 15: + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 16; + break; + case 16: + sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + ctx->state = 17; + break; + case 17: + sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); + ctx->state = 19; + break; + case 19: + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + ctx->state = 20; + break; + case 20: + sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); + ctx->state = 21; + break; + case 21: + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 22; + break; + case 22: + sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 23; + break; + case 23: + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); + ctx->state = 24; + break; + case 24: + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + ctx->state = 25; + break; + } + case 25: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 25) { + err = FP_WOULDBLOCK; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible point that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_point_16_17(sp_point_521* r, const sp_point_521* table, + int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->x[9] = 0; + r->x[10] = 0; + r->x[11] = 0; + r->x[12] = 0; + r->x[13] = 0; + r->x[14] = 0; + r->x[15] = 0; + r->x[16] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + r->y[9] = 0; + r->y[10] = 0; + r->y[11] = 0; + r->y[12] = 0; + r->y[13] = 0; + r->y[14] = 0; + r->y[15] = 0; + r->y[16] = 0; + r->z[0] = 0; + r->z[1] = 0; + r->z[2] = 0; + r->z[3] = 0; + r->z[4] = 0; + r->z[5] = 0; + r->z[6] = 0; + r->z[7] = 0; + r->z[8] = 0; + r->z[9] = 0; + r->z[10] = 0; + r->z[11] = 0; + r->z[12] = 0; + r->z[13] = 0; + r->z[14] = 0; + r->z[15] = 0; + r->z[16] = 0; + for (i = 1; i < 16; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->x[9] |= mask & table[i].x[9]; + r->x[10] |= mask & table[i].x[10]; + r->x[11] |= mask & table[i].x[11]; + r->x[12] |= mask & table[i].x[12]; + r->x[13] |= mask & table[i].x[13]; + r->x[14] |= mask & table[i].x[14]; + r->x[15] |= mask & table[i].x[15]; + r->x[16] |= mask & table[i].x[16]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + r->y[9] |= mask & table[i].y[9]; + r->y[10] |= mask & table[i].y[10]; + r->y[11] |= mask & table[i].y[11]; + r->y[12] |= mask & table[i].y[12]; + r->y[13] |= mask & table[i].y[13]; + r->y[14] |= mask & table[i].y[14]; + r->y[15] |= mask & table[i].y[15]; + r->y[16] |= mask & table[i].y[16]; + r->z[0] |= mask & table[i].z[0]; + r->z[1] |= mask & table[i].z[1]; + r->z[2] |= mask & table[i].z[2]; + r->z[3] |= mask & table[i].z[3]; + r->z[4] |= mask & table[i].z[4]; + r->z[5] |= mask & table[i].z[5]; + r->z[6] |= mask & table[i].z[6]; + r->z[7] |= mask & table[i].z[7]; + r->z[8] |= mask & table[i].z[8]; + r->z[9] |= mask & table[i].z[9]; + r->z[10] |= mask & table[i].z[10]; + r->z[11] |= mask & table[i].z[11]; + r->z[12] |= mask & table[i].z[12]; + r->z[13] |= mask & table[i].z[13]; + r->z[14] |= mask & table[i].z[14]; + r->z[15] |= mask & table[i].z[15]; + r->z[16] |= mask & table[i].z[16]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Fast implementation that generates a pre-computation table. + * 4 bits of window (no sliding!). + * Uses add and double for calculating table. + * 521 doubles. + * 143 adds. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_fast_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, + int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 t[16 + 1]; + sp_digit tmp[2 * 17 * 6]; +#endif + sp_point_521* rt = NULL; +#ifndef WC_NO_CACHE_RESISTANT +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* p = NULL; +#else + sp_point_521 p[1]; +#endif +#endif /* !WC_NO_CACHE_RESISTANT */ + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * (16 + 1), + heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + #ifndef WC_NO_CACHE_RESISTANT + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), + heap, DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + #endif + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + rt = t + 16; + + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + (void)sp_521_mod_mul_norm_17(t[1].x, g->x, p521_mod); + (void)sp_521_mod_mul_norm_17(t[1].y, g->y, p521_mod); + (void)sp_521_mod_mul_norm_17(t[1].z, g->z, p521_mod); + t[1].infinity = 0; + sp_521_proj_point_dbl_17(&t[ 2], &t[ 1], tmp); + t[ 2].infinity = 0; + sp_521_proj_point_add_17(&t[ 3], &t[ 2], &t[ 1], tmp); + t[ 3].infinity = 0; + sp_521_proj_point_dbl_17(&t[ 4], &t[ 2], tmp); + t[ 4].infinity = 0; + sp_521_proj_point_add_17(&t[ 5], &t[ 3], &t[ 2], tmp); + t[ 5].infinity = 0; + sp_521_proj_point_dbl_17(&t[ 6], &t[ 3], tmp); + t[ 6].infinity = 0; + sp_521_proj_point_add_17(&t[ 7], &t[ 4], &t[ 3], tmp); + t[ 7].infinity = 0; + sp_521_proj_point_dbl_17(&t[ 8], &t[ 4], tmp); + t[ 8].infinity = 0; + sp_521_proj_point_add_17(&t[ 9], &t[ 5], &t[ 4], tmp); + t[ 9].infinity = 0; + sp_521_proj_point_dbl_17(&t[10], &t[ 5], tmp); + t[10].infinity = 0; + sp_521_proj_point_add_17(&t[11], &t[ 6], &t[ 5], tmp); + t[11].infinity = 0; + sp_521_proj_point_dbl_17(&t[12], &t[ 6], tmp); + t[12].infinity = 0; + sp_521_proj_point_add_17(&t[13], &t[ 7], &t[ 6], tmp); + t[13].infinity = 0; + sp_521_proj_point_dbl_17(&t[14], &t[ 7], tmp); + t[14].infinity = 0; + sp_521_proj_point_add_17(&t[15], &t[ 8], &t[ 7], tmp); + t[15].infinity = 0; + + i = 15; + n = k[i+1] << 0; + c = 5; + y = (int)(n >> 5); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_16_17(rt, t, y); + rt->infinity = !y; + } + else + #endif + { + XMEMCPY(rt, &t[y], sizeof(sp_point_521)); + } + n <<= 27; + for (; i>=0 || c>=4; ) { + if (c < 4) { + n = (k[i+1] << 31) | (k[i] >> 1); + i--; + c += 32; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_521_proj_point_dbl_17(rt, rt, tmp); + sp_521_proj_point_dbl_17(rt, rt, tmp); + sp_521_proj_point_dbl_17(rt, rt, tmp); + sp_521_proj_point_dbl_17(rt, rt, tmp); + + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_16_17(p, t, y); + p->infinity = !y; + sp_521_proj_point_add_17(rt, rt, p, tmp); + } + else + #endif + { + sp_521_proj_point_add_17(rt, rt, &t[y], tmp); + } + } + y = k[0] & 0x1; + sp_521_proj_point_dbl_17(rt, rt, tmp); + sp_521_proj_point_add_17(rt, rt, &t[y], tmp); + + if (map != 0) { + sp_521_map_17(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 17 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifndef WC_NO_CACHE_RESISTANT + #ifdef WOLFSSL_SP_SMALL_STACK + if (p != NULL) + #endif + { + ForceZero(p, sizeof(sp_point_521)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(p, heap, DYNAMIC_TYPE_ECC); + #endif + } +#endif /* !WC_NO_CACHE_RESISTANT */ +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_521) * 17); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef FP_ECC +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, + sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*17; + sp_digit* b = t + 4*17; + sp_digit* t1 = t + 6*17; + sp_digit* t2 = t + 8*17; + sp_digit* x; + sp_digit* y; + sp_digit* z; + volatile int n = i; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_521_mont_dbl_17(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_17(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(w, w, p521_mod, p521_mp_mod); +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_17(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(t1, t1, w, p521_mod); + sp_521_mont_tpl_17(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_17(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t2, b, p521_mod); + sp_521_mont_sub_17(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_17(t2, b, x, p521_mod); + sp_521_mont_dbl_17(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_17(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_17(t1, t1, p521_mod, p521_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_521_mont_mul_17(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_17(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t1, p521_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_17(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(t1, t1, w, p521_mod); + sp_521_mont_tpl_17(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_17(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t2, b, p521_mod); + sp_521_mont_sub_17(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_17(t2, b, x, p521_mod); + sp_521_mont_dbl_17(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_17(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_17(t1, t1, p521_mod, p521_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_17(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t1, p521_mod); +#endif /* WOLFSSL_SP_SMALL */ + /* Y = Y/2 */ + sp_521_mont_div2_17(y, y, p521_mod); +} + +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_521_proj_to_affine_17(sp_point_521* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 17; + sp_digit* tmp = t + 4 * 17; + + sp_521_mont_inv_17(t1, a->z, tmp); + + sp_521_mont_sqr_17(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t2, t1, p521_mod, p521_mp_mod); + + sp_521_mont_mul_17(a->x, a->x, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(a->y, a->y, t1, p521_mod, p521_mp_mod); + XMEMCPY(a->z, p521_norm_mod, sizeof(p521_norm_mod)); +} + +#endif /* FP_ECC */ +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_521 { + sp_digit x[17]; + sp_digit y[17]; +} sp_table_entry_521; + +#ifdef FP_ECC +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_qz1_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t2 = t; + sp_digit* t3 = t + 2*17; + sp_digit* t6 = t + 4*17; + sp_digit* t1 = t + 6*17; + sp_digit* t4 = t + 8*17; + sp_digit* t5 = t + 10*17; + + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(p->x, t2) & + sp_521_cmp_equal_17(p->y, t4)) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; + + /* H = U2 - X1 */ + sp_521_mont_sub_17(t2, t2, p->x, p521_mod); + /* R = S2 - Y1 */ + sp_521_mont_sub_17(t4, t4, p->y, p521_mod); + /* Z3 = H*Z1 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_521_mont_sqr_17(t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t3, p->x, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(t2, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(t2, t2, t1, p521_mod); + sp_521_mont_dbl_17(t5, t3, p521_mod); + sp_521_mont_sub_17(x, t2, t5, p521_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_521_mont_sub_17(t3, t3, x, p521_mod); + sp_521_mont_mul_17(t3, t3, t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t1, p->y, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, t3, t1, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef WOLFSSL_SP_SMALL +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * width = 4 + * 16 entries + * 130 bits between + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_521_gen_stripe_table_17(const sp_point_521* a, + sp_table_entry_521* table, sp_digit* tmp, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; +#else + sp_point_521 t[3]; +#endif + sp_point_521* s1 = NULL; + sp_point_521* s2 = NULL; + int i; + int j; + int err = MP_OKAY; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + s1 = t + 1; + s2 = t + 2; + + err = sp_521_mod_mul_norm_17(t->x, a->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_17(t->y, a->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_17(t->z, a->z, p521_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_521_proj_to_affine_17(t, tmp); + + XMEMCPY(s1->z, p521_norm_mod, sizeof(p521_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p521_norm_mod, sizeof(p521_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_521)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<4; i++) { + sp_521_proj_point_dbl_n_17(t, 131, tmp); + sp_521_proj_to_affine_17(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<4; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_521_proj_point_add_qz1_17(t, s1, s2, tmp); + sp_521_proj_to_affine_17(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC */ +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_16_17(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->x[9] = 0; + r->x[10] = 0; + r->x[11] = 0; + r->x[12] = 0; + r->x[13] = 0; + r->x[14] = 0; + r->x[15] = 0; + r->x[16] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + r->y[9] = 0; + r->y[10] = 0; + r->y[11] = 0; + r->y[12] = 0; + r->y[13] = 0; + r->y[14] = 0; + r->y[15] = 0; + r->y[16] = 0; + for (i = 1; i < 16; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->x[9] |= mask & table[i].x[9]; + r->x[10] |= mask & table[i].x[10]; + r->x[11] |= mask & table[i].x[11]; + r->x[12] |= mask & table[i].x[12]; + r->x[13] |= mask & table[i].x[13]; + r->x[14] |= mask & table[i].x[14]; + r->x[15] |= mask & table[i].x[15]; + r->x[16] |= mask & table[i].x[16]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + r->y[9] |= mask & table[i].y[9]; + r->y[10] |= mask & table[i].y[10]; + r->y[11] |= mask & table[i].y[11]; + r->y[12] |= mask & table[i].y[12]; + r->y[13] |= mask & table[i].y[13]; + r->y[14] |= mask & table[i].y[14]; + r->y[15] |= mask & table[i].y[15]; + r->y[16] |= mask & table[i].y[16]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^130, ... + * Pre-generated: products of all combinations of above. + * 4 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * table Pre-computed table. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_stripe_17(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* t = NULL; +#else + sp_point_521 rt[2]; + sp_digit t[2 * 17 * 6]; +#endif + sp_point_521* p = NULL; + int i; + int j; + int y; + int x; + int err = MP_OKAY; + + (void)g; + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = rt + 1; + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + y = 0; + x = 130; + for (j=0; j<4 && x<521; j++) { + y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j); + x += 131; + } + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_16_17(rt, table, y); + } else + #endif + { + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + } + rt->infinity = !y; + for (i=129; i>=0; i--) { + y = 0; + x = i; + for (j=0; j<4 && x<521; j++) { + y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j); + x += 131; + } + + sp_521_proj_point_dbl_17(rt, rt, t); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_16_17(p, table, y); + } + else + #endif + { + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + } + p->infinity = !y; + sp_521_proj_point_add_qz1_17(rt, rt, p, t); + } + + if (map != 0) { + sp_521_map_17(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +/* Cache entry - holds precomputation tables for a point. */ +typedef struct sp_cache_521_t { + /* X ordinate of point that table was generated from. */ + sp_digit x[17]; + /* Y ordinate of point that table was generated from. */ + sp_digit y[17]; + /* Precomputation table for point. */ + sp_table_entry_521 table[16]; + /* Count of entries in table. */ + uint32_t cnt; + /* Point and table set in entry. */ + int set; +} sp_cache_521_t; + +/* Cache of tables. */ +static THREAD_LS_T sp_cache_521_t sp_cache_521[FP_ENTRIES]; +/* Index of last entry in cache. */ +static THREAD_LS_T int sp_cache_521_last = -1; +/* Cache has been initialized. */ +static THREAD_LS_T int sp_cache_521_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_521 = 0; + static wolfSSL_Mutex sp_cache_521_lock; +#endif + +/* Get the cache entry for the point. + * + * g [in] Point scalar multiplying. + * cache [out] Cache table to use. + */ +static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) +{ + int i; + int j; + uint32_t least; + + if (sp_cache_521_inited == 0) { + for (i=0; ix, sp_cache_521[i].x) & + sp_521_cmp_equal_17(g->y, sp_cache_521[i].y)) { + sp_cache_521[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_521_last + 1) % FP_ENTRIES; + for (; i != sp_cache_521_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_521[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_521_last) { + least = sp_cache_521[0].cnt; + for (j=1; jx, sizeof(sp_cache_521[i].x)); + XMEMCPY(sp_cache_521[i].y, g->y, sizeof(sp_cache_521[i].y)); + sp_cache_521[i].set = 1; + sp_cache_521[i].cnt = 1; + } + + *cache = &sp_cache_521[i]; + sp_cache_521_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifndef FP_ECC + return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 17 * 6]; +#endif + sp_cache_521_t* cache; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_521 == 0) { + wc_InitMutex(&sp_cache_521_lock); + initCacheMutex_521 = 1; + } + if (wc_LockMutex(&sp_cache_521_lock) != 0) { + err = BAD_MUTEX_E; + } + } +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_521(g, &cache); + if (cache->cnt == 2) + sp_521_gen_stripe_table_17(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_521_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); + } + else { + err = sp_521_ecc_mulmod_stripe_17(r, g, cache->table, k, + map, ct, heap); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + return err; +#endif +} + +#else +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * width = 8 + * 256 entries + * 65 bits between + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_521_gen_stripe_table_17(const sp_point_521* a, + sp_table_entry_521* table, sp_digit* tmp, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; +#else + sp_point_521 t[3]; +#endif + sp_point_521* s1 = NULL; + sp_point_521* s2 = NULL; + int i; + int j; + int err = MP_OKAY; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + s1 = t + 1; + s2 = t + 2; + + err = sp_521_mod_mul_norm_17(t->x, a->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_17(t->y, a->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_17(t->z, a->z, p521_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_521_proj_to_affine_17(t, tmp); + + XMEMCPY(s1->z, p521_norm_mod, sizeof(p521_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p521_norm_mod, sizeof(p521_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_521)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_521_proj_point_dbl_n_17(t, 66, tmp); + sp_521_proj_to_affine_17(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_521_proj_point_add_qz1_17(t, s1, s2, tmp); + sp_521_proj_to_affine_17(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC */ +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_256_17(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->x[9] = 0; + r->x[10] = 0; + r->x[11] = 0; + r->x[12] = 0; + r->x[13] = 0; + r->x[14] = 0; + r->x[15] = 0; + r->x[16] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + r->y[9] = 0; + r->y[10] = 0; + r->y[11] = 0; + r->y[12] = 0; + r->y[13] = 0; + r->y[14] = 0; + r->y[15] = 0; + r->y[16] = 0; + for (i = 1; i < 256; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->x[9] |= mask & table[i].x[9]; + r->x[10] |= mask & table[i].x[10]; + r->x[11] |= mask & table[i].x[11]; + r->x[12] |= mask & table[i].x[12]; + r->x[13] |= mask & table[i].x[13]; + r->x[14] |= mask & table[i].x[14]; + r->x[15] |= mask & table[i].x[15]; + r->x[16] |= mask & table[i].x[16]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + r->y[9] |= mask & table[i].y[9]; + r->y[10] |= mask & table[i].y[10]; + r->y[11] |= mask & table[i].y[11]; + r->y[12] |= mask & table[i].y[12]; + r->y[13] |= mask & table[i].y[13]; + r->y[14] |= mask & table[i].y[14]; + r->y[15] |= mask & table[i].y[15]; + r->y[16] |= mask & table[i].y[16]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * table Pre-computed table. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_stripe_17(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* t = NULL; +#else + sp_point_521 rt[2]; + sp_digit t[2 * 17 * 6]; +#endif + sp_point_521* p = NULL; + int i; + int j; + int y; + int x; + int err = MP_OKAY; + + (void)g; + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = rt + 1; + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + y = 0; + x = 65; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j); + x += 66; + } + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_17(rt, table, y); + } else + #endif + { + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + } + rt->infinity = !y; + for (i=64; i>=0; i--) { + y = 0; + x = i; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j); + x += 66; + } + + sp_521_proj_point_dbl_17(rt, rt, t); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_17(p, table, y); + } + else + #endif + { + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + } + p->infinity = !y; + sp_521_proj_point_add_qz1_17(rt, rt, p, t); + } + + if (map != 0) { + sp_521_map_17(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +/* Cache entry - holds precomputation tables for a point. */ +typedef struct sp_cache_521_t { + /* X ordinate of point that table was generated from. */ + sp_digit x[17]; + /* Y ordinate of point that table was generated from. */ + sp_digit y[17]; + /* Precomputation table for point. */ + sp_table_entry_521 table[256]; + /* Count of entries in table. */ + uint32_t cnt; + /* Point and table set in entry. */ + int set; +} sp_cache_521_t; + +/* Cache of tables. */ +static THREAD_LS_T sp_cache_521_t sp_cache_521[FP_ENTRIES]; +/* Index of last entry in cache. */ +static THREAD_LS_T int sp_cache_521_last = -1; +/* Cache has been initialized. */ +static THREAD_LS_T int sp_cache_521_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_521 = 0; + static wolfSSL_Mutex sp_cache_521_lock; +#endif + +/* Get the cache entry for the point. + * + * g [in] Point scalar multiplying. + * cache [out] Cache table to use. + */ +static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) +{ + int i; + int j; + uint32_t least; + + if (sp_cache_521_inited == 0) { + for (i=0; ix, sp_cache_521[i].x) & + sp_521_cmp_equal_17(g->y, sp_cache_521[i].y)) { + sp_cache_521[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_521_last + 1) % FP_ENTRIES; + for (; i != sp_cache_521_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_521[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_521_last) { + least = sp_cache_521[0].cnt; + for (j=1; jx, sizeof(sp_cache_521[i].x)); + XMEMCPY(sp_cache_521[i].y, g->y, sizeof(sp_cache_521[i].y)); + sp_cache_521[i].set = 1; + sp_cache_521[i].cnt = 1; + } + + *cache = &sp_cache_521[i]; + sp_cache_521_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifndef FP_ECC + return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 17 * 6]; +#endif + sp_cache_521_t* cache; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_521 == 0) { + wc_InitMutex(&sp_cache_521_lock); + initCacheMutex_521 = 1; + } + if (wc_LockMutex(&sp_cache_521_lock) != 0) { + err = BAD_MUTEX_E; + } + } +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_521(g, &cache); + if (cache->cnt == 2) + sp_521_gen_stripe_table_17(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_521_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); + } + else { + err = sp_521_ecc_mulmod_stripe_17(r, g, cache->table, k, + map, ct, heap); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + return err; +#endif +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_521(const mp_int* km, const ecc_point* gm, ecc_point* r, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[17]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 17, km); + sp_521_point_from_ecc_point_17(point, gm); + + err = sp_521_ecc_mulmod_17(point, point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_17(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the point by the scalar, add point a and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, + const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[17 + 17 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (17 + 17 * 2 * 6), heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 17; + + sp_521_from_mp(k, 17, km); + sp_521_point_from_ecc_point_17(point, gm); + sp_521_point_from_ecc_point_17(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_17(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_17(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_17(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_17(point, point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_17(point, point, addP, tmp); + + if (map) { + sp_521_map_17(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_17(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Striping precomputation table. + * 4 points combined into a table of 16 points. + * Distance of 131 between points. + */ +static const sp_table_entry_521 p521_table[16] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0xc2e5bd66,0xf97e7e31,0x856a429b,0x3348b3c1,0xa2ffa8de,0xfe1dc127, + 0xefe75928,0xa14b5e77,0x6b4d3dba,0xf828af60,0x053fb521,0x9c648139, + 0x2395b442,0x9e3ecb66,0x0404e9cd,0x858e06b7,0x000000c6 }, + { 0x9fd16650,0x88be9476,0xa272c240,0x353c7086,0x3fad0761,0xc550b901, + 0x5ef42640,0x97ee7299,0x273e662c,0x17afbd17,0x579b4468,0x98f54449, + 0x2c7d1bd9,0x5c8a5fb4,0x9a3bc004,0x39296a78,0x00000118 } }, + /* 2 */ + { { 0x66fd07ca,0x1036eb9b,0x6b7fb490,0x6ca52cc1,0xd3e0c270,0x512e973e, + 0x73d92d11,0x889980bf,0xa4005eea,0x38b4cfe4,0x8ceb4313,0xb6f992cc, + 0x6daf7c23,0xd0ac2f8d,0xe32a93cb,0x1ccfbf17,0x000000c2 }, + { 0x2f508cca,0x7bd9d6f1,0x595a72af,0xe82d7171,0x97512873,0x25d02976, + 0x8cf39fbc,0xefc1de8b,0x9a1237f4,0x25e6b77f,0xd4d98b5d,0x9f3b73e7, + 0xeccb07fe,0xe1fda62b,0x625350cf,0xdb813b03,0x00000014 } }, + /* 3 */ + { { 0x9b27bd61,0x415a1c9b,0x606854d6,0x74522753,0x92e73538,0x9e331ef4, + 0x817e7a6d,0x0b3dba85,0x49ac273b,0x55c4bd53,0xfcb5417f,0xad42c78d, + 0x92e08d38,0x528998b9,0xcc1914cc,0x14c2fff6,0x000000c1 }, + { 0x767e9645,0x35b26fb0,0xc5e5a659,0x162b512f,0xcc47fbb8,0xa6e03696, + 0x0a29a69b,0x732db065,0xd56bdf5d,0x058a74ed,0x25c858d9,0x4b7b60a0, + 0xbd43373d,0x17f8a6d4,0xedf610b4,0x7b968f51,0x0000011f } }, + /* 4 */ + { { 0x1bc0fa77,0x5f56b5a4,0x64fd36f5,0x6cdd6bb5,0x8a5b7c7f,0xd0ac68b5, + 0x09919ef9,0x4a92d9bf,0x71c3c520,0xc305e12b,0xdb699aee,0x554a9d1c, + 0x61f54643,0x7fde0077,0x479115ce,0x99c13124,0x00000039 }, + { 0xc271ac2d,0x25f890e1,0x94b370ac,0x1353ccd3,0x744d4011,0xc7b5adf6, + 0xbe378127,0x9ccd7687,0x06c4e3cd,0xa8489b5c,0x305505f9,0x1945580a, + 0x4ab3b12b,0x07190a20,0x1534ea4d,0x0ff53eb1,0x00000159 } }, + /* 5 */ + { { 0x91798548,0x877d4edd,0x031d657a,0xc43c7b25,0xfab18a04,0x47603671, + 0xf670b476,0x7e39e7f2,0xb02fcc03,0xf7b76431,0x877f46f5,0x7c5662f3, + 0x1c8b0c61,0x5bf8327e,0x4a8be322,0xe9cdb353,0x000001ae }, + { 0x9d264420,0xa2d7092e,0x533ff3db,0x1f970352,0x99b5b52e,0x31dd232b, + 0x850f45e9,0x8a9ce16b,0xc3011849,0x01c99023,0xc8e9301e,0x4bc30989, + 0xcd95f64c,0x77a4de70,0x1026f289,0xbc8797bb,0x000000d7 } }, + /* 6 */ + { { 0x2be9edf8,0x98ea0934,0xfcb98199,0x6c2f3132,0xfaf83aeb,0xf579893d, + 0xc73fda0f,0x858e87bb,0x7a0b9d1c,0xd3c0b3fb,0x71ee68b1,0x21fe6305, + 0x66aa6f16,0x5bf8f01f,0xbca825ed,0x30934c99,0x000000d1 }, + { 0x913022f2,0xe4309850,0xde5b80ce,0xfdc336c9,0x8b6130ef,0xb716d689, + 0xa758d2f4,0x8a58b405,0xaa5cbc1c,0x98879df8,0xc12ce0bb,0x847cfd06, + 0x8c02ff3c,0xa1006360,0x3438695b,0x836e906a,0x00000136 } }, + /* 7 */ + { { 0x259ce02d,0xac8fe351,0xdae5e0f7,0xa506da0c,0xf043421d,0x77b56e98, + 0xa1647490,0xe0d041c7,0x9cb90101,0xe41f0789,0xda3e72e6,0x29bbf572, + 0x04a14df0,0x6b635c47,0xe81ef5d3,0x56873f58,0x000001dd }, + { 0x5cf9e33f,0x77abe79e,0x0a1117fd,0x91aab581,0xcbac2fe1,0x11edf3b1, + 0xd72113b7,0xef43e017,0x06b74002,0xf9ad685c,0x8fbd3b1a,0x7e6370ce, + 0x42f73a82,0x550dd50b,0xc5e64a9b,0x8f2146be,0x000001f2 } }, + /* 8 */ + { { 0x2934ed82,0x05a704cc,0x989edd8c,0x647089fb,0x0ce7c62d,0xe0b239d4, + 0x105a5eff,0x4c892ea6,0xd5ed6b04,0xa519395f,0x509ed794,0x806c7003, + 0xe70ce5c4,0x882e9886,0xff01f6a9,0x50730ca1,0x00000088 }, + { 0xdbcc5484,0x90a78a16,0xfd454b50,0xc1ab078c,0xcb09e525,0x6f488252, + 0xe19b2ed7,0xdd663f53,0xa67bf59c,0x16b10da1,0x36bb770a,0xb47f6b95, + 0x777b2bce,0x6bdc8428,0x561553f8,0xcd02ae3d,0x00000017 } }, + /* 9 */ + { { 0x1579d15a,0x1e3633a0,0x3e98cd1f,0x574f0c23,0xc60f4f99,0x45969dca, + 0x49fb9f24,0x10062c93,0xd378f640,0xd29a29d7,0xd7d48c2f,0xec941760, + 0x31fbea5c,0xf0591c59,0xb40f9ebf,0xd6173e6b,0x00000063 }, + { 0x5a984a72,0x220f4f39,0x32510f26,0x9a3f82ce,0x8c069a1d,0xf3d04c76, + 0x69a21e57,0xf1d6d891,0xdc4db601,0x6b96b30b,0x64dcf3e0,0x71eeb728, + 0xc7caaff3,0x6f80c483,0x571b66e4,0x45533092,0x000000b0 } }, + /* 10 */ + { { 0x87140dad,0x49ae4521,0x57e2803e,0xda73032b,0x026ea20a,0x13f5e5eb, + 0x6e00afb9,0x2d54c4b0,0x7a150474,0x4393b92b,0x13f1a7da,0xb5b41bf8, + 0x02b5867a,0x6d786907,0xaf2ea4d1,0x5193a9ac,0x000001b3 }, + { 0xa6b186cb,0x2a1563f7,0xe28e57b6,0x73a70a44,0x78fc8a1d,0xd7c4fc6d, + 0xdf3d6d99,0x4c9b4581,0x1e373aab,0x544f5249,0xe913498e,0xe99434a2, + 0xc4700f4c,0x30159749,0xe5142766,0xb8ef02cc,0x000001d0 } }, + /* 11 */ + { { 0xb9e6ffc9,0xe99805a6,0xf74d977b,0x1a357f05,0x5c9941bc,0xc8ddef31, + 0xcbe842e7,0x4b6d66ca,0xa20dc12d,0x84e1f75f,0x5f0c02fc,0x8b1b2c50, + 0x037b493d,0x3fa1889e,0x95705046,0x720bd9e0,0x000001c2 }, + { 0x93ab9309,0x1a1f3378,0x226a8f94,0xe05a30a2,0x4045f1bd,0x2c01a52d, + 0xab5f5115,0xf42e8fd5,0x0c05fecf,0x954d1d09,0x8d0650d3,0x47e964d1, + 0x3c860801,0x6866fa5d,0x5abbb4af,0xac2fecbf,0x0000012c } }, + /* 12 */ + { { 0xe5537747,0x846dc3d2,0x1f5f9f46,0xe28e00df,0x3f31e42d,0x041af624, + 0x256af225,0x4948947f,0xff4f9550,0x3896c61a,0x34bb5a3e,0xcb40c773, + 0xeceafacc,0xb9becb07,0x4d45e83e,0xfe29f049,0x000001aa }, + { 0x6b5578db,0x83fb71b3,0x0a710526,0x3017f115,0x5f220d77,0x189ec946, + 0x48465e68,0xba87ae07,0x70e0cbea,0x1da474d5,0x2b2ba7c5,0xb92cb0a6, + 0x8b1fb7e2,0x35cb356d,0x2cc8cb18,0x1155296a,0x0000000f } }, + /* 13 */ + { { 0x6ed0f604,0x7f9c9d9b,0xcb49c6d7,0x765e43e9,0xae9be5ca,0x03c4dd67, + 0x405aed36,0x5480888b,0x920ccddb,0x3a69ebb2,0x03f0c7cc,0x44ec0573, + 0xce89b026,0x158e2437,0x4f179a17,0x86795029,0x0000003a }, + { 0x9f193dd9,0xf7854032,0xdcc158a9,0x531e4068,0x3642b1a5,0x774171bf, + 0xc1e53aa3,0x12b4920f,0xfd87478d,0xd1c5fb53,0xa7cba7ca,0x48958c58, + 0x3f66f2c7,0x375b2cb2,0x598899bd,0x1b510d0f,0x000001b8 } }, + /* 14 */ + { { 0x52007e41,0xfe96299d,0xcd708dcd,0x997140b5,0xf655f6fa,0xe9294eed, + 0xd58b839d,0x7701d45d,0xb6f77cdb,0x5dbdf5ad,0x95a572f0,0x265189f4, + 0xb3515e7b,0xc162794e,0x72655e0b,0xbfb571e0,0x00000168 }, + { 0xbda82a6b,0xf0d2b863,0x390a9cc7,0x3df5b283,0x700fcd7b,0xbab9995a, + 0xfa4e6c06,0xc01ef0af,0x76a392d4,0x10a98513,0x955392f0,0xa7e3fc72, + 0x1d7a8550,0x8e3c0128,0x361898a8,0xcbca551a,0x0000010f } }, + /* 15 */ + { { 0x3ab71115,0xc8a4cd40,0xbcb9b55b,0xb783170d,0xabd9b426,0x1be20f6a, + 0x5377b714,0x32d2ea64,0x6b358bbf,0xda342480,0x6e202211,0x782bc800, + 0xaa27c499,0xf80974c4,0x50341cde,0xc2e66fa9,0x0000004e }, + { 0x24ae60c3,0x082cb95b,0x83ad7484,0xd4b80af4,0x6205256b,0x84b739ce, + 0xae1fe063,0x616f505e,0x342f218f,0xef14ea68,0x64a01186,0x2b17d66c, + 0x50858bce,0x60e889ce,0xd5881005,0xdb046c59,0x000001e1 } }, +}; + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^130, ... + * Pre-generated: products of all combinations of above. + * 4 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_17(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_stripe_17(r, &p521_base, p521_table, + k, map, ct, heap); +} + +#else +/* Striping precomputation table. + * 8 points combined into a table of 256 points. + * Distance of 66 between points. + */ +static const sp_table_entry_521 p521_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0xc2e5bd66,0xf97e7e31,0x856a429b,0x3348b3c1,0xa2ffa8de,0xfe1dc127, + 0xefe75928,0xa14b5e77,0x6b4d3dba,0xf828af60,0x053fb521,0x9c648139, + 0x2395b442,0x9e3ecb66,0x0404e9cd,0x858e06b7,0x000000c6 }, + { 0x9fd16650,0x88be9476,0xa272c240,0x353c7086,0x3fad0761,0xc550b901, + 0x5ef42640,0x97ee7299,0x273e662c,0x17afbd17,0x579b4468,0x98f54449, + 0x2c7d1bd9,0x5c8a5fb4,0x9a3bc004,0x39296a78,0x00000118 } }, + /* 2 */ + { { 0x0f0ccb51,0x80398667,0x3654974a,0xb87e1d01,0xb2b29ed9,0x7f58cf21, + 0xa3add337,0x06c0e9aa,0xe9d08ffb,0xf13b35d0,0x96761627,0xdd8bf44c, + 0x758a3ef4,0xa4a18c14,0xa0043adb,0x96a576dd,0x0000013e }, + { 0x632d95a3,0x2bde24f8,0x4c524829,0x79f15ef1,0x9bdaba19,0xaadd863e, + 0xa962b707,0xdde053f4,0x14258d98,0xc598a2de,0x061c235c,0x9fa5a19d, + 0xe8ffd32c,0x0ed46510,0xef78ceac,0x2aea9dd1,0x00000185 } }, + /* 3 */ + { { 0xeaaf1fe3,0xd0a91dd8,0x4400b52b,0x0db38662,0x21abf0d2,0xff6a06a9, + 0xa768c940,0x9412879a,0x9a1eec37,0xf3791abc,0x2738343c,0xc913fbe6, + 0xe222abc1,0x728b42ab,0x2b9ef313,0x874c0a86,0x00000157 }, + { 0xe6f03d49,0x0ac8f184,0x1e48be03,0xa9c357e4,0x815cbdef,0x02ce5ef3, + 0x5fd8dc3c,0x7a41c7ab,0xfaeb109d,0x4bef67c9,0xa84f4d38,0x2f98cca1, + 0x672f0aae,0x7e03d47d,0x1d58968b,0x24b1ab58,0x00000007 } }, + /* 4 */ + { { 0xdf9314e0,0x904f2d4b,0xe7a00aac,0xdaae850d,0x582efb03,0x79231083, + 0xec7fe6d2,0x80f1c283,0x199d74a8,0x2d5b3996,0x395007e7,0x5f120b9b, + 0x4773f03e,0x30d23773,0x3b78b686,0xf4c19273,0x00000121 }, + { 0xfa8b51f0,0xf103ff6d,0x40e2bdf0,0xae7afb51,0x83254171,0x1130380e, + 0xcda10d95,0xe83501b8,0x4f3a8c01,0x1057771e,0xac807069,0x8f52196a, + 0xa5623821,0x3609b0aa,0x94a0a7f1,0x8c257906,0x000001db } }, + /* 5 */ + { { 0xb2c0958d,0x300370cc,0x69a7b387,0x89aef166,0x480c9b38,0x2792f3cf, + 0xfab3e149,0x0b2984f2,0x50748967,0x9751e436,0xad33db2a,0x9cab99d5, + 0xb44a4daa,0x4d945d32,0x16c77325,0xa26cca52,0x0000000a }, + { 0xf9e66d18,0xcdbe1d41,0xaa117e7a,0x80aeef96,0xddb0d24b,0x053214a2, + 0x5c98b7bf,0x6dcfb227,0xdfd3c848,0x613e7436,0x3ca4d52c,0x6e703fa1, + 0x18551e64,0x0c8e2977,0xbfa8527d,0xf5e90eac,0x000001c6 } }, + /* 6 */ + { { 0x4ab2d58f,0xa2c2f1e7,0x2a097802,0xc1bbf82c,0x770bb76a,0x6583eb24, + 0x5667f7bd,0x8e4ed9ed,0xfd96897e,0xd8c01d86,0x3fbe0f15,0x66395a13, + 0xd99cdcb1,0x51e4f39d,0x720deb25,0xde08424a,0x00000082 }, + { 0x60ea91af,0x97aa53b2,0x7a31dfdd,0xa4384af7,0x5cd09bbe,0xcd82f239, + 0xf30058e1,0x997c19da,0xe5c78e97,0x443b60c6,0x575b1845,0xfaae9b5f, + 0x08c2ce16,0x5ce86f33,0x4f63fa86,0x983ce58f,0x00000073 } }, + /* 7 */ + { { 0x8217609d,0xaee93131,0x2412fc00,0x7f8a9dd4,0x286c6329,0xe117e64c, + 0x7bf1c65e,0xcc3782d6,0x8d03eee5,0xe8c144db,0x9ab93799,0x01acacb2, + 0xb07784c7,0x215eb1b5,0x1affcd87,0x2c409fa8,0x000000f8 }, + { 0x378139a4,0x007d3766,0xb55bea93,0xc6d969eb,0x68c8bc9d,0xc7c60d6f, + 0x5f93f242,0x844e8461,0x741717d9,0x8461ca2a,0xf0bf120e,0x8e930e79, + 0x6b5699d7,0xe1554a02,0x6a4fb6de,0xe69c7702,0x0000007d } }, + /* 8 */ + { { 0x4bee80d7,0x61b51bb0,0x7692de69,0x0e1f6a1f,0xa0ebc3bd,0x8379e46c, + 0x930644f0,0x1c0bffa7,0x390db077,0x97c67b87,0xfada1ce9,0x095c33e1, + 0xac54b512,0x3c500add,0xd3118656,0xc231d360,0x000000b0 }, + { 0x39bcab2f,0x06289298,0x64dd220a,0xc0c06780,0x763dc2a0,0x062f6084, + 0x1938c3e3,0x88e9da73,0x52e46eb9,0x69be8f2d,0x6a5de0fd,0xe55c8d2d, + 0xdb2c0e26,0xf3a3fd63,0x1e4bff57,0x899c6d9f,0x0000014a } }, + /* 9 */ + { { 0xec05ce88,0x9ff6e3a1,0xb6afd202,0xf8fc2496,0x6fbeb007,0x0b9d2077, + 0xeebded40,0xb50ec0bd,0x693700f7,0xaef97742,0x3f7b030e,0x806e37a1, + 0x1b901f77,0x5cf17d17,0xca95ae0f,0x9036e5df,0x00000159 }, + { 0x000e8e0c,0x00af64b5,0x06fb4df9,0xd3f2ae04,0x449f23ba,0x5f61da67, + 0x255b25a9,0x0ca91842,0x8e33c650,0xfa6af3e6,0xc2c027c1,0x14373c00, + 0x972840a5,0x99f3cda1,0xd0e84240,0x98c62b79,0x000000e7 } }, + /* 10 */ + { { 0xae4d0f28,0xe8c7c4a8,0x566d006e,0x3a8a55ef,0x066e4023,0x37985f65, + 0x5d321b76,0x8deccab5,0xb8351b07,0x38b966d6,0x57d548ab,0x2e889e53, + 0xe631ab0b,0x7a9e8e2f,0xe75c537b,0x45c60f95,0x00000059 }, + { 0x7867d79c,0xbca27d34,0x81c81980,0x7f460b15,0x976b8c51,0x7ec2d9ab, + 0x61b91ed9,0xfcd04486,0xd9c1d15f,0x730a7a25,0xf94c9db9,0x8a2cf259, + 0x5dec5a3b,0x8e784b87,0x3e5131ee,0x06252607,0x00000004 } }, + /* 11 */ + { { 0xf1631bba,0xdee04e5c,0x156f4524,0x40e6c1df,0xe4c30990,0x06603f30, + 0x6b6abec7,0xdb649a43,0xf6b94f6e,0x354f509c,0x36b7e0b5,0x7fecf469, + 0xba1e6dd2,0xa7a7107e,0x689450ca,0x889edac5,0x00000022 }, + { 0xd05596f2,0x9012916e,0xb023cb8b,0xe3901dac,0xe7d4abe1,0x2501d3ec, + 0xa9c90313,0xb2815040,0xc6d146d0,0x9dbcd3f1,0x74ee1896,0x6fa1d5b1, + 0xa91226fb,0x49aea161,0xb8a80984,0x754ceedf,0x00000154 } }, + /* 12 */ + { { 0x4270b2f0,0xb64e27b0,0xbf4d74d7,0x84b34e48,0x0c2722ba,0xb186be8b, + 0x9ff9b71c,0xf54a589d,0x34fd6bc4,0x9887e4df,0x7412f49d,0xb7c669fd, + 0x77f89d16,0x4008d9bb,0xc902e074,0xafb9426b,0x000001cf }, + { 0x662935ca,0xcca4f2d1,0x997dcc46,0x2847c703,0x353c79f8,0xc089e9e5, + 0x5215f0f4,0x9ed8d989,0x80911b9d,0x59cf08bc,0x6de27aa3,0x4b03540e, + 0xf69e320d,0x52f4d63e,0x94ef193b,0xa0217fd6,0x000000e6 } }, + /* 13 */ + { { 0x74214780,0xb77de627,0x207459ea,0xca066817,0xe9c7fb01,0xf78579b7, + 0xd6d4b7c7,0xe55548c1,0xa66caa39,0x45756190,0x98505a4f,0xf8141b03, + 0x4c8864eb,0xa5ca0d7c,0x9e129d3f,0xbf8af950,0x00000053 }, + { 0x85285092,0xbc9b29d8,0x8eed5e5f,0x82f31daa,0xf618aab9,0x9c33690e, + 0xd2626ed1,0x0eee14f4,0x07ed8e09,0x4229570b,0x8736d040,0x1977920e, + 0xede7d01d,0x47ee25ff,0xbc7ab73b,0x3c921c3a,0x000001b9 } }, + /* 14 */ + { { 0xa08b2b14,0x0b6a07cc,0xbf174c7f,0xaa978deb,0xc40cb2a4,0x291cb828, + 0x90adc838,0x95c78272,0x8c1edde6,0x08da8b2a,0x90fbd220,0x741ceb2f, + 0x322db94e,0x5f89c9e5,0xb73c548e,0x18266085,0x0000007d }, + { 0x2defd012,0x69ebf82a,0x5a1537ef,0x01ecb094,0x3ef0811d,0x3c557535, + 0xb2bd4dea,0x59c882a7,0x7bf969c8,0x00a1f972,0x0b25ad1b,0x063adf5e, + 0xf2536005,0x4c1ff306,0x4112fe18,0x8e515bec,0x00000117 } }, + /* 15 */ + { { 0xefe3d3d5,0x9314787f,0x9d897227,0x29e76f65,0xe0b6acf5,0x15c77ed1, + 0x1c5e8dd9,0x9c2b7b20,0x5f5667af,0x788038f1,0xf3576ef4,0xf38c766f, + 0x0040154a,0x9f0623c8,0xde883b53,0x47d3c44b,0x00000096 }, + { 0xde1b21a4,0x32075638,0x571081c1,0xbb6399c1,0x75c03599,0x322e6067, + 0xade60cf5,0x5c7fde7f,0xefc19059,0x1b195440,0xdd7b3960,0x7e70ac8c, + 0x6a6fa73e,0x4aa5a83d,0x63080764,0x34f8cfac,0x00000042 } }, + /* 16 */ + { { 0x286492ad,0xee31e71a,0x65f86ac4,0x08f3de44,0xda713cb4,0xe89700d4, + 0xa86b7104,0x7ad0f5e9,0x2572c161,0xd9a62e4f,0x25cc1c99,0x77d223ef, + 0x3b962e0c,0xedff6961,0x81d8b205,0x818d28f3,0x0000008e }, + { 0x8cdf1f60,0x721231cf,0x6717760f,0x8b640f2b,0xe045a403,0xbe726f8c, + 0x0370689f,0x422285dc,0x72ea0dcb,0x7196bf8f,0xc8086623,0xa16f7855, + 0xc326fe48,0xd4e19fc7,0x8f68bf44,0xfdbc856e,0x0000013e } }, + /* 17 */ + { { 0xe6a3ace5,0xde34d04f,0x896191c1,0x0dbb603e,0xf75ed0f4,0xb4dc0007, + 0x95b259b5,0x15e0e6bc,0x2615f020,0xdfbcba66,0xd31ea3f8,0xb2ec5433, + 0x103ff824,0x42b0b0e4,0xc480332e,0x19315060,0x00000111 }, + { 0x045452f1,0x9997ea28,0x71f3f73b,0x80b678cf,0x41e9328e,0x4a52bddc, + 0xe6af1c23,0xb7f2656e,0xb44215e7,0xc43805b9,0xf0a4028b,0x3aa734f2, + 0x422476e2,0xe3c72479,0x68c60cf7,0x6dc2e8b0,0x000001f1 } }, + /* 18 */ + { { 0xfffc0de5,0xbcdfae6f,0xab4a5f24,0xa801814f,0xea2aa8dd,0x19013658, + 0xda4f0441,0xf3b1caf5,0x34100611,0xf24b9cdb,0x96e0cf88,0x48c324ed, + 0x23055c82,0x4b7ea334,0x89092e29,0x6e835b64,0x000001d3 }, + { 0x07372f27,0x7eb77ae7,0x83bae19a,0x4779b4fa,0x65429ebb,0xa175dae1, + 0xfc03ef3f,0x942ec266,0x6991c7c4,0x0e5fc6a9,0x56253d3c,0xa0f61e4f, + 0xde74e738,0x7a11ff58,0x624de919,0x60524cd4,0x00000002 } }, + /* 19 */ + { { 0x01342e08,0x45b5d0ca,0xb749f0af,0x509ed4f0,0x6529d804,0xeb5502d9, + 0x6d80359c,0x5eb087db,0x4c384800,0xeaa66a87,0xc75a8784,0xe972c7a0, + 0x6874317e,0x8c169e21,0xe5c9fbf4,0x81c556e0,0x0000014f }, + { 0xe120674d,0x26b0b12b,0x219f00ac,0xc6bf09b9,0xd658caa6,0x1e1e732d, + 0x8292d99e,0xc771c5af,0x25fdbf80,0x5d813529,0x3666c37d,0xe61bd798, + 0x1d0df680,0x8dac946a,0xc39f0983,0x58dcf684,0x0000009f } }, + /* 20 */ + { { 0x7b7dc837,0x14169102,0xb50eb1c4,0x2d719754,0xd7e6741b,0x04f4092a, + 0xbc824a38,0x1d0a7f1d,0xc8e20bcf,0x570b2056,0xda181db0,0x6732e3b9, + 0x0a7b508a,0x7880636e,0xc9f70492,0x11af502c,0x00000045 }, + { 0xc56f4ffa,0x0b820d94,0xc4f0c0fa,0x1c6205a2,0xa1a0606a,0x99f33d4e, + 0x79b316fb,0x1bab6466,0xe4f240fc,0x05aa0852,0x92d7dc43,0x22539b78, + 0x06e3c073,0x03657f12,0xcedb6633,0x28405280,0x00000059 } }, + /* 21 */ + { { 0x4397760c,0x90d08711,0x1c9fcd06,0xb9020b76,0x987e24f7,0xc7fec7fa, + 0x522335a0,0x0e33b8a0,0xae21ca10,0x73dbeafd,0x3b032220,0x458c060a, + 0xee145da6,0x9b9c73b8,0x27ff62ef,0x31c661e5,0x000000aa }, + { 0x81430b5e,0xaf518eb0,0x50ee0d69,0xb32f9cea,0xaa6ebe8b,0x0ecdb0b5, + 0x9fe1d689,0x1f15f7f2,0x1a59cc9a,0xce5d68f3,0x08ab2a63,0xf4d67994, + 0x4347ce54,0xe85b1cef,0x286d0776,0x8ff423c0,0x00000176 } }, + /* 22 */ + { { 0x33dcec23,0x8564104c,0xcdd07519,0xbaf0d61b,0x4c4f309a,0x486daf51, + 0xde488715,0xf01bc8f5,0xd3539ba3,0xddd6baf1,0x3a3be8ec,0xbb7e665d, + 0xcb5d865f,0xf919dac3,0xf12149a0,0xfe203da3,0x00000173 }, + { 0x78d4a3d1,0x043ae9a1,0x865316d8,0xa4d5cf58,0x41176463,0xeaf026c0, + 0xf84afa44,0x316c638f,0xffea422d,0x512f2397,0x6622b613,0x691eaa04, + 0x97e7068d,0x48856ea3,0xf4a1b33c,0x42d1b2e3,0x000001b5 } }, + /* 23 */ + { { 0x1f487402,0xf51b2d5e,0x7aaf1dd5,0xe36016e6,0x6da9c20a,0x1eb3f1f5, + 0xece45bfd,0x25b7d361,0x027a9e18,0x42db0633,0xe8411649,0xbf228777, + 0x458773d0,0xf5fce0c4,0x2dd7a5f0,0xb2b3151d,0x0000001f }, + { 0xfbaa096a,0x102773e8,0xe093a878,0x152726eb,0x2c7f1781,0x5c53cd07, + 0xab5dca76,0x38d3dfd0,0x87ef2d4a,0xbb4a7d85,0xb7eb11c2,0x5c9c2013, + 0x0b6da22f,0x5e353c34,0xa325ecad,0x846d50a5,0x00000039 } }, + /* 24 */ + { { 0x1677df58,0x76da7736,0x1cb50d6c,0x364bd567,0x0a080ff2,0x0443c7d7, + 0x86532430,0xa0a85429,0xc35101e7,0x82002dd2,0x48c5cd76,0xbebc6143, + 0xca6cf13f,0xff1591ae,0x98bf8dc0,0x91c7c2e6,0x000000fb }, + { 0x12de14d5,0x6a7c5cad,0x6561c822,0xbc448c5f,0x7cdbb3da,0x9f8de430, + 0xc76811d7,0x9c58f011,0x75462049,0x1e89806e,0xc9a74e49,0xe52ad0a2, + 0xb2be37c3,0x2034685c,0x0a0bc72d,0x7a863245,0x000000ec } }, + /* 25 */ + { { 0x8a86786e,0x33818c21,0x2137e2c8,0xed537f74,0xa7e6eb20,0x5d9690d1, + 0x5cdc4803,0x9790ec70,0x24f7bd75,0x469162c8,0x4e1f0f14,0x09e7ef9d, + 0xce9915ca,0xd30c128b,0x6c71226f,0x810145f6,0x0000002d }, + { 0xb71d87e5,0x312749f5,0x7b02ceda,0x25f3b141,0xe0baff16,0x02456d2e, + 0xfcae6627,0x97f7b3a9,0x37bd985f,0x0d6ebf8f,0x7fa6d0c1,0x20aa81b9, + 0x21f2f137,0xb29f1a01,0x5cc0ddb1,0xe326a2f8,0x0000003d } }, + /* 26 */ + { { 0x38c2ee78,0x26f3398b,0xa75a0bee,0x40c3d101,0x565a7f8e,0x35a31706, + 0x04019e5d,0xd12985e3,0xb8174b6e,0x21e2a642,0xaf80a52a,0x25a15ee8, + 0x8518d80e,0x5d1e0fe6,0x04f6ea9a,0x8cbbc138,0x00000084 }, + { 0xdfd45169,0x76828690,0x59d3e8d0,0x38d7e098,0xcdb8bfc2,0x23758811, + 0x162cf648,0x8499547a,0xb4d15b8c,0x494bab3b,0xc60499a6,0x822cbc57, + 0xa8a1cfed,0xac43224e,0x57c6598b,0x43563469,0x000000d9 } }, + /* 27 */ + { { 0x68271323,0x2b069253,0x49cd04d7,0x24d9e0a8,0x2b31cc7d,0xaae35fbf, + 0x57a3e361,0x44f64b4f,0x0294e856,0x14904686,0x43ced4ae,0xddc82ee7, + 0x7e2cda47,0xcb92a6a5,0xbfc1f968,0x989c42ef,0x0000013f }, + { 0xb8651600,0xbed98bdf,0x7a3cfaee,0x8c363434,0x35b1a226,0x93a12543, + 0xd5825507,0x558da7dd,0x852eb1e9,0xa5173b23,0x2295f545,0xdf5ae585, + 0x6646d101,0xe546e2ef,0x5d89f862,0xf7e16a2c,0x000001fa } }, + /* 28 */ + { { 0xc7ec136d,0x0d746c8e,0xcd11351b,0xf8e1d827,0xf187a116,0x764a3ad3, + 0x136e8465,0x2f1b968f,0x850983c2,0xd41aa294,0xbe717259,0x2123ecc4, + 0x763c149c,0xdcdcab52,0x1022b82d,0xa7f50b18,0x0000016d }, + { 0x0ca5e258,0xf99e532d,0x97b62a7b,0xa148ad17,0xc77fddef,0x8d0a242e, + 0x74f9b6c4,0x58518bcd,0x7fd122d4,0xc53b30b8,0xfb50b2d7,0xbb8cd193, + 0xbc01aae9,0x1a169aee,0x1de26e09,0x7e49b10a,0x000001c5 } }, + /* 29 */ + { { 0x21210716,0x2cabe675,0x07e02400,0x81a296a3,0x8c83795b,0x94afc11d, + 0xdd9efa6a,0x68f20334,0x677d686f,0x5be2f9eb,0xbf5ce275,0x6a13f277, + 0xb9757c5c,0xf7d92241,0xc74f4b8c,0x70c3d2f4,0x00000132 }, + { 0x8d209aa4,0xf9c8609c,0xdb2b5436,0x46f413a2,0x2992345d,0x96b72d1a, + 0x9487c34f,0x186f2aeb,0xb440a375,0x4fa72176,0x7da5358e,0x3a420936, + 0xff25b310,0xf11eade3,0x505d60b8,0x9a570153,0x000001a9 } }, + /* 30 */ + { { 0x6e7495bb,0xae151393,0x490879d1,0xebd2fd28,0x29fd76fc,0x9c232b0b, + 0xc60e721c,0xa1a0d49b,0x517a09e2,0x9f582b83,0x9d8badf8,0xac37809e, + 0x0ad48bb4,0x4aa4de9e,0xcb6cc487,0xfd041312,0x00000027 }, + { 0xead4fb6d,0xc05502ee,0x0a602cbe,0x760c25ed,0xbd7f4a07,0x58ba6841, + 0x54edce14,0xc28b6032,0x0397614c,0xb9d41e39,0x181eed93,0x4221b71d, + 0x332d4b0b,0xd010e3c2,0xdab0e419,0xdfe58a27,0x00000096 } }, + /* 31 */ + { { 0x7debd24e,0x4cd6fcd6,0x9ae2b075,0xbe3fca60,0xf217c26c,0xa7d8c22e, + 0xb9620e3f,0xd42d03e0,0xc7f9f87d,0x634bf216,0x8972ffee,0x22b1ec53, + 0xd60d3e77,0x83a957c1,0x0f6a537e,0xedfe5f86,0x00000162 }, + { 0xf0ea20b8,0x40a05400,0x1d796900,0x2872ac7e,0x0edb0cac,0x7765a5c9, + 0xb62939a7,0x9df5b930,0xaf2cb708,0xf78a676e,0x52febc12,0x030732bf, + 0xba190ad3,0x3a6640de,0x93e7e341,0x36eae15f,0x000000d5 } }, + /* 32 */ + { { 0xa1c88f3c,0x6c6119f6,0x2ec6944a,0x924e5fec,0x5742ff2a,0x4c8aac60, + 0xddb22c7c,0x60adde1e,0xfa5d25bb,0x9728938c,0xec117de0,0xfa5ac4f7, + 0x482929c1,0x41f35ab7,0x0afd95f5,0xd1c4e8f9,0x00000180 }, + { 0xa7cd8358,0x2fc4e73d,0xf2a1c920,0x39361a57,0xad94d288,0xf6f2f130, + 0x2b6a78e2,0xe37e2466,0x79c262cd,0x0babff8b,0x61b597b9,0x6cae01ef, + 0xa60d4e64,0x9c1e33f0,0xdd01f845,0x52a42280,0x0000000e } }, + /* 33 */ + { { 0x0f013755,0x72d640a4,0xfb8380e9,0x0b6dce77,0x7eb64b31,0x2789ce79, + 0x93ca5a36,0x8e704b0b,0x58bdffc9,0x18c360ff,0xb230c372,0x53b1f323, + 0x5a7385d1,0xd6b39088,0x56b93bf7,0x071130f5,0x0000004a }, + { 0xfeef3f88,0x29a2096b,0xb82b3945,0x22eba869,0x872664a7,0x7fe2184a, + 0x858ff942,0xa0dc0ba1,0x7490c9da,0x33799eb5,0x81588ce8,0x1d356f62, + 0xa7b2cee2,0x7dd9bc7f,0xa3cfaee9,0x1e61a4e8,0x000000d2 } }, + /* 34 */ + { { 0xe9068656,0xec5db629,0x9fede4df,0x623bd70c,0xfcd45546,0xc78ad5bd, + 0x6291a741,0xf7981dd2,0x761e688e,0x3ac53d92,0x55b9272f,0x6a96892a, + 0x06546fec,0x4217e7b8,0xab9e2f56,0x793c03cb,0x0000015e }, + { 0x6eff39be,0x08fd9543,0xdbff4f68,0x5a1af07e,0xb0241616,0x83d47abd, + 0xd4798029,0x37c5d2fd,0x60b2e6fb,0x9d86d978,0xce8db998,0xe3e3284e, + 0xd868b9bb,0x9f049eb5,0x9dad18b3,0x3b3e8a78,0x0000018e } }, + /* 35 */ + { { 0xe51e61f0,0x57026c56,0x307f2757,0xdddbcaa3,0xb1aeaf41,0x92a026eb, + 0xe2d7f5ba,0xa33e937c,0xbc5ead91,0x1f7cc01e,0x2e46807d,0x90ab665d, + 0x53419519,0xc2a44f55,0x79664049,0x099c1ca6,0x000000aa }, + { 0x8f97e387,0xb561a909,0x45e1dd69,0xf6051778,0x7ff1d6ab,0x1ffa512b, + 0xd09a9c89,0x42da55a4,0xd2282e2b,0x5e5a7c71,0xe74185ad,0xdfa5a203, + 0xea0baeff,0x19b1369d,0x1ecc0a16,0xa5eef914,0x000001a3 } }, + /* 36 */ + { { 0x7a573b81,0x2af20d0a,0x66194cef,0x7eac1ca8,0x0b711c34,0xef0d2d8d, + 0xba099d42,0x6aea016c,0x5067a8ca,0xa6609d28,0x7a1351ef,0x6a52c600, + 0xb11c2634,0xdab85818,0xbb1c033c,0xf17fa45d,0x00000121 }, + { 0xfc3279d6,0x9fb8b87a,0xc201f1e1,0xe30e76ab,0x806c21dd,0x02af6a83, + 0xc63f824f,0xeafd7e2b,0x46bd1a53,0x7b074e26,0xa2139164,0xcd6f4931, + 0xc172d9bf,0xab2cfd39,0x4db59cf1,0x62f3eb4b,0x0000010a } }, + /* 37 */ + { { 0xe0689a1b,0xe402de36,0x7dcafe72,0x9dccc9fd,0x255d0bfb,0xe4dead7e, + 0x4ada04d9,0xd7ee87ee,0xbfd2e774,0x5a85039e,0x770b2b9b,0x282c6657, + 0xba103bba,0xa7aca826,0xc7cd5071,0xac7028ba,0x0000011a }, + { 0x680c8f04,0x2e61d39c,0xb48b3b5e,0x2f09c4cc,0x95744f3c,0x131609bd, + 0xaaccb593,0x6d72e4b4,0x5adfb209,0xdb7060ca,0x1fd3eccf,0xc67d9e43, + 0xe1752a73,0x1487a26f,0x64d0857c,0x3d953663,0x000001e3 } }, + /* 38 */ + { { 0x4cec9e7f,0xe664506b,0x30aab98f,0xa44564b4,0x173fa284,0x5e1b501f, + 0x15c97472,0xe7b7bd7e,0x82dec033,0xd6cc67a8,0x0a63b762,0x1fe2e934, + 0x3f8e2fcd,0x3a084e1b,0x9ae6e752,0xccce4da8,0x000000fd }, + { 0xc12fd820,0x0797f8ee,0x96da4733,0x325f892a,0x55997bf4,0x597d241d, + 0x02b753cf,0x3aef35ac,0xf677ceba,0x8a73f95d,0xd1bbac6c,0x5b2892b7, + 0xcc5278b0,0x90751583,0xa47f45f6,0x2f5ed53f,0x0000001c } }, + /* 39 */ + { { 0xab40b79c,0x3914165e,0x25b489a8,0xbfb6eed8,0x8a6c107f,0xda136b7d, + 0x8e01f28b,0xd431db8b,0xa4d79907,0x84e5d0dd,0xa471e685,0x69a91472, + 0x98376ff8,0x58d06969,0xc46311fd,0xce369b74,0x00000006 }, + { 0x1add1452,0x6c0773d1,0xed8e9a2a,0x2e4e9c95,0xca15a40c,0xe8ff8e32, + 0xaf62f18f,0x3fcb7d36,0xeec9484b,0x2ca336ee,0x3b20405b,0xa4d6e7a9, + 0x956d8352,0x6d90d031,0xd9ca03e7,0xdd375603,0x000000e5 } }, + /* 40 */ + { { 0x8b481bf7,0xcc5f297d,0x2a13383c,0x06a2a3e4,0xdc40b96c,0x9e14528c, + 0x1189da3c,0x9a2bf35f,0x6cd57fa7,0xb8adb989,0x9357d32b,0xc1a4935c, + 0xc2d76fad,0x51fb2580,0x24f23de1,0x98721eb4,0x000001ba }, + { 0x52a4b397,0x8c02daaf,0x0d0b4e54,0xc3c5f4cc,0x7b7e79cd,0x29be4db3, + 0xb33970b6,0xf34336ec,0x92808c7f,0xed3dcb7c,0x02288db1,0xec290eff, + 0xe96ed59a,0x2a479d51,0x76d8fa5f,0x9d7ed870,0x00000092 } }, + /* 41 */ + { { 0xe660043c,0xd8edaf0b,0x016e074d,0x84aa2ccb,0xe2cc3b3d,0x9d2368e7, + 0x5c269fc4,0x47b50130,0x3de33e36,0xd0194ee1,0x789ca504,0xdb3361b9, + 0x984db11d,0x8cd51833,0xc8ec92f0,0xd5b801ec,0x000000c6 }, + { 0x47ab9887,0x33f91c15,0x6b5ab011,0x2f285e2a,0x133fc818,0x9b734e5a, + 0x38d8692c,0x5c435a74,0x43282e81,0x3c92b47c,0x9c7bcdaa,0x191231f5, + 0x4d158c86,0x3ae425c3,0xc5a23cca,0x7f568feb,0x00000011 } }, + /* 42 */ + { { 0xbf5caa87,0x8ccbd9d5,0x68dd8c9d,0x17bfc60f,0xc7d4dede,0x63eb4dbb, + 0x8270b5bf,0xbf6e5945,0xcc098fe7,0x887137a5,0x05d7b8f5,0xca5eb687, + 0x4b25a533,0x4b7deeee,0x4a700a6c,0x8e045c32,0x000000ef }, + { 0x70cf52bc,0x160c1c92,0x90cc6298,0x4bf3f63a,0xbf3028fb,0x5fff421c, + 0x523beff1,0x0a8102d7,0x8b9ce105,0xff3309a3,0x06621b1e,0x8e9da4d0, + 0xcc0a7807,0x9775f89f,0x00178612,0x59044865,0x000000eb } }, + /* 43 */ + { { 0xebbd33ec,0x8a6664fd,0xce5ad579,0x0cf9a660,0x50fb56ed,0xecd06c05, + 0x1d5aaa6e,0xb4ca5fad,0x948a7f07,0x36daee5b,0xefe1c11a,0xd2e37887, + 0x91d2544b,0x41f61ac4,0x2bffd8ea,0x49df7071,0x000000be }, + { 0x65acdb56,0x60e2f1f5,0x5e5e5bde,0xf2f13c84,0xe17a0412,0xb97fd354, + 0xd9c93bef,0x8a2867cf,0x25a957e4,0x9ca9d16b,0x4a18635f,0x1f55c19b, + 0x8d26ae71,0x9b3868f5,0x4c94541d,0xac448041,0x00000000 } }, + /* 44 */ + { { 0xd4ad38db,0x6c1bcf89,0x3d714511,0x1180f381,0xcb70243a,0x5b4c2759, + 0x163a716c,0x5dd64d63,0x13648bdb,0xbbd2efea,0xe4de9969,0xa47187f9, + 0xe2de8c45,0x65de6912,0x4bdad0a7,0xe075f29c,0x00000048 }, + { 0x5e4dd88d,0x00335474,0x80577afc,0x18283638,0x227288f7,0xe4b35c01, + 0xe68989de,0xd008fd91,0xcd3f71ba,0x42142315,0x3e4da1e2,0x5cb023ff, + 0xb5662bb1,0x7e6b9c35,0x7fb04fe5,0x143f4165,0x00000072 } }, + /* 45 */ + { { 0x26f40f2c,0xb06b046c,0x6cd7c31d,0xbd5d246c,0x1953a9b7,0xaaa56270, + 0x8f00436f,0x5ac929b8,0x21d0660d,0x1937392c,0x9bd6dbe6,0xd279ed15, + 0xd17c43f9,0x377c4d5a,0xb8fcd025,0x800eda50,0x00000179 }, + { 0x36132f31,0xb88ddc0b,0x2ade73a3,0x6f8f4f01,0x203de2b9,0x38859ec3, + 0x231b6533,0xedb03814,0xa14093ca,0xad08cd20,0x5c2be2f9,0xb9f86d44, + 0xf6ebc09f,0xfd3d9532,0x1aef478d,0x757b5899,0x0000013d } }, + /* 46 */ + { { 0x580f894b,0x7d9ad100,0xd925e46f,0xb612488a,0x2e5a6865,0x45497e14, + 0x17f9a813,0xc86e1053,0xf8a33541,0xd8aa820a,0x7a66d578,0xa6790660, + 0x5f758e23,0x47df60ae,0xa7f8ab5c,0xcadd4c90,0x00000107 }, + { 0x6764ad0e,0x356b044f,0x250189b3,0xf69fe0e1,0x5f14db6a,0x2deaca62, + 0x1bd77d54,0xe9f2779f,0x5cfa895c,0x979911f2,0xb6f19ac3,0xd4e94ced, + 0x01af44b1,0xc3533417,0x50c727f5,0xcac43fff,0x0000003b } }, + /* 47 */ + { { 0x83c1d4cf,0x1742951c,0xb245c34f,0xe03791d0,0x9c2dcc71,0xea8f8ef6, + 0x2a310767,0x2ea57a29,0xb12948bd,0x255b46bb,0x0feaeb83,0x2adc1e09, + 0x449abf59,0xa0d2d18c,0xc4a8a689,0x9e8c9ff5,0x00000019 }, + { 0xeb28171a,0xc9f7b9cd,0xd576987b,0xefd78403,0x22ff824c,0x58b4f3bf, + 0xbf333cc5,0xee09b393,0xb01ceb72,0xebff83a2,0x220299cd,0x5bb34c45, + 0x66ebf751,0xa3c3e8a0,0x49d05cf3,0x5dee07bb,0x000001a6 } }, + /* 48 */ + { { 0xb114257b,0x09a958d6,0xd4975e30,0x729afd41,0x3aae7b11,0x072879b5, + 0xedd1ac83,0x0791b093,0x1eb67975,0xcfefc7d1,0xe2675b4a,0x0e54bd37, + 0x8d69517f,0x89a62d7e,0x202109a3,0x96f805d8,0x0000006b }, + { 0x57b5f9f4,0x4815d517,0x405b44d1,0xe5c9e436,0xe4870160,0x3442dde0, + 0x1ef6b3f8,0x953fef95,0xf7497faf,0x919e4cf5,0x016ef0b7,0x24e3cc4d, + 0x2512eeed,0xfc5caa87,0xa3bd1703,0xf1ba4029,0x000001b6 } }, + /* 49 */ + { { 0x529252ac,0x2a668435,0x74e7b0d8,0x3da626c0,0xe0be86ab,0x55080cc1, + 0x4ed5dc53,0x534a53f7,0x0cd41fd0,0xa9eff140,0x5674891c,0x0e7c945c, + 0xec53b5ad,0xdea4b895,0x15150988,0xefc67bef,0x000001ff }, + { 0x306033fd,0x988dc109,0xf36875d9,0x1b287979,0xe3c335c5,0x4d39af26, + 0x124e29d6,0xa47259fd,0xc41dbdfc,0x5d60c570,0x0cc0d895,0x06224b61, + 0xeea8ff86,0xa041d4e5,0xae4d8707,0x2920e15c,0x000001fd } }, + /* 50 */ + { { 0xcd67da85,0x66d15f0c,0x5ac54a15,0xae98b6f4,0xf1ac71c3,0x2f05e021, + 0x47559224,0x1feb2226,0x66e856dc,0x2a2f1561,0x6fb4ba47,0x65eb1456, + 0xa29d920b,0x34688bd2,0xf9d4cb9b,0x943ce86e,0x00000061 }, + { 0xaac91174,0xb4696218,0x41dd9234,0x85b519ec,0x9f0763a4,0xb7efadf2, + 0x712c8b33,0x98517f27,0xb0538630,0xa02e7ec3,0x1ff3e3e4,0x46bc45bb, + 0x29496486,0x46ae896f,0xebd2b93f,0x2aeb1649,0x00000146 } }, + /* 51 */ + { { 0xe8e4d3c3,0x1f34f41f,0x5bb7e9db,0xc80d87ff,0xd910b579,0xf0216c0a, + 0xb87349ae,0x2a24b761,0x2b0a6cc0,0x054bc528,0xaf2d1957,0x3b4c7029, + 0xadbe6cdd,0x0e4b90e2,0x26060a34,0x8e774f81,0x000000cf }, + { 0x2e229950,0x3c7f9dbc,0xd9f82b70,0xab11f846,0xf10c05f3,0x2b7ad9a3, + 0x0f1820ca,0x203ead4f,0xccbfb332,0x51dbcbc8,0x066706f1,0x3bd9caf0, + 0x06059d5e,0x5a39be25,0xdcafe64e,0x984387c8,0x0000014c } }, + /* 52 */ + { { 0x8e011531,0x708a757f,0xc3dcd57c,0x7f45b172,0xc2d99e29,0xa8eac9fd, + 0xb93b6415,0x9d4ee81f,0xa5488e86,0xa5833b54,0x0bb7ab70,0xddd561c3, + 0xb3bdf3a9,0xb5bda384,0x1ddf332b,0xf909f8e0,0x00000124 }, + { 0xab41e782,0xc5b8aa84,0x851ddb87,0x1de20126,0x99482bd2,0xf49baa7d, + 0xf4b6413b,0x05963deb,0x7cd1e224,0xed369fbb,0x1bad60ee,0xdcf495dd, + 0x892e30ed,0xeb475693,0xaf0a212d,0xaaf11bd8,0x0000010b } }, + /* 53 */ + { { 0x16ec64e2,0x71460174,0x7d7c6ebe,0xbfd14acf,0x668b7176,0x1e3504a3, + 0x741b041c,0x72e3f3f3,0x2d3b67b0,0x651fa54a,0xe57d928d,0x623edca3, + 0x72c8f419,0x29b74e8b,0x327abaef,0x3d99cb47,0x00000038 }, + { 0xda342a3f,0x808dd0b3,0xdef4a954,0x12002462,0xeab5a860,0x1b1c642e, + 0x06e54b6d,0x5e1e2a05,0x10c6cf1a,0x9ba1710f,0x0f903cd0,0x334fc366, + 0x134166f5,0x969e0001,0x155c4353,0xfaa26074,0x000000fa } }, + /* 54 */ + { { 0x712de285,0xc85cd0e6,0x869f5dc5,0xcd2ff8b0,0xdf4ed389,0x372a2b92, + 0x55b99c84,0x63524d30,0xe07a0033,0x46fef5a2,0xd6e09493,0x0a2c82da, + 0x72a8952b,0xb3626621,0xaf217eb6,0x9afcb188,0x0000002c }, + { 0x9a64c5b5,0xd3b9d476,0x44c4cfe1,0xa0d8d5de,0x11c6dbff,0x560858ef, + 0x41c14aed,0xce1d978f,0x35efe854,0x251f9e72,0x0474575d,0xf9d0c14c, + 0xbda89c03,0x0d2c838e,0x36cc9dc0,0xa25f040b,0x0000016f } }, + /* 55 */ + { { 0x9cad682d,0xb23d9dea,0x46369391,0x87acb1b3,0x5c0f24d7,0x9f5c1988, + 0xd41883ce,0xdff62fc7,0x53555e46,0xd1ab29df,0x891cda05,0x569b1cb2, + 0x52c633ed,0xdb14dbc4,0x2a345428,0x1acbb86c,0x00000194 }, + { 0x24db8127,0xd86a70c8,0x41b7cf5b,0x84a6563f,0xb908d9b4,0x8d84dabe, + 0x899c260a,0xaaeaae63,0x44436957,0x13ed6b2b,0xd0a92c8d,0x3bc94f99, + 0xd04bcb97,0x978f2e2b,0x716a565f,0x56a388ef,0x00000074 } }, + /* 56 */ + { { 0x96fc1f77,0x6082dfe4,0x1347ad6a,0xb04c435f,0x25ebe457,0xf42694dc, + 0xb6f764aa,0x64a17069,0x04d83da1,0xe03873d5,0xe0c82330,0xb0b9db52, + 0xd4239b3e,0x9886b34e,0x598814da,0x76587f2a,0x0000016a }, + { 0xebc71a5d,0x6918f8e8,0x85405233,0x49141a42,0xc182cbcc,0xd63f09cc, + 0xe09057a7,0x4afe59d3,0xe239d8eb,0xe633db0d,0xfd9494b2,0xbac8582d, + 0x4704fd61,0x8b915a41,0xfceaefd9,0xe0866a9d,0x0000010e } }, + /* 57 */ + { { 0x52e07a4d,0x2b50c470,0xe5d745d0,0x7f6d38b8,0xe1af1226,0xb414c47c, + 0x39c505f7,0x03e4b44b,0x86f739be,0x59f3d795,0xe7c2f1bc,0xca19bca7, + 0xc063fad4,0x1c51c01e,0x7f428afb,0xda3937a5,0x00000080 }, + { 0x102369fa,0xe9d8ca9d,0x706c0e35,0xe009bffb,0x96b55d80,0x2e0a19a7, + 0xac0d094c,0xda0e42de,0x787c187a,0x6c1be2c5,0x9cfa04b6,0x6d4ae2cc, + 0x76577340,0x5b0cea60,0xc7c96285,0x2d525245,0x000000d8 } }, + /* 58 */ + { { 0xae93de69,0x6dcb238c,0x3bfdae9b,0x4963c833,0xe8b79836,0x33c81f4d, + 0xae8bf8ae,0xe13a2244,0x4c3ebacc,0x0bc6e786,0x555a5ad6,0xa837a53c, + 0xbc7e9459,0x875d8d35,0xf9f46fcd,0xb3705534,0x0000001f }, + { 0x7fb974a1,0x78e9270c,0xe9ed2481,0x23448fa0,0x64bffbd4,0x14166c3d, + 0xd79f4b3c,0xa05aa443,0x3b9f32a0,0xd855a4f1,0xac90235e,0x4bebcf8d, + 0x8db52b48,0x65849987,0xe48d09d1,0xaa4d59f1,0x00000183 } }, + /* 59 */ + { { 0xdbffad9f,0xee585d75,0xf419d8fc,0x64df6174,0xe6c69345,0x6f73bf59, + 0x83d59b0c,0xb80793d1,0x929c8950,0x6baf4fc3,0x29962bab,0xbd445a95, + 0xeaa91273,0x52b61945,0x3d1c785b,0x4fccdfff,0x000001be }, + { 0x7cb2857f,0x05c384d9,0x06b7abf4,0x4cf83058,0x43ace6b2,0xf528dd17, + 0xbc43d6b6,0x2c7b8fa2,0x14e564b9,0x8f0e28bf,0xd2b9f01a,0x1b69bc73, + 0x3dd383e6,0xab8beb40,0x9791946b,0xaccea0c5,0x000000ae } }, + /* 60 */ + { { 0x0163c2de,0x9a68baee,0xeb2768a4,0xc42d0b2b,0xffdae767,0x5686f124, + 0x0aaca943,0x926da5d5,0xe01091cf,0x699c34ce,0x5324becd,0x3d254540, + 0x4193a0a9,0x1b6b58f1,0xd611cc9d,0xf144925e,0x0000014f }, + { 0xc1ed9259,0x7f61a60c,0x2f1d5a7f,0x1be37aa3,0x07aef431,0x0384713d, + 0x4e6fa7ba,0x99f33d49,0x8bd3730c,0x43928c16,0x5b9557dc,0x73cf8ccf, + 0xd1a2bee5,0x0bc6d460,0x83b15610,0x27cd1943,0x00000145 } }, + /* 61 */ + { { 0x3427af4e,0x4be65135,0x310d937d,0x2e6c0bb1,0xcaa671c3,0xbd8ea76a, + 0xd3a9c376,0x9d7b3fd4,0x471709aa,0x124ce863,0x018051c0,0x225ce41d, + 0xf9e8ee1c,0x5489284f,0x535c4ec8,0x22d829c9,0x0000013d }, + { 0xa1b15e02,0x6b01ed9d,0x301e5868,0x1d092bac,0x5764135b,0xbfa7a183, + 0x6f7159a4,0xc0ee59b7,0x18090d0d,0x9171a051,0xb8052196,0x5c1531bb, + 0x20927904,0x740930fc,0x76337685,0x963b48cc,0x00000008 } }, + /* 62 */ + { { 0xf4aaaed5,0x0fe8b620,0xfe871ee8,0x1068de7d,0xfebfcb4b,0x2b22030f, + 0xc3a2155b,0xd4dfbee7,0x2769b805,0xa7a26a8c,0x6d39eaf0,0x377de770, + 0xf615f032,0xf1a92447,0x42d9b731,0xa1b81a84,0x0000012a }, + { 0xb1152e8f,0x299e67d0,0x92b5e14c,0x2e773d97,0xf1cb57a2,0xe0d81073, + 0xbf1da4a2,0x03af0a9c,0xc22b449a,0x169b160e,0xdd2d7d1d,0xb82c1ac8, + 0xbfc98ee4,0x7508aca6,0xe3cbea15,0x54992440,0x00000150 } }, + /* 63 */ + { { 0xa13a4602,0x70004a0a,0xd0d2c60e,0x505c71a3,0xa6d79bc5,0xa4fe2463, + 0xd54d9df4,0xe878eb3a,0x73d3c7b8,0x7ecca907,0x244ecfa5,0x5b3bb278, + 0xb124d179,0x8a30f61f,0x4f632af0,0x5b7e5001,0x00000115 }, + { 0x9ef0021a,0x62c42ecc,0xf856c9d4,0x58017fd7,0x2e6478bc,0x10e243b8, + 0x1505a4db,0xaf074669,0x4cd7eea5,0xd9bb0a1c,0xd52aed0a,0xe8ba39a2, + 0xb549f09d,0x0747449a,0x9e57fa64,0xd5c8f7bd,0x0000013f } }, + /* 64 */ + { { 0x5a53c22b,0x1bd8ce7b,0x7cab446a,0x78733fcd,0x48acb394,0xc44ca4e2, + 0xa38c790f,0xa9888b1e,0x15c34237,0x36afb6eb,0xfb702063,0xb913b8a8, + 0x917508fa,0x34b77cc5,0xf9e4732b,0xa931d7a7,0x00000050 }, + { 0x56d21d18,0xa90a4290,0x55b410a1,0x82666307,0x894a6b05,0xb4684a8b, + 0x828cf75c,0x8a1ade63,0x127702a3,0x4fb2f85a,0xadf7b709,0x83ff7d05, + 0xa68d1db6,0x1d3f5a92,0xc093cd5c,0x243ce1db,0x000000f5 } }, + /* 65 */ + { { 0xd37d7891,0x8fc183c3,0xfd865eca,0x17b50149,0x8f218441,0x0f6e43d6, + 0x5a07f658,0xaf51ec25,0xad303202,0x8fe5a6cb,0x10676ef5,0x95de68f3, + 0xca4e000c,0x7508e31f,0x77735254,0x783e5a95,0x00000159 }, + { 0x2e537ad9,0xbc1db571,0x35be9cf7,0x5e87112d,0xd57f9bcb,0xbb522b48, + 0xa8b3cbc7,0x1eff7890,0xe5ecdb5c,0x4f306e11,0x3387e7ed,0x30da8392, + 0x72321e3d,0x4d91fcf4,0xe412a67c,0x8487bb62,0x0000009f } }, + /* 66 */ + { { 0x8cb8e08e,0x86f5f80f,0x2496fed6,0x7cfd2c41,0x60b7dcdf,0x0061b743, + 0x57f4d05f,0x4dbaffdf,0x458061f2,0xb1993c2a,0x9de994c4,0x6c6ca8d0, + 0x2747e062,0xef70d24d,0xb9995cbc,0xd4e5d4e3,0x000000ff }, + { 0xc6f40077,0x3171e245,0x0723e506,0x1592e045,0x6a6bfd88,0x35c86f7e, + 0x6d9d9ce0,0xba0959d1,0x3eb5770c,0x2e7f8fe8,0xc40d63dd,0x58eb0881, + 0xeb9e4419,0x56333bda,0x3afd1f4d,0xfb0397df,0x00000034 } }, + /* 67 */ + { { 0xb358815c,0x7b84e05e,0xe41087d9,0x3abcb2d4,0x07f05d7a,0x87a75889, + 0x7a9d481c,0x350778d5,0x42d64cbd,0x9d34cff8,0xccf289fe,0x0859cd5a, + 0xdd2b2c6e,0x8372d591,0x18b40b62,0xc06d482e,0x0000006b }, + { 0xda4ed375,0xd10695a0,0x298daaea,0x51baf588,0xf4b7092c,0xb028a1b4, + 0x7a335b35,0x8ab87dae,0x0567efd8,0xa7359362,0x3320c374,0x7a49fc10, + 0xa3558b30,0x737acac4,0x4c0fce9b,0xd30696a3,0x0000001e } }, + /* 68 */ + { { 0xbd3902fe,0xd9550ab0,0x86a9d3b3,0x9bba4b4b,0x975cac37,0x3a59e0a9, + 0x333605dc,0x045e8731,0x1afc2c58,0xf2c598c2,0xeef9cbf1,0x81ff8d6f, + 0x9bf83c42,0x82bed5d0,0x528131d5,0x9d1d9d5b,0x00000157 }, + { 0x5519258e,0x687da305,0x027de2a8,0x73f539f9,0xd6a230d6,0x69fa9747, + 0x5f5d1684,0xab1aeb23,0x5f7e41f5,0x5bbfe947,0x16a7feb3,0xbd546abb, + 0xe16d5187,0x2afbd4e8,0xbcc953dd,0x7437be13,0x00000160 } }, + /* 69 */ + { { 0xee9755a3,0x55f165a9,0xb82c9ab1,0x0c8d5a1a,0xab6b97e6,0x65a1e45a, + 0xab05e271,0x3004cdb0,0x6db0830f,0x9e0c3b52,0x75acbdeb,0xaae1ec1a, + 0x761e8498,0x413d4484,0xb1b9c62e,0x589e09bb,0x000001e9 }, + { 0x9c72258d,0x67512081,0x5c1593d4,0x61dcd734,0x91c11fdb,0x6c627a7b, + 0x8857908e,0xd1d3e9bf,0x530bc68e,0x9aac06fe,0x6b5b44ff,0x125c16bb, + 0xdb90edd5,0x38860bb6,0xfbbedb5c,0x96fe8b08,0x000001aa } }, + /* 70 */ + { { 0xf257c0f8,0x323a5dd8,0xdd3a10d9,0x4884dc92,0xbbb8ce03,0x03f379ce, + 0xa47262a9,0x6217ad53,0x52e06c6d,0xa1df2017,0xc32428cd,0xf5b723e0, + 0x2c30c62c,0x1e5d3889,0x477f82cc,0xd9a90f1f,0x000001fd }, + { 0x1763ab59,0x830d27ba,0x723783e9,0xcf27d93e,0x945968aa,0x81558264, + 0x1700d5d5,0x63251a32,0x03146d9f,0xcf6bbe73,0xe65bf0f2,0x6cdcf455, + 0x632323fb,0x80aa00ce,0xd96a4744,0x6e49e62c,0x00000149 } }, + /* 71 */ + { { 0x40574c09,0xbeff0b7e,0x3fe80e96,0xb76f2643,0xeb237d91,0x0b3bd352, + 0x7edc3102,0x3c0c62b7,0x424a36dc,0xf989394b,0x7c6c435e,0xe9ea64c2, + 0xe388d076,0x2dfc21c4,0xa4e69e4b,0xcc3852f6,0x00000139 }, + { 0xbb096b91,0x5238a3ff,0x73d8d43e,0xee72c9e5,0x8c577558,0xc116db11, + 0xdc47d4b4,0x54ec89d2,0x42e1955f,0x2006dd35,0x7437475c,0x004aed6a, + 0x2bee9041,0xc1ddc32a,0xed9332c9,0x597417a2,0x000001fb } }, + /* 72 */ + { { 0x859bae66,0x3c0f1981,0x845d7c1b,0xab48e9b1,0x452a3c1e,0xc6ce9c03, + 0xff810339,0x2384a00c,0x5f98d6fe,0xcd7ede11,0x38a0dd5b,0xf7a00e3d, + 0x3c7e1c06,0x56dd948a,0x8e53a61f,0x9d21a7d1,0x000000d0 }, + { 0x880eb3fb,0xf9cfdbaf,0x5e83f7c9,0x64cfd297,0xa28a74b4,0x61ba7d6f, + 0xdfb13e03,0xb8200d5f,0x232a6128,0x03bc8f4b,0x81a8d86e,0xd1fb92c2, + 0x706d6ea7,0x68675fae,0xefab18c2,0x9b08608a,0x0000011d } }, + /* 73 */ + { { 0xbbd2f539,0x17cf6146,0x76e26ba2,0x96052fc0,0xd4be4a67,0x36821d18, + 0x9f3f39a8,0x8f823422,0x433f873a,0x68b846b9,0x716f4568,0x7a1d3f36, + 0x2fd47750,0xdf603e28,0x6975e226,0x77cb02c5,0x00000003 }, + { 0x8c01dd59,0xf275add3,0xb9c1a37a,0x9c213a9e,0x4dfc5403,0x690ad104, + 0x07ee0d86,0x202ee206,0x661fc40e,0x896ede95,0xd0b02f56,0x6b4d7398, + 0xe5af1a24,0xccb96991,0xc13f7125,0xd5c281af,0x0000009f } }, + /* 74 */ + { { 0xd7073a5a,0xc858c54b,0x861eac7d,0x87c81a5c,0xe720201a,0x51f84a39, + 0x40e003ce,0x952a9f8e,0x58f199de,0x76bdc4ab,0xd56cc02b,0x1cf12322, + 0x83f162f3,0xb6634e63,0x8f969e11,0x84c017ee,0x00000169 }, + { 0x5c89f1fa,0xf1f43362,0xb697b078,0x4a02a630,0x4b05b7f4,0x33311e5c, + 0x4fede4cc,0xa7ccae51,0x4b025aa4,0x0d26e874,0xf84db7ad,0x7d5b77bb, + 0xf571c1fe,0x39ef1aa8,0x418ccd20,0x65eba928,0x0000018d } }, + /* 75 */ + { { 0x8abb2537,0xa37866ab,0x65b3096f,0x14ac4cbb,0x2a428ad3,0x827fa7ed, + 0x10e9e196,0x95d19f62,0x89801b4e,0x31eb97a0,0xaae8b823,0xaae77a62, + 0x5f5c9642,0x9693d62a,0x3e368b84,0xff5bfe97,0x000000ad }, + { 0x492b0dee,0xa3efae21,0x9602c2ce,0x2143e9ee,0x6f3b99e5,0x21367c99, + 0xe93b8f59,0xdd78b2b0,0x1064c13e,0x8d541c38,0xf5738e7a,0xe6b970da, + 0x8373b1a4,0xaf6ecc16,0x74ae208f,0xdbfa3f4f,0x00000180 } }, + /* 76 */ + { { 0x907a6aa0,0xb024621a,0x407879f6,0xef56cb68,0x8168a934,0x44c38b68, + 0x9b9a9048,0x70d638d3,0x82541f20,0x6968caa0,0x1fc88b50,0x0c597053, + 0xaf635784,0x5564ded5,0xc4d494cf,0xe7e898c7,0x00000097 }, + { 0x6b6ebb2f,0xe1dc98d9,0x7aa9e126,0x292a17fc,0xfa2a2c68,0xb60f0fdb, + 0xb2e1851b,0x9c63270c,0x81ca4cfe,0x898db265,0xb11959d5,0x94082638, + 0xa54b8d19,0xe44f308e,0x44e63094,0x96399eb8,0x000000d6 } }, + /* 77 */ + { { 0xb83769ee,0xfa00f362,0x3efc4cb3,0x72d040ac,0x57abd687,0xc3933889, + 0x940a7128,0x62264425,0xec242a31,0x909c4c8f,0x65a1a551,0xd1e48f1e, + 0x049c2172,0x68bd70f1,0x709b7fd4,0xc8692d2b,0x00000041 }, + { 0xdf816784,0x4e388aa1,0x01be75ce,0x4a58c8a5,0x02a67812,0x9b49dffb, + 0xeda721e0,0xa73299e0,0xe67a65ec,0x8a0bd1f5,0x856c71b6,0xd81e91e8, + 0xc005aa30,0x37aee2f4,0x0595bbf2,0xd9400750,0x00000073 } }, + /* 78 */ + { { 0x010c0ef3,0xa912ac4a,0x4e81b1a0,0x0e654bd8,0x4f353509,0x8f0563dc, + 0xb47d189a,0x10dc41f3,0xf238c09c,0x122edd06,0xc41acf67,0x224c16af, + 0x83758520,0x1ccb9334,0x2275ae6f,0x1a4b5f29,0x00000127 }, + { 0x3ce688b5,0x792fd473,0xdca9c68b,0x14566d37,0x541711d0,0xfce9326e, + 0x3cc341a8,0xe3ba14ee,0x2122c11f,0x6b8ab4cc,0xf5d379b5,0xc0fa763b, + 0xf1522f91,0x95e2d2ae,0x31cf95a5,0xd4e21b3d,0x000000ac } }, + /* 79 */ + { { 0x1d8e061a,0x4013a779,0xacc84a30,0x62707e70,0xeb2f636a,0x6ac08266, + 0x77b25c9d,0xe917ea21,0x70ff35cf,0xddb78bbd,0x041898be,0x5008db2b, + 0xce0ae445,0x0f58a4fc,0x2257d0e7,0xed092397,0x00000043 }, + { 0xe2e129e6,0x2cad77b3,0x0f1be4d7,0xfb8c4a87,0x20056333,0xaee50dff, + 0x2a691543,0xbc2658c1,0xb8fe2640,0x95dc0cca,0x1965a0af,0x694eb584, + 0xedd1d99e,0x7d3baa53,0x8a1edc87,0x2df13b20,0x00000083 } }, + /* 80 */ + { { 0xd181c3f2,0xfead2247,0xf337b23f,0x915d35be,0x74890672,0xdb4cfcba, + 0xfda7a3a1,0xe4f70d8f,0x79275686,0x226b6419,0x6ff1f79e,0xe8040863, + 0xcf5fa4e8,0x98e84b39,0xd8a09f60,0x57aa0be9,0x000000da }, + { 0x4efcea66,0xd40cecf5,0xafc76fae,0x98df2aec,0xc91585a8,0x63f19a48, + 0x13f00aa5,0xb111bda7,0x44b5cb9f,0x6687afab,0x652620d1,0xc6d5fb12, + 0xbacb35ab,0xaf953f1b,0xff94c4d2,0x99709370,0x000000ed } }, + /* 81 */ + { { 0x68b54c89,0xac9f56e0,0xce737c22,0x08ecc17d,0xab089b53,0x208ee83f, + 0x543fbd1b,0xb0f3a129,0x844dd706,0x1b204cf8,0xdec2e40d,0x80975c89, + 0x9399914a,0x08b011ae,0x74674df7,0x6b4ba170,0x00000017 }, + { 0x8fdfc175,0x71216ea9,0x7e0f5b0c,0x77b7fc63,0xceb33a34,0x88d0285f, + 0x0223eab7,0xb679814f,0x51c6d922,0x9078720b,0x9c13f51d,0x5859d5a4, + 0xfaed60b5,0xe69f850b,0x6d0ccab2,0x2499a844,0x0000005c } }, + /* 82 */ + { { 0x73e7bcf1,0x41d581fb,0xdd3c17be,0x16dde61c,0xfa199fd9,0xc62997ec, + 0xc159db97,0x1a758873,0x64132830,0x4ed77896,0x2942a918,0x9672ce89, + 0x816ba4bb,0xf3ee4587,0xce54dd7f,0x4fb7a148,0x00000123 }, + { 0xf009be8c,0xf05d80af,0x78df1ba1,0x62e938d7,0x312de620,0xa7e22e84, + 0x6070c4b9,0x48d29e7f,0xa1b5da37,0x5cd9c3eb,0xa4717453,0x1e51bd2f, + 0x56ab9e67,0x94098ab0,0x49f7c6a1,0xbb584abc,0x00000049 } }, + /* 83 */ + { { 0x1ea470f7,0xa9f25530,0xe9254e30,0xa01bf808,0x71a0038d,0x098569ea, + 0x5913ca87,0x0d2b2ee1,0xb8281fdb,0xae17004b,0x118e5c2a,0xdb5c6eb0, + 0x1fa943ab,0xa56ac64c,0x1a92d501,0x1aaf6477,0x00000053 }, + { 0x06345730,0x9679ef49,0x846f37c2,0x946aaa4e,0x1a7c3aab,0xf81726b0, + 0x8166df4e,0xcb808da2,0x4e04dc3e,0xe9fb3fc2,0x76ec19b4,0x9e0b61db, + 0xeed6d13e,0x6e7f665e,0x86a75384,0x70ed8c07,0x000000e5 } }, + /* 84 */ + { { 0x108ce13f,0x66456e58,0x0e397813,0xb5bfc58d,0xea3949e9,0x04b6a84b, + 0x75af667d,0xea9b66bc,0xa891566b,0x7cb4d6dc,0xbf61595a,0x1b3cecf0, + 0x002e2520,0x4312c73d,0x6135a5fa,0x81d76898,0x0000014b }, + { 0x841078ec,0x4047bc25,0x179c454d,0x75aa9c96,0x4851f8fc,0x6a160609, + 0xce34091f,0x998d4e3e,0x88e54102,0x9a9f6704,0x5da8ac5e,0xbf280f88, + 0x8fec230c,0xc64caca0,0x5094b775,0x0ac864b0,0x0000002b } }, + /* 85 */ + { { 0x8f5daf7f,0x6b606e39,0x10927506,0x48385489,0x08c58a72,0xa2255c5c, + 0xc90f3ee3,0x2f362fd0,0x08795f02,0xc9633af4,0x0425f5aa,0x71710bd1, + 0xec06dbfb,0xc2017e05,0xc1b8bbcd,0xd9c7dc82,0x000001c8 }, + { 0x18b8bed9,0x7db41fdf,0xe3a23125,0xe9483308,0x7291c4bb,0xbcf91de7, + 0x41448aaf,0x9b0b972b,0xc44da462,0x95dfc633,0x01bf50a2,0x90b9c463, + 0x869e3131,0x18b66f77,0x121baad9,0xa8a4e2fa,0x000000f5 } }, + /* 86 */ + { { 0xca0251ea,0x8ca55109,0x27a6c9b0,0xf2aeed8b,0x5620f528,0x901a8beb, + 0xae13fc56,0x9a8421e8,0x85993c07,0x1349f1c4,0x0d1ab0d7,0x29e08359, + 0xaeb5d909,0x96e2929b,0xf599a66f,0x96c2f1f8,0x000000ce }, + { 0x12be8bd7,0xe4bc4b51,0x3c67e99b,0xf4846a0f,0x4d3a3864,0xd89cc7d3, + 0x73f43981,0x1f647112,0x26dce567,0xc32bc324,0xf02b096b,0xf7134ebf, + 0x0d0682b7,0x5604f00b,0xe3ce8b59,0xfd23d7ea,0x0000011c } }, + /* 87 */ + { { 0xa27689a6,0xf89646cc,0x5564172b,0xd6a7dc43,0xb57cbfcc,0x30bda48e, + 0x5b1adfe5,0x9b11fffb,0x711d8bf4,0x9f2d80db,0xb70e5a5b,0xe879fdf0, + 0x6bd18a1d,0x97534183,0x8cbfd504,0xc8c526bd,0x00000114 }, + { 0xef7388bd,0xd5fe725b,0xe7ffaea7,0xf1c3dbdf,0x7e6de2ac,0x78395b89, + 0x9ebf1bfb,0x81a72c9a,0x69785146,0x65265707,0xf52670af,0x3925ecd9, + 0x83d57d48,0x437bcdd2,0xc80ecb02,0xb5d732a7,0x000001ce } }, + /* 88 */ + { { 0xcfd376d7,0xa7f9fcce,0xa66b084d,0x6b4eab3e,0xd5b91bd8,0x6ac90d08, + 0x8aa304d8,0xaa3d5b7e,0x7f866a4f,0x27f3d42b,0xbb813ae1,0x95d19fa8, + 0xe34a9206,0xd38798d7,0xa32c1cdd,0xdf7c0a69,0x00000073 }, + { 0x38315b16,0xbe2c01bb,0x9e18c8f9,0x1daa7c89,0x08b6b853,0xa3d43fb4, + 0x68092a81,0xb159e48c,0x836faad4,0x77e93d9e,0xa4699730,0xd4ed6361, + 0x6297e476,0x569cb3f6,0xe7811fa6,0xb69d8183,0x00000185 } }, + /* 89 */ + { { 0xab9cb764,0x18f27eb3,0x8ebc1d6d,0xbbbefc21,0x0479aa79,0x47760ddb, + 0x09e542f5,0xb4d16d24,0xbc699b96,0xe35c38d1,0x8c8d8c8a,0x13b2ae25, + 0x67a3a45d,0x8579c152,0x6c554c04,0x773b7357,0x000000d9 }, + { 0x0218c299,0x9620a473,0x99f78a33,0x69be29b3,0x484f414f,0x4684a009, + 0x9a2ca4d4,0xb2c74937,0x68db7ab3,0x09c0773e,0x935c357f,0x6181f059, + 0x8b7de3f2,0x0931303d,0xe0fb6e08,0xf3effcd0,0x00000060 } }, + /* 90 */ + { { 0xb25d6530,0x723c14be,0x9a97d40f,0x5e015b39,0xfbf7f622,0x209c3c4b, + 0x14b4f0f1,0x83d8c59c,0x3f7e8ecf,0xcf002fde,0x1eb1ef0f,0x35d353c9, + 0x201f0c60,0x394c42a5,0x7be8ee34,0x787128ab,0x000001b5 }, + { 0xb70110cd,0xa0937d3a,0x477911b5,0xe0fa4efc,0xc53a4c19,0xc6acaf5b, + 0x38d509f2,0xbd3010f3,0xe54ac1c6,0x3ee2a82b,0xe4f2a3bf,0x31ea67c3, + 0xf089c7b9,0x7a4ca66e,0x34a2362f,0x5bda2c4f,0x000000b0 } }, + /* 91 */ + { { 0xd1f575cd,0xb424a071,0xa5237182,0x15693b01,0x9a2c9d40,0x14133602, + 0x9c914a60,0x50c4348b,0x095b31c1,0x9024573d,0x22fd4962,0x6f975fd2, + 0xe210b277,0xa1704886,0x6dba937b,0xac29b813,0x000001f6 }, + { 0x775da491,0x09edef55,0x2b6aad82,0x25953f9e,0x1bb40d5b,0x6696a106, + 0x4d5127d8,0xcfc45311,0x81ead062,0x2f21dca9,0xaf3b7123,0x3f3e4f07, + 0x9646f20d,0x12cd06b8,0x6910f5bb,0x24136369,0x0000015e } }, + /* 92 */ + { { 0x3ecfc44e,0x0c844fd0,0x5043b3d5,0x4095f2c8,0xc9bd059a,0x9a5fe7db, + 0xf65becdf,0x239328fa,0xa67961cd,0xe3102471,0xbbb5dfdd,0xea9e39bf, + 0x133dc5ba,0x8022b6d0,0x5f12c379,0xbed7aa9b,0x00000141 }, + { 0xfd94d941,0x096f0059,0x7d4ff018,0xfc6e9f00,0x779f05e3,0xe63af598, + 0x00483c99,0x4c40f0b3,0x72a19870,0x04d2feef,0x464a4a71,0xdb773b5b, + 0x49367f1e,0x00b6770f,0x2a9fbd2a,0x4f7e0301,0x00000169 } }, + /* 93 */ + { { 0x8a9095fd,0x0df5dd73,0xd3ce857a,0xc4b7a021,0xe5edc767,0x90aa796b, + 0x180a0808,0x56497eff,0x66f10aab,0xb9856e1f,0x39879766,0x31298824, + 0x3ba80601,0x61748cf7,0x555da929,0x07d9076c,0x00000012 }, + { 0x1c44394d,0x0b049a01,0x0ce49e45,0xf5f25ef7,0xb1694265,0x1e3a09f0, + 0x109b33f8,0x2c5bd9fe,0xa30932e4,0x07f2a43f,0xc6cf8af2,0x736abfca, + 0xf3366722,0xadf7fa04,0xfa9d26b0,0x2f1e92fb,0x000000e0 } }, + /* 94 */ + { { 0x63be4d4a,0x9524e4a6,0x66f3cc91,0x1fa57bed,0x7e7a7ccd,0xdd7c93fa, + 0x88c5d1d3,0x70e8cf6a,0x3f251f1e,0xb257997a,0xe3554cf5,0x0a5ec58e, + 0x065a7109,0x68d268d7,0x085089ea,0x7c23d4d2,0x0000004c }, + { 0xbd52d132,0x63ae575b,0x38c81cc5,0x0fb8daa7,0xe4e63b99,0x096a6e51, + 0xb239d387,0x51d6b366,0xa5d49fed,0xed5f8874,0x43a8c07a,0x025091d9, + 0xe4686ae2,0x100f845a,0x7eb4ef5a,0x1af59d74,0x000001c2 } }, + /* 95 */ + { { 0xdd441308,0x5f7bc01e,0x86308890,0x0dc34944,0x759611cd,0x2af38a74, + 0x4c23ce66,0x11a71261,0xf8bafed2,0x37f317b5,0x4c93e079,0x4efbb9ff, + 0x8ecc52cf,0x880f0edd,0xddc9d82a,0x480cdd2c,0x00000028 }, + { 0xc3f807ac,0xe8f1ca0d,0xbd070549,0x6a3e4fc2,0x91f8bb6c,0xad3d0a14, + 0x3d6dfacd,0xe3ee1cfd,0x5fb46ffb,0xee46b1b9,0x7dd5cfbc,0x5207b3ac, + 0xb1b8e8b7,0xd580c0d9,0xc7bdd11a,0x52c669f4,0x00000084 } }, + /* 96 */ + { { 0xc0ace6d5,0xa42b4747,0xbe7287ad,0xd5acb64b,0x89bc2614,0xf3304899, + 0xff05c71e,0x817fe836,0xd35ac450,0x772eb246,0x375a9c3c,0x7f5fc216, + 0xcbc0d6fd,0xfb6f9e1a,0x720e9733,0x7643c315,0x0000009a }, + { 0xf3845ccf,0x4b2216b4,0x90bc05bd,0x9c174e80,0xd6049037,0x7a550c74, + 0x6358c806,0xbd7220a1,0xaa677b6d,0x838f9c41,0x66e2e08e,0x37332c19, + 0x496f6da5,0xb032875e,0x9c30630d,0x52b274cf,0x0000000c } }, + /* 97 */ + { { 0x8ea58beb,0x6ec2e782,0x3665fa48,0x2b404c1d,0x20b40ff0,0x546d5fad, + 0x29d3e6a5,0xfb5df7b6,0x66c81991,0xf186846d,0x6e2cfe3e,0xbe690bde, + 0x1410d16b,0x97aeb9a0,0xbacc8e92,0x59d81548,0x000000cb }, + { 0xbaf66a23,0xd905d3ad,0x40dfb081,0xc3337387,0x4b00f432,0x6d5535de, + 0x07d3a03e,0xe17fe8e8,0x066bca80,0x29544ff7,0xbadffa55,0x60c2b96c, + 0x45a26ea4,0x9f018d94,0x24a34ffc,0xd5438167,0x0000011e } }, + /* 98 */ + { { 0xbd7f8a61,0x62a873fb,0xbbe580bb,0x5e18cd71,0x667f6980,0xfd5c9eb3, + 0x571d3dc0,0xab8d4f61,0x783f9bc8,0xe2e45215,0x24398b14,0x36c3774b, + 0x74d811b5,0x2db4a363,0x2debe3c3,0x9f7f1297,0x00000138 }, + { 0x798fefb2,0xbb97f21c,0x107baa72,0x9c76fcb5,0xfadbb568,0x12fbf760, + 0xd33ea6c5,0x1a648be7,0x236134a5,0x412a2993,0x8985893b,0x4a3d8169, + 0x3e66ada4,0x6144958f,0x7687b457,0xb4dfc79b,0x00000140 } }, + /* 99 */ + { { 0x7abe5bb9,0x83b14570,0xe51d81be,0xae0cbfd8,0xc9827aff,0x20dadf49, + 0xa687b554,0xc3a72548,0xeeb41733,0x080263fb,0xd3827c63,0x7014fdc3, + 0xb5e3b70e,0x7d018f84,0xfbcf7168,0x1d483e00,0x00000015 }, + { 0x6b578aa3,0x154e3c7c,0xd3043dae,0x511ce9b5,0xb6008101,0x55f89e9b, + 0xf405ac6f,0x4ec31112,0x2008ac7b,0x7e66a4d8,0x25c52fa6,0x73c00d39, + 0x8acac2eb,0xee1b9998,0x60b57453,0xdfa31d95,0x0000008f } }, + /* 100 */ + { { 0x251cf8d8,0xcc74a0e0,0x041f2bd2,0xd4d8949d,0x33ebce52,0x0b734a49, + 0x5c5bcdae,0xe1ac5f51,0x16200b93,0xd3ecdfcc,0xa793736e,0x2506a266, + 0xea6e6940,0x585a1c8b,0x9190f935,0x081cdd53,0x0000000e }, + { 0x53e28412,0x055f9956,0xdb27164b,0x0d1526f2,0x1df3adc7,0xcd5625eb, + 0xdd35dedd,0xd2c453ca,0xa838ffe2,0xed442849,0x5c0ce589,0xad20c137, + 0xbd99b609,0x2d5fba81,0x622efb07,0x5be41dcc,0x000001ad } }, + /* 101 */ + { { 0x8f850756,0x563af667,0x52f3b597,0x86d37aae,0x796842f5,0x10d38a53, + 0xf743f997,0xcdaaf99f,0x93f1a8ba,0x2fa755e5,0x409f7cd9,0x1af04e15, + 0xd6d0650b,0x63bf9a0a,0x55abfd9a,0x67b1cead,0x0000000e }, + { 0xb5f43178,0x3660a8e0,0x9cc35b33,0x56bd412d,0x880f6808,0x3d7bfa63, + 0x2e622c71,0x7f372d66,0x6ff82445,0xad7b7be7,0x8db04e51,0x0f2bde80, + 0x4bd15c8d,0xe1e781fe,0xb8e502f2,0x1f475bfb,0x00000194 } }, + /* 102 */ + { { 0xd63543ec,0x79482bf9,0xa117ef3e,0x985cb67c,0x160ccc63,0x8ac50638, + 0x729bdc1e,0x556cbed5,0xa22686df,0xd62ed97d,0xc81eb77c,0xb124cb5f, + 0x72fa2ed9,0x4d7b4f66,0x78335b96,0x60b29aa7,0x00000172 }, + { 0xa43df7c6,0x21bfc7b6,0xbc20706c,0x85acac23,0x345d9580,0xeb6f37bc, + 0xa32a08bc,0x9d8f20d2,0xd1953c5e,0xf08924f6,0xc4f680d0,0x7d25d7c6, + 0x2de9912c,0x64e6a237,0x52ce644c,0xda1c06c4,0x000000eb } }, + /* 103 */ + { { 0x411dd110,0x26677c5c,0x2c991c4a,0x0d6787aa,0xa45666d6,0x53be6a41, + 0xc15f9f15,0x73e716aa,0x0e0cc7b2,0xa93b863f,0x2a624ab0,0xa4057117, + 0x1a39c260,0xe5e7656e,0x2ef6f130,0xaf8d78b5,0x00000046 }, + { 0x70f38dff,0x796214b1,0x123a1105,0x3e35d828,0x957ed812,0x046a44d4, + 0x0da60161,0x618fa9ba,0x54f84413,0xe7cdd2a5,0x19ea95ab,0xf1c2563e, + 0xcb2a30b4,0xc4459e14,0x61ff9aa9,0xc748add6,0x00000183 } }, + /* 104 */ + { { 0x9de58caf,0x32981f39,0x8753ea64,0x05bb80fd,0x2d119486,0xc83f9f24, + 0x03eeb00a,0xf490cf06,0x7c73d79c,0x4037f251,0x724d461b,0x844209fd, + 0x272420cf,0x6b03f6d2,0xb3438fa2,0x6f4bd29e,0x00000152 }, + { 0xc389e51c,0x964d034a,0x6db7d98e,0xacda55e9,0xe913c583,0xb2ae97de, + 0xfeb03440,0x0793077b,0x9d461e29,0xaa16e378,0x043bf8be,0xb0a67533, + 0xba7d8c3f,0x9d749a42,0x6bb925dc,0x7c41e6d6,0x000000ec } }, + /* 105 */ + { { 0xc5da8398,0x2e9b345d,0xbb38c430,0xbc66841f,0x7c3bb47a,0xce3ac562, + 0x738d2cdd,0x8fbeb12b,0x68731185,0xd4bc2ad7,0xbbd4f4f4,0x9521db1c, + 0xfe4e1b0e,0x2a690cae,0x7bfebe3e,0x375215eb,0x00000194 }, + { 0x2edfd661,0x4cb234f1,0xed52c1f4,0x0149984e,0xd8f8f98c,0x32d27260, + 0x7be38590,0xfe76e4e4,0x95e8b672,0x5435873d,0xf2b00e82,0x916c397f, + 0xbad61eb8,0x3b9bf705,0xae131bbe,0x7ee90182,0x00000000 } }, + /* 106 */ + { { 0x93fbcb5c,0xd36fea9e,0x9fa8529b,0x382be583,0xfd611ba0,0x0b243125, + 0xcd8a2637,0xa59ae37f,0x3d8d4704,0xab78c60e,0x44c41b79,0x1bac243d, + 0xeda49cc5,0xc4001fea,0x83dc7e9f,0x988ea44a,0x000000f6 }, + { 0xf077f79e,0x4d90caa4,0xd9e2590d,0xf4d17601,0xd21b4b77,0x11debbb3, + 0x9037e1b6,0x031b3f60,0x135becf0,0xf113ed82,0xf2903dda,0xf6c01379, + 0xa6f19296,0x36bde7ca,0x9dbbad85,0x57d3b684,0x0000006c } }, + /* 107 */ + { { 0x9abfccb0,0x963fee38,0xb9676e63,0x6c6e2a24,0x84ba6d27,0xf8768f02, + 0x465853d1,0xc38ba3ba,0x1b8ab9b6,0x6e3ab36d,0x47a07331,0x01fc9742, + 0x25233f32,0xfdd41718,0xac61de7a,0x4dacfa81,0x00000021 }, + { 0xeaa3198c,0x365a9f37,0xfc8b99d5,0xcbe8a345,0xd4f5ecbc,0xa427f12a, + 0x0c237514,0xe841ff60,0x28a27b05,0x5d9e8c5a,0x62859ff3,0x2d377444, + 0xea8bde37,0x1c0460ff,0x29cf5bf8,0x0a0e49a1,0x00000181 } }, + /* 108 */ + { { 0x45843c3e,0x688203af,0xaabebae7,0x4601e303,0x624df62b,0x397b08f3, + 0xd21e5aa8,0x5687348a,0x9a242b0e,0x2cf12c73,0x32a76c6d,0xc848ed01, + 0xf52751a2,0xb72aa1c2,0x92c02d05,0xb63296c3,0x000000f3 }, + { 0xc6f3d1f0,0xce4b42ad,0x2f532b94,0x2f0dcc53,0x83443d9c,0x57813335, + 0xdc8dd9cb,0xb50118ee,0xee87192f,0x3039e1a5,0x557419c2,0x9977267d, + 0x30f96b0c,0x462efa4c,0x3cd3c35a,0x454fb796,0x000001f7 } }, + /* 109 */ + { { 0x9d153926,0x10f28194,0x82b57548,0x42e28c91,0x509e94c9,0x4b423b30, + 0xde9d6b57,0xc5acc52a,0x8b3ca314,0xaa746c39,0xc63d5bc5,0x0f4ea307, + 0xe1ccc989,0x425553a2,0xf76d9194,0x271198bf,0x0000008e }, + { 0x3c8e672b,0xc7900e46,0x3f2dfc27,0x703675cd,0xaf2163c9,0x704951f7, + 0x7aceaab0,0x74d69908,0x7e8d2369,0x482f21a9,0x813dc115,0xdcfbc1dc, + 0x04f6cd13,0x0ce2bc80,0x82bfaff2,0x2a54662c,0x0000003f } }, + /* 110 */ + { { 0x1588a8bc,0x0dcf41e6,0x210c52cb,0x6f48cd0e,0x758e7a45,0x338562bd, + 0x48b9b957,0x1600d54b,0xa6b89b9e,0x461df80b,0x098cc82f,0xf7fd4f17, + 0x14977147,0x167f01cd,0x6116c5f9,0xb1338511,0x00000048 }, + { 0x5d2617f0,0xdeb76333,0x6ecb8606,0x3f9a5772,0x1b91fce9,0xa93c032d, + 0x6c84b997,0xf7a4388b,0x823ca5be,0xbfe80225,0x35a32f6b,0x6f19c028, + 0xe3cb5c58,0xf26cd5ad,0x6d0c1dd9,0x7f5ddc77,0x000001e7 } }, + /* 111 */ + { { 0x6ee764c9,0x3c9feec8,0xb07c82cc,0xd1bec836,0xa005b142,0x6bf1b2e6, + 0x29e8a5ea,0x70ef51a3,0x3ffe241c,0x517d298e,0x72966c28,0xbb389e28, + 0x2c7acc76,0x3a2da8a9,0x732a21b5,0x902c9126,0x0000004a }, + { 0x8f7ce110,0x96c51b9c,0xaeb036f1,0xdcc33a87,0x0a6a59e2,0x82695098, + 0xe78db500,0xceaf26a7,0xc95bb030,0x82f3c384,0x24c42f42,0x6dd6e9f7, + 0x70ac4a0a,0x768dde29,0x03d22efc,0x4aedce4b,0x0000016f } }, + /* 112 */ + { { 0xeded03c0,0x077f032a,0x588ddd4d,0x2684a052,0x9a85be0f,0x6d09bc4f, + 0xe0b9b6bb,0xbdda0c7f,0xf2fb5887,0x19689c7e,0xec3cce7e,0xf8a96960, + 0x768d2ae5,0xb043d9d5,0xdb21219a,0x29c8081b,0x00000068 }, + { 0xde59f006,0x6bf872fa,0xcb97ef5a,0xc2b9ffc6,0x58ae7ef8,0x371915db, + 0xf4ccaa1f,0xc2e23ca1,0x89c27cc4,0x1af8c60e,0xc86bdcc6,0xeee5d7e7, + 0x9bd8de43,0x9225b47f,0x4b24f08b,0x53e7f463,0x000000b4 } }, + /* 113 */ + { { 0xe3048bda,0x54c496d0,0x43c3de4e,0xe2b67499,0x4c2d509e,0xac2049f7, + 0x543c5089,0xb01f691e,0x105a365b,0xcd9960a3,0x78b17049,0x34d93ffe, + 0xf82c9467,0x029f99b3,0x0161a755,0x785c5ea2,0x00000091 }, + { 0x953dbdb6,0xb455f978,0x97eca19f,0xea9e84d9,0x36d4d75a,0x473bd029, + 0xc15276fa,0xa9c17ca8,0x47c76356,0x9cf66133,0x039738d2,0x4a68360b, + 0x69733609,0xd3e430a8,0xe2b27f21,0x0ae532de,0x000001b4 } }, + /* 114 */ + { { 0x5164cb8b,0x68110e82,0x2552a67d,0x6979af4f,0x8d185527,0xe10d6d0e, + 0xfb64eac4,0xcf6c5787,0xac424592,0x8408163b,0xfce0d810,0x5d8fff37, + 0xda84c15c,0x8b284e49,0x32663ec9,0xed805567,0x00000010 }, + { 0x51f3ee9e,0x106f4030,0xb38adf1e,0x2e8e3ee9,0xa13d6449,0xd3c87a6e, + 0x80e1abb1,0x27b49f45,0x0bfd7298,0xc283d179,0xafc7a35f,0x8fe50fa5, + 0xade3ad4f,0x773da545,0xd9a21df2,0x78bfaae4,0x000001f8 } }, + /* 115 */ + { { 0xabad5678,0xae60d8e8,0xe600c25b,0x0afa72ce,0x4c288e21,0xb9d4e0b4, + 0xd254cf9f,0x64447f76,0x959e2ba5,0x1fb36bc4,0x2961132c,0x393c44d7, + 0xfc140f19,0xd7a8881f,0x8d096648,0x27a86128,0x00000091 }, + { 0x8a9e690c,0xb536c021,0xeab4fa15,0x85dcc521,0xb00ee54c,0x09af4423, + 0xaf3a8e48,0xb3793525,0xb7731d85,0xe1f36308,0x141cfb55,0xb5361d78, + 0xeffc4529,0xea41f29e,0x9f7d2634,0xcf5755b1,0x000000e8 } }, + /* 116 */ + { { 0xd212b398,0x01edb80d,0xd53dd373,0xd0396181,0x8a52fa95,0x0e086047, + 0xa7825e6d,0xad1e6432,0x330ece4f,0xe0185bc5,0xb078936f,0x508f7313, + 0x9e7f6ea3,0x1dc982fd,0xd5556b60,0xdbf3a602,0x000000e8 }, + { 0x279e05bc,0xc3763234,0xf44453d3,0x7f5f40ec,0x7fa30793,0x310c5f4d, + 0x108d7e22,0x5cffad36,0xc2a98bbc,0xf2f01ef3,0xd7d47f80,0x30ab1719, + 0xa9b22e1c,0x7bc9f918,0xe834df94,0xf53dc52a,0x000001f9 } }, + /* 117 */ + { { 0xc183f89b,0xf266b49e,0x5f5806d4,0xd3fb5f02,0x94ec3080,0xd30a42b5, + 0x371cd917,0x4b6b1940,0xb7f7e26d,0xf7541aab,0x2d5b7b64,0xe55269eb, + 0x7f8036c5,0x0e1a85c1,0xda5f2675,0xa0ff0f22,0x000001ce }, + { 0x3a8e11f8,0x602bd56a,0xf5f9ab54,0x29864021,0x0ccc92d7,0xc6742c5a, + 0x523f650b,0xd64569e6,0xf7fabfb4,0xc8e4681b,0xc3c9e6cb,0xb4275947, + 0x38f5ff20,0x2b3952d5,0x1f04aea2,0x818f8e38,0x000001b0 } }, + /* 118 */ + { { 0xe50d90f0,0x3be5bffa,0xf5011cdc,0x4cb3b11b,0xa691dfac,0xe10ca711, + 0x4ea1a773,0x62ec211d,0xe586eeb6,0x5a979ebb,0xa0c2f1fd,0x4df16ab1, + 0xc57bbfea,0xfe9e3f7e,0x5ae526f6,0x1b05960e,0x0000015e }, + { 0x8630e62e,0x1c8e04a5,0x6447e1b7,0x3d00310e,0x43b4447a,0xcf1e6b61, + 0x7462e7a3,0x92abb851,0x0002724d,0x8309ea08,0xe45296df,0x1d805d70, + 0x3d4ed812,0x0f3849b3,0x6834d44e,0x2d6bffbc,0x00000096 } }, + /* 119 */ + { { 0x48e07711,0xd13fe58d,0xd270a3b2,0x70f83648,0x8cdff04c,0x1517892d, + 0x51411f14,0x15bb6578,0x3e4f8a55,0x6c31cd90,0x0413362f,0x73f87152, + 0xeca06d4d,0x2fe025ee,0x954e317f,0x32a6e417,0x000000ad }, + { 0x69d147df,0x7e38c63f,0x710bf37b,0xb69bb06e,0x28d514de,0xb94debef, + 0x8d11c3d9,0x4b2307fb,0x0385c604,0x3b369df9,0xe7800e83,0x68ea2f49, + 0x7d501c1c,0xf028b258,0x5cef7818,0x97078221,0x00000055 } }, + /* 120 */ + { { 0x54c1d751,0x10c351db,0xba0f9512,0x81445301,0xbfdc8bed,0xa77eb34f, + 0xcf23680a,0x498d8138,0xe04f2860,0x928c14a4,0x16a5b6da,0x96192dba, + 0x5f9a9103,0x49dea95b,0x01724102,0x80dd4578,0x00000085 }, + { 0x0e09221c,0xe9072500,0xf21de056,0x62e05b21,0xe0e60950,0x448cafa1, + 0x6f775129,0x657fb97b,0xf1f34aca,0x5d2991bd,0x49ff15d6,0xa66cd5ac, + 0xd049ec79,0xdc1d6897,0xe72baea8,0x388fca84,0x00000067 } }, + /* 121 */ + { { 0xa6ef1dd3,0x6520b49d,0x3ba6cd76,0x391a045e,0xf33d5f48,0x9c84980a, + 0xef07474a,0xe53cf5b2,0x78bfb1ea,0xa35b2e9a,0xeda906fa,0xeca97fd6, + 0x1b9f2cf4,0xf1a93789,0x3ab28589,0x66753369,0x0000010d }, + { 0x73691faf,0x5b510496,0xd57ec618,0xdc73d3a9,0x930a8525,0x7e2921bb, + 0x40b05b69,0x094f571e,0x413bedca,0x5e96a017,0x8d1a6b98,0x9e7d4f72, + 0x3eade8b7,0x55143fda,0xd16e454d,0x859b8444,0x000000fb } }, + /* 122 */ + { { 0x7c667aaf,0x7c22083e,0x4a91ccba,0x33545cb9,0x8ca0e94a,0xca1e9931, + 0xe4eaa0c7,0xc3afff23,0x42f56844,0xa21ac436,0x60d52d0b,0xfcc68a8b, + 0x6a9301d4,0x401a585b,0x907abce1,0x547f762c,0x000000a3 }, + { 0xfbe260ce,0x63dd3ed3,0x80dc01fa,0x2717752d,0x6f1da3e4,0xd5fab75d, + 0x5261f10e,0x5f16864a,0xd20cd6bb,0xbe7b1f63,0x221ac656,0x9d638c10, + 0x673b918e,0x3137b8f6,0x4ada2fb8,0x23eb4438,0x00000174 } }, + /* 123 */ + { { 0x2a1fbcf4,0x194e27c4,0x5facd5ee,0x4c0d285b,0x915e6607,0x75c2ebdd, + 0xef0a6a9a,0x1e696510,0x067cf458,0x13c5afa1,0x7bee1fba,0x2be013c1, + 0xdad279e7,0x85a406d6,0x5142cf59,0x0042951d,0x00000031 }, + { 0xa22bbc45,0x6a735ec1,0x7f56f4d8,0x4ee5391a,0x236001de,0x305af9d0, + 0xaa2f8d25,0xa8b21851,0x187db78a,0x0e2c36d8,0xa1a888c3,0xcfcc083f, + 0xbd3e7d5b,0xb91dab7f,0xf4fdd023,0x62d85460,0x000000f4 } }, + /* 124 */ + { { 0x4972d703,0xf568ba02,0x39098a03,0xfc44ca1d,0xae28c855,0xe9b8e542, + 0x5b1b4536,0x4fd4f360,0x4c7f7e48,0x2e08b07b,0x2230823d,0x042f3b98, + 0x1889fd13,0xc9ffd313,0xc6c68359,0x56af0652,0x000001bb }, + { 0x06e0f16a,0xedbf05e2,0xd74644a5,0xfc1ac2fa,0x0f92c71a,0xe59a0a98, + 0x36c800a1,0x13ae37d7,0x236178dc,0x5f20efc6,0x2b46ef10,0x443a58b8, + 0x442509e4,0xc9517dcf,0x640ed9b0,0x7d0bb415,0x00000166 } }, + /* 125 */ + { { 0x3d22842d,0x3aa30a61,0xb3c4ece0,0x8c6e00f5,0x6df82b79,0x8764cf87, + 0x78d208c5,0xda92d86d,0xe788854a,0x0a52d391,0xa59b0994,0x499b26fb, + 0x04c5fc9a,0x5dc133ad,0x34e3f134,0xa5c09269,0x000001dd }, + { 0xfad6d673,0x6f0dcac2,0x00f3b3fe,0x6d8fdf05,0x631756e9,0xece71941, + 0x0a4d80e3,0x3990f493,0x31d13001,0xf2aca936,0x75581638,0xee91966c, + 0xe6dd5679,0x6df0f574,0xccd71cda,0xbe124868,0x00000111 } }, + /* 126 */ + { { 0x475cc1b4,0xf644c726,0x2b73978c,0x915fc2f9,0x0e3d7eb7,0x65a7e6d1, + 0xf40c38e0,0xbb44e21a,0xe1ad24fc,0x988662b9,0xc35606e5,0x270ba4dd, + 0x1a4f93f7,0xc3834a2c,0x3362a4d7,0x93d0c9a2,0x00000021 }, + { 0xf769fd7f,0xe2cb7b8c,0x89a213b9,0x1815da97,0x6b910fef,0x7b4f8c56, + 0x26931438,0x2088b309,0x925b37c0,0x477b71bd,0x26a640e5,0xa049a921, + 0xfd21c6ef,0xd3ddf1bd,0x232a56b2,0x9b5f9d7d,0x00000064 } }, + /* 127 */ + { { 0x679a9c35,0xd640adf8,0xcb74d796,0xcdad98e3,0x5f8e9daf,0x464b8ebb, + 0xad4a073c,0x4738614e,0x2edde557,0xbd86c0ee,0x576ce0b9,0x77331738, + 0x4095fb96,0x9b5d3327,0xee09aead,0x72f0aeb3,0x00000136 }, + { 0x64e54ba5,0xa388c76d,0xdc474d21,0x63fe7af1,0xb2a77081,0x7fa3e9d1, + 0xde1240ad,0x0447b49e,0xc720303a,0xd9f64b66,0xe6bd0213,0xb1c78029, + 0x0aa03ea5,0x1caf1c70,0x3bb85d2b,0x179180eb,0x00000103 } }, + /* 128 */ + { { 0xaf2ed12f,0xadbf4f9f,0xf380fd8a,0xce1d19e4,0xa39e81ae,0x0957bdb5, + 0x626ef6bc,0xf9833321,0x0cf5b28d,0x110ae5ea,0x20392cd4,0xab159450, + 0x6bc67855,0x67c49887,0xa3fd61c6,0xce7e5938,0x0000004a }, + { 0x28c7dea9,0x59c5b9ef,0x0a6a7184,0xd02f95ba,0x8202769c,0x034dc257, + 0x94dd6896,0x213b0b08,0xb5dea95a,0x03730b7f,0x617ca889,0xfe243ed0, + 0xfb1ba052,0x16cf4d17,0x226f96da,0xd8691d6b,0x000001c0 } }, + /* 129 */ + { { 0xbf8015c2,0xaa2edf3f,0xc49502d8,0xe7f8236d,0xa6a43157,0xe890f6e0, + 0xa2d04b0c,0x318ef325,0xa809dbab,0x9cc0668d,0xda67ca21,0xdd26937a, + 0x83febc49,0x8f27c12c,0x3c9b9844,0x87b3db2f,0x00000029 }, + { 0xfd2e3dc7,0x37e7aed0,0x7415fd55,0x498e8bdb,0x58a45f25,0xfc0d6c9a, + 0x209c85d0,0x83d5baba,0xd579e1ee,0x31ec8dc6,0xa502bfed,0x1f4cad0b, + 0x1f41bef1,0xc432e6ce,0xbbffca65,0x3b10afaa,0x00000191 } }, + /* 130 */ + { { 0x53053af7,0xbd9f7df0,0xb28a1cf4,0x60304765,0x7ce90438,0x441778fc, + 0xac8c5ddd,0x8fbed36e,0xfb59ec61,0x27b1313b,0xa1b1becf,0x9d2656ff, + 0x945973a9,0x334e1345,0xc362b595,0x3261888c,0x0000018c }, + { 0xaa7f6ff8,0xf413a414,0x3fab7c7a,0x092aeb88,0x7cc307ba,0xfa1d886b, + 0x2346100e,0xdc81c125,0x02140c93,0x93d4d273,0xe6104835,0xa1ed7e3c, + 0xdf1795f3,0xe2b91ecf,0x369ed416,0x160dc11a,0x00000191 } }, + /* 131 */ + { { 0x8b57d7cc,0x9a72f46e,0x4bf02386,0x3140b0e5,0x05b3a91d,0x886c396e, + 0xa4ec26e0,0x1b9ab3a9,0xc50f58e9,0x742feaeb,0x55e26af0,0x1592c608, + 0xbb1cd9f7,0x943cd476,0xc7f02c89,0x3ed97fd4,0x0000017c }, + { 0xe6d54964,0x53b02503,0xc6a318c0,0xd9bd1162,0x9cc28c22,0x18ff6cf4, + 0x03534640,0xa45c7840,0xb4cc0668,0x8ea3335e,0xf42dbe03,0x7ad727f8, + 0xfdf6c3cd,0xb157e911,0xec992d76,0xa7f894c9,0x000001b3 } }, + /* 132 */ + { { 0xaf09ea77,0x91e6e397,0x75dc25c5,0x26a760b9,0xb94a197b,0x8c040c08, + 0xb68ce619,0x041baca8,0x5bd23564,0xa19a0d15,0xd977b33f,0x86ca5b94, + 0xe5fbd029,0xf31f87f8,0xb1901f99,0xf76c55a6,0x000000b8 }, + { 0x3846ec9f,0x175bf8c3,0x9deaca46,0xf462205c,0xa3108df0,0x92cb5ec0, + 0xcfaed928,0x879db283,0x65049fb2,0x477dc004,0x96ee5031,0x48d24bac, + 0x56adce45,0xa7db6b16,0xab1c684f,0x0110cdab,0x000000fc } }, + /* 133 */ + { { 0x4d308bf2,0x151b66d8,0xd6638004,0x99013c9f,0xfd383bf9,0x6892df92, + 0x3ffc8efc,0xa10efd84,0x313ea287,0x527e316c,0x3a0df740,0x8ef6e3cd, + 0xf6ebd2a1,0xcb96e430,0xa70ee4ce,0xc1ebecf2,0x0000018c }, + { 0x1a70404c,0x80d14ad7,0xf9ce2a30,0x6ad21dd0,0x3aa3e072,0xb94cbcde, + 0x6363a690,0x0ab59611,0xc6b1e2b4,0xe70bff45,0x66ceec5b,0x1296dd0b, + 0x747757c0,0xd4cb2a74,0x3d7d91e8,0x08988ca6,0x000000aa } }, + /* 134 */ + { { 0xf8db0396,0xaa2dcfca,0xb422da76,0xe8ae8f37,0x96485724,0x652f8349, + 0x7bf1493f,0xf647c3c4,0xb0247a4e,0x8b600b46,0x7aebda8e,0xabf3e439, + 0xa7958df0,0x2e1d231f,0xf881bab2,0x38e692b1,0x000000ef }, + { 0x26cf3047,0x1f3c1689,0x59539858,0xdad14f94,0x293f20b6,0xfde85d1c, + 0xf57abb17,0x2ea5436e,0x1794de38,0x0d1a8ffc,0x2bfecd2f,0x9ba508e2, + 0xdb786042,0x110f0a7f,0x7cde31f8,0x2ade6f64,0x00000196 } }, + /* 135 */ + { { 0xfec78898,0xc996a537,0xde0fa77f,0x0b39de72,0xd34cb08f,0xf6d076ac, + 0xda78d353,0xacd8bb82,0xa0392cc1,0x5fe804d3,0xe581549d,0xab7adede, + 0xc067c6d9,0x883901a0,0x4ed93f37,0x5855ffa2,0x00000191 }, + { 0xbf9ebef3,0x29570e36,0xdf4b3177,0xe21046a5,0xa6816b5c,0xf9b89a95, + 0x288d0e11,0xadf39281,0x3979159a,0xd6baabe5,0x5c8fabb2,0x411afee0, + 0xe5c7af10,0xf192c3af,0xd7dce37b,0xaa72e81c,0x000000f7 } }, + /* 136 */ + { { 0x16c386ee,0x20fa3c0f,0xd4c09839,0xb33b0469,0x876a3136,0x79e0d722, + 0x3c406c06,0x343c0a92,0x4debe27d,0xef220e3e,0x196f00ea,0x09d7b1e1, + 0x24a9dcff,0x4a0f5dd8,0x99c1d085,0x53582ec5,0x000001e2 }, + { 0x5138c7ed,0xcc8ef262,0x6547f88d,0xdec43194,0xdd0a9488,0x2b6e53ad, + 0x8257ebdc,0xeb9f1efa,0x1f08c989,0xc583c6eb,0x40163768,0xf1736911, + 0xdbc20e3d,0x6282ff8b,0x9cbd514e,0x26b81005,0x000000d5 } }, + /* 137 */ + { { 0xa0025949,0x2449522f,0x0bbd8945,0xb26d888f,0xe637216f,0x33442f5f, + 0x472827f6,0xd8ec3b64,0x99fc2681,0x91d8a1a3,0x68c7710d,0x6d232ead, + 0xe51b2762,0x8e5bfe2f,0xfd109fa7,0x0f9f4fed,0x00000004 }, + { 0x6b4a05e0,0x1952ea51,0xf21c78eb,0xcb0d48ee,0x1997dfdb,0x64d36619, + 0x8b4c21fd,0x0d11b204,0xbe92303a,0xa6f569b6,0x78c5e809,0x2b8f6096, + 0x36805d8e,0x7226b5ab,0xdb349ca2,0xd6cff180,0x000001bd } }, + /* 138 */ + { { 0x943cc612,0xa49f8576,0x832b31c7,0xc914319e,0xcccadebd,0x9225e297, + 0xb0619821,0x4918fb42,0x25b1cc7c,0xaccb3084,0xa646e5f0,0x751d3347, + 0x590e3e22,0xeafb4aae,0x2c4a0008,0x82146038,0x00000151 }, + { 0xbf96a461,0x3c2481db,0xb52a3ba4,0x51c122e9,0x464db08b,0x21c2858e, + 0x6d6a081d,0xb1014b78,0xf533cef7,0x167d3ed4,0x81545f7c,0x6cfb3294, + 0x449b7b9f,0xea46d31c,0x9621c299,0xcfad7613,0x00000081 } }, + /* 139 */ + { { 0x478a7f0e,0xef796327,0xde17705d,0x914183e2,0x572117e8,0xd24a26df, + 0xb7cd52cf,0x3cdb1b09,0xad83c160,0x9e42b9fb,0x709ef8c9,0x6971d2ea, + 0x8ee54ccd,0x1894fc5b,0x34a520fc,0xf757b4e5,0x000000fc }, + { 0x86b62347,0x5a5518cc,0x7bc2a928,0xec51c9d2,0x2966727f,0x2eea2b05, + 0x0ae43e6f,0xbc8a8e3a,0x05ca066b,0x80535b5e,0x8833986d,0x91ffcdb1, + 0x32374cdd,0x2f4a5bba,0x0d202243,0x08763a49,0x00000124 } }, + /* 140 */ + { { 0x4efac14d,0xe498b972,0xa79a9d3c,0xb6f4bf8d,0xd6e07c29,0x0f1e8dbd, + 0x71771538,0xfac30cfd,0x71b03263,0x4c91ed22,0x19b455f5,0xbf938335, + 0x127092bf,0x76a5e789,0xb4813bd9,0xa97674e1,0x00000128 }, + { 0x583e5924,0x29b63c41,0x8f171d06,0x61f9aff1,0xab227a28,0x2b45b3cd, + 0x8a11ab70,0x939d5dda,0xe8db6971,0x2bfb47b0,0x0ec10805,0x562379df, + 0x24ce1801,0xaf5a6481,0x34f94aba,0x8d98c434,0x00000150 } }, + /* 141 */ + { { 0xcfffc80f,0xdea9fe73,0xd43473f6,0xe23e2e9b,0xc9d37ba7,0x27fb3ed3, + 0x7a3fc357,0x733766d2,0x8e04a03d,0xd0db4cf3,0x2bbe0f43,0x8ce01752, + 0xda986f4f,0xd87eb719,0x2fe6b037,0x6d1b50ae,0x00000153 }, + { 0xda40bab1,0x371f5def,0x9b2bda63,0x07d6a8af,0x0d4aca87,0x5e8a5c89, + 0x643ff8ab,0x4d72f0ff,0x4bf8ec2f,0x9c4c10d9,0x0eb93e22,0x36b0eaba, + 0x1d2dfd01,0xbc4b0e8f,0x9d34a082,0x9f252e5a,0x00000142 } }, + /* 142 */ + { { 0x7d0e7020,0x4affd4c1,0xb5482168,0x9b169aaa,0x588f348f,0xdbe01708, + 0x885986bb,0xdaebf6ff,0x15f9c381,0xb33987f5,0x04a94a7b,0x7e455f2c, + 0xa0ed6849,0x39a41442,0x1ef7798c,0x1c1ad4a6,0x00000154 }, + { 0x072709c4,0x7647b628,0x8810e5fe,0xb330d68b,0xe92e0f63,0xd1bd8874, + 0xf8bea9ba,0x144e4fb9,0x8318981a,0xc15afc18,0xb68c6a07,0xe19c5c82, + 0x36e00b66,0x858c57a2,0x07cb7aec,0x9b255110,0x00000011 } }, + /* 143 */ + { { 0xc887027d,0x121ced27,0x2bfab286,0x6050f335,0x19d511e2,0x6e373c1c, + 0x7f4c69f5,0x02d4c3a9,0x25226bb4,0xe6f356af,0x83e7ac30,0x3b9011c3, + 0x33d8fdfb,0x43b0c23d,0xaf2ea363,0xa8c390f7,0x0000000b }, + { 0x7e851bac,0xc430c3d6,0xa5f544fc,0x8991c389,0x67fba061,0x006bbc64, + 0x97cbdbf4,0xd49d024e,0x7734adad,0x4539b7dd,0x28cb6d2a,0x90ba8f9f, + 0x4de4b3ad,0x7a921830,0xa7b96928,0xb28732ef,0x0000006a } }, + /* 144 */ + { { 0x22ed5986,0x71dab52d,0x58533e06,0xdeee627a,0xcf155fe3,0xe8fee37a, + 0x7ae8b132,0xcd61490d,0x34a08b94,0x2706e185,0xf9c15c30,0xa85ffd52, + 0x51a5ad46,0xd5a224f3,0x54d700bb,0x44d1b6d5,0x000001e6 }, + { 0x862e4e9c,0x96830686,0x48763fe4,0xfe5cd76c,0xc0839caa,0x60309679, + 0x8d83d62d,0xc0e4cbeb,0x11bc4ae2,0x911e254e,0x64fca062,0x96a0d7c8, + 0xe9a27045,0xf5785dd5,0xf3e0412c,0x2f4677d0,0x000001be } }, + /* 145 */ + { { 0xab01a6dc,0x4c0012dd,0xae1adb69,0x391bd6c1,0xb9b05079,0x3ae7daec, + 0x62a1061f,0xc2714f9e,0xa96536b7,0x71978ee7,0x5e17654b,0xeec11bd0, + 0xefab3dd4,0xc71166e0,0x87edbf61,0x0f7aa572,0x000001d7 }, + { 0x51eb5932,0x26ea6f7d,0x5f882ca4,0x354ea0aa,0x7739f7dc,0x175b6097, + 0x9be57934,0xd335192a,0x78545ecc,0x9801f423,0x7b643c9d,0x32b8e256, + 0x23e3abec,0xb9411dd7,0xcf1c6509,0x656dea68,0x000000ee } }, + /* 146 */ + { { 0xa0890deb,0x4d38e140,0xbceb84bd,0xbf7bd87d,0xba041dec,0x51f0ff72, + 0xa6820be9,0xafeec70a,0x8c486298,0x755190a3,0xe7010ec4,0xecdba558, + 0x8c7879b1,0xced91db8,0xef5e215c,0x08de3e4c,0x0000014c }, + { 0x16266da2,0x9c1534ed,0x7b4c9009,0x9ce322eb,0x69927688,0x37decaef, + 0x05c2844d,0x6525097f,0x1ac519ab,0xd23b7e13,0x65a3cc86,0x682ebb72, + 0x628c4575,0x0c531db9,0x73805373,0x2e00e8b8,0x000000be } }, + /* 147 */ + { { 0x57ed32e9,0x3807c800,0x7c024997,0x427e40cf,0xabb54830,0x58506abb, + 0xce820bf4,0x5649776f,0xb2c43e81,0xb5353293,0xcfef6648,0x671e8353, + 0x903bdca5,0x27217d3f,0xa813fd79,0x40a9c109,0x000001dc }, + { 0x3db21a38,0x6beaa6c3,0xd73ef7e4,0xcae222e1,0xbd1d507f,0x1ff684e7, + 0x587a77ab,0xf5bac664,0x0c64a4d6,0x58c74f62,0x6a7c378a,0x4ca837d9, + 0x3e42e409,0xf43df531,0xfb49e14f,0x8a9a4347,0x0000013f } }, + /* 148 */ + { { 0x992f8923,0x85ab4edf,0x6fd209f3,0xe24aa5e0,0x1b1340ee,0x27be9b87, + 0x91e0bb40,0x2957d11f,0xf3d4c62c,0x425afad2,0xc7ff7aaf,0x2d231286, + 0x0114cbe9,0x96412b2b,0xc3e23529,0x6706a231,0x0000019f }, + { 0x225c02af,0x06b3bbd2,0x3fa3e98d,0x53ebc166,0xb84f482e,0xa6df2b75, + 0x2bfc55df,0x912b4521,0x512a73da,0x30bdbd40,0x3d53eaa4,0xac0f43d9, + 0x0c27fd53,0xfc358fe4,0x919424b4,0x2cb183be,0x000000a3 } }, + /* 149 */ + { { 0x3fa6a746,0xe39b0c2d,0x1d5a24a8,0xe84a7922,0x78cdf2b5,0x70a58914, + 0x30666cb3,0x8a88067d,0xf6d71d06,0xb09a709e,0x0065d184,0x50007a3e, + 0xb8dc9448,0x7046af4b,0xc65493ac,0x2b6a3129,0x000001fd }, + { 0xe45f2771,0xd3d5d5bd,0xf432ed95,0x8542b08a,0xf232a6bb,0x2ecd40fb, + 0xe8beccb2,0x0fcb6143,0xbf8e247f,0xcecc513a,0x8da3039b,0x955d56f7, + 0x56c2a0df,0x9157c619,0x3031fe2a,0xa6d35cbf,0x0000018c } }, + /* 150 */ + { { 0xbe0c4923,0xdd800b1b,0x6902907b,0x046ae740,0x957bd0c7,0x2398b37f, + 0x9655f8b8,0xaa8e1a9d,0x500f4150,0xcd2927fa,0x202e7aee,0x826a9c6d, + 0x9f29692e,0xb4cf58b3,0xbf41577c,0x3093868c,0x0000011f }, + { 0x333ed442,0xadcb5e7a,0x906fef7b,0xae5c8e2f,0x3d98f228,0x2d9b0123, + 0x7ffe125c,0x4632f2da,0xba231835,0x59487731,0x12d2c512,0xa0caae5b, + 0x9857d9c4,0xbf00e658,0x54f200f6,0xc5d10086,0x00000172 } }, + /* 151 */ + { { 0x2fc283e0,0x58954046,0x7ee0880e,0xf7633984,0xb7fd1622,0xfaf1b40e, + 0xf598c5ed,0xecf5151e,0x7e00d9bb,0x6b4d92f7,0xa8c43fd4,0x7543e3b3, + 0x6511d1d2,0x3994e12c,0xaf05b6d3,0xdd841a1d,0x000000c6 }, + { 0x23b991ad,0x23da17e0,0x71fba514,0xaab2b213,0x0ddc1879,0xb417ec5a, + 0x5f63acdc,0x173bc8ad,0x1e2a7d50,0x2fcf5210,0x6106d008,0x63373fd0, + 0x7db012cf,0x1e8211de,0x576545ef,0xa07766d9,0x0000018c } }, + /* 152 */ + { { 0xaf80dfaf,0x8e4347b9,0x9c4667f3,0xa80b631f,0x6ddbc238,0x6ff1db26, + 0xaa8718a0,0x6161e365,0xaf31c35f,0xe7f7ac90,0xfc6846e8,0xc03831d1, + 0x684175b4,0x1e669d10,0x934b731a,0x6da9d620,0x000000c7 }, + { 0xa3e4e78b,0x981f597b,0x55099f9a,0x2c14dedc,0x93088c61,0xbf373995, + 0x9b207458,0x7c568307,0xa2276900,0xc4440c47,0xf7e6daf3,0xb6df23c8, + 0x42929103,0x4f662c25,0x8b3b7963,0xf4ea6db1,0x000000f9 } }, + /* 153 */ + { { 0xced36049,0xc669eb88,0xf41b99f8,0x87a4ffe1,0x6a72e108,0x690b7563, + 0x65a0bb8a,0x67dd6a8c,0x96e42955,0x42cf8c58,0x1aabffad,0x5286b5f3, + 0x8f6f26a4,0x1f7dfaf2,0x0e1ae503,0xc5d9e0ac,0x00000120 }, + { 0xacc10da7,0xafbee3ff,0x944946e5,0x67e2d5f9,0x3c4220ff,0x8ec17e86, + 0xbd6f632e,0xfe6f7414,0xc3fc9ef4,0x4a9e3c0f,0x03bfb870,0x25ff3cba, + 0xbb03342d,0x18fd3600,0x0050cd2e,0x1e63e753,0x000001ac } }, + /* 154 */ + { { 0x8f3d6a02,0xdd83d07c,0x7ef4d0d1,0x71fc143c,0xd4c7af61,0xca994bf0, + 0x827c5cf0,0xc8a93e98,0x2b697882,0x4a102c7b,0x8a55e8ba,0x633c87d5, + 0xcc2d64f0,0x1ae8822f,0x986d01fc,0x2ce9b53f,0x000001c1 }, + { 0x95dc1b79,0x859639fd,0x3f4e616a,0x2728f754,0xede2fb9f,0x6e703c4c, + 0xd50fae9e,0x042f7680,0xc2d530ed,0x0546bc3b,0xcdd598ac,0x00a4006b, + 0xe1294910,0x3f3286c9,0xb6bf9629,0x77782255,0x00000146 } }, + /* 155 */ + { { 0xe30c98fe,0xaf81421e,0xfc2cd705,0xdeb0feb0,0x14df6ad2,0x9b2c4ca6, + 0x9ba314e8,0xd38134de,0x4f04b16d,0xa443deb8,0xf07f8ca8,0xfc556ee0, + 0x3a4f3917,0x3c1c83bb,0xb1adcd41,0x8397dd24,0x00000199 }, + { 0xdf4781e6,0xca01e17e,0x46f1f901,0x32d7c319,0xb53090da,0xa227a613, + 0xa7c8c607,0x2495b1dc,0xddc69709,0x1cf2fbee,0x45608098,0x1d3d82bb, + 0x085134d7,0xcfcddda3,0x96798c41,0x3dd171b5,0x000000d2 } }, + /* 156 */ + { { 0xd4dd7e96,0x97a40f84,0x8409fc0c,0x7114c8ea,0xa9d11393,0xc56f29e6, + 0x8fd8c6d6,0x3b606621,0x00269e7c,0xad3baa86,0x05929d5f,0x1413c6b0, + 0x222e365b,0xc1ad7e40,0x4798aaec,0x6a82621a,0x000001d3 }, + { 0xc1003c81,0xaeac45c4,0xf43d8602,0x9ef9ef5a,0x60f77469,0x36a65f5e, + 0xbf5d2858,0xf312e7ab,0xc84acef1,0x2f53ec81,0x9d248b52,0x63e32ca2, + 0x81e65c60,0xfe9aa7c5,0x52841973,0xe3686c9a,0x00000017 } }, + /* 157 */ + { { 0x9e90de99,0x0b2efe65,0xad05ab63,0xbe4485bc,0xe14e4892,0xc48a6a52, + 0x22628687,0x2ad85430,0x5eb3db54,0x261f0e95,0xd45e5841,0x48e81863, + 0x8ed75739,0xcfe1ce0f,0x7d84ade4,0xbd6f1ff5,0x0000003f }, + { 0xd1bf968c,0xd43711dd,0x48dfa472,0xd558d7cd,0xe425a566,0x49f09223, + 0x5c26d041,0x0cf83338,0x7c2c1743,0xbe7b81f1,0x5143d9d9,0xe3bdc33e, + 0x94fd3fae,0xf385ac35,0x9fd1811a,0x7551cf42,0x00000113 } }, + /* 158 */ + { { 0x20193bb2,0x4928f55b,0x7310b872,0x96e579d0,0xd345d276,0x5ee06309, + 0xa871868a,0x9a43e432,0x11038683,0x28c113e1,0xa332f108,0x8286ecf3, + 0x0385cbb4,0x3348aa37,0xef158daf,0x698ffcaa,0x000000c6 }, + { 0xf6908745,0xa044c54a,0x6a3353fb,0xa6b336e4,0xd561e821,0x694c2852, + 0x3634917f,0x1b297970,0x81f61315,0x6e1023b9,0xef46a5ef,0x6817dc2b, + 0x8e114f7f,0x93dea0af,0xed72c5bf,0xc3cf3cd5,0x00000136 } }, + /* 159 */ + { { 0x7b080de4,0xbb8799ab,0xd69d8396,0x3b8f781d,0x986f8f63,0x76b42aaa, + 0xa54bc5ca,0x5d74c038,0xa9c2fbb9,0x76fcb605,0x80178930,0x8451b440, + 0x9d286f0d,0x40f00c38,0x0c543263,0x3038e952,0x0000014c }, + { 0x6977aad9,0xc94bc381,0xd7087be3,0xadbfd082,0x875fed08,0x06d0820c, + 0x345656fc,0xe1ce84d4,0x0fd6dd4e,0x71c4d8e0,0x6a5fab40,0x23338b22, + 0x0baeeb6f,0xd477eac1,0x5f80c26c,0xe4db08bb,0x00000078 } }, + /* 160 */ + { { 0x1078342a,0x0111d12a,0x559a1064,0x0534725e,0x0fd3ffdd,0xea459d59, + 0x06f0ac1f,0xcf694a9f,0x3e19bc69,0xf6d24adb,0xb9ddcd00,0x3ce38f5e, + 0xb632dd4e,0x38400f66,0xe15e1c55,0xcab8fdfb,0x00000085 }, + { 0x8d09422f,0x0a943f6b,0x0f988c3b,0x17d29756,0x2ef2e4d9,0x55a441fa, + 0x35f7c13f,0x6743523b,0xedaad3ff,0x274d3407,0x9347242d,0x59411435, + 0x3bb8615d,0x1cb27301,0xbd7794cd,0xa0437004,0x0000007d } }, + /* 161 */ + { { 0x2d712c44,0x824b99a6,0xa6962577,0x148368f8,0xd65e2287,0x8ed68432, + 0x6f5bc5f8,0x14028306,0x4ec3479d,0xe6cf3121,0x9326db70,0x96db6f44, + 0xca32936b,0xca5ac098,0x2fea21af,0x69e248c7,0x0000004d }, + { 0xa71269fb,0x0aa89092,0x18650b60,0x2f6bdba8,0x9fb55db2,0x1d9cc2a3, + 0x6311e9d0,0x0fceb0df,0x90ac2c1d,0x6faeb79c,0xcb1f372a,0x2393b222, + 0xbc8c4193,0x62a6f3df,0x2fe8e674,0x9dea30b2,0x00000001 } }, + /* 162 */ + { { 0x12b3118b,0x7df689ac,0x6cb6ea56,0xd06ee39d,0x187cd978,0xcfcc22c2, + 0x8d537d87,0xb985b681,0xe9f56db2,0x75845152,0x5e098c15,0x0f839871, + 0x3b212cd2,0xbe96a5c8,0xd9ac1c47,0x3dda0338,0x000001fb }, + { 0xcfa0a9b8,0xf06b7fe0,0xe22dcf75,0x9478bac7,0x136887c8,0xf3815e04, + 0x914c54bc,0xed811dde,0x0f51ea64,0xc8c24160,0x4c870577,0x63914d83, + 0xa8abbcb4,0xed24e552,0x2644f52e,0x9e5eb9e8,0x00000001 } }, + /* 163 */ + { { 0x66d52313,0x1f65a04e,0x4d3f72bd,0xfd694545,0xa6b7ae11,0x2bc0ddaf, + 0x571ab247,0x921f79d8,0xae5a8d68,0xd4c5f966,0xaec5ce13,0xfde17716, + 0xb764bd39,0x70e6eda4,0x990d6783,0xffe94085,0x000001ef }, + { 0xd88f92e8,0xf3fa0e27,0x9c77123c,0xa21ef0fd,0x89274dba,0x6259974c, + 0xb9ba2762,0xd4cfa4a5,0x46ebcaf6,0x10c909d2,0x8f8e2870,0x0317a10d, + 0x453aeea2,0xb0771de1,0x68c6b0a3,0xdf0c4791,0x000000ea } }, + /* 164 */ + { { 0x4c854477,0x11bc1e48,0x8638e47c,0x2bec25b4,0x869c54d9,0x43d4e02b, + 0xbe1e7ed2,0xe318de32,0x6b460c4a,0xf5471eb0,0xaa426afe,0x38ae7bf3, + 0xd8452dc1,0x23ae26dd,0x5782de9d,0x9d3fc1d5,0x00000164 }, + { 0x0ade1979,0xd87cae31,0x3b4bc728,0xa847041d,0x56c3c9be,0x38923c40, + 0xd74ae467,0x36fe182a,0xecbe49ae,0x92bff6f4,0xdc41f9f5,0x6680db80, + 0xe4630715,0x35bac06f,0xd6d07307,0x6d68b4c7,0x000000c0 } }, + /* 165 */ + { { 0x854dfcf2,0xdbe22be7,0xa6ae3bd0,0xee21a7df,0xa521ec46,0xf4633ad1, + 0x41a9484c,0xee94527a,0x2aa123f3,0x1145eb9b,0xcae3ca92,0x5634a82a, + 0xfc85d925,0xe176aca0,0x19082d8c,0x504cf7fc,0x00000078 }, + { 0x3799793c,0xd74ce7c4,0xb5519fb5,0x74ddd618,0x95ff9808,0x2cf6df93, + 0xb8bf61e6,0x00ea45d1,0xdcfcf54f,0x26863613,0x030035b0,0x67423b76, + 0x4028a9cb,0x9fbc7534,0x051a077e,0x7b52ce37,0x000000f4 } }, + /* 166 */ + { { 0x96bec962,0xebf7d8ad,0x17e0107a,0xd1cc81f6,0x214e1058,0x64c44509, + 0x42394c9f,0x6c298c43,0x1a660513,0xd910052d,0x90df8243,0xc3643754, + 0xfe5cdea4,0x2313be1e,0xd27fb7b1,0x249a60f7,0x00000076 }, + { 0x1cf593a0,0x74975838,0x8364c59e,0x0c9ceefb,0xe05c9991,0x2f5a1333, + 0x421808e3,0x30ea5e1f,0x4f5e8f4f,0x56fb3a4f,0xb6c0cb47,0x2cae6e2e, + 0x08bdcc6a,0x60b307fd,0x0ff8c117,0xee17901c,0x0000001a } }, + /* 167 */ + { { 0x89aa9e14,0xc048336b,0xf676700f,0x66634271,0x906b6980,0x4daa0433, + 0xebb7ab23,0x30247ee1,0xeb59a053,0x969b4aa7,0x8000f4d5,0xd78ef825, + 0x46026b5b,0xe5db38eb,0x7d6856c4,0x06a43e5d,0x0000003b }, + { 0xed2a0ee7,0xaa0ae838,0xf16e8813,0x04bbe528,0x4ea64137,0x8ab6df5c, + 0x06e29867,0x5be80cb6,0xf459ed2b,0xf19b1b72,0x1761521a,0x7a9cce4d, + 0xaa516f3b,0x39aff994,0xb3416925,0x97d92e86,0x00000007 } }, + /* 168 */ + { { 0x5af3a8ca,0x25aeede1,0xa5c351ec,0x33924782,0xf93ec080,0x41e7a3fb, + 0xe6f425b4,0xb04f93c4,0x81e76009,0xe4ec12ec,0x5180ffc6,0x797366d4, + 0x0e0aef3a,0xd293cbb5,0x68d71d91,0xa1496944,0x00000061 }, + { 0x675a67a1,0xf52c541c,0x8f5fe906,0x67d38d30,0xf6be988e,0x2a70bccc, + 0x18589886,0xae03ecbe,0x7067045b,0xecd02616,0x10ca8d96,0x1facdd99, + 0x30c0735d,0x7aa10a82,0x3328f21c,0x2a27e554,0x00000015 } }, + /* 169 */ + { { 0xe6057e27,0x3dd609e0,0xc7a454da,0x87e8b6a7,0x1f32dd5b,0xff599145, + 0xd0ef51e2,0xea397a88,0x25567546,0xc49866a1,0x3228b480,0xea45c8b1, + 0xdd01997a,0x3dbe0e77,0xc51867d2,0x0e2ea28f,0x000001f8 }, + { 0x69d0820b,0x6295412d,0x1ea65a18,0x03173127,0xeb06380d,0xc27c8221, + 0x75fe9706,0x7ffd4efc,0x5a71d250,0x7b396a57,0xc7cb7543,0x61c80051, + 0xad4dbee3,0xe07db4d7,0x9b192d45,0x1c7481f4,0x00000143 } }, + /* 170 */ + { { 0x08e1cc4d,0x5eab2d04,0xad2dc1ee,0xe93758d3,0x5c9c7393,0x0ceb7dfe, + 0xd3379683,0x530d86a9,0xe24f86d7,0xef5283ca,0xf0b1bb0b,0xab5d1a64, + 0x54db4e3c,0x96aabc1f,0x3bc00c59,0x3e3d87cc,0x00000144 }, + { 0x1d60e7b0,0xe50a8213,0x5d33d018,0xfc9b629b,0xfd05338d,0xc54aee42, + 0xe821c6ea,0x0678f2c0,0x06ac09cb,0xe5c9d75f,0x53018df6,0x83357513, + 0x0bf8c667,0x81ca6fac,0x9d0ae2dd,0x7fc8020e,0x000000e1 } }, + /* 171 */ + { { 0x1baaa5eb,0x8add4741,0x79bd8036,0x02cbb759,0xcdffed22,0xd8680c40, + 0x4e091141,0x1c23a8f0,0x20748b87,0x65d141ed,0x659e9289,0x586a1575, + 0x5006dbfe,0x7c68d7cd,0x22569a74,0xda0ad0df,0x00000148 }, + { 0x7f9069d7,0xc8fcc5db,0x5c0531a4,0x2487d245,0xe9a2db3a,0xc5ab4899, + 0xb4fe9720,0x52bfd538,0xd27f35e4,0x73a04ca4,0xee2dac93,0x7cbbc549, + 0xff3ee7e2,0x0287229d,0x28da9360,0x3179878d,0x000000d0 } }, + /* 172 */ + { { 0x3b66c047,0x89b7e9bb,0x602a3e1d,0x22e65869,0xc8db9c00,0x44f82297, + 0xd08a74a3,0x0e76aca3,0xfcd398de,0xfbf1a71d,0x8320e66a,0x2fbb6eaa, + 0x179c9fc5,0xa82d0ebc,0x4e7ab2b4,0x4e00cf6f,0x0000000f }, + { 0x4890c439,0x424c0e9a,0xbc35a6b2,0x37564a2b,0xd9b7497d,0x95a4479d, + 0x612de942,0xa1ff3f0d,0xe60d0033,0x358627fc,0x522417da,0x815da8c0, + 0xef6b8385,0x506104d4,0xf16e96aa,0x800728d2,0x00000120 } }, + /* 173 */ + { { 0xab039042,0x976f2372,0x9fa084ed,0x10e6978c,0x58bec143,0xd03fdd2f, + 0xfe2045c3,0x3200c101,0xb0a5a928,0xe6868f7a,0xe61faff8,0x26c95d1d, + 0xb7b12265,0xa1e20127,0xc2a5ed17,0x8e63dd78,0x00000089 }, + { 0x22bba4ee,0xbb6533da,0xf496a574,0x3eff6397,0x14f2a6b9,0x409329f7, + 0x1dfdd73f,0xa08248bd,0x69bca1b1,0x62f33f2e,0xba2e0327,0x9a177e64, + 0x75ddf741,0xbc50e993,0x4a56bd1c,0xb87a979f,0x00000095 } }, + /* 174 */ + { { 0x67c1f177,0xe83736a9,0x600133c9,0x1b6d3508,0x6eac9a5b,0x9424bb92, + 0xc27ef31c,0x7a9c01a6,0x122b4870,0xad93bba5,0x9d1ac985,0x9eb94e2a, + 0xd53f175b,0x511c0206,0x5102d914,0xd13eb252,0x000000b1 }, + { 0x675a1171,0xcfe7dbeb,0x16c0d2b1,0xb228295c,0x057c88ca,0x8db25b5a, + 0xd300e9cf,0x73ea9e96,0x269552eb,0xb0e0037f,0x9e0f98df,0xea9d035c, + 0xd290480f,0x860e49b8,0xc036b319,0xa35e9512,0x00000037 } }, + /* 175 */ + { { 0x8f00df48,0xc56729ee,0x11ac8304,0xb89ca7b6,0x8b3a8123,0x497a57f9, + 0xc21ca3ea,0xe0431b19,0xe2bb3ce7,0x45a73deb,0xadc77819,0x2f86cc2b, + 0xe5eb3df1,0x5ff005e4,0xdd27dcf0,0xf955dd7a,0x0000005e }, + { 0x00ee402f,0xe0c22ffa,0x3b30bb4c,0x5b335e2a,0x643cb101,0x542551d0, + 0x3cd19688,0xc6183f45,0xf0be54b4,0xc6664f22,0x4c20cde4,0xa5f4cfee, + 0x80a4c475,0xdcaa972f,0x59111ed9,0xde4af200,0x0000019c } }, + /* 176 */ + { { 0xd771f428,0x9e9d0bc8,0xe43ca382,0x3ac1ecd9,0xeb93acf0,0x8d5ee480, + 0x065a2a3f,0x16232f81,0x2f0b8a73,0x1fc04faa,0x025474a2,0x4a8df7e7, + 0x3bb15f6f,0x51ac4ff2,0xe0950e52,0x66e21b73,0x0000006b }, + { 0x67a41dee,0x59c98480,0x7b3e2b3f,0x2cfa95ae,0x891454e1,0x54d98386, + 0xeefca6a4,0xf0dddbdf,0x11e9cb75,0x5f691b24,0xfef208c3,0xa9b9e766, + 0x18b33cf6,0xe8df1000,0xd1c174a9,0xb8a55ac9,0x000001c4 } }, + /* 177 */ + { { 0x5c4cccb8,0xa99f5862,0x2ef4d3ef,0x70bf5209,0x89efc878,0x28f4e576, + 0xda14206e,0xa2366f96,0x7c52107d,0x90331a00,0xd4a0f0f0,0x478d4cea, + 0x472a47b0,0xb2899ee2,0x64207549,0xae96534e,0x00000110 }, + { 0xcced05b0,0x2cc1d655,0x01759543,0xabac3f09,0x8e577cd7,0xbaeb70a4, + 0x40e98d6d,0x84b00893,0x603d24f1,0x26983653,0x2572173d,0x6e145883, + 0x611141de,0x1d348b26,0xefa27f34,0xe52257dc,0x0000006b } }, + /* 178 */ + { { 0xc947e655,0x92678f33,0x08923795,0xff0fb76a,0x790239d1,0xb2dfe745, + 0x3cdbb7ce,0xea087492,0x05f6d41c,0x21326db9,0x79dc5588,0x5b1ae9ae, + 0xe9c31702,0xe145340c,0xa2c38a9c,0x07502c29,0x000000c3 }, + { 0xc156ace2,0x0c124f11,0x79ff2529,0x2c170fe7,0x6e1171b2,0x60df9a81, + 0x55de2797,0xa19bca83,0x7c6cc79d,0x1ad927ea,0x1d61f770,0x28590112, + 0x261c06bb,0xfe80c826,0xaa2642bb,0x4050d338,0x0000015e } }, + /* 179 */ + { { 0xeaad87bc,0xc9397829,0x81e84cbd,0xe0ac9367,0x6ade4fde,0xb579c24d, + 0x690d7f56,0x50b9aba5,0xd14fb0b9,0xf09b29d3,0x25a0e7b6,0xd0684f23, + 0x606f4ff3,0x0514e9d3,0xe8ad733b,0xe63bdd26,0x00000077 }, + { 0xe0d25c6d,0x0afd06ec,0x00ba2dcf,0xdd90021a,0x8c5bb398,0x1b025770, + 0x198ff8fc,0x077f06d8,0xb7e2cd68,0x87d50ff1,0x263a3572,0xef75e057, + 0xfa925a9a,0xbf257892,0x739d0e95,0x847d3df0,0x00000111 } }, + /* 180 */ + { { 0xfec82924,0x52ab9cc7,0xa7220d69,0x1c76dd69,0xa06ef0e2,0xa63527de, + 0x27183904,0xab3e51c2,0x716807c8,0xf4db35ea,0x748f1246,0x8f3ede0a, + 0x41156095,0xf1493644,0x874b38de,0x5f6583d1,0x000000f7 }, + { 0x0b927eb7,0xa39189e1,0xc2e2f127,0xa87c6359,0x7fe966f4,0x0b72c233, + 0x105e5585,0x102b8382,0xe58c39f9,0x63fee006,0x991b5329,0x3f052ee3, + 0xcbaff97b,0x7f5b854c,0x5f805060,0x935e5f6c,0x0000016a } }, + /* 181 */ + { { 0xdfd88d38,0xf19a0355,0xc549df40,0x555cd8e3,0x04d006e1,0x322729e3, + 0xfd0b0ce6,0xf16b706c,0x35f2ad31,0xf156dc09,0xf7a3df9f,0xb30c5213, + 0xa55e5fb5,0x9f29cc92,0x2b858da2,0xa0ecfdd4,0x00000144 }, + { 0x52658a92,0xb5c115df,0xc4281616,0xbce3ed17,0x7fd92a91,0xa5595f70, + 0x9cd5d896,0x663c8bfd,0x5a9472b1,0x0776343f,0xb033e1bd,0x14e44ca8, + 0x1e5c02fb,0x27a1c986,0xcc4ffb32,0xece0f2c4,0x000001b5 } }, + /* 182 */ + { { 0x31211943,0x17127bab,0x5684325c,0x44a8cac6,0xd855fc3e,0xd2fe0b88, + 0xce91eea5,0x47abab0c,0x78ec7d12,0x5d23ddc4,0x0cd9fefa,0xa3986de7, + 0x82655766,0x32c7b867,0xeeaec7fa,0x3e54018b,0x00000087 }, + { 0xb38d17c1,0xc96e86f2,0x71fa040d,0x9cbfbd0c,0xf88499cb,0xe111ab79, + 0xf71ec80b,0x1d47c5ce,0x46c89692,0xacaa3bc1,0x3d316331,0x5f921c0e, + 0xe768765b,0x31fa081e,0x41eff270,0xd5dafd5f,0x000000fe } }, + /* 183 */ + { { 0x4cda1348,0x8af10b9d,0x25c3013a,0xb0769fd2,0x8957c22b,0x450aa5b1, + 0xf5acf1c4,0x5cafd6c7,0x9fef8029,0xcf71a140,0xee089f5d,0xe12029f5, + 0x0fbd2ba8,0x9752a8fb,0x6f70cb58,0x61e2275f,0x00000090 }, + { 0x1fbda16a,0xb70a4ac5,0xf1dfa2a2,0x79910e79,0xd9945f6f,0xba2ce132, + 0xeb4ba4ef,0x450d59ae,0x4bf2d53d,0x6a8e09b3,0xe620c7a8,0x76010204, + 0x0a53c6f4,0x63f8943d,0x87eaf56a,0x14c91d19,0x00000132 } }, + /* 184 */ + { { 0x490d66c3,0xe54fb120,0xa0dc8204,0xeaed7328,0x04b4294d,0xba014c38, + 0x31ddc467,0x3f2fa2ab,0x8342ed11,0x70ff55ea,0x23034e0e,0xb18da72f, + 0xbd8ae3c1,0xadc30dbe,0x3e945a02,0x179bdf6f,0x0000009c }, + { 0x7484c26f,0x46c928ef,0xef2adbb1,0x206b7db1,0x3f58dda7,0x0887f548, + 0x4bc7edb6,0xfde4e20c,0x975cafdc,0x484d121d,0x86beec20,0xc5b59670, + 0xa6d6db67,0xb579aa88,0x41187488,0x22c6d87e,0x00000015 } }, + /* 185 */ + { { 0xc471d4ae,0x0a890757,0x43a1da76,0xfef4b1a5,0x6aa701a1,0xb892b182, + 0x59c65f93,0xbf4d4e52,0xd789df35,0x923af929,0x0b79c3f2,0x3ccb46c6, + 0xcf4cf130,0x95582ce7,0x257f0ec4,0x7da081b4,0x0000011c }, + { 0x9aeef274,0xf92c6ae5,0x1437c083,0xe6c5bf4f,0xe13c86af,0xaa74b023, + 0x2a225360,0xd21dace6,0x22589fa5,0xb3d572b8,0xdfa74b0f,0x3d4a3916, + 0xb12891a9,0xe76cd8dc,0x59f4cfbd,0xa0391a3f,0x0000019a } }, + /* 186 */ + { { 0x203fc3f1,0x054ba69e,0x62106a29,0x09168ccb,0xaad5fa9f,0xb0818540, + 0xbff7ed6f,0xecb8f20e,0xbef94afd,0x2c80a618,0xb0abd1db,0xe25d8ca0, + 0x028e0a7c,0x75e67a41,0xd6e95b9a,0xdd7662dd,0x000001b2 }, + { 0xf289d7ee,0x87dff279,0xeea2205c,0x4d755d59,0xc18adac6,0xaeb0fd54, + 0x7ec01019,0x3a8c46cf,0xb48d70a4,0x6fc90e7e,0x10b39ef8,0x965c53c1, + 0x38545a20,0x455777cc,0x57dd023e,0xa33430f7,0x0000016e } }, + /* 187 */ + { { 0x0ff53d2c,0xfa9f3949,0xb00349b9,0x8dc91596,0xd5997967,0xf10a5014, + 0xa8a6b78a,0x4dd72dab,0x8b517b10,0xef5de540,0xa6d39be0,0x142b90bc, + 0xeda17f70,0xcaeaa3e9,0x06b31118,0xa01689d6,0x0000016d }, + { 0xf46afff7,0xea6ca563,0x34a5e5f3,0x3945c7ba,0xaa998fd8,0xc1ffe4c8, + 0xb63f535e,0x42a60146,0xd1f509e5,0x50816888,0x9f8cd0db,0xd1918daa, + 0x78a36772,0x6505e6bb,0x9cc6dc66,0x4ab03a81,0x000001ef } }, + /* 188 */ + { { 0xd376d986,0x06089d14,0xa2dc35b0,0xd0f4e077,0x53ff2c86,0x1c11709a, + 0x123c3fc8,0xfef4ba45,0x1b656fc2,0x852cd5a7,0x1fefa8bb,0xb57c7489, + 0x48110b77,0x8f05383e,0x52c5a129,0x4b55d3ad,0x0000004c }, + { 0xf3827633,0x5110cff3,0xe00afe96,0x086784d5,0x3ead32fa,0xcb387882, + 0x2b91cd86,0x3dcf4d16,0xe6f3638a,0x078b6a58,0xe8b7fd42,0x33792112, + 0xee5683e7,0x6964044d,0x28e28433,0x3b84210f,0x00000122 } }, + /* 189 */ + { { 0xc3ebeb27,0x6c28a9a9,0x3ef590f8,0xd7bcdcb5,0x4dae7f37,0xe88a2e11, + 0x726ea7c9,0x033522e4,0x8c141388,0x99d50386,0x61621575,0x59b1aeca, + 0xfcc564d8,0x719fcfeb,0x1aeb8e36,0x3a577af1,0x00000043 }, + { 0x6feba922,0xc3f26ce0,0x475a5693,0x5f6c83ee,0x28bf378e,0x7f796740, + 0xbdc3f6f1,0xd2a5e368,0xa6ed90ae,0x3d034a0a,0x4a47cbd5,0x3b1c3a4c, + 0x4dce2bc8,0xa4f0aa6e,0x74ca00eb,0x97c7af43,0x000001c0 } }, + /* 190 */ + { { 0x79c28de7,0x00377178,0xab9c330c,0x617aa2aa,0x66bc61eb,0x43081826, + 0x4d78b504,0xe0b5b5cf,0x9870fc72,0xd76a752d,0xd40b7bc5,0x3b4689f5, + 0x87f2d03a,0xa97fd867,0xfd6060a9,0x6ab7b5ee,0x000001c0 }, + { 0xffb71704,0xe99eadb1,0x390fe3b1,0x436e58bb,0xab4f19aa,0xeecab82c, + 0xe0f3d9dc,0xda492dfa,0x6e20ad12,0x2a0f54bd,0x7dbbd262,0xaf89fa0f, + 0xe8d2eb54,0xdcc50a1a,0xef7d0758,0x9799f816,0x000000b7 } }, + /* 191 */ + { { 0x104f98cc,0x9ec46462,0x72aedeae,0x45115922,0x7e62186f,0x7ae93dd0, + 0x8d6d69b6,0xd17ce026,0xfd43a8f3,0xb5347608,0x7c0ab797,0xe87f1c13, + 0x139f991d,0x3bf597a8,0xe547e0d6,0xe293a85b,0x0000008d }, + { 0x8ef668b1,0x0982add3,0x611c9764,0xc54e6b2d,0x1c1d4263,0x3ce76b12, + 0xeff64e73,0x3134b28e,0x2871612a,0xaf71a9ac,0xba093594,0x31c88af2, + 0xba9108e8,0x0b649112,0x5cf437da,0x8febc5c5,0x00000113 } }, + /* 192 */ + { { 0xc4a2daa2,0x7e9ca589,0x400f608c,0x18ea703c,0xd5175103,0x6f8cd058, + 0x4abb6f29,0x26493472,0x94296ab4,0x0be553e1,0xac51657d,0x9af9398f, + 0x4f880ea8,0xe232deec,0x67b1e1b1,0x2f81761e,0x00000137 }, + { 0x3a20f662,0x51014bc7,0x49ed9502,0x1fb7e77c,0xb62b9652,0x89f5096f, + 0xa2e8d37e,0x3a659c67,0x5804170e,0x0f2b2a26,0x9ed50a34,0x1674fce6, + 0xfdc3c00f,0xaaa4537e,0x4ce99d93,0xf3c3bfda,0x00000198 } }, + /* 193 */ + { { 0x81614189,0xbab1f5cd,0x24b259f7,0xc7d56c45,0x45fb415e,0xc7baa4b2, + 0x7af6bef9,0x302bc8dc,0x74b48e82,0x91b770e0,0x9b6d1b1f,0x4a1336e0, + 0xe6680c97,0x285c1357,0xc7ccb625,0x59bcb813,0x0000012d }, + { 0x7c019927,0xddad83b4,0x630dfd5b,0xe10f2667,0x31e05d23,0x15dbec5a, + 0x456ac460,0x2aa6e5fa,0x243cac82,0x46956529,0x4dc8c9e9,0xc69c9c7f, + 0xe24a4065,0xadb27e09,0xae41301b,0xdfa7a34e,0x000001cc } }, + /* 194 */ + { { 0x59cb1a7d,0x176a864d,0x6aefb8ee,0x4d864ca3,0x1c22b0d8,0x0ee83acb, + 0xd980df1d,0x7e80a6eb,0x7f94ced9,0xf582acc4,0x3a72c115,0xa29cd123, + 0xc7107bb7,0xce12a2a8,0x4ed80a30,0x0229ca56,0x00000150 }, + { 0x2f1c180b,0x9774bad5,0xd749aa10,0xd08be998,0x56dbd1ba,0x978c48ab, + 0x0afbea9a,0x6ed3e3e4,0x153dc5fc,0x8a8be97b,0x9be93ed0,0xadc7f095, + 0x2cee23bd,0x8d242908,0xdc2729de,0x417523c6,0x00000016 } }, + /* 195 */ + { { 0x6c14a31e,0x74eeccf1,0xb2de3c2d,0x488e2534,0x7cec43c3,0xf9bb3599, + 0x916ac936,0x4210459d,0x9f7e4400,0x71d15c02,0x44553583,0x8c9c7c12, + 0xec94a467,0xcc97548d,0x3167bad9,0x4ca67818,0x0000014e }, + { 0x8d0312bf,0x033af055,0x54161e66,0xbd1bf4f5,0xfa41781d,0x259945a7, + 0x00eef1d5,0x33494da8,0x79c3b8d0,0x6c505ec0,0x1c9f6e69,0x70ae1ade, + 0x76830aaa,0x0288f0c1,0xa62a060c,0x7f4cfe3b,0x0000000c } }, + /* 196 */ + { { 0x057d6006,0x0d8b447d,0xfd71c8b0,0x38b976e6,0xabcf40f5,0x5e77e029, + 0xf103a783,0x13bee386,0x5e472c4b,0x20a6ac20,0x31fcb194,0x43b045f6, + 0xc00abf49,0xe5dc1d9f,0xa5556b79,0x28c0bc70,0x000001b5 }, + { 0x8a8640b8,0xba9d07ee,0xd0e34012,0x25611023,0xbe24ae89,0xc7ce655b, + 0xfa579dcd,0xe358e524,0x377bbfe5,0x57ce2715,0x3c0947e4,0x64651c6c, + 0xf4a97826,0x5fbd8d50,0xe2e1c15a,0x6fcdd28f,0x0000008d } }, + /* 197 */ + { { 0x5c7202c8,0xb564a2f6,0x5a54b0d8,0x7d634052,0x1434fbf5,0x8414d672, + 0x1d9830a3,0x8114215e,0x5ef0fbe1,0xc7a758d5,0xe6f57f9f,0x5705dcf8, + 0xd92269d3,0x5dd49a56,0xbdb49f97,0x8f015d7a,0x000000f1 }, + { 0xb4799ce6,0x07131110,0x2cbcb7db,0x35bbfb99,0xf7ba21e2,0xc1f00c9f, + 0xb18f49fe,0x009d6913,0xabcf959b,0x8da61951,0x0d42146e,0x0e687213, + 0xae5f23f1,0x55832817,0x9ae7386b,0xc9b5bb68,0x00000143 } }, + /* 198 */ + { { 0x48c74424,0x423328db,0xd19cb2eb,0x32616e11,0x40d6e217,0xe534192a, + 0x0cbdc752,0xdd83a94c,0xd733bb01,0x5c623050,0x5b7a4520,0xcd0d631a, + 0x9a4011c8,0xccdc0a25,0x646e7cd5,0x22f112cc,0x000001e6 }, + { 0x3e1e4c4b,0x47d6e29a,0x9fb1548a,0xd5f82538,0x4fd3e319,0x7e3705b5, + 0x0a08b966,0x8c4ce59a,0xd8cbe8db,0xbca749e7,0xaeec3d75,0xcc4496ea, + 0x8a1a313d,0x17dc723a,0x8ceb9360,0x250ff77a,0x000001a5 } }, + /* 199 */ + { { 0xfe29bd79,0xa55a0726,0x4f990b34,0x6574a810,0xaad56983,0x6906946d, + 0x50d41fef,0x0e580ab9,0x6e6f7f45,0xbc75b514,0xf0f3718a,0x508cc97b, + 0xa5634087,0x51ba2ca4,0xe64d8910,0x75c39077,0x00000172 }, + { 0xf77ca6bd,0xf37cccaf,0xbdb18df5,0xe0a0df41,0x019e01f7,0x9f46cff8, + 0xaa65d72b,0xbe4f3d44,0x6e3663e9,0x7822d8ac,0x3ef9db6d,0x5f37f922, + 0xabe4a9aa,0x7f0ad39d,0xf69cc8ba,0xa0a57c70,0x00000098 } }, + /* 200 */ + { { 0x00fd5286,0xd9c50cf4,0x72a4b03c,0x1ea5b9d5,0x051ae73e,0xf5e60f9e, + 0x951b3824,0xfe9b5142,0x9fb4d667,0xb034b2d0,0xedc50856,0x4b537a80, + 0x8cb0022e,0x69ee1012,0x6a548aee,0x7c8b9e5c,0x000000ed }, + { 0xd933619b,0x746007bc,0x2b9dfe19,0x0ce7668e,0xcc6e2a2e,0xa9eed5d3, + 0x7eebf32f,0x35a14f5f,0x67cc4f64,0x75cb898d,0x7850c16c,0xcb2185fc, + 0x45f79c96,0x09874a76,0x27db4744,0x7468f8ae,0x00000139 } }, + /* 201 */ + { { 0xc88684f6,0xc5de68ad,0x619a7dbf,0x7c1edaab,0xb27a18f5,0x258d1735, + 0x8ecd89eb,0xb27e7b65,0xd879f7ea,0x3d8889c6,0x67d5befb,0xa8fdc96d, + 0x37bad73c,0xc84d86ae,0xce8e56d7,0xc7e91976,0x000001a4 }, + { 0x6319ffa6,0x5001a540,0x134ec04c,0x0cae64ec,0xd541242c,0x1f69a96c, + 0xbf2caeee,0x9da259ee,0x28bee805,0x88e7978c,0xb8e890e4,0xe9484beb, + 0xfb227fd9,0x0e5246d0,0x625d6318,0x8be2a54a,0x000001b7 } }, + /* 202 */ + { { 0xf472f13a,0xa223554a,0x5733e91c,0xfac993b7,0x96c168a2,0x26afe9f0, + 0x4b127535,0x7cfe761d,0xe77070ca,0x84301873,0xc7e7cdf6,0x66b6aaad, + 0xa1562ed4,0xda2dd5ea,0x39faf8d8,0xa81a2e00,0x00000016 }, + { 0x4e3de3bf,0xa880759c,0x52f3088a,0x0c1e2e11,0xaa7eba5b,0xcb2ded9a, + 0x9f9c11ca,0x4c65d553,0xb0dc5c19,0x0ab9bd87,0xca3f4b61,0xd32f8c96, + 0x28cb5f9f,0x49842fcc,0xb90e21df,0x31ae27cc,0x000000f4 } }, + /* 203 */ + { { 0x6a0ccd0a,0x3b2a0a0d,0x5993b555,0xa3eeec82,0x9de672a6,0xb13486fd, + 0x0da05dcf,0x8d9c5148,0x6739874d,0xc4aa444d,0xe29a35c9,0xd9cf35b2, + 0x89177ead,0xd6bd9b5f,0x2a0470a1,0x9af0f59d,0x000001d6 }, + { 0xba7535fd,0xb2f844c7,0xa842ff39,0x45bd4c3d,0xe951974b,0x5fe149ed, + 0xfd4453ec,0x6982e997,0xe6c37c0e,0xa63f705d,0xd2c3ef6a,0x09b0f6a9, + 0x1776a8d6,0xbedd3586,0xede11b78,0x4048a46a,0x00000176 } }, + /* 204 */ + { { 0x51a251d1,0xa47c6ee5,0x0d279dfd,0xbef4bf12,0xec518a28,0x4c2d538c, + 0x3880be6e,0x1b2b7887,0x1be9b20b,0xc69ccf8e,0x3796a19e,0xe41dfeae, + 0xfb50bdea,0x25676fc9,0x03e180c0,0x8b815a05,0x0000016a }, + { 0x53f5ef65,0x2ca085f6,0x77b25105,0x61dfbbf9,0xa3346fe6,0x88ea87e3, + 0x1b95f7ef,0x25ddfdee,0x5b65eaec,0x22074e69,0x4c2e023b,0x11869a15, + 0x42e83bb5,0x8601b577,0xfa877e7d,0x1464652c,0x00000015 } }, + /* 205 */ + { { 0x57fa58f1,0x250853c8,0x4ca4c670,0xb58a4e68,0x1b81f40d,0x07b96d0a, + 0x558e8cbd,0xa4651e10,0x42e388cf,0x1a64046e,0x44436088,0x51b0d539, + 0xe26b8fd0,0xc2bf35b3,0x5702cfce,0x4ae78709,0x000000fd }, + { 0xdf53d498,0x3c79bc29,0x1137f624,0x4cf31c4e,0x17a3cedf,0x93b6856c, + 0x6cd9115d,0x2461131c,0x9228cddb,0xab30a453,0x8d202bf1,0xe97757b6, + 0xe6108612,0xa666de7c,0x4f6026b4,0xc200fe65,0x00000051 } }, + /* 206 */ + { { 0xb1a2b4b5,0xea96103c,0x843c0968,0x98dccbfe,0x986ffb5b,0x6a37072d, + 0x169d3ac2,0x2fa07af2,0x771371f1,0x8bb85b9a,0xe7c299ef,0xeae10d34, + 0xe2372efc,0x3d4bdc69,0x8dd856f1,0x378df75d,0x00000039 }, + { 0xde7ff5d9,0x31e902ff,0x325a09ca,0x0e9a85d2,0xf4192fcd,0xd71b93a6, + 0x15b076b3,0xf52a5737,0x6e711d1c,0xd726aa86,0x2c292819,0x0b61b1df, + 0xc8015de6,0x224e575c,0x18b79e47,0x68e893e1,0x000001dc } }, + /* 207 */ + { { 0x6ffeda73,0xb7924ff9,0xa0da2018,0xe709f406,0xf89584df,0x368e20ea, + 0x8355a040,0x0095112e,0xfd777d7c,0x259d4528,0x2bf8f2c8,0xb0c49565, + 0x44c5311b,0x7f631928,0x8466d9d5,0x698d0e4f,0x000000d5 }, + { 0x015d204d,0xe10d64fa,0x6dd10c53,0x7b626bfa,0xa7698c94,0x087f8e63, + 0x05337a56,0x525a6547,0xdf5c782f,0x558e2244,0x855fbaff,0x48aa1e41, + 0x47ee3830,0x48f2218e,0x138463d3,0xf2523959,0x0000004d } }, + /* 208 */ + { { 0xd8695310,0x76f4fd69,0x7e8768ea,0xe28eb09f,0xe0d532a8,0x039c1812, + 0xc572ac79,0xdda67744,0x785d6293,0x1f9800e0,0x3da76bb2,0x2bfe2a5a, + 0xa2bc7217,0x6ed15b90,0xd1788a8e,0xd80e61bf,0x0000004c }, + { 0x16730056,0xb9f40370,0xdced3d43,0x46f45fef,0x1aa50742,0x0afd763c, + 0xff92ae73,0x21e5c652,0x1bb2063f,0x6ef0830d,0x12d22540,0x18306ecc, + 0x1f15001c,0x4edd9b3a,0xc0cc5424,0xe4eb25b8,0x000001f4 } }, + /* 209 */ + { { 0xa1db5c18,0xed61a714,0x7677074c,0x9454e61e,0x7bf685de,0xe970fbe5, + 0xd2145be5,0x221b0c53,0xee49a5f2,0xb931881b,0x14b11d03,0x00b91afa, + 0x3ec22137,0xc6aefe49,0x526200af,0x50554e94,0x0000013c }, + { 0x7364c92e,0xd42c45e7,0x735218e8,0xe0500265,0x84d3f3c5,0xd281da02, + 0xdbf7646b,0x312f8424,0x485f304f,0xe1a88f2a,0x1127a513,0x583f5631, + 0x1a60e0bc,0xed7950c7,0x4b7b70a4,0x92855e10,0x000000c6 } }, + /* 210 */ + { { 0x644614e7,0x8d06185c,0x4749a424,0x2e906cae,0x2587e528,0x585412ea, + 0xd12857cd,0x3763990a,0xba5593b5,0x770c7f70,0xdd5d2a46,0xc2cf6dc4, + 0x3b69a1ba,0x564da456,0x187895da,0x639f7e14,0x000001c8 }, + { 0xf8589620,0x05c96b02,0x41e44054,0x2fe468a3,0x096ad09c,0xbf22da11, + 0x9c652aee,0xbc73c298,0x547e1b8f,0xcdef9f8b,0x977dbf73,0x7073785a, + 0x7e13552d,0x0a92a1aa,0x3a393d3f,0x22761140,0x0000015b } }, + /* 211 */ + { { 0x1fbfaf32,0x89a5a7b0,0xbe661d21,0x5c5a62d0,0xf5e3b44d,0x47970f5e, + 0xf43bbf62,0x3ea001ed,0x260ae5a0,0xa8e74285,0x2697c62c,0xeb899ebd, + 0x751a7643,0x36a003e6,0xba0725a6,0xef178c51,0x000000ea }, + { 0x9bd51f28,0xaacf8e9f,0xa8712044,0x39febbdb,0x5bfc8365,0x8780ad3a, + 0x10e6f08f,0x408a34cd,0x8241ab0e,0x8104ca10,0x98a662a1,0x843e71ce, + 0x232048d6,0x9dce8514,0x1cf3d187,0x5cba23be,0x000001fa } }, + /* 212 */ + { { 0x2973a15c,0x2fe8c9d2,0xd42979f3,0x66fec8dd,0x0b6afb3e,0x39af4a39, + 0xab65ef22,0x0bb1e436,0x66c5fcdb,0x8f26201e,0x5af4870b,0x3cffe8a3, + 0x2bb44e24,0x65ae286f,0x51dd1722,0xda2e283a,0x00000114 }, + { 0xc1e3d708,0x4a9c9a56,0x1cb0efa6,0x4fe62d3f,0x97e87540,0xf0702984, + 0x3cea46fa,0x138b7d6b,0x83886263,0x0780634e,0x71c30909,0x27e84280, + 0xe5838647,0xf0af79d7,0xb236a267,0xc1b86582,0x00000104 } }, + /* 213 */ + { { 0xa526c894,0x32ff09ed,0x14ac7d23,0x95abf120,0x3cd92934,0xb6f94dcd, + 0x92e6b556,0xffaaeb12,0x1036c31b,0x193796ea,0x707ff32e,0xa9d237e7, + 0x829d67b8,0xd65a5b0d,0xdb29248b,0x48edb556,0x000001b3 }, + { 0xded46575,0x6ee9f9b2,0xffa69acf,0x496ca08a,0xf16d37d1,0xd5aeb3a1, + 0x789e5d01,0x4a507db1,0xc827cc45,0x05e2ce29,0x2964e677,0x29b6e4a5, + 0x4c0e46f2,0x0563b0ba,0x4bc46485,0xe75c2448,0x000000a3 } }, + /* 214 */ + { { 0xd2f6615d,0x0fcb476f,0xd98da9a9,0x4b7f9b78,0xd2bdf107,0xe2fddf1c, + 0x9b956f31,0x2bda3086,0xb596eadf,0xf3cca2f7,0x355b2538,0x91c09f8b, + 0xc6c846db,0x46f3f6f3,0x2a14642e,0x9bb9398e,0x000001ff }, + { 0xa17bd645,0x5118d4f5,0xdbd6d552,0x57033eab,0x734d0957,0x007e86fc, + 0x5f53c435,0x98ca065f,0xfd27dd19,0x9949d9bf,0x6952d1ca,0xddc4e304, + 0x81ac101c,0x84cab4fb,0x4a56b007,0x46d079f9,0x00000003 } }, + /* 215 */ + { { 0xa6bfdedd,0x95eb8e4f,0x7a74c6f9,0x993a285e,0x3d09a252,0x8bd5d4d1, + 0x19a5f767,0xeaa10be6,0x0cebb340,0xd3db083e,0x1dbf7a83,0xc633a78b, + 0xc30f23e1,0x2664bc3e,0x07a08379,0x6630f8f1,0x000001c9 }, + { 0xdef86a80,0xbbf4cb4b,0x3f8259ab,0x1fa4ec78,0x609532c8,0xa4bf7604, + 0x8b909e92,0x71bb7acc,0x17884160,0xca1d7317,0xca1ab928,0x7f7f14be, + 0x5f8455a5,0xbfea016e,0xbf21e899,0x7b8c76b9,0x0000002d } }, + /* 216 */ + { { 0x4b9f8e7d,0x46860563,0x63fc58a8,0x201176b7,0x2feed68a,0xe7a5da7e, + 0x65183190,0xcc67763e,0xe9377ad6,0x7d7d0102,0x77032321,0xccfc4720, + 0x534bb505,0x573ee031,0x0f1a2769,0x1bf1ef8c,0x000000f3 }, + { 0x0c935667,0x635f5c4b,0x060d2b8b,0x74152c39,0x37c3a574,0xeffaac2e, + 0x0b72e0cd,0xfd5fcc4c,0xf4f60247,0xb743f9b9,0x79e16f33,0x05c2e354, + 0x3074ef9c,0xa2234c47,0x495aace3,0x4092f279,0x00000124 } }, + /* 217 */ + { { 0xb30f9170,0x5bfd7851,0x37fce5b1,0x715aa1e9,0x928437b9,0xcffd55e0, + 0xc32f1273,0x88acd259,0x48be1e34,0x5a145cf2,0x7a5bc62b,0x3a340860, + 0x18156f46,0x6296eb15,0x2774e1c3,0x397fad19,0x000001e7 }, + { 0x9c8225b5,0x362f99f4,0x46b77c4d,0x33efce49,0x8541e91b,0x451df530, + 0x38f3d693,0x0bd2d934,0xe727b54e,0x0b5de2d6,0x7622d940,0x42d929c2, + 0x56f6a94b,0x36ace723,0xfccaf205,0x64a18cd5,0x00000044 } }, + /* 218 */ + { { 0xaba95d63,0x8dbe0aab,0x7b4b346d,0x92780c61,0x0e0d8142,0x6430f863, + 0xb56ef04c,0x875be02a,0x785e3633,0xc28feb95,0xc12c93e4,0xd5401795, + 0xe36f82a3,0x89ff51c1,0x10eeafd6,0x3c48c895,0x0000016b }, + { 0xd4f064be,0x79287eba,0x54ebda99,0x1a77d555,0x623727ea,0x46745ef2, + 0x89f366c6,0xa911f591,0xc59d6ebd,0x7e5435cd,0x7524d213,0x3a84daea, + 0x4395b38d,0xc7b1dd1c,0x1a823c49,0xca13e704,0x0000001c } }, + /* 219 */ + { { 0x874d64b0,0x6399860c,0x1653ce0c,0x3375b092,0xeaa11986,0x16700000, + 0x621cd15d,0x62c67909,0x77d70dcd,0xbe1d7dd6,0x305bd4cd,0xeff0f270, + 0x362f8f30,0x076ec621,0x7e445b78,0x81204816,0x000001d8 }, + { 0x161f9758,0x81749a0e,0xa3c4fce2,0xe60915fe,0x911dd8af,0xf537ce41, + 0x79a51a09,0xfe36a8ac,0x2ca5cf8e,0x67fb54b4,0xe49057f5,0x1bdcae07, + 0xa4244b64,0xb71ff0c5,0x4b606583,0x4815a536,0x00000106 } }, + /* 220 */ + { { 0xef39cc39,0x78c69c3e,0xfa6356d1,0x98304564,0x412fb990,0xbd3c3542, + 0x79dbb2a5,0xa1d531d3,0xe7e75e3d,0x4865f188,0x0b0147b1,0x2dac4e22, + 0x33d29ab0,0xf59e51ca,0x37b074ef,0xc964f7fe,0x000000f1 }, + { 0x0e301262,0x7080c0a6,0x5390a22d,0x9a458060,0xcc8a9029,0xda677f9a, + 0x14c0f1c2,0xdfae9057,0x6e66d9f7,0x3665ff16,0x47846924,0xc866dd8c, + 0xc4cc307c,0xc5afe98f,0xe0bf50e4,0x60e3ba63,0x00000039 } }, + /* 221 */ + { { 0x959ecdb3,0x1a785136,0xf9e959be,0x289af617,0xcde0dc88,0x5145b2b8, + 0x7c079e15,0xfe9070b0,0x50e22415,0xf77f04d3,0x358d6d42,0xb3ab7372, + 0xba7b629a,0x14fd41b9,0x7400fd25,0x7b32d80e,0x00000193 }, + { 0x7147886f,0xe5d80d4d,0x576c81ca,0xe08ced61,0x642717bb,0xe14e8692, + 0xabb4bd21,0x9dcdf198,0x6530308b,0x658be646,0xd99d19c7,0xfbf192da, + 0x304ab126,0x55a3d1b3,0xfa24de31,0x943f4be5,0x0000000e } }, + /* 222 */ + { { 0x7fe9ea48,0xc5424058,0x61b57486,0xaf24f825,0x78719740,0x9d2c413c, + 0x70eb874d,0x27a9be79,0xb62ba3aa,0x43fef8e0,0x2c1bf0ac,0x0a23f286, + 0x4af130e1,0x51c276f3,0xae55cebf,0xf6cd1e9a,0x00000185 }, + { 0x40369093,0x24defa7f,0x58581e0a,0x11f1d9d6,0xe512ed9e,0x9900bf33, + 0xed120896,0xbf8a8459,0x8b73c399,0x8324555e,0x8f6f54fe,0x54a30569, + 0x3c252355,0x2a9d6da5,0x2a093b31,0xe6a6f904,0x0000016a } }, + /* 223 */ + { { 0x152cdd35,0xb2e123c9,0x86402ef1,0xae6e43a8,0xb9ce5bd5,0x892bf0df, + 0x75804914,0xb4acb84a,0xf502eec2,0x8c7f55ff,0xaa33ef4e,0x9c8a7b93, + 0xfd9d2001,0x06b10357,0x0ba3bceb,0x3e319ff0,0x00000027 }, + { 0xabe360a3,0x182c2f77,0xadfefca6,0x57ef5c84,0x650b6fcc,0x9a4f0ca6, + 0xaaf0b202,0x3f4f8e56,0xa24ef156,0x5c8508a0,0x1ea45f13,0xd8f62fd9, + 0x28036dbe,0xf2c923a0,0x1a4d103b,0x4a9ca4c0,0x0000018a } }, + /* 224 */ + { { 0x5448e339,0x2a3fb798,0x18a39976,0xde8770cf,0x7a69170c,0x1160574d, + 0x2b6067ac,0x4bb05c59,0x848138ab,0xde0d2db0,0x4909e794,0x149dab92, + 0x790315f7,0x83a336b6,0xa335a258,0xcd9074d9,0x0000013c }, + { 0xac1b784d,0xe839c5e0,0xee527ae1,0xab65c8c6,0xa1c88ec0,0xd3c86146, + 0x46c1bf58,0x2201f790,0x3fda502a,0x71cec627,0x225b9065,0xff3f88eb, + 0xc556dfcd,0x6c1f0c98,0x484fa5cc,0xaa3222aa,0x000000ac } }, + /* 225 */ + { { 0xc9b4dfd6,0x17e74bc3,0xf8e76293,0x25ba8053,0x9d8c3520,0x0307dc05, + 0xb85a20b4,0x1c9036cc,0x23871359,0xf2c63f0a,0xca95fb4e,0x1a99d9d8, + 0x9850c6c6,0x3d7c4f39,0x68299668,0x162969c9,0x00000169 }, + { 0xcb63ee53,0x7d13c267,0x75eac353,0x67b12e61,0x191abfca,0xb3369a11, + 0xee1af69f,0x5ad0649d,0x11dc11e7,0x4d7a6f00,0xdb9f9765,0x80f030b8, + 0xf0ab1332,0xa20001a3,0x39d8cc62,0xe17c98d2,0x00000194 } }, + /* 226 */ + { { 0x1d8fe898,0x720d80b4,0x32184534,0x8d7a28b7,0x04f21740,0xf1f3c385, + 0x166aa6af,0x5d381cd5,0xcc560e35,0x9cde6084,0x5e61e2cd,0xcb041f0a, + 0xd9b4951a,0x621116f5,0x7ee2ac2c,0x509e16d3,0x000000c4 }, + { 0x2c6fd79e,0xb82a20c4,0x3af78b0e,0x95b7ee4e,0xbad819ca,0x3d9b63c1, + 0x98552569,0x10d674de,0xf9c19d0f,0x17de64b2,0x47c5e6a9,0xa03fabaf, + 0x2ce2db6f,0x858bc4ad,0x1fc9d18e,0x76c2380a,0x000000c9 } }, + /* 227 */ + { { 0xb064f114,0x91171ef8,0x4f2f0f4c,0x83cb1565,0x57b262b7,0x30525854, + 0x0f34936c,0x468c6701,0x99a41fed,0xef26d2fe,0xa7f7f6a9,0xf6da2267, + 0xa01bfc1b,0x2563b8db,0xc340ed40,0x14b36c85,0x0000000e }, + { 0x25db67e6,0x5e57e264,0x7f2e905f,0x85df4e89,0x026c4268,0x7832e514, + 0x3e875093,0x312be262,0x3c538691,0x856b5bd8,0x95734f9d,0x5b1cae55, + 0xd5aa4861,0x5a07bfe2,0xce8abb58,0x7a4c96f0,0x000001d0 } }, + /* 228 */ + { { 0x523aa2e9,0x7bf54d05,0xed3d0860,0xc8841e0c,0x7f9bfb69,0x5683f6e2, + 0x162bdf85,0xdcb07f44,0x07b0dcc9,0x62d17839,0x657a536e,0xa2cbb8ab, + 0x7cf47d3c,0x98b9a0d2,0x5eea6370,0xff154d68,0x000001f2 }, + { 0x56b232ac,0x568b768a,0x3f2a52ab,0x4e8d6e36,0x8837fc60,0xbae87a16, + 0xd10a7691,0xebc58a83,0xf9455fbe,0xad5e4af0,0x7d654e2e,0x1a20d6c3, + 0xda7c8255,0x8c40fcb9,0x60d9b931,0x6d7b3cd7,0x000000b2 } }, + /* 229 */ + { { 0xbb2eaf45,0x7b090c3e,0x62ffb92f,0xed24d91c,0xa736f23d,0xbf2a3ea4, + 0x6ff0fde3,0xb5b99ebd,0xca1102f5,0xbca2b55d,0x07e032a8,0xf6203cd8, + 0xa8bf17a8,0x5410b448,0xe1dc55b1,0xb86660a7,0x00000109 }, + { 0x02a2fbd8,0xb148b1da,0x3b22e8a5,0xfed85e8b,0x8712b509,0x1378a0e4, + 0xc6a3e516,0x68560148,0x1633b503,0x7100921c,0x25512711,0x93925143, + 0x07d31047,0x7b4931d2,0x8542e0bb,0x623e722b,0x000000ea } }, + /* 230 */ + { { 0x24972688,0x084823d3,0x003f5762,0x58b83c12,0x6d0d4528,0x194d6690, + 0x2c6f747e,0x84219584,0x0146d89a,0xc8f8a2e9,0x7451bbc2,0x29ec1de7, + 0xf7f284fa,0xf622b6b8,0x7b71e44f,0x83f1dbe9,0x00000060 }, + { 0x999dd56b,0x99649333,0x97a47de9,0x2cfac0ba,0xbbe8fb20,0x6660d8ae, + 0xf61d7bca,0x47c29dd8,0x85adc14d,0x6f5fb51d,0x4f9fd41c,0xe65ac788, + 0xff513e6c,0x1ce69dd4,0xffe59d3e,0x1ace591e,0x00000023 } }, + /* 231 */ + { { 0xa9fda771,0x2e67a438,0x8663100e,0x626f652c,0xe133f23b,0xdfb19e48, + 0x035d2d1f,0x599f88f2,0x8d13e878,0x1723a112,0xfb51ce07,0x890aa292, + 0xbbd9ba82,0xe5f3a70e,0x374514b4,0xdde82673,0x00000155 }, + { 0xd6f59a95,0x08b2b77e,0x02020420,0x93f853e3,0xebac7797,0x52252ac1, + 0xb56b6676,0x6ecdcb99,0x9722a500,0x4abdb9f9,0x04e2bad0,0x26210f3f, + 0x3034dd4d,0x0ca5a0ff,0xdac0b80d,0x333d8080,0x00000041 } }, + /* 232 */ + { { 0x35a85a06,0xe8510709,0x42ef1b44,0x4e166e76,0xa07b3a6d,0x84a90b71, + 0x30329e6a,0xd6dd6c00,0x3d555259,0x20c4ba65,0x6f8ad05e,0xee3b26af, + 0x2ab4cccd,0x20e3d541,0xa9406424,0x79798934,0x000001bd }, + { 0x8e0c7ff0,0xf2a1d184,0x9543b340,0xbae85efc,0xf51d318b,0xe96431ae, + 0x75878fa6,0xe5d3ed4e,0xc2895f52,0x4d2a29db,0x1f11067c,0x3af27877, + 0x9e7f4ee5,0x6ccde964,0xa56d74da,0x35188da1,0x00000192 } }, + /* 233 */ + { { 0x03d310ed,0xb0832120,0x987b0311,0xd20ee8cc,0x84c558a8,0x9e549d26, + 0xb7167ec8,0x5e25f3ce,0x4bf55bb5,0xacf114f4,0x061c9017,0x819edc77, + 0xdeb343c0,0x759a44e6,0x04c9b5ed,0x58df9f7e,0x00000078 }, + { 0x3bf13222,0x4fa47ebb,0xea07da11,0x1e451dcd,0xc0d8242f,0x1be9fac3, + 0x36eb871e,0x93257d4d,0xbea3190d,0xf49e775a,0x4ebe2b33,0x406d191f, + 0x0c110096,0x67aac53c,0xd381ac78,0x5215cf8b,0x000001f4 } }, + /* 234 */ + { { 0xfa493b79,0x387e8a8e,0x4eb1c2ac,0xb20e270b,0x9ff22320,0x9f393fa0, + 0xa91c393d,0x5ee1baae,0x138a8d96,0xdeda961a,0x97bd50e4,0x69ab238c, + 0x2363c8e0,0xff68d48a,0xce4c4c16,0xaf8e00e5,0x00000158 }, + { 0xcfc509a1,0x6ccdcf06,0xc26cc075,0x60f411ef,0x4d9c57f0,0x6d0cdfd6, + 0x32e99cac,0xa9514853,0x8b8e9510,0x58f9ab3d,0xb10dc3fd,0xa7e98709, + 0x75ef3509,0x8390843d,0x5a9312c7,0x28ccc9d0,0x000001b6 } }, + /* 235 */ + { { 0xe341463f,0x1d934f00,0x150da7a0,0x14c8a6ce,0x4109553f,0xdb4860fc, + 0xa93f4a91,0xc23bde5a,0x2cd58067,0x9f47c787,0x8433dc80,0x1d330054, + 0x75a32a7d,0x0c0be7f9,0x88c75da9,0x08b777d5,0x0000012e }, + { 0x61a10d37,0xdfc12817,0x5c50f5a5,0xed7b6181,0x79477c60,0x28af95db, + 0x33c5310b,0xa0aa2b77,0x53118267,0x905faab8,0x6b41959f,0xf40e9816, + 0x16b37784,0x9ccb4252,0x69866acc,0x6835d77c,0x000000c5 } }, + /* 236 */ + { { 0x2b450a66,0xe9d714cb,0x7dbfdc14,0x1318885c,0xb466a0c0,0x655a8d85, + 0x5bdfc1a6,0x02a21e99,0xe67792d1,0x7a0d7c98,0xb550a797,0x2a01bb57, + 0x5d74d337,0x42c46233,0x88dad495,0x7be4e1c0,0x0000008b }, + { 0x95812273,0x1873b03f,0xee3f757f,0x2e26ed32,0x6da6217a,0x2c710eae, + 0x261d9f4f,0x9b50b574,0xb7c1da2d,0x43971fa9,0xc4a85de7,0x22c4fb87, + 0xec22137b,0xf72c3451,0x77ba1926,0x1345668c,0x00000173 } }, + /* 237 */ + { { 0x8a3ba183,0x3e3e8c7a,0xfe389fa7,0x4e8cebbb,0x0f9ba60f,0x8ea44687, + 0xcb601a83,0x55176e35,0x12e52db4,0xf90bdc26,0x8f712bf1,0x95f9e459, + 0xbea054cd,0x9bd3200f,0xdd5fd40b,0x2cf19bf6,0x0000017a }, + { 0x66736feb,0x71cf6ca2,0xde7cfe2f,0xbde86f49,0xfc290563,0xc60abce8, + 0x726b6e4f,0xaae8a3ce,0x3f29235b,0xd2382445,0x650ffa5e,0xa4b557f5, + 0x113ef744,0xa1453e54,0x3e426dd2,0x7c676a53,0x000001b0 } }, + /* 238 */ + { { 0x35d96872,0xf5e603f2,0x3fa5b8ca,0xab1a23cc,0xe988dc5f,0x5459871b, + 0xd430c0bd,0xe32e8489,0x764d9cc3,0x7ec269e0,0xf2c0c40d,0xf7238212, + 0x887b83b4,0x2d946183,0x2f18a411,0x281fa671,0x00000010 }, + { 0x64858b37,0x8028048f,0x357de5d9,0xe0e149af,0x619ebb18,0xb2218791, + 0x9f2b0ba0,0x210200b3,0x1039cbae,0x5a87eae6,0x39579d1d,0x4efdcddb, + 0x2788515e,0x1b388eaa,0xc81878aa,0x1a552c3c,0x0000002c } }, + /* 239 */ + { { 0x0ea723dc,0x7ac7f500,0x42b15231,0x0a5f04f4,0xbe885c86,0x63d49445, + 0xff119702,0x61f9993f,0xc4c58cea,0xc3fba45c,0xb9cd6036,0xe6d151e6, + 0x57b923bb,0x75a3ab15,0xceb2fd46,0x4ec07c52,0x00000147 }, + { 0xed88239d,0xc46a3d32,0x835ae694,0x0d1b8ae6,0x9feeb2e7,0xf4fde325, + 0x43bc0bb5,0x223bf71c,0x8f62a705,0x3cd220b7,0x9fe799a5,0x2224860e, + 0x24ab7f93,0xd8558703,0xb594958b,0x8e0f7330,0x0000010f } }, + /* 240 */ + { { 0x3c67d520,0xaf35c7bb,0x23fca9ec,0xd8f4958b,0x8bbaa808,0x0778f194, + 0x2135e8ae,0x418c30ce,0xc888eff7,0xcdd8d9a9,0xf73144ab,0x72075df0, + 0x4506a534,0xb549c895,0x5fbb7fc5,0x4ef38979,0x0000011c }, + { 0x43f5e698,0x3fe2c9ac,0xe38a5e3a,0xce77fcbc,0x3089c2e1,0x6d05c90e, + 0xac1d5801,0x5a74f3ff,0x381b9d2a,0xaeeda220,0xf5f3960b,0xd958b143, + 0x0db7abbe,0x65ffd051,0x7a05b718,0x8e97e680,0x000000ce } }, + /* 241 */ + { { 0x8ce86a83,0x2251e61b,0xbf7e7160,0x8604159f,0x48f03377,0xfc127dd7, + 0x45052242,0x87cb2c37,0x934ea09b,0xbd4950f4,0xc4679441,0x5146c403, + 0x23ba416a,0xe8ad4710,0xaf638eb1,0x89b81a60,0x000001b3 }, + { 0xe8150c69,0xe699934e,0xe27c14bc,0x74f75908,0x6a0194ff,0x5dc0a891, + 0x1bd51b76,0x38f49d32,0x18779630,0x6bc3305e,0xfd3b4a68,0xfe2f3fbf, + 0xd7caf189,0x1409b377,0x9b8f109b,0x029ea13b,0x000001b3 } }, + /* 242 */ + { { 0x25a2fd88,0xef7938d2,0xceba0603,0x890f2f7c,0xd7a6dff4,0x4c3e1c80, + 0x2883f986,0x00c78f36,0x998e5305,0xed92b592,0x325ddc73,0x018a8f1b, + 0xd5d3708a,0x6dffd987,0x0d1f28bb,0xdcd3554f,0x00000059 }, + { 0x23a74e7d,0x17c6e41d,0x5db32df6,0x94b61ebe,0x9e7ffa0b,0x3c2fffa7, + 0x2ebb7a0d,0x473662b7,0x01adf9c3,0xa86415ee,0x54679264,0x1502c326, + 0x2fa09c57,0x16911349,0x24749086,0x897f34aa,0x00000195 } }, + /* 243 */ + { { 0xabadc253,0x4845d359,0xc797c95e,0xe054b92c,0x9a218212,0x22a9b5bd, + 0xa52b8827,0x9bb80a5e,0x2e61c676,0xea38e78e,0x08b0f8b3,0xfb274b1a, + 0xdb9d854c,0xb6aa42e3,0x56012d73,0x8ba22523,0x00000163 }, + { 0x75c8c576,0x7cec0e6f,0xe4bc7dd2,0xabb20e7c,0x69d80726,0x0958a0c8, + 0x8a023eb7,0xa908c66a,0x76110b15,0xca9f50ea,0x186f61a6,0x668c9994, + 0x2a0a69d8,0x9ddf22ed,0xbbf8a10f,0xbfee1897,0x000001e0 } }, + /* 244 */ + { { 0x48319e4f,0x26d86818,0x5a586fa0,0x6be6f6b5,0x26713265,0xbef5d886, + 0x98529cfa,0xac252ac5,0x62b29cfb,0xe7cc45f1,0xa2a6358d,0xee050609, + 0x2940ac70,0xf7cb9ca4,0xa885b1f0,0xfb44aaec,0x000001ad }, + { 0xe798678e,0x66b7a936,0x99540438,0xca01e103,0x816860b7,0xf2491e37, + 0xb745d857,0xeeffd483,0xa4705ed6,0x5dbb3628,0xb2a5d0f7,0x57d68d49, + 0x2389fee3,0xd1a8529a,0x1a7fd686,0xdbbc2549,0x000001ad } }, + /* 245 */ + { { 0x969686a3,0xe10cba20,0xe3c053f5,0x308b1c55,0x26f47102,0x1712b134, + 0x49033038,0x1f9165b1,0x2d01527b,0x45b72017,0xaa9a34e2,0x6fcf6647, + 0xb0be35c8,0x51f54b94,0x5a15e382,0xfccb22a5,0x000000e3 }, + { 0x5b4dc0be,0xaa71e4ec,0xdb1cd5c4,0xbb136248,0x046e1007,0xf36bff43, + 0xda9c99a3,0x5a6806d7,0x8349bc50,0x9cbfc6ee,0xe13e0850,0x26871e73, + 0x67f448c1,0x5e6aa227,0x2da7baf9,0xba77787c,0x000001b9 } }, + /* 246 */ + { { 0xc5a73375,0x1abe58ee,0x7a8ac438,0x175df69d,0xceca835a,0x2cf3150a, + 0xf507d30f,0xb87b0609,0xc60b0424,0x9ae53a2b,0x410f90ec,0x4931e182, + 0xadd689bb,0x452c7d0f,0x47631a8e,0xab453491,0x00000013 }, + { 0x8c84f3af,0xaf2dd856,0x1baae33e,0x829dc092,0x8b96b070,0x46542a85, + 0xe8a82516,0x42260d40,0x5c35322b,0xb9e5edac,0x39eda0d2,0xbca79560, + 0xb962b90a,0x86bd07c6,0xb1ec5302,0x2e22dac7,0x0000010a } }, + /* 247 */ + { { 0x239d8f0a,0x665fc09d,0xab8a1021,0x92b2e03c,0x0173477b,0xe4369768, + 0x8e361604,0xab38ed9f,0x9eb061be,0x79b0091d,0x3e845670,0xcd422654, + 0x2fe1a2e0,0xa0f77ec7,0x760a030f,0x1d242162,0x00000093 }, + { 0xf8646bc3,0xfa9f834c,0x40ae96f9,0x7df94a52,0x379177d1,0x901c3890, + 0xffeb66cd,0x9dfd0644,0x77b92465,0x81aec2ec,0xcd981d4f,0x2df3b7f2, + 0xf377b093,0xc9bc3f69,0xdd859d8b,0xdaef34f3,0x00000125 } }, + /* 248 */ + { { 0xa2c123bc,0xac08451b,0x0818fa54,0xd1e83a68,0x98957b8a,0x56dd5702, + 0xf0f12f16,0xcc7f2e34,0x0a9fa14d,0x1f6a9c33,0xb2fe782c,0xefc9a2bb, + 0x709f54dd,0xd319c697,0xd6460a53,0x0b8238cb,0x000001dc }, + { 0x44dfb6f6,0xf6492901,0x6e401d26,0x270d7cb4,0x48537ad8,0x1a70a40e, + 0x70d8dbd9,0x84d661b5,0xf170d58b,0xca27223a,0x6344e1d2,0xeeb4cf14, + 0xab9de1fa,0x2255fc95,0xdbdc5ea7,0xcd6e110a,0x000001f8 } }, + /* 249 */ + { { 0x78b8a0a7,0x2a57c6b9,0xe833edea,0x24b4aeb6,0x4bd13fe7,0x9e4617c1, + 0xfc2e8ee4,0xc4186888,0xfb147eef,0x8d398a49,0x2e662cfb,0xe9f191f1, + 0x958ba2ec,0x61872289,0x00b8d50d,0xbd6d0f1b,0x0000002d }, + { 0x24c93cc9,0x895cfdfe,0xb9e718e7,0x29ed7780,0x38baf7eb,0x01c8ba58, + 0x4ddcbf69,0x0225387e,0xa180d6bb,0x64b250bf,0xc947c7c2,0x6d68e548, + 0x9923f3cd,0x82a7b632,0x2d103cd2,0xb8f03613,0x0000000f } }, + /* 250 */ + { { 0x8cd9d494,0x8198b3f0,0x94f4f9f3,0x9b2065b9,0x3c738fa9,0x7664a220, + 0xd8d229cb,0x199f4c14,0xc51c54b3,0xddad75c4,0xd213a332,0x9a32ce0b, + 0x888c7b2f,0xf3a21085,0x5b1ff20a,0x6defa362,0x000000d1 }, + { 0x19a296eb,0x44e00548,0xd1a91313,0x1d94ff15,0xfeaa454f,0xd7dead2b, + 0x4d40bd7f,0xae65a803,0x1801a4af,0x604f147e,0xa5e0de77,0x983048f9, + 0xff572ca0,0xa3b19ca5,0x1821d117,0xa237dba7,0x000000e0 } }, + /* 251 */ + { { 0x91630ee8,0xedbabf84,0x05eb5301,0xde6589c2,0xa051f47b,0x9f7d2b2d, + 0x212bbe81,0xaeaa9f96,0x94292124,0xdced3d5e,0xf4435e5b,0x691f5b89, + 0x9411f66f,0x19604c33,0x4356f0da,0xb7fc09ca,0x000000e6 }, + { 0xf74f811c,0x1294e413,0xdf8d8ddb,0x1a42d831,0x963418c9,0x27f57217, + 0x88ebcdec,0x5fde5218,0xea305bc9,0xfdd5e06e,0xac668b61,0xed1e6088, + 0xeb811861,0x333af016,0x15ddcebc,0x5ecb192d,0x000001c9 } }, + /* 252 */ + { { 0xe0bde442,0x927b37a3,0x66f7a73e,0xe0543fe8,0x8ed10c2e,0xd30d9d20, + 0xa6617a32,0xaf79c341,0xd1d5cf8b,0xe7367870,0xe3abcf8b,0x02d0dce9, + 0x772b5e7b,0xfe23d2dd,0x1ffc70c5,0x29fceea0,0x0000010b }, + { 0x62d803ff,0x31bcae4d,0xdbc306a9,0x93ee913f,0xd8c10662,0xaf1de7ab, + 0xe7a6d658,0xd485782a,0x102f4e06,0x9126592e,0x136fafe6,0x91a3127f, + 0x88371213,0x46b93440,0xa31e1634,0x53bb4380,0x000000ba } }, + /* 253 */ + { { 0xca5636b0,0x62e517fc,0x6aba15c7,0x4296e021,0x212e7b2d,0x5aa8fd7c, + 0x5717ad84,0x9517ce6d,0x98b2f357,0xe762b85b,0xdf59b07c,0x42f996b5, + 0xf37ef6f0,0xf3732abb,0x4542b489,0xa5d145ea,0x0000015d }, + { 0xaa7f6e3f,0x1e77c55e,0xaa4a05bc,0x3f4d99a7,0x45828227,0xa56d7d77, + 0x77b748fb,0xdb0895fb,0x0629f5d1,0x1c484cce,0x359803fb,0xf5b1c90a, + 0x1720b8d0,0x43ac4f29,0x72ac13f2,0x8c10bfe8,0x000000e9 } }, + /* 254 */ + { { 0xc06c4fd6,0x9d1c4785,0xd25c2b9d,0xbf4b9025,0xd4982f24,0x04135eb1, + 0xba4fef2b,0x3ab3edc2,0x98de07ab,0x55a5239f,0x096f4b7d,0xd5fc49ab, + 0x3844c815,0xc50a2960,0x15676b2b,0xdb1148d0,0x00000047 }, + { 0x10f3bad9,0xc49f9cc5,0x022901d4,0x490888fc,0xc47b44df,0x917a55eb, + 0xf39f2b68,0x20b2ebc6,0x04e9962a,0x0c58e3af,0x573dd5b7,0x52ab7c1b, + 0xa329f76c,0x2b54add6,0x82f4ca3b,0x59dad1eb,0x00000108 } }, + /* 255 */ + { { 0xa182d1ad,0x662c4128,0x20916c45,0x7751796e,0xba681647,0xa7704272, + 0xb92c85c1,0xfac8b0fa,0xaefb2e07,0x207ab2df,0x7861b32d,0xc73530a0, + 0x88aed145,0x63dbed65,0x0a53a49d,0x547bcdca,0x000000bd }, + { 0x87056b51,0xa7c1382f,0x130f9912,0xc3d91edb,0xd3805b42,0xf7c7de46, + 0xfd31a995,0x456101eb,0xcd3fb8aa,0x1efd22b4,0x9eb17bce,0xfe391df7, + 0x616c0c32,0xb4d4c0c6,0x711beef4,0x19f023be,0x00000112 } }, +}; + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_17(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_stripe_17(r, &p521_base, p521_table, + k, map, ct, heap); +} + +#endif + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_521(const mp_int* km, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[17]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 17, km); + + err = sp_521_ecc_mulmod_base_17(point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_17(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the base point of P521 by the scalar, add point a and return + * the result. If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, + int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[17 + 17 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (17 + 17 * 2 * 6), + heap, DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 17; + + sp_521_from_mp(k, 17, km); + sp_521_point_from_ecc_point_17(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_17(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_17(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_17(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_17(point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_17(point, point, addP, tmp); + + if (map) { + sp_521_map_17(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_17(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +static void sp_521_add_one_17(sp_digit* a_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + + __asm__ __volatile__ ( + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adds r1, r1, #1\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1}\n\t" + "adcs r1, r1, #0\n\t" + "stm %[a]!, {r1}\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r4", "cc" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_521_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i; + int j; + byte* d; + + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; + } + + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_521_ecc_gen_k_17(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[66]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + buf[0] &= 0x1; + sp_521_from_bin(k, 17, buf, (int)sizeof(buf)); + if (sp_521_cmp_17(k, p521_order2) <= 0) { + sp_521_add_one_17(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; + #else + sp_point_521 point[1]; + #endif + sp_digit k[17]; +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = NULL; +#endif + int err = MP_OKAY; + + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, DYNAMIC_TYPE_ECC); + #else + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, DYNAMIC_TYPE_ECC); + #endif + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + infinity = point + 1; + #endif + + err = sp_521_ecc_gen_k_17(rng, k); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_17(point, k, 1, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_17(infinity, point, p521_order, 1, 1, NULL); + } + if (err == MP_OKAY) { + if (sp_521_iszero_17(point->x) || sp_521_iszero_17(point->y)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_521_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_17(point, pub); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) { + /* point is not sensitive, so no need to zeroize */ + XFREE(point, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_521_ctx { + int state; + sp_521_ecc_mulmod_17_ctx mulmod_ctx; + sp_digit k[17]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; +#else + sp_point_521 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_521_ctx; + +int sp_ecc_make_key_521_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_521_ctx* ctx = (sp_ecc_key_gen_521_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_521_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_521_ecc_gen_k_17(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_521_ecc_mulmod_base_17_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_521_ecc_mulmod_17_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p521_order, 1, 1); + if (err == MP_OKAY) { + if (sp_521_iszero_17(ctx->point->x) || + sp_521_iszero_17(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_521_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_17(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 66 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_521_to_bin_17(sp_digit* r, byte* a) +{ + int i; + int j = 0; + + a[j++] = r[16] >> 8; + a[j++] = r[16] >> 0; + for (i = 15; i >= 0; i--) { + a[j++] = r[i] >> 24; + a[j++] = r[i] >> 16; + a[j++] = r[i] >> 8; + a[j++] = r[i] >> 0; + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_521(const mp_int* priv, const ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[17]; +#endif + int err = MP_OKAY; + + if (*outLen < 65U) { + err = BUFFER_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 17, priv); + sp_521_point_from_ecc_point_17(point, pub); + err = sp_521_ecc_mulmod_17(point, point, k, 1, 1, heap); + } + if (err == MP_OKAY) { + sp_521_to_bin_17(point->x, out); + *outLen = 66; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_521_ctx { + int state; + union { + sp_521_ecc_mulmod_17_ctx mulmod_ctx; + }; + sp_digit k[17]; + sp_point_521 point; +} sp_ecc_sec_gen_521_ctx; + +int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_521_ctx* ctx = (sp_ecc_sec_gen_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_521_from_mp(ctx->k, 17, priv); + sp_521_point_from_ecc_point_17(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_521_ecc_mulmod_17_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_521_to_bin_17(ctx->point.x, out); + *outLen = 66; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register byte n asm ("r2") = (byte)n_p; + + __asm__ __volatile__ ( + "rsb r12, %[n], #32\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" +#else + "ldrd r4, r5, [%[a]]\n\t" +#endif + "lsr r4, r4, %[n]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #8]\n\t" + "str r4, [%[a]]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r5, [%[a], #4]\n\t" + "lsl r3, r4, r12\n\t" + "lsr r4, r4, %[n]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r6, [%[a], #8]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #20]\n\t" + "str r4, [%[a], #12]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r5, [%[a], #16]\n\t" + "lsl r3, r4, r12\n\t" + "lsr r4, r4, %[n]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r6, [%[a], #20]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #32]\n\t" + "str r4, [%[a], #24]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r5, [%[a], #28]\n\t" + "lsl r3, r4, r12\n\t" + "lsr r4, r4, %[n]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r6, [%[a], #32]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #44]\n\t" + "str r4, [%[a], #36]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r5, [%[a], #40]\n\t" + "lsl r3, r4, r12\n\t" + "lsr r4, r4, %[n]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #52]\n\t" + "str r6, [%[a], #44]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #56]\n\t" + "str r4, [%[a], #48]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r5, [%[a], #52]\n\t" + "lsl r3, r4, r12\n\t" + "lsr r4, r4, %[n]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #64]\n\t" + "str r6, [%[a], #56]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[r], #60]\n\t" + "str r5, [%[r], #64]\n\t" +#else + "strd r4, r5, [%[r], #60]\n\t" +#endif + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) + : + : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + ); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register byte n asm ("r2") = (byte)n_p; + + __asm__ __volatile__ ( + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #64]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r6, [%[r], #68]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #56]\n\t" + "str r5, [%[r], #64]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r6, [%[r], #56]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #44]\n\t" + "str r5, [%[r], #52]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r6, [%[r], #44]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #32]\n\t" + "str r5, [%[r], #40]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r6, [%[r], #32]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #20]\n\t" + "str r5, [%[r], #28]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r6, [%[r], #20]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #8]\n\t" + "str r5, [%[r], #16]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a]]\n\t" + "str r6, [%[r], #8]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) + : + : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + ); +} + +static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register byte n asm ("r2") = (byte)n_p; + + __asm__ __volatile__ ( + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #132]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #128]\n\t" + "str r6, [%[r], #136]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #124]\n\t" + "str r5, [%[r], #132]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #120]\n\t" + "str r4, [%[r], #128]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #116]\n\t" + "str r6, [%[r], #124]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #112]\n\t" + "str r5, [%[r], #120]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #108]\n\t" + "str r4, [%[r], #116]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #104]\n\t" + "str r6, [%[r], #112]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #100]\n\t" + "str r5, [%[r], #108]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #96]\n\t" + "str r4, [%[r], #104]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #92]\n\t" + "str r6, [%[r], #100]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #88]\n\t" + "str r5, [%[r], #96]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #84]\n\t" + "str r4, [%[r], #92]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #80]\n\t" + "str r6, [%[r], #88]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #76]\n\t" + "str r5, [%[r], #84]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #72]\n\t" + "str r4, [%[r], #80]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #68]\n\t" + "str r6, [%[r], #76]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #64]\n\t" + "str r5, [%[r], #72]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r6, [%[r], #64]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #52]\n\t" + "str r5, [%[r], #60]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r6, [%[r], #52]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #40]\n\t" + "str r5, [%[r], #48]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r6, [%[r], #40]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #28]\n\t" + "str r5, [%[r], #36]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r6, [%[r], #28]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #16]\n\t" + "str r5, [%[r], #24]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r6, [%[r], #16]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #4]\n\t" + "str r5, [%[r], #12]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a]]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "str r5, [%[r]]\n\t" + "str r6, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) + : + : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + ); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #0x40\n\t" + "\n" + "L_sp_521_sub_in_pkace_17_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_521_sub_in_pkace_17_word_%=\n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2}\n\t" + "ldm %[b]!, {r6}\n\t" + "sbcs r2, r2, r6\n\t" + "stm %[a]!, {r2}\n\t" + "sbc %[a], %[a], %[a]\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)a; +} + +#else +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2}\n\t" + "ldm %[b]!, {r6}\n\t" + "sbcs r2, r2, r6\n\t" + "stm %[a]!, {r2}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + ); + return (uint32_t)(size_t)a; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umull r5, r3, %[b], r8\n\t" +#endif + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "\n" + "L_sp_521_mul_d_17_word_%=: \n\t" + /* A[i] * B */ + "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #0x44\n\t" + "blt L_sp_521_mul_d_17_word_%=\n\t" + "str r3, [%[r], #68]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + ); +} + +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_521_word_17_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_521_word_17_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_521_mask_17(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<17; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } + r[16] = a[16] & m; +#endif +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) +{ + sp_digit t1[35]; + sp_digit t2[18]; + sp_digit sd[18]; + sp_digit div; + sp_digit r1; + int i; + + ASSERT_SAVED_VECTOR_REGISTERS(); + + (void)m; + div = (d[16] << 23) | (d[15] >> 9); + XMEMCPY(t1, a, sizeof(*t1) * 2 * 17); + r1 = sp_521_cmp_17(&t1[17], d) >= 0; + sp_521_cond_sub_17(&t1[17], &t1[17], d, (sp_digit)0 - r1); + sp_521_lshift_17(sd, d, 23); + sp_521_lshift_34(t1, t1, 23); + + for (i = 16; i >= 0; i--) { + sp_digit hi = t1[17 + i] - (t1[17 + i] == div); + r1 = div_521_word_17(hi, t1[17 + i - 1], div); + + sp_521_mul_d_17(t2, sd, r1); + t1[17 + i] += sp_521_sub_in_place_17(&t1[i], t2); + t1[17 + i] -= t2[17]; + sp_521_mask_17(t2, sd, t1[17 + i]); + t1[17 + i] += sp_521_add_17(&t1[i], &t1[i], t2); + sp_521_mask_17(t2, sd, t1[17 + i]); + t1[17 + i] += sp_521_add_17(&t1[i], &t1[i], t2); + } + + r1 = sp_521_cmp_17(t1, sd) >= 0; + sp_521_cond_sub_17(r, t1, sd, (sp_digit)0 - r1); + sp_521_rshift_17(r, r, 23); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_521_mod_17(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + return sp_521_div_17(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P521 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_521_mont_mul_order_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_521_mul_17(r, a, b); + sp_521_mont_reduce_order_17(r, p521_order, p521_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P521 curve. */ +static const uint32_t p521_order_minus_2[17] = { + 0x91386407U,0xbb6fb71eU,0x899c47aeU,0x3bb5c9b8U,0xf709a5d0U,0x7fcc0148U, + 0xbf2f966bU,0x51868783U,0xfffffffaU,0xffffffffU,0xffffffffU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0x000001ffU +}; +#else +/* The low half of the order-2 of the P521 curve. */ +static const uint32_t p521_order_low[9] = { + 0x91386407U,0xbb6fb71eU,0x899c47aeU,0x3bb5c9b8U,0xf709a5d0U,0x7fcc0148U, + 0xbf2f966bU,0x51868783U,0xfffffffaU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Square number mod the order of P521 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_order_17(sp_digit* r, const sp_digit* a) +{ + sp_521_sqr_17(r, a); + sp_521_mont_reduce_order_17(r, p521_order, p521_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P521 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_n_order_17(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_521_mont_sqr_order_17(r, a); + for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + XMEMCPY(t, a, sizeof(sp_digit) * 17); + ctx->i = 519; + ctx->state = 1; + break; + case 1: + sp_521_mont_sqr_order_17(t, t); + ctx->state = 2; + break; + case 2: + if ((p521_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) { + sp_521_mont_mul_order_17(t, t, a); + } + ctx->i--; + ctx->state = (ctx->i == 0) ? 3 : 1; + break; + case 3: + XMEMCPY(r, t, sizeof(sp_digit) * 17U); + err = MP_OKAY; + break; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +static void sp_521_mont_inv_order_17(sp_digit* r, const sp_digit* a, + sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 17); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_order_17(t, t); + if ((p521_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_521_mont_mul_order_17(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 17U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 17; + sp_digit* t3 = td + 4 * 17; + int i; + + /* t = a^2 */ + sp_521_mont_sqr_order_17(t, a); + /* t = a^3 = t * a */ + sp_521_mont_mul_order_17(t, t, a); + /* t= a^c = t ^ 2 ^ 2 */ + sp_521_mont_sqr_n_order_17(t2, t, 2); + /* t = a^f = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + + /* t3 = a^1e */ + sp_521_mont_sqr_order_17(t3, t); + /* t3 = a^1f = t3 * a */ + sp_521_mont_mul_order_17(t3, t3, a); + + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_521_mont_sqr_n_order_17(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_521_mont_sqr_n_order_17(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_521_mont_sqr_n_order_17(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + + /* t2= a^ffffffff00000000 = t ^ 2 ^ 32 */ + sp_521_mont_sqr_n_order_17(t2, t, 32); + /* t = a^ffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + /* t2= a^ffffffffffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_521_mont_sqr_n_order_17(t2, t, 64); + /* t = a^ffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + /* t2= a^ffffffffffffffffffffffffffffffff00000000000000000000000000000000 = t ^ 2 ^ 128 */ + sp_521_mont_sqr_n_order_17(t2, t, 128); + /* t = a^ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_17(t, t2, t); + + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0 */ + sp_521_mont_sqr_n_order_17(t2, t, 5); + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t * t3 */ + sp_521_mont_mul_order_17(t2, t2, t3); + + for (i=259; i>=1; i--) { + sp_521_mont_sqr_order_17(t2, t2); + if ((p521_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_521_mont_mul_order_17(t2, t2, a); + } + } + sp_521_mont_sqr_order_17(t2, t2); + sp_521_mont_mul_order_17(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ +#endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Calculate second signature value S from R, k and private value. + * + * s = (r * x + e) / k + * + * s Signature value. + * r First signature value. + * k Ephemeral private key. + * x Private key as a number. + * e Hash of message as a number. + * tmp Temporary storage for intermediate numbers. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_s_17(sp_digit* s, const sp_digit* r, sp_digit* k, + sp_digit* x, const sp_digit* e, sp_digit* tmp) +{ + int err; + sp_digit carry; + sp_int32 c; + sp_digit* kInv = k; + + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_17(k, k, p521_norm_order); + err = sp_521_mod_17(k, k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_17(k); + + /* kInv = 1/k mod order */ + sp_521_mont_inv_order_17(kInv, k, tmp); + sp_521_norm_17(kInv); + + /* s = r * x + e */ + sp_521_mul_17(x, x, r); + err = sp_521_mod_17(x, x, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_17(x); + carry = sp_521_add_17(s, e, x); + sp_521_cond_sub_17(s, s, p521_order, 0 - carry); + sp_521_norm_17(s); + c = sp_521_cmp_17(s, p521_order); + sp_521_cond_sub_17(s, s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_17(s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_17(s, s, kInv); + sp_521_norm_17(s); + } + + return err; +} + +/* Sign the hash using the private key. + * e = [hash, 521 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_521* point = NULL; +#else + sp_digit e[7 * 2 * 17]; + sp_point_521 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int32 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 17, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 17; + k = e + 4 * 17; + r = e + 6 * 17; + tmp = e + 8 * 17; + s = e; + + if (hashLen > 66U) { + hashLen = 66U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_17(rng, k); + } + else { + sp_521_from_mp(k, 17, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_17(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 17U); + sp_521_norm_17(r); + c = sp_521_cmp_17(r, p521_order); + sp_521_cond_sub_17(r, r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_17(r); + + if (!sp_521_iszero_17(r)) { + /* x is modified in calculation of s. */ + sp_521_from_mp(x, 17, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_521_from_bin(e, 17, hash, (int)hashLen); + + /* Take 521 leftmost bits of hash. */ + if (hashLen == 66U) { + sp_521_rshift_17(e, e, 7); + } + + err = sp_521_calc_s_17(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_521_iszero_17(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 17); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_521)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sign_521_ctx { + int state; + union { + sp_521_ecc_mulmod_17_ctx mulmod_ctx; + sp_521_mont_inv_order_17_ctx mont_inv_order_ctx; + }; + sp_digit e[2*17]; + sp_digit x[2*17]; + sp_digit k[2*17]; + sp_digit r[2*17]; + sp_digit tmp[3 * 2*17]; + sp_point_521 point; + sp_digit* s; + sp_digit* kInv; + int i; +} sp_ecc_sign_521_ctx; + +int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng, + mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sign_521_ctx* ctx = (sp_ecc_sign_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sign_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->s = ctx->e; + ctx->kInv = ctx->k; + + ctx->i = SP_ECC_MAX_SIG_GEN; + ctx->state = 1; + break; + case 1: /* GEN */ + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_17(rng, ctx->k); + } + else { + sp_521_from_mp(ctx->k, 17, km); + mp_zero(km); + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + break; + case 2: /* MULMOD */ + err = sp_521_ecc_mulmod_17_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &p521_base, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + ctx->state = 3; + } + break; + case 3: /* MODORDER */ + { + sp_int32 c; + /* r = point->x mod order */ + XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 17U); + sp_521_norm_17(ctx->r); + c = sp_521_cmp_17(ctx->r, p521_order); + sp_521_cond_sub_17(ctx->r, ctx->r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_17(ctx->r); + + if (hashLen > 66U) { + hashLen = 66U; + } + sp_521_from_mp(ctx->x, 17, priv); + sp_521_from_bin(ctx->e, 17, hash, (int)hashLen); + if (hashLen == 66U) { + sp_521_rshift_17(ctx->e, ctx->e, 7); + } + ctx->state = 4; + break; + } + case 4: /* KMODORDER */ + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_17(ctx->k, ctx->k, p521_norm_order); + err = sp_521_mod_17(ctx->k, ctx->k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_17(ctx->k); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 5; + } + break; + case 5: /* KINV */ + /* kInv = 1/k mod order */ + err = sp_521_mont_inv_order_17_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp); + if (err == MP_OKAY) { + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 6; + } + break; + case 6: /* KINVNORM */ + sp_521_norm_17(ctx->kInv); + ctx->state = 7; + break; + case 7: /* R */ + /* s = r * x + e */ + sp_521_mul_17(ctx->x, ctx->x, ctx->r); + ctx->state = 8; + break; + case 8: /* S1 */ + err = sp_521_mod_17(ctx->x, ctx->x, p521_order); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* S2 */ + { + sp_digit carry; + sp_int32 c; + sp_521_norm_17(ctx->x); + carry = sp_521_add_17(ctx->s, ctx->e, ctx->x); + sp_521_cond_sub_17(ctx->s, ctx->s, + p521_order, 0 - carry); + sp_521_norm_17(ctx->s); + c = sp_521_cmp_17(ctx->s, p521_order); + sp_521_cond_sub_17(ctx->s, ctx->s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_17(ctx->s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_17(ctx->s, ctx->s, ctx->kInv); + sp_521_norm_17(ctx->s); + + /* Check that signature is usable. */ + if (sp_521_iszero_17(ctx->s) == 0) { + ctx->state = 10; + break; + } + #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + ctx->i = 1; + #endif + + /* not usable gen, try again */ + ctx->i--; + if (ctx->i == 0) { + err = RNG_FAILURE_E; + } + ctx->state = 1; + break; + } + case 10: /* RES */ + err = sp_521_to_mp(ctx->r, rm); + if (err == MP_OKAY) { + err = sp_521_to_mp(ctx->s, sm); + } + break; + } + + if (err == MP_OKAY && ctx->state != 10) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 17U); + XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 17U); + XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 17U); + XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 17U); + XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 17U); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_SIGN */ + +#ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #0x40\n\t" + "\n" + "L_sp_521_sub_17_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_521_sub_17_word_%=\n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "sbcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "sbcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Divide the number by 2 mod the modulus. (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus. + */ +static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r4}\n\t" + "ands r3, r4, #1\n\t" + "beq L_sp_521_div2_mod_17_even_%=\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4}\n\t" + "ldm %[m]!, {r8}\n\t" + "adcs r4, r4, r8\n\t" + "stm %[r]!, {r4}\n\t" + "adc r3, r12, r12\n\t" + "b L_sp_521_div2_mod_17_div2_%=\n\t" + "\n" + "L_sp_521_div2_mod_17_even_%=: \n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4}\n\t" + "stm %[r]!, {r4}\n\t" + "\n" + "L_sp_521_div2_mod_17_div2_%=: \n\t" + "sub %[r], %[r], #0x44\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[r]]\n\t" + "ldr r9, [%[r], #4]\n\t" +#else + "ldrd r8, r9, [%[r]]\n\t" +#endif + "lsr r8, r8, #1\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #8]\n\t" + "str r8, [%[r]]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #12]\n\t" + "str r9, [%[r], #4]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #16]\n\t" + "str r10, [%[r], #8]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #20]\n\t" + "str r8, [%[r], #12]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #24]\n\t" + "str r9, [%[r], #16]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #28]\n\t" + "str r10, [%[r], #20]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #32]\n\t" + "str r8, [%[r], #24]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #36]\n\t" + "str r9, [%[r], #28]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #40]\n\t" + "str r10, [%[r], #32]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #44]\n\t" + "str r8, [%[r], #36]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #48]\n\t" + "str r9, [%[r], #40]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #52]\n\t" + "str r10, [%[r], #44]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #56]\n\t" + "str r8, [%[r], #48]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #60]\n\t" + "str r9, [%[r], #52]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #64]\n\t" + "str r10, [%[r], #56]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "orr r9, r9, r3, lsl #31\n\t" + "str r8, [%[r], #60]\n\t" + "str r9, [%[r], #64]\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + ); +} + +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +static const unsigned char L_sp_521_num_bits_17_table[] = { + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +static int sp_521_num_bits_17(const sp_digit* a_p) +{ + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register unsigned char* L_sp_521_num_bits_17_table_c asm ("r1") = (unsigned char*)&L_sp_521_num_bits_17_table; + + __asm__ __volatile__ ( + "mov lr, %[L_sp_521_num_bits_17_table]\n\t" + "ldr r1, [%[a], #64]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_16_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_16_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x18\n\t" +#else + "mov r2, #0x218\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_16_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x10\n\t" +#else + "mov r2, #0x210\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_16_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x8\n\t" +#else + "mov r2, #0x208\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x200\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_%=: \n\t" + "ldr r1, [%[a], #60]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_15_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_15_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xf8\n\t" +#else + "mov r2, #0x1f8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_15_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xf0\n\t" +#else + "mov r2, #0x1f0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_15_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xe8\n\t" +#else + "mov r2, #0x1e8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xe0\n\t" +#else + "mov r2, #0x1e0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_%=: \n\t" + "ldr r1, [%[a], #56]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_14_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_14_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xd8\n\t" +#else + "mov r2, #0x1d8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_14_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xd0\n\t" +#else + "mov r2, #0x1d0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_14_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xc8\n\t" +#else + "mov r2, #0x1c8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xc0\n\t" +#else + "mov r2, #0x1c0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_%=: \n\t" + "ldr r1, [%[a], #52]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_13_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_13_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xb8\n\t" +#else + "mov r2, #0x1b8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_13_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xb0\n\t" +#else + "mov r2, #0x1b0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_13_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xa8\n\t" +#else + "mov r2, #0x1a8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xa0\n\t" +#else + "mov r2, #0x1a0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_%=: \n\t" + "ldr r1, [%[a], #48]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_12_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_12_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x98\n\t" +#else + "mov r2, #0x198\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_12_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x90\n\t" +#else + "mov r2, #0x190\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_12_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x88\n\t" +#else + "mov r2, #0x188\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x80\n\t" +#else + "mov r2, #0x180\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_%=: \n\t" + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_11_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_11_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x78\n\t" +#else + "mov r2, #0x178\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_11_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x70\n\t" +#else + "mov r2, #0x170\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_11_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x68\n\t" +#else + "mov r2, #0x168\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_10_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_10_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x58\n\t" +#else + "mov r2, #0x158\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_10_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x50\n\t" +#else + "mov r2, #0x150\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_10_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x48\n\t" +#else + "mov r2, #0x148\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_9_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_9_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x38\n\t" +#else + "mov r2, #0x138\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_9_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x30\n\t" +#else + "mov r2, #0x130\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_9_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x28\n\t" +#else + "mov r2, #0x128\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_8_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_8_3_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x18\n\t" +#else + "mov r2, #0x118\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_8_2_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x10\n\t" +#else + "mov r2, #0x110\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_8_1_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x8\n\t" +#else + "mov r2, #0x108\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_7_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_7_3_%=\n\t" + "mov r2, #0xf8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_7_2_%=\n\t" + "mov r2, #0xf0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_7_1_%=\n\t" + "mov r2, #0xe8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xe0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_6_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_6_3_%=\n\t" + "mov r2, #0xd8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_6_2_%=\n\t" + "mov r2, #0xd0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_6_1_%=\n\t" + "mov r2, #0xc8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xc0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_5_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_5_3_%=\n\t" + "mov r2, #0xb8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_5_2_%=\n\t" + "mov r2, #0xb0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_5_1_%=\n\t" + "mov r2, #0xa8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xa0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_4_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_4_3_%=\n\t" + "mov r2, #0x98\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_4_2_%=\n\t" + "mov r2, #0x90\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_4_1_%=\n\t" + "mov r2, #0x88\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x80\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_3_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_3_3_%=\n\t" + "mov r2, #0x78\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_3_2_%=\n\t" + "mov r2, #0x70\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_3_1_%=\n\t" + "mov r2, #0x68\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x60\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_2_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_2_3_%=\n\t" + "mov r2, #0x58\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_2_2_%=\n\t" + "mov r2, #0x50\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_2_1_%=\n\t" + "mov r2, #0x48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_1_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_1_3_%=\n\t" + "mov r2, #56\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_1_2_%=\n\t" + "mov r2, #48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_1_1_%=\n\t" + "mov r2, #40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #32\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_0_3_%=\n\t" + "mov r2, #24\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_0_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_0_2_%=\n\t" + "mov r2, #16\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_0_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_0_1_%=\n\t" + "mov r2, #8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_0_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "ldrb r12, [lr, r3]\n\t" + "\n" + "L_sp_521_num_bits_17_18_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [L_sp_521_num_bits_17_table] "+r" (L_sp_521_num_bits_17_table_c) + : + : "memory", "r2", "r3", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)a; +} + +#else +static int sp_521_num_bits_17(const sp_digit* a_p) +{ + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "ldr r1, [%[a], #64]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_16_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x220\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_%=: \n\t" + "ldr r1, [%[a], #60]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_15_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x200\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_%=: \n\t" + "ldr r1, [%[a], #56]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_14_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xe0\n\t" +#else + "mov r2, #0x1e0\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_%=: \n\t" + "ldr r1, [%[a], #52]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_13_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xc0\n\t" +#else + "mov r2, #0x1c0\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_%=: \n\t" + "ldr r1, [%[a], #48]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_12_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xa0\n\t" +#else + "mov r2, #0x1a0\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_%=: \n\t" + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_11_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x80\n\t" +#else + "mov r2, #0x180\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_10_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_9_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_8_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_7_%=\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_6_%=\n\t" + "mov r2, #0xe0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_5_%=\n\t" + "mov r2, #0xc0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_4_%=\n\t" + "mov r2, #0xa0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_3_%=\n\t" + "mov r2, #0x80\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_2_%=\n\t" + "mov r2, #0x60\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_1_%=\n\t" + "mov r2, #0x40\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "mov r2, #32\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "\n" + "L_sp_521_num_bits_17_18_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr", "cc" + ); + return (uint32_t)(size_t)a; +} + +#endif /* WOLFSSL_ARM_ARCH && (WOLFSSL_ARM_ARCH < 7) */ +/* Non-constant time modular inversion. + * + * @param [out] r Resulting number. + * @param [in] a Number to invert. + * @param [in] m Modulus. + * @return MP_OKAY on success. + */ +static int sp_521_mod_inv_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit u[17]; + sp_digit v[17]; + sp_digit b[17]; + sp_digit d[17]; + int ut, vt; + sp_digit o; + + + XMEMCPY(u, m, sizeof(u)); + XMEMCPY(v, a, sizeof(v)); + + ut = sp_521_num_bits_17(u); + vt = sp_521_num_bits_17(v); + + XMEMSET(b, 0, sizeof(b)); + if ((v[0] & 1) == 0) { + sp_521_rshift1_17(v, v); + XMEMCPY(d, m, sizeof(u)); + d[0] += 1; + sp_521_rshift1_17(d, d); + vt--; + + while ((v[0] & 1) == 0) { + sp_521_rshift1_17(v, v); + sp_521_div2_mod_17(d, d, m); + vt--; + } + } + else { + XMEMSET(d+1, 0, sizeof(d)-sizeof(sp_digit)); + d[0] = 1; + } + + while (ut > 1 && vt > 1) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_17(u, v) >= 0))) { + sp_521_sub_17(u, u, v); + o = sp_521_sub_17(b, b, d); + if (o != 0) + sp_521_add_17(b, b, m); + ut = sp_521_num_bits_17(u); + + do { + sp_521_rshift1_17(u, u); + sp_521_div2_mod_17(b, b, m); + ut--; + } + while (ut > 0 && (u[0] & 1) == 0); + } + else { + sp_521_sub_17(v, v, u); + o = sp_521_sub_17(d, d, b); + if (o != 0) + sp_521_add_17(d, d, m); + vt = sp_521_num_bits_17(v); + + do { + sp_521_rshift1_17(v, v); + sp_521_div2_mod_17(d, d, m); + vt--; + } + while (vt > 0 && (v[0] & 1) == 0); + } + } + + if (ut == 1) + XMEMCPY(r, b, sizeof(b)); + else + XMEMCPY(r, d, sizeof(d)); + + + return MP_OKAY; +} + +#endif /* WOLFSSL_SP_SMALL */ + +/* Add point p1 into point p2. Handles p1 == p2 and result at infinity. + * + * p1 First point to add and holds result. + * p2 Second point to add. + * tmp Temporary storage for intermediate numbers. + */ +static void sp_521_add_points_17(sp_point_521* p1, const sp_point_521* p2, + sp_digit* tmp) +{ + + sp_521_proj_point_add_17(p1, p1, p2, tmp); + if (sp_521_iszero_17(p1->z)) { + if (sp_521_iszero_17(p1->x) && sp_521_iszero_17(p1->y)) { + sp_521_proj_point_dbl_17(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + p1->x[10] = 0; + p1->x[11] = 0; + p1->x[12] = 0; + p1->x[13] = 0; + p1->x[14] = 0; + p1->x[15] = 0; + p1->x[16] = 0; + XMEMCPY(p1->z, p521_norm_mod, sizeof(p521_norm_mod)); + } + } +} + +/* Calculate the verification point: [e/s]G + [r/s]Q + * + * p1 Calculated point. + * p2 Public point and temporary. + * s Second part of signature as a number. + * u1 Temporary number. + * u2 Temporary number. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_vfy_point_17(sp_point_521* p1, sp_point_521* p2, + sp_digit* s, sp_digit* u1, sp_digit* u2, sp_digit* tmp, void* heap) +{ + int err; + +#ifndef WOLFSSL_SP_SMALL + err = sp_521_mod_inv_17(s, s, p521_order); + if (err == MP_OKAY) +#endif /* !WOLFSSL_SP_SMALL */ + { + sp_521_mul_17(s, s, p521_norm_order); + err = sp_521_mod_17(s, s, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_17(s); +#ifdef WOLFSSL_SP_SMALL + { + sp_521_mont_inv_order_17(s, s, tmp); + sp_521_mont_mul_order_17(u1, u1, s); + sp_521_mont_mul_order_17(u2, u2, s); + } +#else + { + sp_521_mont_mul_order_17(u1, u1, s); + sp_521_mont_mul_order_17(u2, u2, s); + } +#endif /* WOLFSSL_SP_SMALL */ + { + err = sp_521_ecc_mulmod_base_17(p1, u1, 0, 0, heap); + } + } + if ((err == MP_OKAY) && sp_521_iszero_17(p1->z)) { + p1->infinity = 1; + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_17(p2, p2, u2, 0, 0, heap); + } + if ((err == MP_OKAY) && sp_521_iszero_17(p2->z)) { + p2->infinity = 1; + } + + if (err == MP_OKAY) { + sp_521_add_points_17(p1, p2, tmp); + } + + return err; +} + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 521) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_verify_521(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_521* p1 = NULL; +#else + sp_digit u1[18 * 17]; + sp_point_521 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_521* p2 = NULL; + sp_digit carry; + sp_int32 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 17, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 17; + s = u1 + 4 * 17; + tmp = u1 + 6 * 17; + p2 = p1 + 1; + + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(u1, 17, hash, (int)hashLen); + sp_521_from_mp(u2, 17, rm); + sp_521_from_mp(s, 17, sm); + sp_521_from_mp(p2->x, 17, pX); + sp_521_from_mp(p2->y, 17, pY); + sp_521_from_mp(p2->z, 17, pZ); + + if (hashLen == 66U) { + sp_521_rshift_17(u1, u1, 7); + } + + err = sp_521_calc_vfy_point_17(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(u2, 17, rm); + err = sp_521_mod_mul_norm_17(u2, u2, p521_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_17(p1->z, p1->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(u1, u2, p1->z, p521_mod, p521_mp_mod); + *res = (int)(sp_521_cmp_17(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_521_from_mp(u2, 17, rm); + carry = sp_521_add_17(u2, u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_17(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_17(u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_17(u2, u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_521_mont_mul_17(u1, u2, p1->z, p521_mod, p521_mp_mod); + } + *res = (sp_521_cmp_17(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_verify_521_ctx { + int state; + union { + sp_521_ecc_mulmod_17_ctx mulmod_ctx; + sp_521_mont_inv_order_17_ctx mont_inv_order_ctx; + sp_521_proj_point_dbl_17_ctx dbl_ctx; + sp_521_proj_point_add_17_ctx add_ctx; + }; + sp_digit u1[2*17]; + sp_digit u2[2*17]; + sp_digit s[2*17]; + sp_digit tmp[2*17 * 6]; + sp_point_521 p1; + sp_point_521 p2; +} sp_ecc_verify_521_ctx; + +int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, + word32 hashLen, const mp_int* pX, const mp_int* pY, const mp_int* pZ, + const mp_int* rm, const mp_int* sm, int* res, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_verify_521_ctx* ctx = (sp_ecc_verify_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_verify_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(ctx->u1, 17, hash, (int)hashLen); + sp_521_from_mp(ctx->u2, 17, rm); + sp_521_from_mp(ctx->s, 17, sm); + sp_521_from_mp(ctx->p2.x, 17, pX); + sp_521_from_mp(ctx->p2.y, 17, pY); + sp_521_from_mp(ctx->p2.z, 17, pZ); + if (hashLen == 66U) { + sp_521_rshift_17(ctx->u1, ctx->u1, 7); + } + ctx->state = 1; + break; + case 1: /* NORMS0 */ + sp_521_mul_17(ctx->s, ctx->s, p521_norm_order); + err = sp_521_mod_17(ctx->s, ctx->s, p521_order); + if (err == MP_OKAY) + ctx->state = 2; + break; + case 2: /* NORMS1 */ + sp_521_norm_17(ctx->s); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 3; + break; + case 3: /* NORMS2 */ + err = sp_521_mont_inv_order_17_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp); + if (err == MP_OKAY) { + ctx->state = 4; + } + break; + case 4: /* NORMS3 */ + sp_521_mont_mul_order_17(ctx->u1, ctx->u1, ctx->s); + ctx->state = 5; + break; + case 5: /* NORMS4 */ + sp_521_mont_mul_order_17(ctx->u2, ctx->u2, ctx->s); + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 6; + break; + case 6: /* MULBASE */ + err = sp_521_ecc_mulmod_17_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p521_base, ctx->u1, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_17(ctx->p1.z)) { + ctx->p1.infinity = 1; + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 7; + } + break; + case 7: /* MULMOD */ + err = sp_521_ecc_mulmod_17_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_17(ctx->p2.z)) { + ctx->p2.infinity = 1; + } + XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx)); + ctx->state = 8; + } + break; + case 8: /* ADD */ + err = sp_521_proj_point_add_17_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* MONT */ + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(ctx->u2, 17, rm); + err = sp_521_mod_mul_norm_17(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) + ctx->state = 10; + break; + case 10: /* SQR */ + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_17(ctx->p1.z, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: /* MUL */ + sp_521_mont_mul_17(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 12; + break; + case 12: /* RES */ + { + sp_int32 c = 0; + err = MP_OKAY; /* math okay, now check result */ + *res = (int)(sp_521_cmp_17(ctx->p1.x, ctx->u1) == 0); + if (*res == 0) { + sp_digit carry; + + /* Reload r and add order. */ + sp_521_from_mp(ctx->u2, 17, rm); + carry = sp_521_add_17(ctx->u2, ctx->u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_17(ctx->u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_17(ctx->u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_17(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_521_mont_mul_17(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, + p521_mp_mod); + *res = (int)(sp_521_cmp_17(ctx->p1.x, ctx->u1) == 0); + } + } + break; + } + } /* switch */ + + if (err == MP_OKAY && ctx->state != 12) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y ordinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_521_ecc_is_point_17(const sp_point_521* point, + void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t1 = NULL; +#else + sp_digit t1[17 * 4]; +#endif + sp_digit* t2 = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17 * 4, heap, DYNAMIC_TYPE_ECC); + if (t1 == NULL) + err = MEMORY_E; +#endif + (void)heap; + + if (err == MP_OKAY) { + t2 = t1 + 2 * 17; + + /* y^2 - x^3 - a.x = b */ + sp_521_sqr_17(t1, point->y); + (void)sp_521_mod_17(t1, t1, p521_mod); + sp_521_sqr_17(t2, point->x); + (void)sp_521_mod_17(t2, t2, p521_mod); + sp_521_mul_17(t2, t2, point->x); + (void)sp_521_mod_17(t2, t2, p521_mod); + sp_521_mont_sub_17(t1, t1, t2, p521_mod); + + /* y^2 - x^3 + 3.x = b, when a = -3 */ + sp_521_mont_add_17(t1, t1, point->x, p521_mod); + sp_521_mont_add_17(t1, t1, point->x, p521_mod); + sp_521_mont_add_17(t1, t1, point->x, p521_mod); + + + if (sp_521_cmp_17(t1, p521_b) != 0) { + err = MP_VAL; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t1 != NULL) + XFREE(t1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the x and y ordinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_521(const mp_int* pX, const mp_int* pY) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* pub = NULL; +#else + sp_point_521 pub[1]; +#endif + const byte one[1] = { 1 }; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(pub->x, 17, pX); + sp_521_from_mp(pub->y, 17, pY); + sp_521_from_bin(pub->z, 17, one, (int)sizeof(one)); + + err = sp_521_ecc_is_point_17(pub, NULL); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_521(const mp_int* pX, const mp_int* pY, + const mp_int* privm, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* priv = NULL; + sp_point_521* pub = NULL; +#else + sp_digit priv[17]; + sp_point_521 pub[2]; +#endif + sp_point_521* p = NULL; + const byte one[1] = { 1 }; + int err = MP_OKAY; + + + /* Quick check the lengs of public key ordinates and private key are in + * range. Proper check later. + */ + if (((mp_count_bits(pX) > 521) || + (mp_count_bits(pY) > 521) || + ((privm != NULL) && (mp_count_bits(privm) > 521)))) { + err = ECC_OUT_OF_RANGE_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY && privm) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = pub + 1; + + sp_521_from_mp(pub->x, 17, pX); + sp_521_from_mp(pub->y, 17, pY); + sp_521_from_bin(pub->z, 17, one, (int)sizeof(one)); + if (privm) + sp_521_from_mp(priv, 17, privm); + + /* Check point at infinitiy. */ + if ((sp_521_iszero_17(pub->x) != 0) && + (sp_521_iszero_17(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + /* Check range of X and Y */ + if ((err == MP_OKAY) && + ((sp_521_cmp_17(pub->x, p521_mod) >= 0) || + (sp_521_cmp_17(pub->y, p521_mod) >= 0))) { + err = ECC_OUT_OF_RANGE_E; + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_521_ecc_is_point_17(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_521_ecc_mulmod_17(p, pub, p521_order, 1, 1, heap); + } + /* Check result is infinity */ + if ((err == MP_OKAY) && ((sp_521_iszero_17(p->x) == 0) || + (sp_521_iszero_17(p->y) == 0))) { + err = ECC_INF_E; + } + + if (privm) { + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_521_ecc_mulmod_base_17(p, priv, 1, 1, heap); + } + /* Check result is public key */ + if ((err == MP_OKAY) && + ((sp_521_cmp_17(p->x, pub->x) != 0) || + (sp_521_cmp_17(p->y, pub->y) != 0))) { + err = ECC_PRIV_KEY_E; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, heap, DYNAMIC_TYPE_ECC); + if (priv != NULL) + XFREE(priv, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 17 * 6]; + sp_point_521 p[2]; +#endif + sp_point_521* q = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { + q = p + 1; + + sp_521_from_mp(p->x, 17, pX); + sp_521_from_mp(p->y, 17, pY); + sp_521_from_mp(p->z, 17, pZ); + sp_521_from_mp(q->x, 17, qX); + sp_521_from_mp(q->y, 17, qY); + sp_521_from_mp(q->z, 17, qZ); + p->infinity = sp_521_iszero_17(p->x) & + sp_521_iszero_17(p->y); + q->infinity = sp_521_iszero_17(q->x) & + sp_521_iszero_17(q->y); + + sp_521_proj_point_add_17(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 17 * 2]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 17, pX); + sp_521_from_mp(p->y, 17, pY); + sp_521_from_mp(p->z, 17, pZ); + p->infinity = sp_521_iszero_17(p->x) & + sp_521_iszero_17(p->y); + + sp_521_proj_point_dbl_17(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_521(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 17 * 5]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 17, pX); + sp_521_from_mp(p->y, 17, pY); + sp_521_from_mp(p->z, 17, pZ); + p->infinity = sp_521_iszero_17(p->x) & + sp_521_iszero_17(p->y); + + sp_521_map_17(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Square root power for the P521 curve. */ +static const uint32_t p521_sqrt_power[17] = { + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000080 +}; + +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mont_sqrt_17(sp_digit* y) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t = NULL; +#else + sp_digit t[2 * 17]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 17, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + + { + int i; + + XMEMCPY(t, y, sizeof(sp_digit) * 17); + for (i=518; i>=0; i--) { + sp_521_mont_sqr_17(t, t, p521_mod, p521_mp_mod); + if (p521_sqrt_power[i / 32] & ((sp_digit)1 << (i % 32))) + sp_521_mont_mul_17(t, t, y, p521_mod, p521_mp_mod); + } + XMEMCPY(y, t, sizeof(sp_digit) * 17); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_521(mp_int* xm, int odd, mp_int* ym) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* x = NULL; +#else + sp_digit x[4 * 17]; +#endif + sp_digit* y = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 17, NULL, DYNAMIC_TYPE_ECC); + if (x == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + y = x + 2 * 17; + + sp_521_from_mp(x, 17, xm); + err = sp_521_mod_mul_norm_17(x, x, p521_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_521_mont_sqr_17(y, x, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, y, x, p521_mod, p521_mp_mod); + } + /* y = x^3 - 3x */ + sp_521_mont_sub_17(y, y, x, p521_mod); + sp_521_mont_sub_17(y, y, x, p521_mod); + sp_521_mont_sub_17(y, y, x, p521_mod); + /* y = x^3 - 3x + b */ + err = sp_521_mod_mul_norm_17(x, p521_b, p521_mod); + } + if (err == MP_OKAY) { + sp_521_mont_add_17(y, y, x, p521_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_521_mont_sqrt_17(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 17, 0, 17U * sizeof(sp_digit)); + sp_521_mont_reduce_17(y, p521_mod, p521_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_521_mont_sub_17(y, p521_mod, y, p521_mod); + } + + err = sp_521_to_mp(y, ym); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (x != NULL) + XFREE(x, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_521 */ +#ifdef WOLFCRYPT_HAVE_SAKKE #ifdef WOLFSSL_SP_1024 /* Point structure to use. */ @@ -48528,1757 +126706,9742 @@ typedef struct sp_point_1024 { * a A single precision integer. * b A single precision integer. */ -static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" - "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" - "umull r3, r4, r11, r12\n\t" - "mov r5, #0\n\t" - "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" - "ldr r11, [%[a], #4]\n\t" - "ldr r12, [%[b], #4]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" - "ldr r11, [%[a], #8]\n\t" - "ldr r12, [%[b], #8]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" - "ldr r11, [%[a], #12]\n\t" - "ldr r12, [%[b], #12]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" - "ldr r11, [%[a], #16]\n\t" - "ldr r12, [%[b], #16]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" - "ldr r11, [%[a], #20]\n\t" - "ldr r12, [%[b], #20]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #44]\n\t" - "# A[12] * B[0]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[1]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[2]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" - "ldr r11, [%[a], #24]\n\t" - "ldr r12, [%[b], #24]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #48]\n\t" - "# A[0] * B[13]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[11]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[10]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[1]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[0]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [sp, #52]\n\t" - "# A[14] * B[0]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[13] * B[1]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[2]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[3]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[4]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" - "ldr r11, [%[a], #28]\n\t" - "ldr r12, [%[b], #28]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[13]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[14]\n\t" - "ldr r8, [%[a], #0]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [sp, #56]\n\t" - "# A[0] * B[15]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[14]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[13]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[12]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[11]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[10]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[3]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[2]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[1]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[0]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #0]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [sp, #60]\n\t" - "# A[15] * B[1]\n\t" - "ldr r9, [%[b], #4]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[14] * B[2]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[3]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[4]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[5]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[6]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" - "ldr r11, [%[a], #32]\n\t" - "ldr r12, [%[b], #32]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[12]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[13]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[14]\n\t" - "ldr r8, [%[a], #8]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[15]\n\t" - "ldr r8, [%[a], #4]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[2] * B[15]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[3] * B[14]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[13]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[12]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[11]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[10]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[5]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[4]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[3]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[2]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #8]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[15] * B[3]\n\t" - "ldr r9, [%[b], #12]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[14] * B[4]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[5]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[6]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[7]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[8]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" - "ldr r11, [%[a], #36]\n\t" - "ldr r12, [%[b], #36]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[12]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[13]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[14]\n\t" - "ldr r8, [%[a], #16]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[15]\n\t" - "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[4] * B[15]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[5] * B[14]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[13]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[12]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[11]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[10]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[7]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[6]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[5]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[15] * B[4]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #16]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[15] * B[5]\n\t" - "ldr r9, [%[b], #20]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[14] * B[6]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[7]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[8]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[9]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[10]\n\t" - "ldr r11, [%[a], #40]\n\t" - "ldr r12, [%[b], #40]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[12]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[13]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[14]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[15]\n\t" - "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[6] * B[15]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[7] * B[14]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[13]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[12]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[11]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[10]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[9]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[8]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[7]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[6]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #24]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[15] * B[7]\n\t" - "ldr r9, [%[b], #28]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[14] * B[8]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[9]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[10]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[11]\n\t" - "ldr r11, [%[a], #44]\n\t" - "ldr r12, [%[b], #44]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[12]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[13]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[14]\n\t" - "ldr r8, [%[a], #32]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[15]\n\t" - "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[8] * B[15]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[9] * B[14]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[13]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[12]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[11]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[10]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[9]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[8]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #32]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[15] * B[9]\n\t" - "ldr r9, [%[b], #36]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[14] * B[10]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[11]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[12]\n\t" - "ldr r11, [%[a], #48]\n\t" - "ldr r12, [%[b], #48]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[13]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[14]\n\t" - "ldr r8, [%[a], #40]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[15]\n\t" - "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[10] * B[15]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[11] * B[14]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[13]\n\t" - "ldr r9, [%[b], #52]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[12]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[11]\n\t" - "ldr r8, [%[a], #56]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[15] * B[10]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #40]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[15] * B[11]\n\t" - "ldr r9, [%[b], #44]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[14] * B[12]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[13]\n\t" - "ldr r11, [%[a], #52]\n\t" - "ldr r12, [%[b], #52]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[14]\n\t" - "ldr r8, [%[a], #48]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[15]\n\t" - "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[12] * B[15]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[13] * B[14]\n\t" - "ldr r9, [%[b], #56]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[13]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[12]\n\t" - "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #48]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[15] * B[13]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[14] * B[14]\n\t" - "ldr r11, [%[a], #56]\n\t" - "ldr r12, [%[b], #56]\n\t" - "umull r6, r7, r11, r12\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[15]\n\t" - "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #60]\n\t" - "umull r6, r7, r8, r9\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[14] * B[15]\n\t" - "umull r6, r7, r11, r9\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[15] * B[14]\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, r8, r12\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[15] * B[15]\n\t" - "umull r6, r7, r8, r9\n\t" + "sub sp, sp, #0x40\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r3, r3, r6\n\t" "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else + "umull r3, r4, r11, r12\n\t" + "mov r5, #0\n\t" +#endif + "str r3, [sp]\n\t" + /* A[0] * B[1] */ + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ + "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #4]\n\t" + /* A[2] * B[0] */ + "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ + "ldr r11, [%[a], #4]\n\t" + "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #8]\n\t" + /* A[0] * B[3] */ + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ + "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #12]\n\t" + /* A[4] * B[0] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ + "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ + "ldr r11, [%[a], #8]\n\t" + "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #16]\n\t" + /* A[0] * B[5] */ + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ + "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #20]\n\t" + /* A[6] * B[0] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ + "ldr r11, [%[a], #12]\n\t" + "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #24]\n\t" + /* A[0] * B[7] */ + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #28]\n\t" + /* A[8] * B[0] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ + "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ + "ldr r11, [%[a], #16]\n\t" + "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #32]\n\t" + /* A[0] * B[9] */ + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ + "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #36]\n\t" + /* A[10] * B[0] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ + "ldr r11, [%[a], #20]\n\t" + "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #40]\n\t" + /* A[0] * B[11] */ + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #44]\n\t" + /* A[12] * B[0] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[1] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ + "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ + "ldr r11, [%[a], #24]\n\t" + "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[12] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #48]\n\t" + /* A[0] * B[13] */ + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[12] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[11] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ + "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[1] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[0] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [sp, #52]\n\t" + /* A[14] * B[0] */ + "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[1] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[2] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[3] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ + "ldr r11, [%[a], #28]\n\t" + "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[12] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[13] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[14] */ + "ldr r8, [%[a]]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [sp, #56]\n\t" + /* A[0] * B[15] */ + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[14] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[13] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[12] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[11] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[3] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[2] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[1] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[0] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [sp, #60]\n\t" + /* A[15] * B[1] */ + "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[2] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[3] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[4] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[5] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ + "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ + "ldr r11, [%[a], #32]\n\t" + "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[12] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[13] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[14] */ + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[15] */ + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #64]\n\t" + /* A[2] * B[15] */ + "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[14] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[13] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[12] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[11] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ + "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[5] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[4] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[3] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[2] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #68]\n\t" + /* A[15] * B[3] */ + "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[4] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[5] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[6] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[7] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ + "ldr r11, [%[a], #36]\n\t" + "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[12] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[13] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[14] */ + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[15] */ + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #72]\n\t" + /* A[4] * B[15] */ + "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[14] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[13] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[12] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[11] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[7] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[6] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[5] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[4] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #76]\n\t" + /* A[15] * B[5] */ + "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[6] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[7] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[8] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[9] */ + "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ + "ldr r11, [%[a], #40]\n\t" + "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[12] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[13] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[14] */ + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[15] */ + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #80]\n\t" + /* A[6] * B[15] */ + "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[14] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[13] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[12] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[11] */ + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ + "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[9] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[8] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[7] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[6] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #84]\n\t" + /* A[15] * B[7] */ + "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[8] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[9] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[10] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[11] */ + "ldr r11, [%[a], #44]\n\t" + "ldr r12, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[12] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[13] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[14] */ + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[15] */ + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #88]\n\t" + /* A[8] * B[15] */ + "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[14] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[13] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[12] */ + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[11] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[10] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[9] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[8] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #92]\n\t" + /* A[15] * B[9] */ + "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[10] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[11] */ + "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[12] */ + "ldr r11, [%[a], #48]\n\t" + "ldr r12, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[13] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[14] */ + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[15] */ + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #96]\n\t" + /* A[10] * B[15] */ + "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[14] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[13] */ + "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[12] */ + "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[11] */ + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[10] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #100]\n\t" + /* A[15] * B[11] */ + "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[12] */ + "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[13] */ + "ldr r11, [%[a], #52]\n\t" + "ldr r12, [%[b], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[14] */ + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[15] */ + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #104]\n\t" + /* A[12] * B[15] */ + "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[14] */ + "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[13] */ + "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[12] */ + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#endif + "str r3, [%[r], #108]\n\t" + /* A[15] * B[13] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[14] */ + "ldr r11, [%[a], #56]\n\t" + "ldr r12, [%[b], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r11, r12\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[15] */ + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#endif + "str r4, [%[r], #112]\n\t" + /* A[14] * B[15] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r11, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[14] */ + "ldr r8, [%[a], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r6, r7, r8, r12\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#endif + "str r5, [%[r], #116]\n\t" + /* A[15] * B[15] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, r8, r9\n\t" +#endif "str r3, [%[r], #120]\n\t" "str r4, [%[r], #124]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -50289,10 +136452,9 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #64\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" ); } @@ -50301,112 +136463,521 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) +static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + "sub sp, sp, #0x40\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50414,66 +136985,294 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50481,80 +137280,370 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" + /* A[0] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" + /* A[0] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" + /* A[1] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50562,94 +137651,446 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" + /* A[0] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" + /* A[1] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" + /* A[0] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" + /* A[1] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50657,108 +138098,522 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #44]\n\t" - "# A[0] * A[12]\n\t" + /* A[0] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[11]\n\t" + /* A[1] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #48]\n\t" - "# A[0] * A[13]\n\t" + /* A[0] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[12]\n\t" + /* A[1] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50766,122 +138621,598 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #52]\n\t" - "# A[0] * A[14]\n\t" + /* A[0] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[13]\n\t" + /* A[1] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #56]\n\t" - "# A[0] * A[15]\n\t" + /* A[0] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[14]\n\t" + /* A[1] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50889,115 +139220,560 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #60]\n\t" - "# A[1] * A[15]\n\t" + /* A[1] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[14]\n\t" + /* A[2] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #64]\n\t" - "# A[2] * A[15]\n\t" + /* A[2] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[14]\n\t" + /* A[3] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -51005,101 +139781,484 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #68]\n\t" - "# A[3] * A[15]\n\t" + /* A[3] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[4] * A[14]\n\t" + /* A[4] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[9] */ "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #72]\n\t" - "# A[4] * A[15]\n\t" + /* A[4] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[5] * A[14]\n\t" + /* A[5] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -51107,87 +140266,408 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #76]\n\t" - "# A[5] * A[15]\n\t" + /* A[5] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[6] * A[14]\n\t" + /* A[6] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[10] */ "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #80]\n\t" - "# A[6] * A[15]\n\t" + /* A[6] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[7] * A[14]\n\t" + /* A[7] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -51195,73 +140675,332 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #84]\n\t" - "# A[7] * A[15]\n\t" + /* A[7] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[8] * A[14]\n\t" + /* A[8] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[11] */ "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #88]\n\t" - "# A[8] * A[15]\n\t" + /* A[8] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[9] * A[14]\n\t" + /* A[9] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -51269,59 +141008,256 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #92]\n\t" - "# A[9] * A[15]\n\t" + /* A[9] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[10] * A[14]\n\t" + /* A[10] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[12] */ "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #96]\n\t" - "# A[10] * A[15]\n\t" + /* A[10] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[11] * A[14]\n\t" + /* A[11] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -51329,101 +141265,428 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #100]\n\t" - "# A[11] * A[15]\n\t" + /* A[11] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[12] * A[14]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[13] * A[13]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * A[13] */ "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #104]\n\t" - "# A[12] * A[15]\n\t" + /* A[12] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[13] * A[14]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #108]\n\t" - "# A[13] * A[15]\n\t" + /* A[13] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[14] * A[14]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[14] * A[14] */ "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #112]\n\t" - "# A[14] * A[15]\n\t" + /* A[14] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #56]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #116]\n\t" - "# A[15] * A[15]\n\t" + /* A[15] * A[15] */ "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adc r3, r3, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adc r3, r3, r9\n\t" +#endif "str r2, [%[r], #120]\n\t" "str r3, [%[r], #124]\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "ldm sp!, {r2, r3, r4, r8}\n\t" - "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #64\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + "ldm sp!, {r2, r3, r4, r8}\n\t" + "stm %[r]!, {r2, r3, r4, r8}\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" ); } @@ -51433,84 +141696,48 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* Sub b from a into a. (a -= b) @@ -51518,146 +141745,74 @@ static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "ldr r8, [%[b], #8]\n\t" - "ldr r9, [%[b], #12]\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "subs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #0]\n\t" - "str r3, [%[a], #4]\n\t" - "str r4, [%[a], #8]\n\t" - "str r5, [%[a], #12]\n\t" - "ldr r2, [%[a], #16]\n\t" - "ldr r3, [%[a], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "ldr r8, [%[b], #24]\n\t" - "ldr r9, [%[b], #28]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #16]\n\t" - "str r3, [%[a], #20]\n\t" - "str r4, [%[a], #24]\n\t" - "str r5, [%[a], #28]\n\t" - "ldr r2, [%[a], #32]\n\t" - "ldr r3, [%[a], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "ldr r8, [%[b], #40]\n\t" - "ldr r9, [%[b], #44]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #32]\n\t" - "str r3, [%[a], #36]\n\t" - "str r4, [%[a], #40]\n\t" - "str r5, [%[a], #44]\n\t" - "ldr r2, [%[a], #48]\n\t" - "ldr r3, [%[a], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "ldr r8, [%[b], #56]\n\t" - "ldr r9, [%[b], #60]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #48]\n\t" - "str r3, [%[a], #52]\n\t" - "str r4, [%[a], #56]\n\t" - "str r5, [%[a], #60]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r3, [%[a], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "ldr r8, [%[b], #72]\n\t" - "ldr r9, [%[b], #76]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #64]\n\t" - "str r3, [%[a], #68]\n\t" - "str r4, [%[a], #72]\n\t" - "str r5, [%[a], #76]\n\t" - "ldr r2, [%[a], #80]\n\t" - "ldr r3, [%[a], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "ldr r8, [%[b], #88]\n\t" - "ldr r9, [%[b], #92]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #80]\n\t" - "str r3, [%[a], #84]\n\t" - "str r4, [%[a], #88]\n\t" - "str r5, [%[a], #92]\n\t" - "ldr r2, [%[a], #96]\n\t" - "ldr r3, [%[a], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "ldr r8, [%[b], #104]\n\t" - "ldr r9, [%[b], #108]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #96]\n\t" - "str r3, [%[a], #100]\n\t" - "str r4, [%[a], #104]\n\t" - "str r5, [%[a], #108]\n\t" - "ldr r2, [%[a], #112]\n\t" - "ldr r3, [%[a], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "ldr r8, [%[b], #120]\n\t" - "ldr r9, [%[b], #124]\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "str r2, [%[a], #112]\n\t" - "str r3, [%[a], #116]\n\t" - "str r4, [%[a], #120]\n\t" - "str r5, [%[a], #124]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -51666,148 +141821,76 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[b], #0]\n\t" - "ldr r9, [%[b], #4]\n\t" - "ldr r10, [%[b], #8]\n\t" - "ldr r14, [%[b], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[b], #16]\n\t" - "ldr r9, [%[b], #20]\n\t" - "ldr r10, [%[b], #24]\n\t" - "ldr r14, [%[b], #28]\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[b], #32]\n\t" - "ldr r9, [%[b], #36]\n\t" - "ldr r10, [%[b], #40]\n\t" - "ldr r14, [%[b], #44]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[b], #48]\n\t" - "ldr r9, [%[b], #52]\n\t" - "ldr r10, [%[b], #56]\n\t" - "ldr r14, [%[b], #60]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[a], #72]\n\t" - "ldr r7, [%[a], #76]\n\t" - "ldr r8, [%[b], #64]\n\t" - "ldr r9, [%[b], #68]\n\t" - "ldr r10, [%[b], #72]\n\t" - "ldr r14, [%[b], #76]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - "str r6, [%[r], #72]\n\t" - "str r7, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[a], #88]\n\t" - "ldr r7, [%[a], #92]\n\t" - "ldr r8, [%[b], #80]\n\t" - "ldr r9, [%[b], #84]\n\t" - "ldr r10, [%[b], #88]\n\t" - "ldr r14, [%[b], #92]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" - "str r6, [%[r], #88]\n\t" - "str r7, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[a], #104]\n\t" - "ldr r7, [%[a], #108]\n\t" - "ldr r8, [%[b], #96]\n\t" - "ldr r9, [%[b], #100]\n\t" - "ldr r10, [%[b], #104]\n\t" - "ldr r14, [%[b], #108]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" - "str r6, [%[r], #104]\n\t" - "str r7, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[a], #120]\n\t" - "ldr r7, [%[a], #124]\n\t" - "ldr r8, [%[b], #112]\n\t" - "ldr r9, [%[b], #116]\n\t" - "ldr r10, [%[b], #120]\n\t" - "ldr r14, [%[b], #124]\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" - "str r6, [%[r], #120]\n\t" - "str r7, [%[r], #124]\n\t" - "adc %[c], r12, r12\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "mov %[r], #0\n\t" + "adc %[r], %[r], #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -51853,7 +141936,7 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, sp_digit z1[32]; sp_digit a1[16]; sp_digit b1[16]; - sp_digit z2[32]; + sp_digit* z2 = r + 32; sp_digit u; sp_digit ca; sp_digit cb; @@ -51861,18 +141944,71 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, ca = sp_1024_add_16(a1, a, &a[16]); cb = sp_1024_add_16(b1, b, &b[16]); u = ca & cb; - sp_1024_mul_16(z1, a1, b1); + sp_1024_mul_16(z2, &a[16], &b[16]); sp_1024_mul_16(z0, a, b); - sp_1024_mask_16(r + 32, a1, 0 - cb); - sp_1024_mask_16(b1, b1, 0 - ca); - u += sp_1024_add_16(r + 32, r + 32, b1); - u += sp_1024_sub_in_place_32(z1, z2); + sp_1024_mul_16(z1, a1, b1); + u += sp_1024_sub_in_place_32(z1, z0); + u += sp_1024_sub_in_place_32(z1, z2); + sp_1024_mask_16(a1, a1, 0 - cb); + u += sp_1024_add_16(z1 + 16, z1 + 16, a1); + sp_1024_mask_16(b1, b1, 0 - ca); + u += sp_1024_add_16(z1 + 16, z1 + 16, b1); + u += sp_1024_add_32(r + 16, r + 16, z1); - r[48] = u; - XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - (void)sp_1024_add_32(r + 32, r + 32, z2); + XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (16 - 1)); + a1[0] = u; + (void)sp_1024_add_16(r + 48, r + 48, a1); +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_1024_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + ); + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -51883,23 +142019,32 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[32]; + sp_digit* z2 = r + 32; sp_digit z1[32]; - sp_digit a1[16]; + sp_digit* a1 = z1; + sp_digit zero[16]; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 16); + + mask = sp_1024_sub_16(a1, a, &a[16]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_1024_sub_16(a1, p1, p2); - u = sp_1024_add_16(a1, a, &a[16]); - sp_1024_sqr_16(z1, a1); sp_1024_sqr_16(z2, &a[16]); sp_1024_sqr_16(z0, a); - sp_1024_mask_16(r + 32, a1, 0 - u); - u += sp_1024_add_16(r + 32, r + 32, r + 32); - u += sp_1024_sub_in_place_32(z1, z2); - u += sp_1024_sub_in_place_32(z1, z0); - u += sp_1024_add_32(r + 16, r + 16, z1); - r[48] = u; - XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - (void)sp_1024_add_32(r + 32, r + 32, z2); + sp_1024_sqr_16(z1, a1); + + u = 0; + u -= sp_1024_sub_in_place_32(z1, z2); + u -= sp_1024_sub_in_place_32(z1, z0); + u += sp_1024_sub_in_place_32(r + 16, z1); + zero[0] = u; + (void)sp_1024_add_16(r + 48, r + 48, zero); } #else @@ -51909,57 +142054,196 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" - "mov r5, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x100\n\t" + "ldr lr, [%[a]]\n\t" + "ldr r11, [%[b]]\n\t" + "umull r8, r6, lr, r11\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_1024_mul_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_1024_mul_32_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "ldr lr, [%[a], r4]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "cmp r3, r4\n\t" + "bgt L_sp_1024_mul_32_inner_done_%=\n\t" + "blt L_sp_1024_mul_32_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_1024_mul_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf4\n\t" + "ble L_sp_1024_mul_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" + "ldr r11, [%[b], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adc r7, r7, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "add r7, r7, r10\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umlal r6, r7, lr, r11\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_1024_mul_32_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_1024_mul_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -51968,73 +142252,155 @@ static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) +static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" - "mov r12, #0\n\t" - "mov r6, #0\n\t" + "sub sp, sp, #0x100\n\t" + "ldr lr, [%[a]]\n\t" + "umull r8, r6, lr, lr\n\t" + "str r8, [sp]\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "mov r5, #4\n\t" + "\n" + "L_sp_1024_sqr_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" - "movcc r3, r12\n\t" + "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "\n" + "L_sp_1024_sqr_32_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" - "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "bgt L_sp_1024_sqr_32_inner_done_%=\n\t" + "blt L_sp_1024_sqr_32_inner_%=\n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_1024_sqr_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf4\n\t" + "ble L_sp_1024_sqr_32_outer_%=\n\t" + "ldr lr, [%[a], #124]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, r10\n\t" +#endif "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" - "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "add r5, r5, #4\n\t" + "str r7, [sp, r5]\n\t" + "\n" + "L_sp_1024_sqr_32_store_%=: \n\t" + "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" + "subs r5, r5, #32\n\t" + "bgt L_sp_1024_sqr_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" ); } @@ -52130,43 +142496,37 @@ static const sp_point_1024 p1024_base = { * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a]]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x80\n\t" + "\n" + "L_sp_1024_sub_in_pkace_32_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[a]], #4\n\t" - "str r4, [%[a]], #4\n\t" - "str r5, [%[a]], #4\n\t" - "str r6, [%[a]], #4\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r12, r12\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -52175,204 +142535,176 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_1024_cond_sub_32_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" "and r5, r5, %[m]\n\t" "sbcs r4, r4, r5\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x80\n\t" + "blt L_sp_1024_cond_sub_32_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "subs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "sbcs r4, r4, r5\n\t" - "sbcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + + __asm__ __volatile__ ( + "mov lr, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * @@ -52380,375 +142712,1376 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; __asm__ __volatile__ ( - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x80\n\t" + "\n" + "L_sp_1024_add_32_word_%=: \n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" + "adc r3, r4, #0\n\t" "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_1024_add_32_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { -#ifdef WOLFSSL_SP_SMALL + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_1024_mul_d_32_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #128\n\t" - "blt 1b\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_1024_mul_d_32_word_%=\n\t" "str r3, [%[r], #128]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" - "str r4, [%[r], #124]\n\t" - "str r5, [%[r], #128]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +#else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit b asm ("r2") = (sp_digit)b_p; + + __asm__ __volatile__ ( + /* A[0] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umull r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[1] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[2] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[3] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[4] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[5] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[6] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[7] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[8] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[9] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[10] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[11] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[12] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[13] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[14] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[15] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[16] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[17] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[18] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[19] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[20] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[21] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[22] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[23] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[24] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[25] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[26] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[27] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[28] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "mov r3, #0\n\t" + /* A[29] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else + "umlal r5, r3, %[b], r8\n\t" +#endif + "stm %[r]!, {r5}\n\t" + "mov r4, #0\n\t" + /* A[30] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else + "umlal r3, r4, %[b], r8\n\t" +#endif + "stm %[r]!, {r3}\n\t" + "mov r5, #0\n\t" + /* A[31] * B */ + "ldm %[a]!, {r8}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umlal r4, r5, %[b], r8\n\t" +#endif + "stm %[r]!, {r4}\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { - sp_digit r = 0; + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +{ + register sp_digit d1 asm ("r0") = (sp_digit)d1_p; + register sp_digit d0 asm ("r1") = (sp_digit)d0_p; + register sp_digit div asm ("r2") = (sp_digit)div_p; + + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_1024_word_32_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_1024_word_32_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -52786,400 +144119,395 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p) { - sp_digit r = -1; - sp_digit one = 1; - + register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #124\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0x7c\n\t" + "\n" + "L_sp_1024_cmp_32_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_1024_cmp_32_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -53191,8 +144519,8 @@ static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -53200,12 +144528,15 @@ static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_dig (void)m; - div = d[31]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); - for (i=31; i>=0; i--) { - sp_digit hi = t1[32 + i] - (t1[32 + i] == div); + r1 = sp_1024_cmp_32(&t1[32], d) >= 0; + sp_1024_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1); + for (i = 31; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[32 + i] == div); + sp_digit hi = t1[32 + i] + mask; r1 = div_1024_word_32(hi, t1[32 + i - 1], div); + r1 |= mask; sp_1024_mul_d_32(t2, d, r1); t1[32 + i] += sp_1024_sub_in_place_32(&t1[i], t2); @@ -53229,7 +144560,8 @@ static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_1024_mod_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_1024_div_32(a, m, NULL, r); } @@ -53262,7 +144594,8 @@ static int sp_1024_point_new_ex_32(void* heap, sp_point_1024* sp, { int ret = MP_OKAY; (void)heap; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) (void)sp; *p = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); #else @@ -53274,7 +144607,8 @@ static int sp_1024_point_new_ex_32(void* heap, sp_point_1024* sp, return ret; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* Allocate memory for point and return error. */ #define sp_1024_point_new_32(heap, sp, p) sp_1024_point_new_ex_32((heap), NULL, &(p)) #else @@ -53291,7 +144625,8 @@ static int sp_1024_point_new_ex_32(void* heap, sp_point_1024* sp, */ static void sp_1024_point_free_32(sp_point_1024* p, int clear, void* heap) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* If valid pointer then clear point data if requested and free data. */ if (p != NULL) { if (clear != 0) { @@ -53318,20 +144653,23 @@ static void sp_1024_point_free_32(sp_point_1024* p, int clear, void* heap) static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 32 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 31); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 31); } #elif DIGIT_BIT > 32 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffff; s = 32U - s; @@ -53361,12 +144699,12 @@ static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 32) { r[j] &= 0xffffffff; @@ -53501,333 +144839,1502 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) return err; } +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) /* Reduce the number back to 1024 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { - sp_digit ca = 0; + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" - "umull r6, r7, r8, r11\n\t" +#if !(defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_1024_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" - "adc r4, r7, #0\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) + "ldr r11, [%[m], #124]\n\t" +#else "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" - "umull r6, r7, r8, r7\n\t" +#endif + "ldr r10, [%[a], #124]\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" - "ldr r9, [%[a], #128]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #128]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "ldr r10, [%[a], #128]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #128\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - "ldr r6, [%[m], #124]\n\t" - "subs r9, r6, r9\n\t" - "neg %[ca], %[ca]\n\t" - "sbc r9, r9, r9\n\t" - "orr %[ca], %[ca], r9\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x80\n\t" + "blt L_sp_1024_mont_reduce_32_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "ldr r6, [%[m], #124]\n\t" + "subs r10, r6, r10\n\t" + "neg r3, r3\n\t" + "sbc r10, r10, r10\n\t" + "orr r3, r3, r10\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" ); - - sp_1024_cond_sub_32(a - 32, a, m, ca); + sp_1024_cond_sub_32(a - 32, a, m, mp); } +#elif defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) +/* Reduce the number back to 1024 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + "ldr r11, [%[m]]\n\t" + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_1024_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ + "mov r5, #0\n\t" + "umlal r12, r5, r8, r11\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" + "mov r4, #0\n\t" + "umlal lr, r4, r8, r7\n\t" + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" + "mov r5, #0\n\t" + "umlal lr, r5, r8, r7\n\t" + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umlal r10, r4, r8, r7\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umlal r10, r5, r8, r7\n\t" + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "ldr r10, [%[a], #128]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_1024_mont_reduce_32_word_%=\n\t" + /* Loop Done */ + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "ldr r6, [%[m], #124]\n\t" + "subs r10, r6, r10\n\t" + "neg r3, r3\n\t" + "sbc r10, r10, r10\n\t" + "orr r3, r3, r10\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_1024_cond_sub_32(a - 32, a, m, mp); +} + +#else +/* Reduce the number back to 1024 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +{ + register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; + register sp_digit mp asm ("r2") = (sp_digit)mp_p; + + __asm__ __volatile__ ( + /* i = 0 */ + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "\n" + "L_sp_1024_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r11, %[mp], r4\n\t" + /* a[i+0] += m[0] * mu */ + "ldr r10, [%[m]]\n\t" + "mov r3, #0\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r10, [%[m], #4]\n\t" + "mov r4, r5\n\t" + "umaal r4, r3, r11, r10\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r10, [%[m], #8]\n\t" + "mov r5, r6\n\t" + "umaal r5, r3, r11, r10\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r10, [%[m], #12]\n\t" + "mov r6, r7\n\t" + "umaal r6, r3, r11, r10\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r10, [%[m], #16]\n\t" + "mov r7, r8\n\t" + "umaal r7, r3, r11, r10\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r10, [%[m], #20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umaal r8, r3, r11, r10\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r10, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #24]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r10, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #28]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r10, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #32]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r10, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #36]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r10, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #40]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r10, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #44]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r10, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #48]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r10, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #52]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r10, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #56]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r10, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #60]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r10, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #64]\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r10, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #68]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r10, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #72]\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r10, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #76]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r10, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #80]\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r10, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #84]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r10, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #88]\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r10, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #92]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r10, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #96]\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r10, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #100]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r10, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #104]\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r10, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #108]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r10, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #112]\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r10, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #116]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r10, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umaal r9, r3, r11, r10\n\t" + "str r9, [%[a], #120]\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r10, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umaal r9, r3, r11, r10\n\t" + "ldr r11, [%[a], #128]\n\t" + "mov r10, #0\n\t" + "umaal r3, r11, r10, r10\n\t" + "str r9, [%[a], #124]\n\t" + "adds r3, r3, lr\n\t" + "adc lr, r11, #0\n\t" + "str r3, [%[a], #128]\n\t" + /* i += 1 */ + "add r12, r12, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r12, #0x80\n\t" + "blt L_sp_1024_mont_reduce_32_word_%=\n\t" + /* Loop Done */ + "str r4, [%[a]]\n\t" + "str r5, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "str r8, [%[a], #16]\n\t" + "ldr r10, [%[m], #124]\n\t" + "subs r3, r10, r3\n\t" + "neg lr, lr\n\t" + "sbc r3, r3, r3\n\t" + "orr lr, lr, r3\n\t" + "mov %[mp], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); + sp_1024_cond_sub_32(a - 32, a, m, mp); +} + +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -53835,9 +146342,9 @@ SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_mul_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_1024_mul_32(r, a, b); @@ -53849,9 +146356,9 @@ static void sp_1024_mont_mul_32(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_sqr_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_1024_sqr_32(r, a); @@ -53891,11 +146398,14 @@ static const uint8_t p1024_mod_minus_2[] = { static void sp_1024_mont_inv_32(sp_digit* r, const sp_digit* a, sp_digit* td) { - sp_digit* t = td; + sp_digit* t = &td[32 * 2 * 32]; int i; int j; - sp_digit table[32][2 * 32]; + sp_digit* table[32]; + for (i = 0; i < 32; i++) { + table[i] = &td[2 * 32 * i]; + } XMEMCPY(table[0], a, sizeof(sp_digit) * 32); for (i = 1; i < 6; i++) { sp_1024_mont_sqr_32(table[0], table[0], p1024_mod, p1024_mp_mod); @@ -53942,27 +146452,24 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_32(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 32, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); - sp_1024_cond_sub_32(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->x, r->x, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->x); /* y /= z^3 */ sp_1024_mont_mul_32(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 32, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); - sp_1024_cond_sub_32(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->y, r->y, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -53972,166 +146479,169 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldr r14, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r14, r14, r7\n\t" - "neg r12, r12\n\t" - "sbc r14, r14, r14\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r14\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldr r11, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r11, r11, r7\n\t" + "neg r12, r12\n\t" + "sbc r11, r11, r11\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r12, r11\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" ); } @@ -54141,149 +146651,152 @@ static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adds r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldr r4, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r4, r4, r14\n\t" - "neg r12, r12\n\t" - "sbc r4, r4, r4\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r4\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldr r4, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r4, r4, r11\n\t" + "neg r12, r12\n\t" + "sbc r4, r4, r4\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r12, r4\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12", "cc" ); } @@ -54293,304 +146806,307 @@ static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adds r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldr r4, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r4, r4, r14\n\t" - "neg r12, r12\n\t" - "sbc r4, r4, r4\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r4\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" - "sub %[m], %[m], #128\n\t" - "sub %[a], %[a], #128\n\t" - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adds r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldr r7, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r7, r7, r14\n\t" - "neg r12, r12\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r7\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "subs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbc r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "sub %[r], %[r], #128\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldr r4, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r4, r4, r11\n\t" + "neg r12, r12\n\t" + "sbc r4, r4, r4\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r12, r4\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sub %[r], %[r], #0x80\n\t" + "sub %[m], %[m], #0x80\n\t" + "sub %[a], %[a], #0x80\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldr r7, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r7, r7, r11\n\t" + "neg r12, r12\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r12, r7\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12", "cc" ); } @@ -54601,164 +147117,167 @@ static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc r12, r12, r12\n\t" - "sub %[r], %[r], #128\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc r12, r12, r12\n\t" + "sub %[r], %[r], #0x80\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" ); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -54767,214 +147286,188 @@ static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { - sp_digit c = 0; + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r5, [%[b], r8]\n\t" + "mov lr, #0\n\t" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_1024_cond_add_32_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0x80\n\t" + "blt L_sp_1024_cond_add_32_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "cc" ); -#else - __asm__ __volatile__ ( - - "mov r9, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r6, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #48]\n\t" - "str r6, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r6, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r6, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #80]\n\t" - "str r6, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r6, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #96]\n\t" - "str r6, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r6, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #112]\n\t" - "str r6, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "adc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8", "r9" - ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } -static void sp_1024_rshift1_32(sp_digit* r, sp_digit* a) +#else +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit m asm ("r3") = (sp_digit)m_p; + __asm__ __volatile__ ( - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" + "mov r8, #0\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "and r6, r6, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + + __asm__ __volatile__ ( + "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" "orr r3, r3, r4, lsl #31\n\t" "lsr r4, r4, #1\n\t" "ldr r2, [%[a], #12]\n\t" @@ -55095,9 +147588,9 @@ static void sp_1024_rshift1_32(sp_digit* r, sp_digit* a) "lsr r3, r3, #1\n\t" "str r2, [%[r], #120]\n\t" "str r3, [%[r], #124]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4" + : "memory", "r2", "r3", "r4", "cc" ); } @@ -55107,7 +147600,7 @@ static void sp_1024_rshift1_32(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -55122,6 +147615,61 @@ static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_32(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_32(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_32(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_mont_div2_32(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_32(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_32(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_32_ctx { int state; @@ -55132,7 +147680,14 @@ typedef struct sp_1024_proj_point_dbl_32_ctx { sp_digit* z; } sp_1024_proj_point_dbl_32_ctx; -static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data; @@ -55206,7 +147761,7 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_32(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_32(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -55256,255 +147811,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_32(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_32(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_32(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_32(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_32(y, y, t2, p1024_mod); -} - -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldr r4, [%[a]], #4\n\t" - "ldr r5, [%[a]], #4\n\t" - "ldr r6, [%[a]], #4\n\t" - "ldr r7, [%[a]], #4\n\t" - "ldr r8, [%[b]], #4\n\t" - "ldr r9, [%[b]], #4\n\t" - "ldr r10, [%[b]], #4\n\t" - "ldr r14, [%[b]], #4\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r]], #4\n\t" - "str r5, [%[r]], #4\n\t" - "str r6, [%[r]], #4\n\t" - "str r7, [%[r]], #4\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" - ); - - return c; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "ldr r8, [%[b], #4]\n\t" - "ldr r9, [%[b], #8]\n\t" - "ldr r10, [%[b], #12]\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "ldr r8, [%[b], #20]\n\t" - "ldr r9, [%[b], #24]\n\t" - "ldr r10, [%[b], #28]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r7, [%[b], #32]\n\t" - "ldr r8, [%[b], #36]\n\t" - "ldr r9, [%[b], #40]\n\t" - "ldr r10, [%[b], #44]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #32]\n\t" - "str r4, [%[r], #36]\n\t" - "str r5, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[a], #56]\n\t" - "ldr r6, [%[a], #60]\n\t" - "ldr r7, [%[b], #48]\n\t" - "ldr r8, [%[b], #52]\n\t" - "ldr r9, [%[b], #56]\n\t" - "ldr r10, [%[b], #60]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #48]\n\t" - "str r4, [%[r], #52]\n\t" - "str r5, [%[r], #56]\n\t" - "str r6, [%[r], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[a], #72]\n\t" - "ldr r6, [%[a], #76]\n\t" - "ldr r7, [%[b], #64]\n\t" - "ldr r8, [%[b], #68]\n\t" - "ldr r9, [%[b], #72]\n\t" - "ldr r10, [%[b], #76]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #64]\n\t" - "str r4, [%[r], #68]\n\t" - "str r5, [%[r], #72]\n\t" - "str r6, [%[r], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[a], #88]\n\t" - "ldr r6, [%[a], #92]\n\t" - "ldr r7, [%[b], #80]\n\t" - "ldr r8, [%[b], #84]\n\t" - "ldr r9, [%[b], #88]\n\t" - "ldr r10, [%[b], #92]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #80]\n\t" - "str r4, [%[r], #84]\n\t" - "str r5, [%[r], #88]\n\t" - "str r6, [%[r], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[a], #104]\n\t" - "ldr r6, [%[a], #108]\n\t" - "ldr r7, [%[b], #96]\n\t" - "ldr r8, [%[b], #100]\n\t" - "ldr r9, [%[b], #104]\n\t" - "ldr r10, [%[b], #108]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #96]\n\t" - "str r4, [%[r], #100]\n\t" - "str r5, [%[r], #104]\n\t" - "str r6, [%[r], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[a], #120]\n\t" - "ldr r6, [%[a], #124]\n\t" - "ldr r7, [%[b], #112]\n\t" - "ldr r8, [%[b], #116]\n\t" - "ldr r9, [%[b], #120]\n\t" - "ldr r10, [%[b], #124]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "str r3, [%[r], #112]\n\t" - "str r4, [%[r], #116]\n\t" - "str r5, [%[r], #120]\n\t" - "str r6, [%[r], #124]\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - - return c; -} - -#endif /* WOLFSSL_SP_SMALL */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -55527,6 +147833,21 @@ static int sp_1024_cmp_equal_32(const sp_digit* a, const sp_digit* b) (a[30] ^ b[30]) | (a[31] ^ b[31])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_32(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | + a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -55534,6 +147855,84 @@ static int sp_1024_cmp_equal_32(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*32; + sp_digit* t2 = t + 4*32; + sp_digit* t3 = t + 6*32; + sp_digit* t4 = t + 8*32; + sp_digit* t5 = t + 10*32; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(t2, t1) & + sp_1024_cmp_equal_32(t4, t3)) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_32_ctx { @@ -55546,11 +147945,19 @@ typedef struct sp_1024_proj_point_add_32_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_1024_proj_point_add_32_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -55569,261 +147976,168 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*32; - ctx->t3 = t + 4*32; - ctx->t4 = t + 6*32; - ctx->t5 = t + 8*32; + ctx->t6 = t; + ctx->t1 = t + 2*32; + ctx->t2 = t + 4*32; + ctx->t3 = t + 6*32; + ctx->t4 = t + 8*32; + ctx->t5 = t + 10*32; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); - sp_1024_norm_32(ctx->t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_1024_proj_point_dbl_32_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_1024)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<32; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<32; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<32; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; + break; + case 2: + sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; + break; + case 3: + sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_32(ctx->t1, ctx->t1, ctx->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_32(ctx->t4, ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_32(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_32(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(ctx->t3, ctx->t3, ctx->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_32(ctx->z, ctx->z, ctx->t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_dbl_32(ctx->t1, ctx->y, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t1, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->x, p1024_mod); + sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_32(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* t3 = t + 4*32; - sp_digit* t4 = t + 6*32; - sp_digit* t5 = t + 8*32; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_1024* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<32; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<32; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<32; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t1, t1, x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(t3, t3, y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, t2, p1024_mod, p1024_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_dbl_32(t1, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t1, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); - } -} - /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. * @@ -55844,12 +148158,12 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, static int sp_1024_ecc_mulmod_fast_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; sp_digit* tmp = NULL; #else sp_point_1024 t[16 + 1]; - sp_digit tmp[2 * 32 * 5]; + sp_digit tmp[2 * 32 * 37]; #endif sp_point_1024* rt = NULL; sp_digit n; @@ -55862,13 +148176,13 @@ static int sp_1024_ecc_mulmod_fast_32(sp_point_1024* r, const sp_point_1024* g, (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * (16 + 1), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 5, heap, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 37, heap, DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; @@ -55945,21 +148259,21 @@ static int sp_1024_ecc_mulmod_fast_32(sp_point_1024* r, const sp_point_1024* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) #endif { - ForceZero(tmp, sizeof(sp_digit) * 2 * 32 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + ForceZero(tmp, sizeof(sp_digit) * 2 * 32 * 37); + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(tmp, heap, DYNAMIC_TYPE_ECC); #endif } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) #endif { ForceZero(t, sizeof(sp_point_1024) * 17); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif } @@ -55975,7 +148289,7 @@ static int sp_1024_ecc_mulmod_fast_32(sp_point_1024* r, const sp_point_1024* g, * n Number of times to double * t Temporary ordinate data. */ -static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n, +static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_digit* t) { sp_digit* w = t; @@ -55986,6 +148300,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -55996,7 +148311,6 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n, /* W = Z^4 */ sp_1024_mont_sqr_32(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_32(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -56014,9 +148328,12 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_32(t2, b, x, p1024_mod); + sp_1024_mont_dbl_32(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_32(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_32(t1, t1, p1024_mod, p1024_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -56026,9 +148343,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n, sp_1024_mont_mul_32(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_32(y, b, x, p1024_mod); - sp_1024_mont_mul_32(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(y, y, p1024_mod); + sp_1024_mont_mul_32(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_32(y, y, t1, p1024_mod); } #ifndef WOLFSSL_SP_SMALL @@ -56043,18 +148358,19 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_32(t2, b, x, p1024_mod); + sp_1024_mont_dbl_32(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_32(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_32(t1, t1, p1024_mod, p1024_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_32(y, b, x, p1024_mod); - sp_1024_mont_mul_32(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(y, y, p1024_mod); + sp_1024_mont_mul_32(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_32(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_32(y, y, p1024_mod); + sp_1024_mont_div2_32(y, y, p1024_mod); } /* Convert the projective point to affine. @@ -56097,76 +148413,75 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* t3 = t + 4*32; - sp_digit* t4 = t + 6*32; - sp_digit* t5 = t + 8*32; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*32; + sp_digit* t6 = t + 4*32; + sp_digit* t1 = t + 6*32; + sp_digit* t4 = t + 8*32; + sp_digit* t5 = t + 10*32; - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(p->x, t2) & + sp_1024_cmp_equal_32(p->y, t4)) { sp_1024_proj_point_dbl_32(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<32; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<32; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<32; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ - sp_1024_mont_sub_32(t2, t2, x, p1024_mod); + sp_1024_mont_sub_32(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ - sp_1024_mont_sub_32(t4, t4, y, p1024_mod); + sp_1024_mont_sub_32(t4, t4, p->y, p1024_mod); /* Z3 = H*Z1 */ - sp_1024_mont_mul_32(z, z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_1024_mont_sqr_32(t1, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t3, x, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, t1, t5, p1024_mod); - sp_1024_mont_dbl_32(t1, t3, p1024_mod); - sp_1024_mont_sub_32(x, x, t1, p1024_mod); + sp_1024_mont_sqr_32(t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t3, p->x, t1, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t1, t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(t2, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); + sp_1024_mont_dbl_32(t5, t3, p1024_mod); + sp_1024_mont_sub_32(x, t2, t5, p1024_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_1024_mont_sub_32(t3, t3, x, p1024_mod); sp_1024_mont_mul_32(t3, t3, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, t3, t5, p1024_mod); + sp_1024_mont_mul_32(t1, t1, p->y, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, t3, t1, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -56186,7 +148501,7 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* static int sp_1024_gen_stripe_table_32(const sp_point_1024* a, sp_table_entry_1024* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; #else sp_point_1024 t[3]; @@ -56199,7 +148514,7 @@ static int sp_1024_gen_stripe_table_32(const sp_point_1024* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -56254,7 +148569,7 @@ static int sp_1024_gen_stripe_table_32(const sp_point_1024* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -56283,12 +148598,12 @@ static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g const sp_table_entry_1024* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* rt = NULL; sp_digit* t = NULL; #else sp_point_1024 rt[2]; - sp_digit t[2 * 32 * 5]; + sp_digit t[2 * 32 * 37]; #endif sp_point_1024* p = NULL; int i; @@ -56303,13 +148618,13 @@ static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 37, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -56354,7 +148669,7 @@ static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -56397,7 +148712,7 @@ static THREAD_LS_T int sp_cache_1024_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache) @@ -56468,23 +148783,36 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 32 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 32 * 38]; +#endif sp_cache_1024_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_1024 == 0) { - wc_InitMutex(&sp_cache_1024_lock); - initCacheMutex_1024 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 38, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_1024 == 0) { + wc_InitMutex(&sp_cache_1024_lock); + initCacheMutex_1024 = 1; + } + if (wc_LockMutex(&sp_cache_1024_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_1024_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -56505,6 +148833,9 @@ static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -56525,7 +148856,7 @@ static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const static int sp_1024_gen_stripe_table_32(const sp_point_1024* a, sp_table_entry_1024* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; #else sp_point_1024 t[3]; @@ -56538,7 +148869,7 @@ static int sp_1024_gen_stripe_table_32(const sp_point_1024* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -56593,7 +148924,7 @@ static int sp_1024_gen_stripe_table_32(const sp_point_1024* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -56622,12 +148953,12 @@ static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g const sp_table_entry_1024* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* rt = NULL; sp_digit* t = NULL; #else sp_point_1024 rt[2]; - sp_digit t[2 * 32 * 5]; + sp_digit t[2 * 32 * 37]; #endif sp_point_1024* p = NULL; int i; @@ -56642,13 +148973,13 @@ static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 37, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -56693,7 +149024,7 @@ static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -56736,7 +149067,7 @@ static THREAD_LS_T int sp_cache_1024_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache) @@ -56807,23 +149138,36 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 32 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 32 * 38]; +#endif sp_cache_1024_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_1024 == 0) { - wc_InitMutex(&sp_cache_1024_lock); - initCacheMutex_1024 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 38, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_1024 == 0) { + wc_InitMutex(&sp_cache_1024_lock); + initCacheMutex_1024 = 1; + } + if (wc_LockMutex(&sp_cache_1024_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_1024_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -56844,6 +149188,9 @@ static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -56862,7 +149209,7 @@ static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -56871,7 +149218,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -56894,7 +149241,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_1024_point_to_ecc_point_32(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -60501,7 +152848,7 @@ static int sp_1024_ecc_mulmod_base_32(sp_point_1024* r, const sp_digit* k, */ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -60510,7 +152857,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -60532,7 +152879,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_1024_point_to_ecc_point_32(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -60546,7 +152893,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -60556,25 +152903,25 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else sp_point_1024 point[2]; - sp_digit k[32 + 32 * 2 * 5]; + sp_digit k[32 + 32 * 2 * 37]; #endif sp_point_1024* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (32 + 32 * 2 * 5), + sizeof(sp_digit) * (32 + 32 * 2 * 37), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -60610,7 +152957,7 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, err = sp_1024_point_to_ecc_point_32(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -60633,12 +152980,12 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* t = NULL; #else sp_point_1024 point[1]; - sp_digit t[5 * 2 * 32]; + sp_digit t[38 * 2 * 32]; #endif int err = MP_OKAY; @@ -60654,7 +153001,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); @@ -60662,7 +153009,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = MEMORY_E; } if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 32, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 2 * 32, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -60678,7 +153025,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, *len = sizeof(sp_table_entry_1024) * 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -60736,7 +153083,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -60745,7 +153092,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) { @@ -60774,7 +153121,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, err = sp_1024_point_to_ecc_point_32(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -60784,7 +153131,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, return err; } -/* Multiply p* in projective co-ordinates by q*. +/* Multiply p* in projective coordinates by q*. * * r.x = p.x - (p.y * q.y) * r.y = (p.x * q.y) + p.y @@ -60810,7 +153157,7 @@ static void sp_1024_proj_mul_qx1_32(sp_digit* px, sp_digit* py, sp_1024_mont_add_32(py, t1, py, p1024_mod); } -/* Square p* in projective co-ordinates. +/* Square p* in projective coordinates. * * px' = (p.x + p.y) * (p.x - p.y) = p.x^2 - p.y^2 * py' = 2 * p.x * p.y @@ -60849,8 +153196,8 @@ static void sp_1024_proj_sqr_32(sp_digit* px, sp_digit* py, sp_digit* t) */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; @@ -60858,7 +153205,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) sp_digit* b; sp_digit* e; #else - sp_digit t[4 * 2 * 32]; + sp_digit t[36 * 2 * 32]; sp_digit tx[2 * 32]; sp_digit ty[2 * 32]; sp_digit b[2 * 32]; @@ -60869,9 +153216,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) int bits; int i; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 32 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 40 * 32 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -60879,13 +153226,13 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 32 * 2; - ty = td + 5 * 32 * 2; - b = td + 6 * 32 * 2; - e = td + 7 * 32 * 2; + tx = td + 36 * 32 * 2; + ty = td + 37 * 32 * 2; + b = td + 38 * 32 * 2; + e = td + 39 * 32 * 2; #endif r = ty; @@ -60923,8 +153270,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -62748,14 +155095,14 @@ static const sp_digit sp_1024_g_table[256][32] = { */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; sp_digit* ty; #else - sp_digit t[4 * 2 * 32]; + sp_digit t[36 * 2 * 32]; sp_digit tx[2 * 32]; sp_digit ty[2 * 32]; #endif @@ -62767,9 +155114,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) (void)base; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 32 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 32 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -62777,11 +155124,11 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 32 * 2; - ty = td + 5 * 32 * 2; + tx = td + 36 * 32 * 2; + ty = td + 37 * 32 * 2; #endif r = ty; @@ -62821,8 +155168,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -62831,7 +155178,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) } #endif /* WOLFSSL_SP_SMALL */ -/* Multiply p* by q* in projective co-ordinates. +/* Multiply p* by q* in projective coordinates. * * p.x' = (p.x * q.x) - (p.y * q.y) * p.y' = (p.x * q.y) + (p.y * q.x) @@ -62946,7 +155293,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_32(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -62966,7 +155313,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_32(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_32(t1, t1, p1024_mod); + sp_1024_mont_div2_32(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_32(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -63092,15 +155439,15 @@ static void sp_1024_accumulate_line_add_one_32(sp_digit* vx, sp_digit* vy, int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err = MP_OKAY; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; sp_digit* vy; sp_digit* qx_px; #else - sp_digit t[6 * 2 * 32]; + sp_digit t[36 * 2 * 32]; sp_digit vx[2 * 32]; sp_digit vy[2 * 32]; sp_digit qx_px[2 * 32]; @@ -63122,10 +155469,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_32(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 32 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 32 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -63134,12 +155481,12 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 32 * 2; - vy = td + 7 * 32 * 2; - qx_px = td + 8 * 32 * 2; + vx = td + 36 * 32 * 2; + vy = td + 37 * 32 * 2; + qx_px = td + 38 * 32 * 2; #endif r = vy; @@ -63191,8 +155538,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -63384,7 +155731,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_32(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -63422,7 +155769,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_32(p->y, p->y, p1024_mod); + sp_1024_mont_div2_32(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -63470,8 +155817,8 @@ static const signed char sp_1024_order_op[] = { int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -63481,7 +155828,7 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) sp_digit (*pre_nvy)[64]; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 32]; + sp_digit t[36 * 2 * 32]; sp_digit vx[2 * 32]; sp_digit vy[2 * 32]; sp_digit pre_vx[16][64]; @@ -63507,10 +155854,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_32(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 32 * 2 + 16 * sizeof(sp_point_1024), NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 32 * 2 + 16 * sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -63519,15 +155866,15 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 32 * 2; - vy = td + 7 * 32 * 2; - pre_vx = (sp_digit(*)[64])(td + 8 * 32 * 2); - pre_vy = (sp_digit(*)[64])(td + 24 * 32 * 2); - pre_nvy = (sp_digit(*)[64])(td + 40 * 32 * 2); - pre_p = (sp_point_1024*)(td + 56 * 32 * 2); + vx = td + 36 * 32 * 2; + vy = td + 37 * 32 * 2; + pre_vx = (sp_digit(*)[64])(td + 38 * 32 * 2); + pre_vy = (sp_digit(*)[64])(td + 54 * 32 * 2); + pre_nvy = (sp_digit(*)[64])(td + 70 * 32 * 2); + pre_p = (sp_point_1024*)(td + 86 * 32 * 2); #endif r = vy; @@ -63618,8 +155965,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -63702,10 +156049,9 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, static void sp_1024_accum_dbl_calc_lc_32(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 32; - sp_digit* t2 = t + 2 * 2 * 32; - sp_digit* l = t + 4 * 2 * 32; - + sp_digit* t1 = t + 33 * 2 * 32; + sp_digit* t2 = t + 34 * 2 * 32; + sp_digit* l = t + 35 * 2 * 32; /* l = 1 / 2 * p.y */ sp_1024_mont_dbl_32(l, py, p1024_mod); @@ -63747,10 +156093,9 @@ static void sp_1024_accum_add_calc_lc_32(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, const sp_digit* cx, const sp_digit* cy, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 32; - sp_digit* c = t + 2 * 2 * 32; - sp_digit* l = t + 4 * 2 * 32; - + sp_digit* t1 = t + 33 * 2 * 32; + sp_digit* c = t + 34 * 2 * 32; + sp_digit* l = t + 35 * 2 * 32; /* l = 1 / (c.x - p.x) */ sp_1024_mont_sub_32(l, cx, px, p1024_mod); @@ -63861,13 +156206,13 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 32]; + sp_digit t[36 * 2 * 32]; sp_point_1024 pre_p[16]; sp_point_1024 pd; sp_point_1024 cd; @@ -63901,11 +156246,11 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, err = sp_1024_point_new_32(NULL, negd, neg); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 32 * 2 + 16 * sizeof(sp_point_1024), NULL, - DYNAMIC_TYPE_TMP_BUFFER); + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 32 * 2 + 16 * + sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; } @@ -63913,10 +156258,10 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - pre_p = (sp_point_1024*)(td + 6 * 32 * 2); + pre_p = (sp_point_1024*)(td + 36 * 32 * 2); #endif sp_1024_point_from_ecc_point_32(p, pm); @@ -63947,7 +156292,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, XMEMCPY(c, &pre_p[j], sizeof(sp_point_1024)); for (j = 0; j < sp_1024_order_op_pre[1]; j++) { - sp_1024_accum_dbl_calc_lc_32(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_32(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_32(c, c, t); sp_1024_mont_map_32(c, t); @@ -63976,7 +156322,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, } for (j = 0; j < sp_1024_order_op_pre[i + 1]; j++) { - sp_1024_accum_dbl_calc_lc_32(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_32(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_32(c, c, t); sp_1024_mont_map_32(c, t); @@ -63986,8 +156333,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, *len = sizeof(sp_table_entry_1024) * 1167; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -64021,8 +156368,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res, const byte* table, word32 len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -64031,7 +156378,7 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, sp_digit (*pre_vy)[64]; sp_digit (*pre_nvy)[64]; #else - sp_digit t[6 * 2 * 32]; + sp_digit t[36 * 2 * 32]; sp_digit vx[2 * 32]; sp_digit vy[2 * 32]; sp_digit pre_vx[16][64]; @@ -64064,10 +156411,10 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_point_new_32(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 32 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 32 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -64076,14 +156423,14 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 32 * 2; - vy = td + 7 * 32 * 2; - pre_vx = (sp_digit(*)[64])(td + 8 * 32 * 2); - pre_vy = (sp_digit(*)[64])(td + 24 * 32 * 2); - pre_nvy = (sp_digit(*)[64])(td + 40 * 32 * 2); + vx = td + 36 * 32 * 2; + vy = td + 37 * 32 * 2; + pre_vx = (sp_digit(*)[64])(td + 38 * 32 * 2); + pre_vy = (sp_digit(*)[64])(td + 54 * 32 * 2); + pre_nvy = (sp_digit(*)[64])(td + 70 * 32 * 2); #endif r = vy; @@ -64181,8 +156528,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -64194,20 +156541,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_32(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | - a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * @@ -64219,32 +156552,35 @@ static int sp_1024_iszero_32(const sp_digit* a) static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) { int i; - int j = 0; - word32 s = 0; + int j; + byte* d; - r[0] = 0; - for (i = n-1; i >= 0; i--) { - r[j] |= (((sp_digit)a[i]) << s); - if (s >= 24U) { - r[j] &= 0xffffffff; - s = 32U - s; - if (j + 1 >= size) { - break; - } - r[++j] = (sp_digit)a[i] >> s; - s = 8U - s; - } - else { - s += 8U; - } + for (i = n - 1,j = 0; i >= 3; i -= 4) { + r[j] = ((sp_digit)a[i - 0] << 0) | + ((sp_digit)a[i - 1] << 8) | + ((sp_digit)a[i - 2] << 16) | + ((sp_digit)a[i - 3] << 24); + j++; } - for (j++; j < size; j++) { + if (i >= 0) { + r[j] = 0; + + d = (byte*)r; + switch (i) { + case 2: d[n - 1 - 2] = a[2]; //fallthrough + case 1: d[n - 1 - 1] = a[1]; //fallthrough + case 0: d[n - 1 - 0] = a[0]; //fallthrough + } + j++; + } + + for (; j < size; j++) { r[j] = 0; } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -64254,7 +156590,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) static int sp_1024_ecc_is_point_32(const sp_point_1024* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[32 * 4]; @@ -64263,7 +156599,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, sp_int32 n; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -64273,29 +156609,30 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 32; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_32(t1, point->y); (void)sp_1024_mod_32(t1, t1, p1024_mod); sp_1024_sqr_32(t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); sp_1024_mul_32(t2, t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); - (void)sp_1024_sub_32(t2, p1024_mod, t2); - sp_1024_mont_add_32(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_32(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_32(t1, p1024_mod); - sp_1024_cond_sub_32(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); if (!sp_1024_iszero_32(t1)) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -64303,7 +156640,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -64312,7 +156649,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, */ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* pub = NULL; #else sp_point_1024 pub[1]; @@ -64320,7 +156657,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -64335,7 +156672,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) err = sp_1024_ecc_is_point_32(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -64357,7 +156694,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_1024* pub = NULL; #else @@ -64378,7 +156715,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); @@ -64444,7 +156781,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -64455,6 +156792,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } #endif #endif /* WOLFSSL_SP_1024 */ +#endif /* WOLFCRYPT_HAVE_SAKKE */ #endif /* WOLFSSL_HAVE_SP_ECC */ #endif /* WOLFSSL_SP_ARM32_ASM */ #endif /* WOLFSSL_HAVE_SP_RSA | WOLFSSL_HAVE_SP_DH | WOLFSSL_HAVE_SP_ECC */ diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 6df8b49f9..834ceb628 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -1,22 +1,12 @@ /* sp.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ /* Implementation by Sean Parkinson. */ @@ -45,20 +35,39 @@ #endif #endif +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#undef WOLFSSL_SP_SMALL_STACK +#define WOLFSSL_SP_SMALL_STACK +#endif + #include +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif + #ifdef WOLFSSL_SP_ARM64_ASM -#define SP_PRINT_NUM(var, name, total, words, bits) \ - do { \ - int ii; \ - fprintf(stderr, name "=0x"); \ - for (ii = words - 1; ii >= 0; ii--) \ - fprintf(stderr, SP_PRINT_FMT, (var)[ii]); \ - fprintf(stderr, "\n"); \ +#define SP_PRINT_NUM(var, name, total, words, bits) \ + do { \ + int ii; \ + fprintf(stderr, name "=0x"); \ + for (ii = ((bits + 63) / 64) - 1; ii >= 0; ii--) \ + fprintf(stderr, SP_PRINT_FMT, (var)[ii]); \ + fprintf(stderr, "\n"); \ } while (0) -#define SP_PRINT_VAL(var, name) \ +#define SP_PRINT_VAL(var, name) \ fprintf(stderr, name "=0x" SP_PRINT_FMT "\n", var) + +#define SP_PRINT_INT(var, name) \ + fprintf(stderr, name "=%d\n", var) + #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) #ifndef WOLFSSL_SP_NO_2048 /* Read big endian unsigned byte array into r. @@ -70,41 +79,108 @@ */ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) { - int i; - int j; - byte* d; + sp_int64 nl = n; + sp_int64 bytes = size * 8; - for (i = n - 1,j = 0; i >= 7; i -= 8) { - r[j] = ((sp_digit)a[i - 0] << 0) | - ((sp_digit)a[i - 1] << 8) | - ((sp_digit)a[i - 2] << 16) | - ((sp_digit)a[i - 3] << 24) | - ((sp_digit)a[i - 4] << 32) | - ((sp_digit)a[i - 5] << 40) | - ((sp_digit)a[i - 6] << 48) | - ((sp_digit)a[i - 7] << 56); - j++; - } - - if (i >= 0) { - r[j] = 0; - - d = (byte*)r; - switch (i) { - case 6: d[n - 1 - 6] = a[6]; //fallthrough - case 5: d[n - 1 - 5] = a[5]; //fallthrough - case 4: d[n - 1 - 4] = a[4]; //fallthrough - case 3: d[n - 1 - 3] = a[3]; //fallthrough - case 2: d[n - 1 - 2] = a[2]; //fallthrough - case 1: d[n - 1 - 1] = a[1]; //fallthrough - case 0: d[n - 1 - 0] = a[0]; //fallthrough - } - j++; - } - - for (; j < size; j++) { - r[j] = 0; - } + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); } /* Convert an mp_int to an array of sp_digit. @@ -116,20 +192,23 @@ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 64 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); } #elif DIGIT_BIT > 64 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffffffffffffl; s = 64U - s; @@ -159,12 +238,12 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 64) { r[j] &= 0xffffffffffffffffl; @@ -201,17 +280,19 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) static void sp_2048_to_bin_32(sp_digit* r, byte* a) { int i; - int j; + int j = 0; - for (i = 31, j = 0; i >= 0; i--) { - a[j++] = r[i] >> 56; - a[j++] = r[i] >> 48; - a[j++] = r[i] >> 40; - a[j++] = r[i] >> 32; - a[j++] = r[i] >> 24; - a[j++] = r[i] >> 16; - a[j++] = r[i] >> 8; - a[j++] = r[i] >> 0; + for (i = 31; i >= 0; i--, j += 8) { + __asm__ __volatile__ ( + "ldr x4, [%[r]]\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x4, x4\n\t" + #endif + "str x4, [%[a]]\n\t" + : + : [r] "r" (r + i), [a] "r" (a + j) + : "memory", "x4" + ); } } @@ -643,250 +724,7 @@ static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "stp x5, x3, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24" - ); -} - -/* Square a and put result in r. (r = a * a) - * - * All registers version. - * - * r A single precision integer. - * a A single precision integer. - */ -static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) -{ - __asm__ __volatile__ ( - "ldp x21, x22, [%[a], 0]\n\t" - "ldp x23, x24, [%[a], 16]\n\t" - "ldp x25, x26, [%[a], 32]\n\t" - "ldp x27, x28, [%[a], 48]\n\t" - "# A[0] * A[1]\n\t" - "mul x6, x21, x22\n\t" - "umulh x7, x21, x22\n\t" - "# A[0] * A[2]\n\t" - "mul x4, x21, x23\n\t" - "umulh x5, x21, x23\n\t" - "adds x7, x7, x4\n\t" - "# A[0] * A[3]\n\t" - "mul x4, x21, x24\n\t" - "adc x8, xzr, x5\n\t" - "umulh x5, x21, x24\n\t" - "adds x8, x8, x4\n\t" - "# A[1] * A[2]\n\t" - "mul x4, x22, x23\n\t" - "adc x9, xzr, x5\n\t" - "umulh x5, x22, x23\n\t" - "adds x8, x8, x4\n\t" - "# A[0] * A[4]\n\t" - "mul x4, x21, x25\n\t" - "adcs x9, x9, x5\n\t" - "umulh x5, x21, x25\n\t" - "adc x10, xzr, xzr\n\t" - "adds x9, x9, x4\n\t" - "# A[1] * A[3]\n\t" - "mul x4, x22, x24\n\t" - "adc x10, x10, x5\n\t" - "umulh x5, x22, x24\n\t" - "adds x9, x9, x4\n\t" - "# A[0] * A[5]\n\t" - "mul x4, x21, x26\n\t" - "adcs x10, x10, x5\n\t" - "umulh x5, x21, x26\n\t" - "adc x11, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" - "# A[1] * A[4]\n\t" - "mul x4, x22, x25\n\t" - "adc x11, x11, x5\n\t" - "umulh x5, x22, x25\n\t" - "adds x10, x10, x4\n\t" - "# A[2] * A[3]\n\t" - "mul x4, x23, x24\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x23, x24\n\t" - "adc x12, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" - "# A[0] * A[6]\n\t" - "mul x4, x21, x27\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x21, x27\n\t" - "adc x12, x12, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * A[5]\n\t" - "mul x4, x22, x26\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x22, x26\n\t" - "adc x13, xzr, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[2] * A[4]\n\t" - "mul x4, x23, x25\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x23, x25\n\t" - "adc x13, x13, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[0] * A[7]\n\t" - "mul x4, x21, x28\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x21, x28\n\t" - "adc x13, x13, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[1] * A[6]\n\t" - "mul x4, x22, x27\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x22, x27\n\t" - "adc x14, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * A[5]\n\t" - "mul x4, x23, x26\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x23, x26\n\t" - "adc x14, x14, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[3] * A[4]\n\t" - "mul x4, x24, x25\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x24, x25\n\t" - "adc x14, x14, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[1] * A[7]\n\t" - "mul x4, x22, x28\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x22, x28\n\t" - "adc x14, x14, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[2] * A[6]\n\t" - "mul x4, x23, x27\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x23, x27\n\t" - "adc x15, xzr, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[3] * A[5]\n\t" - "mul x4, x24, x26\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x24, x26\n\t" - "adc x15, x15, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[2] * A[7]\n\t" - "mul x4, x23, x28\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x23, x28\n\t" - "adc x15, x15, xzr\n\t" - "adds x14, x14, x4\n\t" - "# A[3] * A[6]\n\t" - "mul x4, x24, x27\n\t" - "adcs x15, x15, x5\n\t" - "umulh x5, x24, x27\n\t" - "adc x16, xzr, xzr\n\t" - "adds x14, x14, x4\n\t" - "# A[4] * A[5]\n\t" - "mul x4, x25, x26\n\t" - "adcs x15, x15, x5\n\t" - "umulh x5, x25, x26\n\t" - "adc x16, x16, xzr\n\t" - "adds x14, x14, x4\n\t" - "# A[3] * A[7]\n\t" - "mul x4, x24, x28\n\t" - "adcs x15, x15, x5\n\t" - "umulh x5, x24, x28\n\t" - "adc x16, x16, xzr\n\t" - "adds x15, x15, x4\n\t" - "# A[4] * A[6]\n\t" - "mul x4, x25, x27\n\t" - "adcs x16, x16, x5\n\t" - "umulh x5, x25, x27\n\t" - "adc x17, xzr, xzr\n\t" - "adds x15, x15, x4\n\t" - "# A[4] * A[7]\n\t" - "mul x4, x25, x28\n\t" - "adcs x16, x16, x5\n\t" - "umulh x5, x25, x28\n\t" - "adc x17, x17, xzr\n\t" - "adds x16, x16, x4\n\t" - "# A[5] * A[6]\n\t" - "mul x4, x26, x27\n\t" - "adcs x17, x17, x5\n\t" - "umulh x5, x26, x27\n\t" - "adc x19, xzr, xzr\n\t" - "adds x16, x16, x4\n\t" - "# A[5] * A[7]\n\t" - "mul x4, x26, x28\n\t" - "adcs x17, x17, x5\n\t" - "umulh x5, x26, x28\n\t" - "adc x19, x19, xzr\n\t" - "adds x17, x17, x4\n\t" - "# A[6] * A[7]\n\t" - "mul x4, x27, x28\n\t" - "adcs x19, x19, x5\n\t" - "umulh x5, x27, x28\n\t" - "adc x20, xzr, xzr\n\t" - "adds x19, x19, x4\n\t" - "adc x20, x20, x5\n\t" - "# Double\n\t" - "adds x6, x6, x6\n\t" - "adcs x7, x7, x7\n\t" - "adcs x8, x8, x8\n\t" - "adcs x9, x9, x9\n\t" - "adcs x10, x10, x10\n\t" - "adcs x11, x11, x11\n\t" - "adcs x12, x12, x12\n\t" - "adcs x13, x13, x13\n\t" - "adcs x14, x14, x14\n\t" - "adcs x15, x15, x15\n\t" - "adcs x16, x16, x16\n\t" - "adcs x17, x17, x17\n\t" - "adcs x19, x19, x19\n\t" - "# A[0] * A[0]\n\t" - "mul x5, x21, x21\n\t" - "adcs x20, x20, x20\n\t" - "umulh x2, x21, x21\n\t" - "cset x21, cs\n\t" - "# A[1] * A[1]\n\t" - "mul x3, x22, x22\n\t" - "adds x6, x6, x2\n\t" - "umulh x4, x22, x22\n\t" - "adcs x7, x7, x3\n\t" - "# A[2] * A[2]\n\t" - "mul x2, x23, x23\n\t" - "adcs x8, x8, x4\n\t" - "umulh x3, x23, x23\n\t" - "adcs x9, x9, x2\n\t" - "# A[3] * A[3]\n\t" - "mul x4, x24, x24\n\t" - "adcs x10, x10, x3\n\t" - "umulh x2, x24, x24\n\t" - "adcs x11, x11, x4\n\t" - "# A[4] * A[4]\n\t" - "mul x3, x25, x25\n\t" - "adcs x12, x12, x2\n\t" - "umulh x4, x25, x25\n\t" - "adcs x13, x13, x3\n\t" - "# A[5] * A[5]\n\t" - "mul x2, x26, x26\n\t" - "adcs x14, x14, x4\n\t" - "umulh x3, x26, x26\n\t" - "adcs x15, x15, x2\n\t" - "# A[6] * A[6]\n\t" - "mul x4, x27, x27\n\t" - "adcs x16, x16, x3\n\t" - "umulh x2, x27, x27\n\t" - "adcs x17, x17, x4\n\t" - "# A[7] * A[7]\n\t" - "mul x3, x28, x28\n\t" - "adcs x19, x19, x2\n\t" - "umulh x4, x28, x28\n\t" - "adcs x20, x20, x3\n\t" - "stp x5, x6, [%[r], 0]\n\t" - "adc x21, x21, x4\n\t" - "stp x7, x8, [%[r], 16]\n\t" - "stp x9, x10, [%[r], 32]\n\t" - "stp x11, x12, [%[r], 48]\n\t" - "stp x13, x14, [%[r], 64]\n\t" - "stp x15, x16, [%[r], 80]\n\t" - "stp x17, x19, [%[r], 96]\n\t" - "stp x20, x21, [%[r], 112]\n\t" - : - : [r] "r" (r), [a] "r" (a) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "cc" ); } @@ -920,15 +758,47 @@ static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 32]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 48]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_2048_add_word_8(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[b]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" + ); +} + /* Sub b from a into a. (a -= b) * * a A single precision integer and result. @@ -980,7 +850,7 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -1036,72 +906,66 @@ static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 96]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 112]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -/* AND m into each word of a and store in r. +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. */ -static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<8; i++) { - r[i] = a[i] & m; - } -#else - r[0] = a[0] & m; - r[1] = a[1] & m; - r[2] = a[2] & m; - r[3] = a[3] & m; - r[4] = a[4] & m; - r[5] = a[5] & m; - r[6] = a[6] & m; - r[7] = a[7] & m; -#endif -} - -/* Add digit to a into r. (r = a + b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static void sp_2048_add_zero_8(sp_digit* r, const sp_digit* a, - const sp_digit d) +static sp_digit sp_2048_cond_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "adds x3, x3, %[d]\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 48]\n\t" - : - : [r] "r" (r), [a] "r" (a), [d] "r" (d) - : "memory", "x3", "x4", "x5", "x6" + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); + + return (sp_digit)r; } +#endif /* !WOLFSSL_SP_SMALL */ /* Multiply a and b into r. (r = a * b) * @@ -1116,119 +980,74 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, sp_digit z1[16]; sp_digit a1[8]; sp_digit b1[8]; - sp_digit z2[16]; - sp_digit u, ca, cb; + sp_digit* z2 = r + 16; + sp_digit u; + sp_digit ca; + sp_digit cb; ca = sp_2048_add_8(a1, a, &a[8]); cb = sp_2048_add_8(b1, b, &b[8]); u = ca & cb; - sp_2048_mul_8(z1, a1, b1); + sp_2048_mul_8(z2, &a[8], &b[8]); sp_2048_mul_8(z0, a, b); - sp_2048_mask_8(r + 16, a1, 0 - cb); - sp_2048_mask_8(b1, b1, 0 - ca); - u += sp_2048_add_8(r + 16, r + 16, b1); - u += sp_2048_sub_in_place_16(z1, z2); + sp_2048_mul_8(z1, a1, b1); + u += sp_2048_sub_in_place_16(z1, z0); + u += sp_2048_sub_in_place_16(z1, z2); + u += sp_2048_cond_add_8(z1 + 8, z1 + 8, a1, 0 - cb); + u += sp_2048_cond_add_8(z1 + 8, z1 + 8, b1, 0 - ca); + u += sp_2048_add_16(r + 8, r + 8, z1); - u += sp_2048_add_8(r + 16, r + 16, z2); - sp_2048_add_zero_8(r + 24, z2 + 8, u); + (void)sp_2048_add_word_8(r + 24, r + 24, u); } -#ifdef WOLFSSL_SP_SMALL -/* Double a into r. (r = a + a) +/* Add digit to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. + * b A single precision integer. */ -static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add x11, %[a], 64\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "adcs x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r]], #16\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) - : - : "memory", "x3", "x4", "x5", "x6", "x11" - ); - - return c; -} - -#else -/* Double a into r. (r = a + a) - * - * r A single precision integer. - * a A single precision integer. - */ -static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a) +static void sp_2048_add_word_16(sp_digit* r, const sp_digit* a, + sp_digit b) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" - "adds x3, x3, x3\n\t" - "ldr x5, [%[a], 16]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 24]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[b]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x6\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 16]\n\t" "ldp x3, x4, [%[a], 32]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 48]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 56]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x6\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 48]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 112]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" ); - - return (sp_digit)r; -} - -#endif /* WOLFSSL_SP_SMALL */ -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z2[16]; - sp_digit z1[16]; - sp_digit a1[8]; - sp_digit u; - - u = sp_2048_add_8(a1, a, &a[8]); - sp_2048_sqr_8(z1, a1); - sp_2048_sqr_8(z2, &a[8]); - sp_2048_sqr_8(z0, a); - sp_2048_mask_8(r + 16, a1, 0 - u); - u += sp_2048_dbl_8(r + 16, r + 16); - u += sp_2048_sub_in_place_16(z1, z2); - u += sp_2048_sub_in_place_16(z1, z0); - u += sp_2048_add_16(r + 8, r + 8, z1); - u += sp_2048_add_8(r + 16, r + 16, z2); - sp_2048_add_zero_8(r + 24, z2 + 8, u); } /* Sub b from a into a. (a -= b) @@ -1322,7 +1141,7 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -1418,92 +1237,94 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 224]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 240]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -/* AND m into each word of a and store in r. +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. */ -static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<16; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 16; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } -#endif -} - -/* Add digit to a into r. (r = a + b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static void sp_2048_add_zero_16(sp_digit* r, const sp_digit* a, - const sp_digit d) +static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "adds x3, x3, %[d]\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "ldp x3, x4, [%[a], 96]\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 112]\n\t" - : - : [r] "r" (r), [a] "r" (a), [d] "r" (d) - : "memory", "x3", "x4", "x5", "x6" + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "ldp x10, x11, [%[b], 80]\n\t" + "ldp x4, x5, [%[a], 64]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 64]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 80]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "ldp x10, x11, [%[b], 112]\n\t" + "ldp x4, x5, [%[a], 96]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 96]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 112]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); + + return (sp_digit)r; } +#endif /* !WOLFSSL_SP_SMALL */ /* Multiply a and b into r. (r = a * b) * @@ -1518,113 +1339,1094 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, sp_digit z1[32]; sp_digit a1[16]; sp_digit b1[16]; - sp_digit z2[32]; - sp_digit u, ca, cb; + sp_digit* z2 = r + 32; + sp_digit u; + sp_digit ca; + sp_digit cb; ca = sp_2048_add_16(a1, a, &a[16]); cb = sp_2048_add_16(b1, b, &b[16]); u = ca & cb; - sp_2048_mul_16(z1, a1, b1); + sp_2048_mul_16(z2, &a[16], &b[16]); sp_2048_mul_16(z0, a, b); - sp_2048_mask_16(r + 32, a1, 0 - cb); - sp_2048_mask_16(b1, b1, 0 - ca); - u += sp_2048_add_16(r + 32, r + 32, b1); - u += sp_2048_sub_in_place_32(z1, z2); + sp_2048_mul_16(z1, a1, b1); + u += sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_sub_in_place_32(z1, z2); + u += sp_2048_cond_add_16(z1 + 16, z1 + 16, a1, 0 - cb); + u += sp_2048_cond_add_16(z1 + 16, z1 + 16, b1, 0 - ca); + u += sp_2048_add_32(r + 16, r + 16, z1); - u += sp_2048_add_16(r + 32, r + 32, z2); - sp_2048_add_zero_16(r + 48, z2 + 16, u); + (void)sp_2048_add_word_16(r + 48, r + 48, u); } -#ifdef WOLFSSL_SP_SMALL -/* Double a into r. (r = a + a) +/* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ -static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a) +static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add x11, %[a], 128\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "adcs x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r]], #16\n\t" + "ldp x10, x11, [%[a], 0]\n\t" + "ldp x12, x13, [%[a], 16]\n\t" + "ldp x14, x15, [%[a], 32]\n\t" + "ldp x16, x17, [%[a], 48]\n\t" + "ldp x19, x20, [%[a], 64]\n\t" + "ldp x21, x22, [%[a], 80]\n\t" + "ldp x23, x24, [%[a], 96]\n\t" + "ldp x25, x26, [%[a], 112]\n\t" + "# A[0] * A[0]\n\t" + "mul x2, x10, x10\n\t" + "umulh x3, x10, x10\n\t" + "str x2, [%[r]]\n\t" + "mov x4, xzr\n\t" + "# A[0] * A[1]\n\t" + "mul x8, x10, x11\n\t" + "umulh x9, x10, x11\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 8]\n\t" + "# A[0] * A[2]\n\t" + "mul x8, x10, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x10, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[1] * A[1]\n\t" + "mul x8, x11, x11\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x11, x11\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 16]\n\t" + "# A[0] * A[3]\n\t" + "mul x8, x10, x13\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x10, x13\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[1] * A[2]\n\t" + "mul x8, x11, x12\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x11, x12\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 24]\n\t" + "# A[0] * A[4]\n\t" + "mul x8, x10, x14\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x10, x14\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[1] * A[3]\n\t" + "mul x8, x11, x13\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x11, x13\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[2] * A[2]\n\t" + "mul x8, x12, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x12, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 32]\n\t" + "# A[0] * A[5]\n\t" + "mul x5, x10, x15\n\t" + "adcs x4, x4, x9\n\t" + "umulh x6, x10, x15\n\t" + "adc x2, x2, xzr\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[4]\n\t" + "mul x8, x11, x14\n\t" + "umulh x9, x11, x14\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[3]\n\t" + "mul x8, x12, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 40]\n\t" + "# A[0] * A[6]\n\t" + "mul x5, x10, x16\n\t" + "umulh x6, x10, x16\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[5]\n\t" + "mul x8, x11, x15\n\t" + "umulh x9, x11, x15\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[4]\n\t" + "mul x8, x12, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[3]\n\t" + "mul x8, x13, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 48]\n\t" + "# A[0] * A[7]\n\t" + "mul x5, x10, x17\n\t" + "umulh x6, x10, x17\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[6]\n\t" + "mul x8, x11, x16\n\t" + "umulh x9, x11, x16\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[5]\n\t" + "mul x8, x12, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[4]\n\t" + "mul x8, x13, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 56]\n\t" + "# A[0] * A[8]\n\t" + "mul x5, x10, x19\n\t" + "umulh x6, x10, x19\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[7]\n\t" + "mul x8, x11, x17\n\t" + "umulh x9, x11, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[6]\n\t" + "mul x8, x12, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[5]\n\t" + "mul x8, x13, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[4]\n\t" + "mul x8, x14, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 64]\n\t" + "# A[0] * A[9]\n\t" + "mul x5, x10, x20\n\t" + "umulh x6, x10, x20\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[8]\n\t" + "mul x8, x11, x19\n\t" + "umulh x9, x11, x19\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[7]\n\t" + "mul x8, x12, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[6]\n\t" + "mul x8, x13, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[5]\n\t" + "mul x8, x14, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 72]\n\t" + "# A[0] * A[10]\n\t" + "mul x5, x10, x21\n\t" + "umulh x6, x10, x21\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[9]\n\t" + "mul x8, x11, x20\n\t" + "umulh x9, x11, x20\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[8]\n\t" + "mul x8, x12, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[7]\n\t" + "mul x8, x13, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[6]\n\t" + "mul x8, x14, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[5]\n\t" + "mul x8, x15, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 80]\n\t" + "# A[0] * A[11]\n\t" + "mul x5, x10, x22\n\t" + "umulh x6, x10, x22\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[10]\n\t" + "mul x8, x11, x21\n\t" + "umulh x9, x11, x21\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[9]\n\t" + "mul x8, x12, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[8]\n\t" + "mul x8, x13, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[7]\n\t" + "mul x8, x14, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[6]\n\t" + "mul x8, x15, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 88]\n\t" + "# A[0] * A[12]\n\t" + "mul x5, x10, x23\n\t" + "umulh x6, x10, x23\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[11]\n\t" + "mul x8, x11, x22\n\t" + "umulh x9, x11, x22\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[10]\n\t" + "mul x8, x12, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[9]\n\t" + "mul x8, x13, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[8]\n\t" + "mul x8, x14, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[7]\n\t" + "mul x8, x15, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[6]\n\t" + "mul x8, x16, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 96]\n\t" + "# A[0] * A[13]\n\t" + "mul x5, x10, x24\n\t" + "umulh x6, x10, x24\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[12]\n\t" + "mul x8, x11, x23\n\t" + "umulh x9, x11, x23\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[11]\n\t" + "mul x8, x12, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[10]\n\t" + "mul x8, x13, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[9]\n\t" + "mul x8, x14, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[8]\n\t" + "mul x8, x15, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[7]\n\t" + "mul x8, x16, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 104]\n\t" + "# A[0] * A[14]\n\t" + "mul x5, x10, x25\n\t" + "umulh x6, x10, x25\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[13]\n\t" + "mul x8, x11, x24\n\t" + "umulh x9, x11, x24\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[12]\n\t" + "mul x8, x12, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[11]\n\t" + "mul x8, x13, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[10]\n\t" + "mul x8, x14, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[9]\n\t" + "mul x8, x15, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[8]\n\t" + "mul x8, x16, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[7]\n\t" + "mul x8, x17, x17\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x17\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 112]\n\t" + "# A[0] * A[15]\n\t" + "mul x5, x10, x26\n\t" + "umulh x6, x10, x26\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[14]\n\t" + "mul x8, x11, x25\n\t" + "umulh x9, x11, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[13]\n\t" + "mul x8, x12, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[12]\n\t" + "mul x8, x13, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[11]\n\t" + "mul x8, x14, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[10]\n\t" + "mul x8, x15, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[9]\n\t" + "mul x8, x16, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[8]\n\t" + "mul x8, x17, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 120]\n\t" + "# A[1] * A[15]\n\t" + "mul x5, x11, x26\n\t" + "umulh x6, x11, x26\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[2] * A[14]\n\t" + "mul x8, x12, x25\n\t" + "umulh x9, x12, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[13]\n\t" + "mul x8, x13, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[12]\n\t" + "mul x8, x14, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[11]\n\t" + "mul x8, x15, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[10]\n\t" + "mul x8, x16, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[9]\n\t" + "mul x8, x17, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[8]\n\t" + "mul x8, x19, x19\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x19\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 128]\n\t" + "# A[2] * A[15]\n\t" + "mul x5, x12, x26\n\t" + "umulh x6, x12, x26\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[3] * A[14]\n\t" + "mul x8, x13, x25\n\t" + "umulh x9, x13, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[13]\n\t" + "mul x8, x14, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[12]\n\t" + "mul x8, x15, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[11]\n\t" + "mul x8, x16, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[10]\n\t" + "mul x8, x17, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[9]\n\t" + "mul x8, x19, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 136]\n\t" + "# A[3] * A[15]\n\t" + "mul x5, x13, x26\n\t" + "umulh x6, x13, x26\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[4] * A[14]\n\t" + "mul x8, x14, x25\n\t" + "umulh x9, x14, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[13]\n\t" + "mul x8, x15, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[12]\n\t" + "mul x8, x16, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[11]\n\t" + "mul x8, x17, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[10]\n\t" + "mul x8, x19, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[9] * A[9]\n\t" + "mul x8, x20, x20\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x20, x20\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 144]\n\t" + "# A[4] * A[15]\n\t" + "mul x5, x14, x26\n\t" + "umulh x6, x14, x26\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[5] * A[14]\n\t" + "mul x8, x15, x25\n\t" + "umulh x9, x15, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[6] * A[13]\n\t" + "mul x8, x16, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x16, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[12]\n\t" + "mul x8, x17, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[11]\n\t" + "mul x8, x19, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[9] * A[10]\n\t" + "mul x8, x20, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x20, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 152]\n\t" + "# A[5] * A[15]\n\t" + "mul x5, x15, x26\n\t" + "umulh x6, x15, x26\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[6] * A[14]\n\t" + "mul x8, x16, x25\n\t" + "umulh x9, x16, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[7] * A[13]\n\t" + "mul x8, x17, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x17, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[12]\n\t" + "mul x8, x19, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[9] * A[11]\n\t" + "mul x8, x20, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x20, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[10] * A[10]\n\t" + "mul x8, x21, x21\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x21, x21\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 160]\n\t" + "# A[6] * A[15]\n\t" + "mul x5, x16, x26\n\t" + "umulh x6, x16, x26\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[7] * A[14]\n\t" + "mul x8, x17, x25\n\t" + "umulh x9, x17, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[8] * A[13]\n\t" + "mul x8, x19, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x19, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[9] * A[12]\n\t" + "mul x8, x20, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x20, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[10] * A[11]\n\t" + "mul x8, x21, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x21, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 168]\n\t" + "# A[7] * A[15]\n\t" + "mul x5, x17, x26\n\t" + "umulh x6, x17, x26\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[8] * A[14]\n\t" + "mul x8, x19, x25\n\t" + "umulh x9, x19, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[9] * A[13]\n\t" + "mul x8, x20, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x20, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[10] * A[12]\n\t" + "mul x8, x21, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x21, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[11] * A[11]\n\t" + "mul x8, x22, x22\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x22, x22\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 176]\n\t" + "# A[8] * A[15]\n\t" + "mul x5, x19, x26\n\t" + "umulh x6, x19, x26\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[9] * A[14]\n\t" + "mul x8, x20, x25\n\t" + "umulh x9, x20, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[10] * A[13]\n\t" + "mul x8, x21, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x21, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[11] * A[12]\n\t" + "mul x8, x22, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x22, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 184]\n\t" + "# A[9] * A[15]\n\t" + "mul x5, x20, x26\n\t" + "umulh x6, x20, x26\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[10] * A[14]\n\t" + "mul x8, x21, x25\n\t" + "umulh x9, x21, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[11] * A[13]\n\t" + "mul x8, x22, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x22, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[12] * A[12]\n\t" + "mul x8, x23, x23\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x23, x23\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 192]\n\t" + "# A[10] * A[15]\n\t" + "mul x5, x21, x26\n\t" + "umulh x6, x21, x26\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[11] * A[14]\n\t" + "mul x8, x22, x25\n\t" + "umulh x9, x22, x25\n\t" + "adds x5, x5, x8\n\t" + "# A[12] * A[13]\n\t" + "mul x8, x23, x24\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x23, x24\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 200]\n\t" + "# A[11] * A[15]\n\t" + "mul x8, x22, x26\n\t" + "umulh x9, x22, x26\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[12] * A[14]\n\t" + "mul x8, x23, x25\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x23, x25\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[13] * A[13]\n\t" + "mul x8, x24, x24\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x24, x24\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 208]\n\t" + "# A[12] * A[15]\n\t" + "mul x8, x23, x26\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x23, x26\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[13] * A[14]\n\t" + "mul x8, x24, x25\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x24, x25\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 216]\n\t" + "# A[13] * A[15]\n\t" + "mul x8, x24, x26\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x24, x26\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[14] * A[14]\n\t" + "mul x8, x25, x25\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x25, x25\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 224]\n\t" + "# A[14] * A[15]\n\t" + "mul x8, x25, x26\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x25, x26\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 232]\n\t" + "# A[15] * A[15]\n\t" + "mul x8, x26, x26\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x26, x26\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adc x3, x3, x9\n\t" + "stp x2, x3, [%[r], 240]\n\t" : - : "memory", "x3", "x4", "x5", "x6", "x11" + : [r] "r" (r), [a] "r" (a) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "cc" ); - - return c; } -#else -/* Double a into r. (r = a + a) +/* Sub b from a into r. (r = a - b) * * r A single precision integer. * a A single precision integer. + * b A single precision integer. */ -static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a) +static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, + const sp_digit* b) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" - "adds x3, x3, x3\n\t" - "ldr x5, [%[a], 16]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 24]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 16]\n\t" "ldp x3, x4, [%[a], 32]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 48]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 56]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 48]\n\t" "ldp x3, x4, [%[a], 64]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 80]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 88]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 80]\n\t" "ldp x3, x4, [%[a], 96]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 112]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 120]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 112]\n\t" - "cset %[r], cs\n\t" + "csetm %[r], cc\n\t" : [r] "+r" (r) - : [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6" + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -#endif /* WOLFSSL_SP_SMALL */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -1633,22 +2435,31 @@ static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a) SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[32]; + sp_digit* z2 = r + 32; sp_digit z1[32]; - sp_digit a1[16]; + sp_digit* a1 = z1; + sp_digit* zero = z1 + 16; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 16); + + mask = sp_2048_sub_16(a1, a, &a[16]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_2048_sub_16(a1, p1, p2); - u = sp_2048_add_16(a1, a, &a[16]); - sp_2048_sqr_16(z1, a1); sp_2048_sqr_16(z2, &a[16]); sp_2048_sqr_16(z0, a); - sp_2048_mask_16(r + 32, a1, 0 - u); - u += sp_2048_dbl_16(r + 32, r + 32); - u += sp_2048_sub_in_place_32(z1, z2); - u += sp_2048_sub_in_place_32(z1, z0); - u += sp_2048_add_32(r + 16, r + 16, z1); - u += sp_2048_add_16(r + 32, r + 32, z2); - sp_2048_add_zero_16(r + 48, z2 + 16, u); + sp_2048_sqr_16(z1, a1); + + u = 0; + u -= sp_2048_sub_in_place_32(z1, z2); + u -= sp_2048_sub_in_place_32(z1, z0); + u += sp_2048_sub_in_place_32(r + 16, z1); + sp_2048_add_word_16(r + 48, r + 48, u); } #endif /* !WOLFSSL_SP_SMALL */ @@ -1678,12 +2489,12 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r]], #16\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" + "adc %[c], xzr, xzr\n\t" "cmp %[a], x11\n\t" "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -1719,7 +2530,7 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) "b.ne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return c; @@ -1738,10 +2549,10 @@ static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[64]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 248\n\t" "csel x3, xzr, x3, cc\n\t" @@ -1771,7 +2582,7 @@ static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -1787,10 +2598,10 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) sp_digit tmp[64]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 248\n\t" "csel x3, xzr, x3, cc\n\t" @@ -1836,7 +2647,7 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -1845,23 +2656,6 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) #endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) #ifdef WOLFSSL_SP_SMALL -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) -{ - int i; - - for (i=0; i<16; i++) { - r[i] = a[i] & m; - } -} - -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -1887,12 +2681,12 @@ static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r]], #16\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" + "adc %[c], xzr, xzr\n\t" "cmp %[a], x11\n\t" "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -1928,7 +2722,7 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) "b.ne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return c; @@ -1947,10 +2741,10 @@ static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[32]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 120\n\t" "csel x3, xzr, x3, cc\n\t" @@ -1980,7 +2774,7 @@ static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -1996,10 +2790,10 @@ static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) sp_digit tmp[32]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 120\n\t" "csel x3, xzr, x3, cc\n\t" @@ -2045,7 +2839,7 @@ static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -2054,7 +2848,7 @@ static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) #endif /* WOLFSSL_SP_SMALL */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -2090,9 +2884,9 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -2111,280 +2905,280 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, "str x3, [%[r], 256]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[6] * B\n\t" - "ldp x8, x9, [%[a], 48]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" "str x5, [%[r], 40]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[7] * B\n\t" "str x3, [%[r], 48]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[8] * B\n\t" - "ldp x8, x9, [%[a], 64]\n\t" + "ldp x9, x10, [%[a], 64]\n\t" "str x4, [%[r], 56]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[9] * B\n\t" "str x5, [%[r], 64]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[10] * B\n\t" - "ldp x8, x9, [%[a], 80]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" "str x3, [%[r], 72]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[11] * B\n\t" "str x4, [%[r], 80]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[12] * B\n\t" - "ldp x8, x9, [%[a], 96]\n\t" + "ldp x9, x10, [%[a], 96]\n\t" "str x5, [%[r], 88]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[13] * B\n\t" "str x3, [%[r], 96]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[14] * B\n\t" - "ldp x8, x9, [%[a], 112]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" "str x4, [%[r], 104]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[15] * B\n\t" "str x5, [%[r], 112]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[16] * B\n\t" - "ldp x8, x9, [%[a], 128]\n\t" + "ldp x9, x10, [%[a], 128]\n\t" "str x3, [%[r], 120]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[17] * B\n\t" "str x4, [%[r], 128]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[18] * B\n\t" - "ldp x8, x9, [%[a], 144]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" "str x5, [%[r], 136]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[19] * B\n\t" "str x3, [%[r], 144]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[20] * B\n\t" - "ldp x8, x9, [%[a], 160]\n\t" + "ldp x9, x10, [%[a], 160]\n\t" "str x4, [%[r], 152]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[21] * B\n\t" "str x5, [%[r], 160]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[22] * B\n\t" - "ldp x8, x9, [%[a], 176]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" "str x3, [%[r], 168]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[23] * B\n\t" "str x4, [%[r], 176]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[24] * B\n\t" - "ldp x8, x9, [%[a], 192]\n\t" + "ldp x9, x10, [%[a], 192]\n\t" "str x5, [%[r], 184]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[25] * B\n\t" "str x3, [%[r], 192]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[26] * B\n\t" - "ldp x8, x9, [%[a], 208]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" "str x4, [%[r], 200]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[27] * B\n\t" "str x5, [%[r], 208]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[28] * B\n\t" - "ldp x8, x9, [%[a], 224]\n\t" + "ldp x9, x10, [%[a], 224]\n\t" "str x3, [%[r], 216]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[29] * B\n\t" "str x4, [%[r], 224]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[30] * B\n\t" - "ldp x8, x9, [%[a], 240]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" "str x5, [%[r], 232]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[31] * B\n\t" "str x3, [%[r], 240]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "adc x5, x5, x7\n\t" "stp x4, x5, [%[r], 248]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } @@ -2413,226 +3207,202 @@ static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m) SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "ldp x21, x22, [%[m], 48]\n\t" - "ldp x23, x24, [%[m], 64]\n\t" - "ldp x25, x26, [%[m], 80]\n\t" - "ldp x27, x28, [%[m], 96]\n\t" - "mov x3, xzr\n\t" - "# i = 16\n\t" - "mov x4, 16\n\t" "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "ldp x16, x17, [%[a], 32]\n\t" + "ldp x19, x20, [%[a], 48]\n\t" + "ldp x21, x22, [%[a], 64]\n\t" + "ldp x23, x24, [%[a], 80]\n\t" + "ldp x25, x26, [%[a], 96]\n\t" + "ldp x27, x28, [%[a], 112]\n\t" + "mov x3, xzr\n\t" + "# i = 0..15\n\t" + "mov x4, 16\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" "mul x9, %[mp], x12\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" + "ldp x10, x11, [%[m], 0]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" "adds x12, x12, x7\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" + "mul x7, x11, x9\n\t" + "umulh x8, x11, x9\n\t" "adds x12, x13, x7\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" + "ldp x11, x10, [%[m], 16]\n\t" "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x13, x14, x7\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x14, x15, x7\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" + "ldp x11, x10, [%[m], 32]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" + "adds x14, x14, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x15, x16, x7\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" + "adds x15, x15, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "str x11, [%[a], 32]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x16, x17, x7\n\t" "# a[i+6] += m[6] * mu\n\t" - "ldr x11, [%[a], 48]\n\t" + "ldp x11, x10, [%[m], 48]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x21, x9\n\t" + "adds x16, x16, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "str x10, [%[a], 40]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x17, x19, x7\n\t" "# a[i+7] += m[7] * mu\n\t" - "ldr x10, [%[a], 56]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x22, x9\n\t" + "adds x17, x17, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "str x11, [%[a], 48]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x19, x20, x7\n\t" "# a[i+8] += m[8] * mu\n\t" - "ldr x11, [%[a], 64]\n\t" + "ldp x11, x10, [%[m], 64]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x23, x9\n\t" + "adds x19, x19, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x23, x9\n\t" - "str x10, [%[a], 56]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x20, x21, x7\n\t" "# a[i+9] += m[9] * mu\n\t" - "ldr x10, [%[a], 72]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x24, x9\n\t" + "adds x20, x20, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x24, x9\n\t" - "str x11, [%[a], 64]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x21, x22, x7\n\t" "# a[i+10] += m[10] * mu\n\t" - "ldr x11, [%[a], 80]\n\t" + "ldp x11, x10, [%[m], 80]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x25, x9\n\t" + "adds x21, x21, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x25, x9\n\t" - "str x10, [%[a], 72]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x22, x23, x7\n\t" "# a[i+11] += m[11] * mu\n\t" - "ldr x10, [%[a], 88]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x26, x9\n\t" + "adds x22, x22, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x26, x9\n\t" - "str x11, [%[a], 80]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x23, x24, x7\n\t" "# a[i+12] += m[12] * mu\n\t" - "ldr x11, [%[a], 96]\n\t" + "ldp x11, x10, [%[m], 96]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x27, x9\n\t" + "adds x23, x23, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x27, x9\n\t" - "str x10, [%[a], 88]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x24, x25, x7\n\t" "# a[i+13] += m[13] * mu\n\t" - "ldr x10, [%[a], 104]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x28, x9\n\t" + "adds x24, x24, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x28, x9\n\t" - "str x11, [%[a], 96]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x25, x26, x7\n\t" "# a[i+14] += m[14] * mu\n\t" - "ldr x11, [%[a], 112]\n\t" + "ldp x11, x10, [%[m], 112]\n\t" "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 112]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" + "adds x25, x25, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 104]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x26, x27, x7\n\t" "# a[i+15] += m[15] * mu\n\t" - "ldr x10, [%[a], 120]\n\t" + "ldr x10, [%[m], 120]\n\t" "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 120]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" + "adds x26, x26, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" + "umulh x8, x10, x9\n\t" "adds x6, x6, x7\n\t" "adcs x8, x8, x3\n\t" - "str x11, [%[a], 112]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 128]\n\t" - "str x10, [%[a], 120]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 128]\n\t" + "adc x3, xzr, xzr\n\t" + "adds x27, x28, x6\n\t" + "ldr x28, [%[a], 128]\n\t" + "adcs x28, x28, x8\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" "# Create mask\n\t" - "neg x3, x3\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 128\n\t" + "neg x3, x3\n\t" + "mov x9, %[a]\n\t" + "sub %[a], %[a], 128\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "ldp x11, x10, [x9, 48]\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "and x21, x21, x3\n\t" - "sbcs x13, x13, x20\n\t" - "and x22, x22, x3\n\t" - "sbcs x11, x11, x21\n\t" - "stp x12, x13, [%[a], 32]\n\t" - "sbcs x10, x10, x22\n\t" - "stp x11, x10, [%[a], 48]\n\t" - "ldp x12, x13, [x9, 64]\n\t" - "and x23, x23, x3\n\t" - "ldp x11, x10, [x9, 80]\n\t" - "and x24, x24, x3\n\t" - "sbcs x12, x12, x23\n\t" - "and x25, x25, x3\n\t" - "sbcs x13, x13, x24\n\t" - "and x26, x26, x3\n\t" - "sbcs x11, x11, x25\n\t" - "stp x12, x13, [%[a], 64]\n\t" - "sbcs x10, x10, x26\n\t" - "stp x11, x10, [%[a], 80]\n\t" - "ldp x7, x8, [%[m], 112]\n\t" - "ldp x12, x13, [x9, 96]\n\t" - "and x27, x27, x3\n\t" - "ldp x11, x10, [x9, 112]\n\t" - "and x28, x28, x3\n\t" - "sbcs x12, x12, x27\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x28\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 96]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 112]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x12, x12, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x13, x13, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x14, x14, x6\n\t" + "stp x12, x13, [%[a], 0]\n\t" + "sbcs x15, x15, x7\n\t" + "stp x14, x15, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x16, x16, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x17, x17, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x19, x19, x6\n\t" + "stp x16, x17, [%[a], 32]\n\t" + "sbcs x20, x20, x7\n\t" + "stp x19, x20, [%[a], 48]\n\t" + "ldp x4, x5, [%[m], 64]\n\t" + "ldp x6, x7, [%[m], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x21, x21, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x22, x22, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x23, x23, x6\n\t" + "stp x21, x22, [%[a], 64]\n\t" + "sbcs x24, x24, x7\n\t" + "stp x23, x24, [%[a], 80]\n\t" + "ldp x4, x5, [%[m], 96]\n\t" + "ldp x6, x7, [%[m], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x25, x25, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x26, x26, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x27, x27, x6\n\t" + "stp x25, x26, [%[a], 96]\n\t" + "sbcs x28, x28, x7\n\t" + "stp x27, x28, [%[a], 112]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" ); } @@ -2644,9 +3414,9 @@ SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_16(r, a, b); @@ -2658,9 +3428,9 @@ static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_16(r, a); @@ -2696,7 +3466,7 @@ static sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; @@ -2762,7 +3532,7 @@ static sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_dig "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; @@ -2784,9 +3554,9 @@ static void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -2805,180 +3575,183 @@ static void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a, "str x3, [%[r], 128]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[6] * B\n\t" - "ldp x8, x9, [%[a], 48]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" "str x5, [%[r], 40]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[7] * B\n\t" "str x3, [%[r], 48]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[8] * B\n\t" - "ldp x8, x9, [%[a], 64]\n\t" + "ldp x9, x10, [%[a], 64]\n\t" "str x4, [%[r], 56]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[9] * B\n\t" "str x5, [%[r], 64]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[10] * B\n\t" - "ldp x8, x9, [%[a], 80]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" "str x3, [%[r], 72]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[11] * B\n\t" "str x4, [%[r], 80]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[12] * B\n\t" - "ldp x8, x9, [%[a], 96]\n\t" + "ldp x9, x10, [%[a], 96]\n\t" "str x5, [%[r], 88]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[13] * B\n\t" "str x3, [%[r], 96]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[14] * B\n\t" - "ldp x8, x9, [%[a], 112]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" "str x4, [%[r], 104]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[15] * B\n\t" "str x5, [%[r], 112]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "adc x4, x4, x7\n\t" "stp x3, x4, [%[r], 120]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ static sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -2987,23 +3760,52 @@ static sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); - return r; + return d1; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<16; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif } /* Compare a with b in constant time. @@ -3017,147 +3819,139 @@ static sp_int64 sp_2048_cmp_16(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 120\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #16\n\t" + "add %[a], %[a], #112\n\t" + "add %[b], %[b], #112\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "ldp x7, x8, [%[b], 112]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 96]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "ldp x7, x8, [%[b], 80]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 64]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "ldp x7, x8, [%[b], 48]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "ldp x7, x8, [%[b], 16]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -3173,8 +3967,8 @@ static sp_int64 sp_2048_cmp_16(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[32], t2[17]; sp_digit div, r1; @@ -3184,9 +3978,13 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig div = d[15]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 16); - for (i=15; i>=0; i--) { - sp_digit hi = t1[16 + i] - (t1[16 + i] == div); + r1 = sp_2048_cmp_16(&t1[16], d) >= 0; + sp_2048_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1); + for (i = 15; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[16 + i] == div); + sp_digit hi = t1[16 + i] + mask; r1 = div_2048_word_16(hi, t1[16 + i - 1], div); + r1 |= mask; sp_2048_mul_d_16(t2, d, r1); t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2); @@ -3223,12 +4021,14 @@ static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 32]; @@ -3243,11 +4043,17 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 32), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 32), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -3307,6 +4113,10 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -3348,7 +4158,7 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_16(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -3363,12 +4173,14 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[32 * 32]; @@ -3383,11 +4195,17 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 32), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 32), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -3463,6 +4281,10 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -3505,7 +4327,7 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_16(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -3541,442 +4363,408 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "ldp x21, x22, [%[m], 48]\n\t" - "ldp x23, x24, [%[m], 64]\n\t" - "ldp x25, x26, [%[m], 80]\n\t" - "ldp x27, x28, [%[m], 96]\n\t" + "ldp x11, x12, [%[a], 0]\n\t" + "ldp x13, x14, [%[a], 16]\n\t" + "ldp x15, x16, [%[a], 32]\n\t" + "ldp x17, x19, [%[a], 48]\n\t" + "ldp x20, x21, [%[a], 64]\n\t" + "ldp x22, x23, [%[a], 80]\n\t" + "# No carry yet\n\t" "mov x3, xzr\n\t" - "# i = 32\n\t" + "# i = 0..31\n\t" "mov x4, 32\n\t" - "ldp x12, x13, [%[a], 0]\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" - "mul x9, %[mp], x12\n\t" + "mul x10, %[mp], x11\n\t" + "ldp x24, x25, [%[m], 0]\n\t" + "ldp x26, x27, [%[m], 16]\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" - "adds x12, x12, x7\n\t" + "mul x5, x24, x10\n\t" + "umulh x6, x24, x10\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x12, x13, x7\n\t" + "adds x11, x11, x5\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x11, x12, x5\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" - "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" - "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x11, x11, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x12, x13, x5\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" - "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" - "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "adds x12, x12, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x13, x14, x5\n\t" + "ldp x24, x25, [%[m], 32]\n\t" + "ldp x26, x27, [%[m], 48]\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x13, x13, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x14, x15, x5\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "str x11, [%[a], 32]\n\t" - "adds x10, x10, x7\n\t" + "adds x14, x14, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x15, x16, x5\n\t" "# a[i+6] += m[6] * mu\n\t" - "ldr x11, [%[a], 48]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "str x10, [%[a], 40]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x15, x15, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x16, x17, x5\n\t" "# a[i+7] += m[7] * mu\n\t" - "ldr x10, [%[a], 56]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x22, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "str x11, [%[a], 48]\n\t" - "adds x10, x10, x7\n\t" + "adds x16, x16, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x17, x19, x5\n\t" + "ldp x24, x25, [%[m], 64]\n\t" + "ldp x26, x27, [%[m], 80]\n\t" "# a[i+8] += m[8] * mu\n\t" - "ldr x11, [%[a], 64]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x23, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x23, x9\n\t" - "str x10, [%[a], 56]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x17, x17, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x19, x20, x5\n\t" "# a[i+9] += m[9] * mu\n\t" - "ldr x10, [%[a], 72]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x24, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x24, x9\n\t" - "str x11, [%[a], 64]\n\t" - "adds x10, x10, x7\n\t" + "adds x19, x19, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x20, x21, x5\n\t" "# a[i+10] += m[10] * mu\n\t" - "ldr x11, [%[a], 80]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x25, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x25, x9\n\t" - "str x10, [%[a], 72]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x20, x20, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x21, x22, x5\n\t" "# a[i+11] += m[11] * mu\n\t" - "ldr x10, [%[a], 88]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x26, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x26, x9\n\t" - "str x11, [%[a], 80]\n\t" - "adds x10, x10, x7\n\t" + "adds x21, x21, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x22, x23, x5\n\t" + "ldp x24, x25, [%[m], 96]\n\t" + "ldp x26, x27, [%[m], 112]\n\t" "# a[i+12] += m[12] * mu\n\t" - "ldr x11, [%[a], 96]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x27, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x27, x9\n\t" - "str x10, [%[a], 88]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x22, x22, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "ldr x23, [%[a], 96]\n\t" + "umulh x6, x24, x10\n\t" + "adds x23, x23, x5\n\t" "# a[i+13] += m[13] * mu\n\t" - "ldr x10, [%[a], 104]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x28, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x28, x9\n\t" - "str x11, [%[a], 96]\n\t" - "adds x10, x10, x7\n\t" + "adds x23, x23, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "ldp x8, x9, [%[a], 104]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+14] += m[14] * mu\n\t" - "ldr x11, [%[a], 112]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 112]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 104]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 104]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+15] += m[15] * mu\n\t" - "ldr x10, [%[a], 120]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 120]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 112]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 112]\n\t" + "ldp x8, x9, [%[a], 120]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 128]\n\t" + "ldp x26, x27, [%[m], 144]\n\t" "# a[i+16] += m[16] * mu\n\t" - "ldr x11, [%[a], 128]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 128]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 120]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 120]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+17] += m[17] * mu\n\t" - "ldr x10, [%[a], 136]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 136]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 128]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 128]\n\t" + "ldp x8, x9, [%[a], 136]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+18] += m[18] * mu\n\t" - "ldr x11, [%[a], 144]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 144]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 136]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 136]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+19] += m[19] * mu\n\t" - "ldr x10, [%[a], 152]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 152]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 144]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 144]\n\t" + "ldp x8, x9, [%[a], 152]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 160]\n\t" + "ldp x26, x27, [%[m], 176]\n\t" "# a[i+20] += m[20] * mu\n\t" - "ldr x11, [%[a], 160]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 160]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 152]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 152]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+21] += m[21] * mu\n\t" - "ldr x10, [%[a], 168]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 168]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 160]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 160]\n\t" + "ldp x8, x9, [%[a], 168]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+22] += m[22] * mu\n\t" - "ldr x11, [%[a], 176]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 176]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 168]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 168]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+23] += m[23] * mu\n\t" - "ldr x10, [%[a], 184]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 184]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 176]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 176]\n\t" + "ldp x8, x9, [%[a], 184]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 192]\n\t" + "ldp x26, x27, [%[m], 208]\n\t" "# a[i+24] += m[24] * mu\n\t" - "ldr x11, [%[a], 192]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 192]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 184]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 184]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+25] += m[25] * mu\n\t" - "ldr x10, [%[a], 200]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 200]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 192]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 192]\n\t" + "ldp x8, x9, [%[a], 200]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+26] += m[26] * mu\n\t" - "ldr x11, [%[a], 208]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 208]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 200]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 200]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+27] += m[27] * mu\n\t" - "ldr x10, [%[a], 216]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 216]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 208]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 208]\n\t" + "ldp x8, x9, [%[a], 216]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 224]\n\t" + "ldp x26, x27, [%[m], 240]\n\t" "# a[i+28] += m[28] * mu\n\t" - "ldr x11, [%[a], 224]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 224]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 216]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 216]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+29] += m[29] * mu\n\t" - "ldr x10, [%[a], 232]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 232]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 224]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 224]\n\t" + "ldp x8, x9, [%[a], 232]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+30] += m[30] * mu\n\t" - "ldr x11, [%[a], 240]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 240]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 232]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 232]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+31] += m[31] * mu\n\t" - "ldr x10, [%[a], 248]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 248]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x8, x8, x3\n\t" - "str x11, [%[a], 240]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 256]\n\t" - "str x10, [%[a], 248]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 256]\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 240]\n\t" + "umulh x7, x27, x10\n\t" + "ldp x8, x9, [%[a], 248]\n\t" + "adds x5, x5, x6\n\t" + "adcs x7, x7, x3\n\t" + "adc x3, xzr, xzr\n\t" + "adds x8, x8, x5\n\t" + "str x8, [%[a], 248]\n\t" + "adcs x9, x9, x7\n\t" + "str x9, [%[a], 256]\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" - "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" + "b.ne 1b\n\t" "# Create mask\n\t" - "neg x3, x3\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 256\n\t" + "neg x3, x3\n\t" + "mov %[mp], %[a]\n\t" + "sub %[a], %[a], 256\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "ldp x11, x10, [x9, 48]\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "and x21, x21, x3\n\t" - "sbcs x13, x13, x20\n\t" - "and x22, x22, x3\n\t" - "sbcs x11, x11, x21\n\t" - "stp x12, x13, [%[a], 32]\n\t" - "sbcs x10, x10, x22\n\t" - "stp x11, x10, [%[a], 48]\n\t" - "ldp x12, x13, [x9, 64]\n\t" - "and x23, x23, x3\n\t" - "ldp x11, x10, [x9, 80]\n\t" - "and x24, x24, x3\n\t" - "sbcs x12, x12, x23\n\t" - "and x25, x25, x3\n\t" - "sbcs x13, x13, x24\n\t" - "and x26, x26, x3\n\t" - "sbcs x11, x11, x25\n\t" - "stp x12, x13, [%[a], 64]\n\t" - "sbcs x10, x10, x26\n\t" - "stp x11, x10, [%[a], 80]\n\t" - "ldp x7, x8, [%[m], 112]\n\t" - "ldp x12, x13, [x9, 96]\n\t" - "and x27, x27, x3\n\t" - "ldp x11, x10, [x9, 112]\n\t" - "and x28, x28, x3\n\t" - "sbcs x12, x12, x27\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x28\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 96]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 112]\n\t" - "ldp x5, x6, [%[m], 128]\n\t" - "ldp x7, x8, [%[m], 144]\n\t" - "ldp x12, x13, [x9, 128]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 144]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 128]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 144]\n\t" - "ldp x5, x6, [%[m], 160]\n\t" - "ldp x7, x8, [%[m], 176]\n\t" - "ldp x12, x13, [x9, 160]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 176]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 160]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 176]\n\t" - "ldp x5, x6, [%[m], 192]\n\t" - "ldp x7, x8, [%[m], 208]\n\t" - "ldp x12, x13, [x9, 192]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 208]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 192]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 208]\n\t" - "ldp x5, x6, [%[m], 224]\n\t" - "ldp x7, x8, [%[m], 240]\n\t" - "ldp x12, x13, [x9, 224]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 240]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 224]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 240]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x11, x11, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x12, x12, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x13, x13, x6\n\t" + "stp x11, x12, [%[a], 0]\n\t" + "sbcs x14, x14, x7\n\t" + "stp x13, x14, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x15, x15, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x16, x16, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x17, x17, x6\n\t" + "stp x15, x16, [%[a], 32]\n\t" + "sbcs x19, x19, x7\n\t" + "stp x17, x19, [%[a], 48]\n\t" + "ldp x4, x5, [%[m], 64]\n\t" + "ldp x6, x7, [%[m], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x20, x20, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x21, x21, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x22, x22, x6\n\t" + "stp x20, x21, [%[a], 64]\n\t" + "sbcs x23, x23, x7\n\t" + "stp x22, x23, [%[a], 80]\n\t" + "ldp x4, x5, [%[m], 96]\n\t" + "ldp x6, x7, [%[m], 112]\n\t" + "ldp x8, x9, [%[mp], 96]\n\t" + "ldp x10, x11, [%[mp], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 96]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 112]\n\t" + "ldp x4, x5, [%[m], 128]\n\t" + "ldp x6, x7, [%[m], 144]\n\t" + "ldp x8, x9, [%[mp], 128]\n\t" + "ldp x10, x11, [%[mp], 144]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 128]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 144]\n\t" + "ldp x4, x5, [%[m], 160]\n\t" + "ldp x6, x7, [%[m], 176]\n\t" + "ldp x8, x9, [%[mp], 160]\n\t" + "ldp x10, x11, [%[mp], 176]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 160]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 176]\n\t" + "ldp x4, x5, [%[m], 192]\n\t" + "ldp x6, x7, [%[m], 208]\n\t" + "ldp x8, x9, [%[mp], 192]\n\t" + "ldp x10, x11, [%[mp], 208]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 192]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 208]\n\t" + "ldp x4, x5, [%[m], 224]\n\t" + "ldp x6, x7, [%[m], 240]\n\t" + "ldp x8, x9, [%[mp], 224]\n\t" + "ldp x10, x11, [%[mp], 240]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 224]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 240]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x10", "x8", "x9", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "cc" ); } @@ -3988,9 +4776,9 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_32(r, a, b); @@ -4002,9 +4790,9 @@ static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_32(r, a); @@ -4042,7 +4830,7 @@ static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -4142,45 +4930,45 @@ static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } #endif /* WOLFSSL_SP_SMALL */ -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ -static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_32_cond(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "b.lt 1f\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + "1:\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -4189,23 +4977,25 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "cmp x3, x5\n\t" + "b.lt 2f\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + "2:\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); - return r; + return d1; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -4217,8 +5007,8 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -4228,9 +5018,20 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s div = d[31]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); - for (i=31; i>=0; i--) { - sp_digit hi = t1[32 + i] - (t1[32 + i] == div); - r1 = div_2048_word_32(hi, t1[32 + i - 1], div); + for (i = 31; i > 0; i--) { + if (t1[i + 32] != d[i]) + break; + } + if (t1[i + 32] >= d[i]) { + sp_2048_sub_in_place_32(&t1[32], d); + } + for (i = 31; i >= 0; i--) { + if (t1[32 + i] == div) { + r1 = SP_DIGIT_MAX; + } + else { + r1 = div_2048_word_32_cond(t1[32 + i], t1[32 + i - 1], div); + } sp_2048_mul_d_32(t2, d, r1); t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); @@ -4298,7 +5099,7 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; @@ -4420,13 +5221,74 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; #endif /* WOLFSSL_SP_SMALL */ } +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + */ +static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[d1], x6, x3\n\t" + + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return d1; +} + /* AND m into each word of a and store in r. * * r A single precision integer. @@ -4468,259 +5330,235 @@ static sp_int64 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 248\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #32\n\t" + "add %[a], %[a], #240\n\t" + "add %[b], %[b], #240\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 240]\n\t" - "ldp x7, x8, [%[b], 240]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 240]\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 224]\n\t" - "ldp x7, x8, [%[b], 224]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 224]\n\t" + "ldp x8, x9, [%[b], 224]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 208]\n\t" - "ldp x7, x8, [%[b], 208]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 208]\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 192]\n\t" - "ldp x7, x8, [%[b], 192]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 192]\n\t" + "ldp x8, x9, [%[b], 192]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 176]\n\t" - "ldp x7, x8, [%[b], 176]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 176]\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 160]\n\t" - "ldp x7, x8, [%[b], 160]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 160]\n\t" + "ldp x8, x9, [%[b], 160]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 144]\n\t" - "ldp x7, x8, [%[b], 144]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 144]\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 128]\n\t" - "ldp x7, x8, [%[b], 128]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 128]\n\t" + "ldp x8, x9, [%[b], 128]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "ldp x7, x8, [%[b], 112]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 96]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "ldp x7, x8, [%[b], 80]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 64]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "ldp x7, x8, [%[b], 48]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "ldp x7, x8, [%[b], 16]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -4736,8 +5574,8 @@ static sp_int64 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -4747,9 +5585,13 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig div = d[31]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 32); - for (i=31; i>=0; i--) { - sp_digit hi = t1[32 + i] - (t1[32 + i] == div); + r1 = sp_2048_cmp_32(&t1[32], d) >= 0; + sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1); + for (i = 31; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[32 + i] == div); + sp_digit hi = t1[32 + i] + mask; r1 = div_2048_word_32(hi, t1[32 + i - 1], div); + r1 |= mask; sp_2048_mul_d_32(t2, d, r1); t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2); @@ -4788,17 +5630,19 @@ static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else - sp_digit td[8 * 64]; + sp_digit td[32 * 64]; #endif - sp_digit* t[8]; + sp_digit* t[32]; sp_digit* norm = NULL; sp_digit mp = 1; sp_digit n; @@ -4808,147 +5652,22 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 64), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 64), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { norm = td; - for (i=0; i<8; i++) { - t[i] = td + i * 64; - } - - sp_2048_mont_setup(m, &mp); - sp_2048_mont_norm_32(norm, m); - - XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); - if (reduceA != 0) { - err = sp_2048_mod_32(t[1] + 32, a, m); - if (err == MP_OKAY) { - err = sp_2048_mod_32(t[1], t[1], m); - } - } - else { - XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); - err = sp_2048_mod_32(t[1], t[1], m); - } - } - - if (err == MP_OKAY) { - sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); - sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); - sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); - sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); - sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); - sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); - - i = (bits - 1) / 64; - n = e[i--]; - c = bits & 63; - if (c == 0) { - c = 64; - } - c -= bits % 3; - if (c == 64) { - c = 61; - } - if (c < 0) { - /* Number of bits in top word is less than number needed. */ - c = -c; - y = (byte)(n << c); - n = e[i--]; - y |= (byte)(n >> (64 - c)); - n <<= c; - c = 64 - c; - } - else { - y = (byte)(n >> c); - n <<= 64 - c; - } - XMEMCPY(r, t[y], sizeof(sp_digit) * 32); - for (; i>=0 || c>=3; ) { - if (c == 0) { - n = e[i--]; - y = (byte)(n >> 61); - n <<= 3; - c = 61; - } - else if (c < 3) { - y = (byte)(n >> 61); - n = e[i--]; - c = 3 - c; - y |= (byte)(n >> (64 - c)); - n <<= c; - c = 64 - c; - } - else { - y = (byte)((n >> 61) & 0x7); - n <<= 3; - c -= 3; - } - - sp_2048_mont_sqr_32(r, r, m, mp); - sp_2048_mont_sqr_32(r, r, m, mp); - sp_2048_mont_sqr_32(r, r, m, mp); - - sp_2048_mont_mul_32(r, r, t[y], m, mp); - } - - XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); - sp_2048_mont_reduce_32(r, m, mp); - - mask = 0 - (sp_2048_cmp_32(r, m) >= 0); - sp_2048_cond_sub_32(r, r, m, mask); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (td != NULL) - XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); -#endif - - return err; -} -#else -/* Modular exponentiate a to the e mod m. (r = a^e mod m) - * - * r A single precision number that is the result of the operation. - * a A single precision number being exponentiated. - * e A single precision number that is the exponent. - * bits The number of bits in the exponent. - * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. - */ -static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, - int bits, const sp_digit* m, int reduceA) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* td = NULL; -#else - sp_digit td[16 * 64]; -#endif - sp_digit* t[16]; - sp_digit* norm = NULL; - sp_digit mp = 1; - sp_digit n; - sp_digit mask; - int i; - int c; - byte y; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 64), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; -#endif - - if (err == MP_OKAY) { - norm = td; - for (i=0; i<16; i++) { + for (i=0; i<32; i++) { t[i] = td + i * 64; } @@ -4983,6 +5702,22 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); i = (bits - 1) / 64; n = e[i--]; @@ -4990,9 +5725,9 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, if (c == 0) { c = 64; } - c -= bits % 4; + c -= bits % 5; if (c == 64) { - c = 60; + c = 59; } if (c < 0) { /* Number of bits in top word is less than number needed. */ @@ -5003,36 +5738,41 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; } XMEMCPY(r, t[y], sizeof(sp_digit) * 32); - for (; i>=0 || c>=4; ) { + for (; i>=0 || c>=5; ) { if (c == 0) { n = e[i--]; - y = (byte)(n >> 60); - n <<= 4; - c = 60; + y = (byte)(n >> 59); + n <<= 5; + c = 59; } - else if (c < 4) { - y = (byte)(n >> 60); + else if (c < 5) { + y = (byte)(n >> 59); n = e[i--]; - c = 4 - c; + c = 5 - c; y |= (byte)(n >> (64 - c)); n <<= c; c = 64 - c; } else { - y = (byte)((n >> 60) & 0xf); - n <<= 4; - c -= 4; + y = (byte)((n >> 59) & 0x1f); + n <<= 5; + c -= 5; } sp_2048_mont_sqr_32(r, r, m, mp); sp_2048_mont_sqr_32(r, r, m, mp); sp_2048_mont_sqr_32(r, r, m, mp); sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); sp_2048_mont_mul_32(r, r, t[y], m, mp); } @@ -5044,7 +5784,209 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_cond_sub_32(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. + */ +static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* td = NULL; +#else + sp_digit td[64 * 64]; +#endif + sp_digit* t[64]; + sp_digit* norm = NULL; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c; + byte y; + int err = MP_OKAY; + + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (64 * 64), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + norm = td; + for (i=0; i<64; i++) { + t[i] = td + i * 64; + } + + sp_2048_mont_setup(m, &mp); + sp_2048_mont_norm_32(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 32U); + if (reduceA != 0) { + err = sp_2048_mod_32(t[1] + 32, a, m); + if (err == MP_OKAY) { + err = sp_2048_mod_32(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32); + err = sp_2048_mod_32(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp); + sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp); + sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp); + sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp); + sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp); + sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp); + sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp); + sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp); + sp_2048_mont_sqr_32(t[10], t[ 5], m, mp); + sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp); + sp_2048_mont_sqr_32(t[12], t[ 6], m, mp); + sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp); + sp_2048_mont_sqr_32(t[14], t[ 7], m, mp); + sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp); + sp_2048_mont_sqr_32(t[16], t[ 8], m, mp); + sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp); + sp_2048_mont_sqr_32(t[18], t[ 9], m, mp); + sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp); + sp_2048_mont_sqr_32(t[20], t[10], m, mp); + sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp); + sp_2048_mont_sqr_32(t[22], t[11], m, mp); + sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp); + sp_2048_mont_sqr_32(t[24], t[12], m, mp); + sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp); + sp_2048_mont_sqr_32(t[26], t[13], m, mp); + sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp); + sp_2048_mont_sqr_32(t[28], t[14], m, mp); + sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp); + sp_2048_mont_sqr_32(t[30], t[15], m, mp); + sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp); + sp_2048_mont_sqr_32(t[32], t[16], m, mp); + sp_2048_mont_mul_32(t[33], t[17], t[16], m, mp); + sp_2048_mont_sqr_32(t[34], t[17], m, mp); + sp_2048_mont_mul_32(t[35], t[18], t[17], m, mp); + sp_2048_mont_sqr_32(t[36], t[18], m, mp); + sp_2048_mont_mul_32(t[37], t[19], t[18], m, mp); + sp_2048_mont_sqr_32(t[38], t[19], m, mp); + sp_2048_mont_mul_32(t[39], t[20], t[19], m, mp); + sp_2048_mont_sqr_32(t[40], t[20], m, mp); + sp_2048_mont_mul_32(t[41], t[21], t[20], m, mp); + sp_2048_mont_sqr_32(t[42], t[21], m, mp); + sp_2048_mont_mul_32(t[43], t[22], t[21], m, mp); + sp_2048_mont_sqr_32(t[44], t[22], m, mp); + sp_2048_mont_mul_32(t[45], t[23], t[22], m, mp); + sp_2048_mont_sqr_32(t[46], t[23], m, mp); + sp_2048_mont_mul_32(t[47], t[24], t[23], m, mp); + sp_2048_mont_sqr_32(t[48], t[24], m, mp); + sp_2048_mont_mul_32(t[49], t[25], t[24], m, mp); + sp_2048_mont_sqr_32(t[50], t[25], m, mp); + sp_2048_mont_mul_32(t[51], t[26], t[25], m, mp); + sp_2048_mont_sqr_32(t[52], t[26], m, mp); + sp_2048_mont_mul_32(t[53], t[27], t[26], m, mp); + sp_2048_mont_sqr_32(t[54], t[27], m, mp); + sp_2048_mont_mul_32(t[55], t[28], t[27], m, mp); + sp_2048_mont_sqr_32(t[56], t[28], m, mp); + sp_2048_mont_mul_32(t[57], t[29], t[28], m, mp); + sp_2048_mont_sqr_32(t[58], t[29], m, mp); + sp_2048_mont_mul_32(t[59], t[30], t[29], m, mp); + sp_2048_mont_sqr_32(t[60], t[30], m, mp); + sp_2048_mont_mul_32(t[61], t[31], t[30], m, mp); + sp_2048_mont_sqr_32(t[62], t[31], m, mp); + sp_2048_mont_mul_32(t[63], t[32], t[31], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 6; + if (c == 64) { + c = 58; + } + if (c < 0) { + /* Number of bits in top word is less than number needed. */ + c = -c; + y = (byte)(n << c); + n = e[i--]; + y |= (byte)(n >> (64 - c)); + n <<= c; + c = 64 - c; + } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } + else { + y = (byte)(n >> c); + n <<= 64 - c; + } + XMEMCPY(r, t[y], sizeof(sp_digit) * 32); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (byte)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (byte)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= (byte)(n >> (64 - c)); + n <<= c; + c = 64 - c; + } + else { + y = (byte)((n >> 58) & 0x3f); + n <<= 6; + c -= 6; + } + + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + sp_2048_mont_sqr_32(r, r, m, mp); + + sp_2048_mont_mul_32(r, r, t[y], m, mp); + } + + XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U); + sp_2048_mont_reduce_32(r, m, mp); + + mask = 0 - (sp_2048_cmp_32(r, m) >= 0); + sp_2048_cond_sub_32(r, r, m, mask); + } + +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -5071,7 +6013,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[32 * 5]; @@ -5093,7 +6035,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -5103,9 +6045,9 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, #endif if (err == MP_OKAY) { + ah = a + 32; r = a + 32 * 2; m = r + 32 * 2; - ah = a + 32; sp_2048_from_bin(ah, 32, in, inLen); #if DIGIT_BIT >= 64 @@ -5123,7 +6065,38 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_2048_from_mp(m, 32, mm); - if (e[0] == 0x3) { + if (e[0] == 0x10001) { + int i; + sp_digit mp; + + sp_2048_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 32); + err = sp_2048_mod_32_cond(r, a, m); + /* Montgomery form: r = a.R mod m */ + + if (err == MP_OKAY) { + /* r = a ^ 0x10000 => r = a squared 16 times */ + for (i = 15; i >= 0; i--) { + sp_2048_mont_sqr_32(r, r, m, mp); + } + /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m + * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m + */ + sp_2048_mont_mul_32(r, r, ah, m, mp); + + for (i = 31; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_2048_sub_in_place_32(r, m); + } + } + } + else if (e[0] == 0x3) { if (err == MP_OKAY) { sp_2048_sqr_32(r, ah); err = sp_2048_mod_32_cond(r, r, m); @@ -5151,7 +6124,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } XMEMCPY(r, a, sizeof(sp_digit) * 32); - for (i--; i>=0; i--) { + for (i--; i >= 0; i--) { sp_2048_mont_sqr_32(r, r, m, mp); if (((e[0] >> i) & 1) == 1) { sp_2048_mont_mul_32(r, r, a, m, mp); @@ -5177,7 +6150,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif @@ -5186,6 +6159,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -5197,7 +6171,6 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { -#ifdef WOLFSSL_SP_SMALL sp_digit c = 0; __asm__ __volatile__ ( @@ -5215,78 +6188,12 @@ static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; -#else - __asm__ __volatile__ ( - - "ldp x5, x7, [%[b], 0]\n\t" - "ldp x11, x12, [%[b], 16]\n\t" - "ldp x4, x6, [%[a], 0]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 16]\n\t" - "and x7, x7, %[m]\n\t" - "adds x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 0]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 16]\n\t" - "ldp x5, x7, [%[b], 32]\n\t" - "ldp x11, x12, [%[b], 48]\n\t" - "ldp x4, x6, [%[a], 32]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 48]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 32]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 48]\n\t" - "ldp x5, x7, [%[b], 64]\n\t" - "ldp x11, x12, [%[b], 80]\n\t" - "ldp x4, x6, [%[a], 64]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 80]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 64]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 80]\n\t" - "ldp x5, x7, [%[b], 96]\n\t" - "ldp x11, x12, [%[b], 112]\n\t" - "ldp x4, x6, [%[a], 96]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 112]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 96]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 112]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" - ); - - return (sp_digit)r; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * @@ -5310,7 +6217,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, const mp_int* qim, const mp_int* mm, byte* out, word32* outLen) { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[32 * 4]; @@ -5344,7 +6251,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -5369,21 +6276,21 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 32); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[16 * 11]; @@ -5411,8 +6318,14 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL, DYNAMIC_TYPE_RSA); @@ -5460,12 +6373,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 16 * 11); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -5596,7 +6509,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 -static void sp_2048_lshift_32(sp_digit* r, sp_digit* a, byte n) +static void sp_2048_lshift_32(sp_digit* r, const sp_digit* a, byte n) { word64 n64 = n; __asm__ __volatile__ ( @@ -5792,11 +6705,10 @@ static void sp_2048_lshift_32(sp_digit* r, sp_digit* a, byte n) "lsl x2, x2, %[n]\n\t" "lsr x5, x5, x6\n\t" "orr x3, x3, x5\n\t" - "str x2, [%[r]]\n\t" - "str x3, [%[r], 8]\n\t" + "stp x2, x3, [%[r]]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n64) - : "memory", "x2", "x3", "x4", "x5", "x6" + : "memory", "x2", "x3", "x4", "x5", "x6", "cc" ); } @@ -5806,12 +6718,14 @@ static void sp_2048_lshift_32(sp_digit* r, sp_digit* a, byte n) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[97]; @@ -5827,11 +6741,17 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -5860,6 +6780,10 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -5907,7 +6831,7 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, sp_2048_cond_sub_32(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -6048,41 +6972,108 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, */ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) { - int i; - int j; - byte* d; + sp_int64 nl = n; + sp_int64 bytes = size * 8; - for (i = n - 1,j = 0; i >= 7; i -= 8) { - r[j] = ((sp_digit)a[i - 0] << 0) | - ((sp_digit)a[i - 1] << 8) | - ((sp_digit)a[i - 2] << 16) | - ((sp_digit)a[i - 3] << 24) | - ((sp_digit)a[i - 4] << 32) | - ((sp_digit)a[i - 5] << 40) | - ((sp_digit)a[i - 6] << 48) | - ((sp_digit)a[i - 7] << 56); - j++; - } - - if (i >= 0) { - r[j] = 0; - - d = (byte*)r; - switch (i) { - case 6: d[n - 1 - 6] = a[6]; //fallthrough - case 5: d[n - 1 - 5] = a[5]; //fallthrough - case 4: d[n - 1 - 4] = a[4]; //fallthrough - case 3: d[n - 1 - 3] = a[3]; //fallthrough - case 2: d[n - 1 - 2] = a[2]; //fallthrough - case 1: d[n - 1 - 1] = a[1]; //fallthrough - case 0: d[n - 1 - 0] = a[0]; //fallthrough - } - j++; - } - - for (; j < size; j++) { - r[j] = 0; - } + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); } /* Convert an mp_int to an array of sp_digit. @@ -6094,20 +7085,23 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 64 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); } #elif DIGIT_BIT > 64 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffffffffffffl; s = 64U - s; @@ -6137,12 +7131,12 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 64) { r[j] &= 0xffffffffffffffffl; @@ -6179,17 +7173,19 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) static void sp_3072_to_bin_48(sp_digit* r, byte* a) { int i; - int j; + int j = 0; - for (i = 47, j = 0; i >= 0; i--) { - a[j++] = r[i] >> 56; - a[j++] = r[i] >> 48; - a[j++] = r[i] >> 40; - a[j++] = r[i] >> 32; - a[j++] = r[i] >> 24; - a[j++] = r[i] >> 16; - a[j++] = r[i] >> 8; - a[j++] = r[i] >> 0; + for (i = 47; i >= 0; i--, j += 8) { + __asm__ __volatile__ ( + "ldr x4, [%[r]]\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x4, x4\n\t" + #endif + "str x4, [%[a]]\n\t" + : + : [r] "r" (r + i), [a] "r" (a + j) + : "memory", "x4" + ); } } @@ -6214,1670 +7210,355 @@ static void sp_3072_to_bin_48(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[12]; - __asm__ __volatile__ ( - "ldp x10, x11, [%[a], 0]\n\t" - "ldp x12, x13, [%[a], 16]\n\t" - "ldp x14, x15, [%[a], 32]\n\t" - "ldp x16, x17, [%[a], 48]\n\t" - "ldp x19, x20, [%[a], 64]\n\t" - "ldp x21, x22, [%[a], 80]\n\t" + "ldp x8, x9, [%[a], 0]\n\t" + "ldp x10, x11, [%[a], 16]\n\t" + "ldp x12, x13, [%[a], 32]\n\t" + "ldp x14, x15, [%[b], 0]\n\t" + "ldp x16, x17, [%[b], 16]\n\t" + "ldp x19, x20, [%[b], 32]\n\t" "# A[0] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "mul x4, x10, x9\n\t" - "umulh x5, x10, x9\n\t" - "mov x6, 0\n\t" - "str x4, [%[tmp]]\n\t" + "mul x3, x8, x14\n\t" + "umulh x4, x8, x14\n\t" + "str x3, [%[r]]\n\t" "# A[0] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x8, x15\n\t" + "umulh x7, x8, x15\n\t" + "adds x4, x4, x6\n\t" "# A[1] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[tmp], 8]\n\t" - "adc x4, x4, xzr\n\t" + "mul x6, x9, x14\n\t" + "adc x5, xzr, x7\n\t" + "umulh x7, x9, x14\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 8]\n\t" + "adc x3, xzr, xzr\n\t" "# A[0] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x8, x16\n\t" + "umulh x7, x8, x16\n\t" + "adds x5, x5, x6\n\t" "# A[1] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x9, x15\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x15\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" "# A[2] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[tmp], 16]\n\t" - "adc x5, x5, xzr\n\t" + "mul x6, x10, x14\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x14\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 16]\n\t" + "adc x4, x4, xzr\n\t" "# A[0] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x8, x17\n\t" + "umulh x7, x8, x17\n\t" + "adds x3, x3, x6\n\t" "# A[1] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x9, x16\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x9, x16\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" "# A[2] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x10, x15\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x15\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" "# A[3] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[tmp], 24]\n\t" - "adc x6, x6, xzr\n\t" + "mul x6, x11, x14\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x14\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 24]\n\t" + "adc x5, x5, xzr\n\t" "# A[0] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x8, x19\n\t" + "umulh x7, x8, x19\n\t" + "adds x4, x4, x6\n\t" "# A[1] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x9, x17\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x9, x17\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" "# A[2] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x10, x16\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x10, x16\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" "# A[3] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x11, x15\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x15\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" "# A[4] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[tmp], 32]\n\t" - "adc x4, x4, xzr\n\t" + "mul x6, x12, x14\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x14\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 32]\n\t" + "adc x3, x3, xzr\n\t" "# A[0] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x8, x20\n\t" + "umulh x7, x8, x20\n\t" + "adds x5, x5, x6\n\t" "# A[1] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x9, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x19\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" "# A[2] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x10, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" "# A[3] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x11, x16\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x11, x16\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" "# A[4] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x12, x15\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x15\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" "# A[5] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[tmp], 40]\n\t" - "adc x5, x5, xzr\n\t" - "# A[0] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x13, x14\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x14\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 40]\n\t" + "adc x4, x4, xzr\n\t" "# A[1] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x9, x20\n\t" + "umulh x7, x9, x20\n\t" + "adds x3, x3, x6\n\t" "# A[2] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x10, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x19\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" "# A[3] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x11, x17\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" "# A[4] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x12, x16\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x12, x16\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" "# A[5] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[6] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[tmp], 48]\n\t" - "adc x6, x6, xzr\n\t" - "# A[0] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[1] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x13, x15\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x15\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 48]\n\t" + "adc x5, x5, xzr\n\t" "# A[2] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x10, x20\n\t" + "umulh x7, x10, x20\n\t" + "adds x4, x4, x6\n\t" "# A[3] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x11, x19\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x19\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" "# A[4] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x12, x17\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" "# A[5] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[6] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[7] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[tmp], 56]\n\t" - "adc x4, x4, xzr\n\t" - "# A[0] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[1] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[2] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x13, x16\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x13, x16\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 56]\n\t" + "adc x3, x3, xzr\n\t" "# A[3] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x11, x20\n\t" + "umulh x7, x11, x20\n\t" + "adds x5, x5, x6\n\t" "# A[4] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x6, x6, x7\n\t" + "mul x6, x12, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x19\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" "# A[5] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[6] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[7] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[8] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[tmp], 64]\n\t" - "adc x5, x5, xzr\n\t" - "# A[0] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[1] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[2] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[3] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x13, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 64]\n\t" + "adc x4, x4, xzr\n\t" "# A[4] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x4, x4, x7\n\t" + "mul x6, x12, x20\n\t" + "umulh x7, x12, x20\n\t" + "adds x3, x3, x6\n\t" "# A[5] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[6] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[7] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[8] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[9] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[tmp], 72]\n\t" - "adc x6, x6, xzr\n\t" - "# A[0] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[1] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[2] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[3] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[4] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x5, x5, x7\n\t" + "mul x6, x13, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x19\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 72]\n\t" + "adc x5, x5, xzr\n\t" "# A[5] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[6] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[7] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[8] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[9] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[10] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[tmp], 80]\n\t" - "adc x4, x4, xzr\n\t" - "# A[0] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x10, x9\n\t" - "umulh x8, x10, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[1] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x11, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x11, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[2] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[3] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[4] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[5] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[6] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[7] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[8] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[9] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[10] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[11] * B[0]\n\t" - "ldr x9, [%[b], 0]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[tmp], 88]\n\t" - "adc x5, x5, xzr\n\t" - "# A[1] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x11, x9\n\t" - "umulh x8, x11, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[2] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x12, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x12, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[3] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[4] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[5] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[6] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[7] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[8] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[9] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[10] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[11] * B[1]\n\t" - "ldr x9, [%[b], 8]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[r], 96]\n\t" - "adc x6, x6, xzr\n\t" - "# A[2] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x12, x9\n\t" - "umulh x8, x12, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[3] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x13, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x13, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[4] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[5] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[6] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[7] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[8] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[9] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[10] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[11] * B[2]\n\t" - "ldr x9, [%[b], 16]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[r], 104]\n\t" - "adc x4, x4, xzr\n\t" - "# A[3] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x13, x9\n\t" - "umulh x8, x13, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[4] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x14, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x14, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[5] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[6] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[7] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[8] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[9] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[10] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[11] * B[3]\n\t" - "ldr x9, [%[b], 24]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[r], 112]\n\t" - "adc x5, x5, xzr\n\t" - "# A[4] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[5] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x15, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[6] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[7] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[8] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[9] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[10] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[11] * B[4]\n\t" - "ldr x9, [%[b], 32]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[r], 120]\n\t" - "adc x6, x6, xzr\n\t" - "# A[5] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x15, x9\n\t" - "umulh x8, x15, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[6] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x16, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x16, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[7] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[8] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[9] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[10] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[11] * B[5]\n\t" - "ldr x9, [%[b], 40]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[r], 128]\n\t" - "adc x4, x4, xzr\n\t" - "# A[6] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x16, x9\n\t" - "umulh x8, x16, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[7] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x17, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x17, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[8] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[9] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[10] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[11] * B[6]\n\t" - "ldr x9, [%[b], 48]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[r], 136]\n\t" - "adc x5, x5, xzr\n\t" - "# A[7] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x17, x9\n\t" - "umulh x8, x17, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[8] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x19, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x19, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[9] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[10] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[11] * B[7]\n\t" - "ldr x9, [%[b], 56]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[r], 144]\n\t" - "adc x6, x6, xzr\n\t" - "# A[8] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x19, x9\n\t" - "umulh x8, x19, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[9] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x20, x9\n\t" - "adc x4, xzr, xzr\n\t" - "umulh x8, x20, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[10] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x5, x5, x7\n\t" - "# A[11] * B[8]\n\t" - "ldr x9, [%[b], 64]\n\t" - "adcs x6, x6, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x4, x4, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x5, x5, x7\n\t" - "adcs x6, x6, x8\n\t" - "str x5, [%[r], 152]\n\t" - "adc x4, x4, xzr\n\t" - "# A[9] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x20, x9\n\t" - "umulh x8, x20, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[10] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x21, x9\n\t" - "adc x5, xzr, xzr\n\t" - "umulh x8, x21, x9\n\t" - "adds x6, x6, x7\n\t" - "# A[11] * B[9]\n\t" - "ldr x9, [%[b], 72]\n\t" - "adcs x4, x4, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x4, x4, x8\n\t" - "str x6, [%[r], 160]\n\t" - "adc x5, x5, xzr\n\t" - "# A[10] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x21, x9\n\t" - "umulh x8, x21, x9\n\t" - "adds x4, x4, x7\n\t" - "# A[11] * B[10]\n\t" - "ldr x9, [%[b], 80]\n\t" - "adcs x5, x5, x8\n\t" - "mul x7, x22, x9\n\t" - "adc x6, xzr, xzr\n\t" - "umulh x8, x22, x9\n\t" - "adds x4, x4, x7\n\t" - "adcs x5, x5, x8\n\t" - "str x4, [%[r], 168]\n\t" - "adc x6, x6, xzr\n\t" - "# A[11] * B[11]\n\t" - "ldr x9, [%[b], 88]\n\t" - "mul x7, x22, x9\n\t" - "umulh x8, x22, x9\n\t" - "adds x5, x5, x7\n\t" - "adc x6, x6, x8\n\t" - "stp x5, x6, [%[r], 176]\n\t" - "ldp x10, x11, [%[tmp], 0]\n\t" - "ldp x12, x13, [%[tmp], 16]\n\t" - "ldp x14, x15, [%[tmp], 32]\n\t" - "ldp x16, x17, [%[tmp], 48]\n\t" - "ldp x19, x20, [%[tmp], 64]\n\t" - "ldp x21, x22, [%[tmp], 80]\n\t" - "stp x10, x11, [%[r], 0]\n\t" - "stp x12, x13, [%[r], 16]\n\t" - "stp x14, x15, [%[r], 32]\n\t" - "stp x16, x17, [%[r], 48]\n\t" - "stp x19, x20, [%[r], 64]\n\t" - "stp x21, x22, [%[r], 80]\n\t" + "mul x6, x13, x20\n\t" + "umulh x7, x13, x20\n\t" + "adds x4, x4, x6\n\t" + "adc x5, x5, x7\n\t" + "stp x4, x5, [%[r], 80]\n\t" : - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22" + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "cc" ); } -/* Square a and put result in r. (r = a * a) +/* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. + * b A single precision integer. */ -static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +static sp_digit sp_3072_add_6(sp_digit* r, const sp_digit* a, + const sp_digit* b) { __asm__ __volatile__ ( - "ldp x10, x11, [%[a], 0]\n\t" - "ldp x12, x13, [%[a], 16]\n\t" - "ldp x14, x15, [%[a], 32]\n\t" - "ldp x16, x17, [%[a], 48]\n\t" - "ldp x19, x20, [%[a], 64]\n\t" - "ldp x21, x22, [%[a], 80]\n\t" - "# A[0] * A[0]\n\t" - "mul x2, x10, x10\n\t" - "umulh x3, x10, x10\n\t" - "str x2, [%[r]]\n\t" - "mov x4, 0\n\t" - "# A[0] * A[1]\n\t" - "mul x8, x10, x11\n\t" - "umulh x9, x10, x11\n\t" - "adds x3, x3, x8\n\t" - "adcs x4, x4, x9\n\t" - "adc x2, xzr, xzr\n\t" - "adds x3, x3, x8\n\t" - "str x3, [%[r], 8]\n\t" - "# A[0] * A[2]\n\t" - "mul x8, x10, x12\n\t" - "adcs x4, x4, x9\n\t" - "umulh x9, x10, x12\n\t" - "adc x2, x2, xzr\n\t" - "adds x4, x4, x8\n\t" - "adcs x2, x2, x9\n\t" - "adc x3, xzr, xzr\n\t" - "adds x4, x4, x8\n\t" - "# A[1] * A[1]\n\t" - "mul x8, x11, x11\n\t" - "adcs x2, x2, x9\n\t" - "umulh x9, x11, x11\n\t" - "adc x3, x3, xzr\n\t" - "adds x4, x4, x8\n\t" - "str x4, [%[r], 16]\n\t" - "# A[0] * A[3]\n\t" - "mul x8, x10, x13\n\t" - "adcs x2, x2, x9\n\t" - "umulh x9, x10, x13\n\t" - "adc x3, x3, xzr\n\t" - "adds x2, x2, x8\n\t" - "adcs x3, x3, x9\n\t" - "adc x4, xzr, xzr\n\t" - "adds x2, x2, x8\n\t" - "# A[1] * A[2]\n\t" - "mul x8, x11, x12\n\t" - "adcs x3, x3, x9\n\t" - "umulh x9, x11, x12\n\t" - "adc x4, x4, xzr\n\t" - "adds x2, x2, x8\n\t" - "adcs x3, x3, x9\n\t" - "adc x4, x4, xzr\n\t" - "adds x2, x2, x8\n\t" - "str x2, [%[r], 24]\n\t" - "# A[0] * A[4]\n\t" - "mul x8, x10, x14\n\t" - "adcs x3, x3, x9\n\t" - "umulh x9, x10, x14\n\t" - "adc x4, x4, xzr\n\t" - "adds x3, x3, x8\n\t" - "adcs x4, x4, x9\n\t" - "adc x2, xzr, xzr\n\t" - "adds x3, x3, x8\n\t" - "# A[1] * A[3]\n\t" - "mul x8, x11, x13\n\t" - "adcs x4, x4, x9\n\t" - "umulh x9, x11, x13\n\t" - "adc x2, x2, xzr\n\t" - "adds x3, x3, x8\n\t" - "adcs x4, x4, x9\n\t" - "adc x2, x2, xzr\n\t" - "adds x3, x3, x8\n\t" - "# A[2] * A[2]\n\t" - "mul x8, x12, x12\n\t" - "adcs x4, x4, x9\n\t" - "umulh x9, x12, x12\n\t" - "adc x2, x2, xzr\n\t" - "adds x3, x3, x8\n\t" + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldr x3, [%[a], 32]\n\t" + "ldr x4, [%[a], 40]\n\t" + "ldr x7, [%[b], 32]\n\t" + "ldr x8, [%[b], 40]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" "str x3, [%[r], 32]\n\t" - "# A[0] * A[5]\n\t" - "mul x5, x10, x15\n\t" - "adcs x4, x4, x9\n\t" - "umulh x6, x10, x15\n\t" - "adc x2, x2, xzr\n\t" - "mov x3, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[4]\n\t" - "mul x8, x11, x14\n\t" - "umulh x9, x11, x14\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[3]\n\t" - "mul x8, x12, x13\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x13\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x4, x4, x5\n\t" - "adcs x2, x2, x6\n\t" - "adc x3, x3, x7\n\t" "str x4, [%[r], 40]\n\t" - "# A[0] * A[6]\n\t" - "mul x5, x10, x16\n\t" - "umulh x6, x10, x16\n\t" - "mov x4, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[5]\n\t" - "mul x8, x11, x15\n\t" - "umulh x9, x11, x15\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[4]\n\t" - "mul x8, x12, x14\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x14\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[3]\n\t" - "mul x8, x13, x13\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x13\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x2, x2, x5\n\t" - "adcs x3, x3, x6\n\t" - "adc x4, x4, x7\n\t" - "str x2, [%[r], 48]\n\t" - "# A[0] * A[7]\n\t" - "mul x5, x10, x17\n\t" - "umulh x6, x10, x17\n\t" - "mov x2, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[6]\n\t" - "mul x8, x11, x16\n\t" - "umulh x9, x11, x16\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[5]\n\t" - "mul x8, x12, x15\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x15\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[4]\n\t" - "mul x8, x13, x14\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x14\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x3, x3, x5\n\t" - "adcs x4, x4, x6\n\t" - "adc x2, x2, x7\n\t" - "str x3, [%[r], 56]\n\t" - "# A[0] * A[8]\n\t" - "mul x5, x10, x19\n\t" - "umulh x6, x10, x19\n\t" - "mov x3, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[7]\n\t" - "mul x8, x11, x17\n\t" - "umulh x9, x11, x17\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[6]\n\t" - "mul x8, x12, x16\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x16\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[5]\n\t" - "mul x8, x13, x15\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x15\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[4] * A[4]\n\t" - "mul x8, x14, x14\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x14, x14\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x4, x4, x5\n\t" - "adcs x2, x2, x6\n\t" - "adc x3, x3, x7\n\t" - "str x4, [%[r], 64]\n\t" - "# A[0] * A[9]\n\t" - "mul x5, x10, x20\n\t" - "umulh x6, x10, x20\n\t" - "mov x4, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[8]\n\t" - "mul x8, x11, x19\n\t" - "umulh x9, x11, x19\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[7]\n\t" - "mul x8, x12, x17\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x17\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[6]\n\t" - "mul x8, x13, x16\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x16\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[4] * A[5]\n\t" - "mul x8, x14, x15\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x14, x15\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x2, x2, x5\n\t" - "adcs x3, x3, x6\n\t" - "adc x4, x4, x7\n\t" - "str x2, [%[r], 72]\n\t" - "# A[0] * A[10]\n\t" - "mul x5, x10, x21\n\t" - "umulh x6, x10, x21\n\t" - "mov x2, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[9]\n\t" - "mul x8, x11, x20\n\t" - "umulh x9, x11, x20\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[8]\n\t" - "mul x8, x12, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[7]\n\t" - "mul x8, x13, x17\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x17\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[4] * A[6]\n\t" - "mul x8, x14, x16\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x14, x16\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[5] * A[5]\n\t" - "mul x8, x15, x15\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x15, x15\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x3, x3, x5\n\t" - "adcs x4, x4, x6\n\t" - "adc x2, x2, x7\n\t" - "str x3, [%[r], 80]\n\t" - "# A[0] * A[11]\n\t" - "mul x5, x10, x22\n\t" - "umulh x6, x10, x22\n\t" - "mov x3, 0\n\t" - "mov x7, 0\n\t" - "# A[1] * A[10]\n\t" - "mul x8, x11, x21\n\t" - "umulh x9, x11, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[2] * A[9]\n\t" - "mul x8, x12, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x12, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[8]\n\t" - "mul x8, x13, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[4] * A[7]\n\t" - "mul x8, x14, x17\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x14, x17\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[5] * A[6]\n\t" - "mul x8, x15, x16\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x15, x16\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x4, x4, x5\n\t" - "adcs x2, x2, x6\n\t" - "adc x3, x3, x7\n\t" - "str x4, [%[r], 88]\n\t" - "# A[1] * A[11]\n\t" - "mul x5, x11, x22\n\t" - "umulh x6, x11, x22\n\t" - "mov x4, 0\n\t" - "mov x7, 0\n\t" - "# A[2] * A[10]\n\t" - "mul x8, x12, x21\n\t" - "umulh x9, x12, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[3] * A[9]\n\t" - "mul x8, x13, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x13, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[4] * A[8]\n\t" - "mul x8, x14, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x14, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[5] * A[7]\n\t" - "mul x8, x15, x17\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x15, x17\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[6] * A[6]\n\t" - "mul x8, x16, x16\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x16, x16\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x2, x2, x5\n\t" - "adcs x3, x3, x6\n\t" - "adc x4, x4, x7\n\t" - "str x2, [%[r], 96]\n\t" - "# A[2] * A[11]\n\t" - "mul x5, x12, x22\n\t" - "umulh x6, x12, x22\n\t" - "mov x2, 0\n\t" - "mov x7, 0\n\t" - "# A[3] * A[10]\n\t" - "mul x8, x13, x21\n\t" - "umulh x9, x13, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[4] * A[9]\n\t" - "mul x8, x14, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x14, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[5] * A[8]\n\t" - "mul x8, x15, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x15, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[6] * A[7]\n\t" - "mul x8, x16, x17\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x16, x17\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x3, x3, x5\n\t" - "adcs x4, x4, x6\n\t" - "adc x2, x2, x7\n\t" - "str x3, [%[r], 104]\n\t" - "# A[3] * A[11]\n\t" - "mul x5, x13, x22\n\t" - "umulh x6, x13, x22\n\t" - "mov x3, 0\n\t" - "mov x7, 0\n\t" - "# A[4] * A[10]\n\t" - "mul x8, x14, x21\n\t" - "umulh x9, x14, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[5] * A[9]\n\t" - "mul x8, x15, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x15, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[6] * A[8]\n\t" - "mul x8, x16, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x16, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[7] * A[7]\n\t" - "mul x8, x17, x17\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x17, x17\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x4, x4, x5\n\t" - "adcs x2, x2, x6\n\t" - "adc x3, x3, x7\n\t" - "str x4, [%[r], 112]\n\t" - "# A[4] * A[11]\n\t" - "mul x5, x14, x22\n\t" - "umulh x6, x14, x22\n\t" - "mov x4, 0\n\t" - "mov x7, 0\n\t" - "# A[5] * A[10]\n\t" - "mul x8, x15, x21\n\t" - "umulh x9, x15, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[6] * A[9]\n\t" - "mul x8, x16, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x16, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[7] * A[8]\n\t" - "mul x8, x17, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x17, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x2, x2, x5\n\t" - "adcs x3, x3, x6\n\t" - "adc x4, x4, x7\n\t" - "str x2, [%[r], 120]\n\t" - "# A[5] * A[11]\n\t" - "mul x5, x15, x22\n\t" - "umulh x6, x15, x22\n\t" - "mov x2, 0\n\t" - "mov x7, 0\n\t" - "# A[6] * A[10]\n\t" - "mul x8, x16, x21\n\t" - "umulh x9, x16, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[7] * A[9]\n\t" - "mul x8, x17, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x17, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "# A[8] * A[8]\n\t" - "mul x8, x19, x19\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x19, x19\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x3, x3, x5\n\t" - "adcs x4, x4, x6\n\t" - "adc x2, x2, x7\n\t" - "str x3, [%[r], 128]\n\t" - "# A[6] * A[11]\n\t" - "mul x5, x16, x22\n\t" - "umulh x6, x16, x22\n\t" - "mov x3, 0\n\t" - "mov x7, 0\n\t" - "# A[7] * A[10]\n\t" - "mul x8, x17, x21\n\t" - "umulh x9, x17, x21\n\t" - "adds x5, x5, x8\n\t" - "# A[8] * A[9]\n\t" - "mul x8, x19, x20\n\t" - "adcs x6, x6, x9\n\t" - "umulh x9, x19, x20\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x8\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, xzr\n\t" - "adds x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "adc x7, x7, x7\n\t" - "adds x4, x4, x5\n\t" - "adcs x2, x2, x6\n\t" - "adc x3, x3, x7\n\t" - "str x4, [%[r], 136]\n\t" - "# A[7] * A[11]\n\t" - "mul x8, x17, x22\n\t" - "umulh x9, x17, x22\n\t" - "adds x2, x2, x8\n\t" - "adcs x3, x3, x9\n\t" - "adc x4, xzr, xzr\n\t" - "adds x2, x2, x8\n\t" - "# A[8] * A[10]\n\t" - "mul x8, x19, x21\n\t" - "adcs x3, x3, x9\n\t" - "umulh x9, x19, x21\n\t" - "adc x4, x4, xzr\n\t" - "adds x2, x2, x8\n\t" - "adcs x3, x3, x9\n\t" - "adc x4, x4, xzr\n\t" - "adds x2, x2, x8\n\t" - "# A[9] * A[9]\n\t" - "mul x8, x20, x20\n\t" - "adcs x3, x3, x9\n\t" - "umulh x9, x20, x20\n\t" - "adc x4, x4, xzr\n\t" - "adds x2, x2, x8\n\t" - "str x2, [%[r], 144]\n\t" - "# A[8] * A[11]\n\t" - "mul x8, x19, x22\n\t" - "adcs x3, x3, x9\n\t" - "umulh x9, x19, x22\n\t" - "adc x4, x4, xzr\n\t" - "adds x3, x3, x8\n\t" - "adcs x4, x4, x9\n\t" - "adc x2, xzr, xzr\n\t" - "adds x3, x3, x8\n\t" - "# A[9] * A[10]\n\t" - "mul x8, x20, x21\n\t" - "adcs x4, x4, x9\n\t" - "umulh x9, x20, x21\n\t" - "adc x2, x2, xzr\n\t" - "adds x3, x3, x8\n\t" - "adcs x4, x4, x9\n\t" - "adc x2, x2, xzr\n\t" - "adds x3, x3, x8\n\t" - "str x3, [%[r], 152]\n\t" - "# A[9] * A[11]\n\t" - "mul x8, x20, x22\n\t" - "adcs x4, x4, x9\n\t" - "umulh x9, x20, x22\n\t" - "adc x2, x2, xzr\n\t" - "adds x4, x4, x8\n\t" - "adcs x2, x2, x9\n\t" - "adc x3, xzr, xzr\n\t" - "adds x4, x4, x8\n\t" - "# A[10] * A[10]\n\t" - "mul x8, x21, x21\n\t" - "adcs x2, x2, x9\n\t" - "umulh x9, x21, x21\n\t" - "adc x3, x3, xzr\n\t" - "adds x4, x4, x8\n\t" - "str x4, [%[r], 160]\n\t" - "# A[10] * A[11]\n\t" - "mul x8, x21, x22\n\t" - "adcs x2, x2, x9\n\t" - "umulh x9, x21, x22\n\t" - "adc x3, x3, xzr\n\t" - "adds x2, x2, x8\n\t" - "adcs x3, x3, x9\n\t" - "adc x4, xzr, xzr\n\t" - "adds x2, x2, x8\n\t" - "str x2, [%[r], 168]\n\t" - "# A[11] * A[11]\n\t" - "mul x8, x22, x22\n\t" - "adcs x3, x3, x9\n\t" - "umulh x9, x22, x22\n\t" - "adc x4, x4, xzr\n\t" - "adds x3, x3, x8\n\t" - "adc x4, x4, x9\n\t" - "stp x3, x4, [%[r], 176]\n\t" - : - : [r] "r" (r), [a] "r" (a) - : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22" + "adc %[r], xzr, xzr\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); + + return (sp_digit)r; +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_add_word_6(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[b]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldr x3, [%[a], 32]\n\t" + "ldr x4, [%[a], 40]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "str x3, [%[r], 32]\n\t" + "str x4, [%[r], 40]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" + ); +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_3072_sub_in_place_12(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" + ); + + return (sp_digit)a; } /* Add b to a into r. (r = a + b) @@ -7920,15 +7601,135 @@ static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 64]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 80]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + __asm__ __volatile__ ( + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + return (sp_digit)r; +} +#endif /* !WOLFSSL_SP_SMALL */ + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[12]; + sp_digit a1[6]; + sp_digit b1[6]; + sp_digit* z2 = r + 12; + sp_digit u; + sp_digit ca; + sp_digit cb; + + ca = sp_3072_add_6(a1, a, &a[6]); + cb = sp_3072_add_6(b1, b, &b[6]); + u = ca & cb; + + sp_3072_mul_6(z2, &a[6], &b[6]); + sp_3072_mul_6(z0, a, b); + sp_3072_mul_6(z1, a1, b1); + + u += sp_3072_sub_in_place_12(z1, z0); + u += sp_3072_sub_in_place_12(z1, z2); + u += sp_3072_cond_add_6(z1 + 6, z1 + 6, a1, 0 - cb); + u += sp_3072_cond_add_6(z1 + 6, z1 + 6, b1, 0 - ca); + + u += sp_3072_add_12(r + 6, r + 6, z1); + (void)sp_3072_add_word_6(r + 18, r + 18, u); +} + +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_3072_add_word_12(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[b]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 80]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" + ); +} + /* Sub b from a into a. (a -= b) * * a A single precision integer and result. @@ -8000,7 +7801,7 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -8076,43 +7877,114 @@ static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 160]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 176]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -/* AND m into each word of a and store in r. +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + __asm__ __volatile__ ( + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "ldp x10, x11, [%[b], 80]\n\t" + "ldp x4, x5, [%[a], 64]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 64]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 80]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + return (sp_digit)r; +} +#endif /* !WOLFSSL_SP_SMALL */ + +/* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. - * m Mask to AND against each digit. + * b A single precision integer. */ -static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) +SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) { -#ifdef WOLFSSL_SP_SMALL - int i; + sp_digit* z0 = r; + sp_digit z1[24]; + sp_digit a1[12]; + sp_digit b1[12]; + sp_digit* z2 = r + 24; + sp_digit u; + sp_digit ca; + sp_digit cb; - for (i=0; i<12; i++) { - r[i] = a[i] & m; - } -#else - r[0] = a[0] & m; - r[1] = a[1] & m; - r[2] = a[2] & m; - r[3] = a[3] & m; - r[4] = a[4] & m; - r[5] = a[5] & m; - r[6] = a[6] & m; - r[7] = a[7] & m; - r[8] = a[8] & m; - r[9] = a[9] & m; - r[10] = a[10] & m; - r[11] = a[11] & m; -#endif + ca = sp_3072_add_12(a1, a, &a[12]); + cb = sp_3072_add_12(b1, b, &b[12]); + u = ca & cb; + + sp_3072_mul_12(z2, &a[12], &b[12]); + sp_3072_mul_12(z0, a, b); + sp_3072_mul_12(z1, a1, b1); + + u += sp_3072_sub_in_place_24(z1, z0); + u += sp_3072_sub_in_place_24(z1, z2); + u += sp_3072_cond_add_12(z1 + 12, z1 + 12, a1, 0 - cb); + u += sp_3072_cond_add_12(z1 + 12, z1 + 12, b1, 0 - ca); + + u += sp_3072_add_24(r + 12, r + 12, z1); + (void)sp_3072_add_word_12(r + 36, r + 36, u); } /* Add digit to a into r. (r = a + b) @@ -8121,13 +7993,13 @@ static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_add_zero_12(sp_digit* r, const sp_digit* a, - const sp_digit d) +static void sp_3072_add_word_24(sp_digit* r, const sp_digit* a, + sp_digit b) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" "ldp x5, x6, [%[a], 16]\n\t" - "adds x3, x3, %[d]\n\t" + "adds x3, x3, %[b]\n\t" "adcs x4, x4, xzr\n\t" "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 0]\n\t" @@ -8149,149 +8021,36 @@ static void sp_3072_add_zero_12(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 64]\n\t" "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 176]\n\t" : - : [r] "r" (r), [a] "r" (a), [d] "r" (d) - : "memory", "x3", "x4", "x5", "x6" + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" ); } -/* Multiply a and b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit* z0 = r; - sp_digit z1[24]; - sp_digit a1[12]; - sp_digit b1[12]; - sp_digit z2[24]; - sp_digit u, ca, cb; - - ca = sp_3072_add_12(a1, a, &a[12]); - cb = sp_3072_add_12(b1, b, &b[12]); - u = ca & cb; - sp_3072_mul_12(z1, a1, b1); - sp_3072_mul_12(z2, &a[12], &b[12]); - sp_3072_mul_12(z0, a, b); - sp_3072_mask_12(r + 24, a1, 0 - cb); - sp_3072_mask_12(b1, b1, 0 - ca); - u += sp_3072_add_12(r + 24, r + 24, b1); - u += sp_3072_sub_in_place_24(z1, z2); - u += sp_3072_sub_in_place_24(z1, z0); - u += sp_3072_add_24(r + 12, r + 12, z1); - u += sp_3072_add_12(r + 24, r + 24, z2); - sp_3072_add_zero_12(r + 36, z2 + 12, u); -} - -#ifdef WOLFSSL_SP_SMALL -/* Double a into r. (r = a + a) - * - * r A single precision integer. - * a A single precision integer. - */ -static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add x11, %[a], 96\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "adcs x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r]], #16\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) - : - : "memory", "x3", "x4", "x5", "x6", "x11" - ); - - return c; -} - -#else -/* Double a into r. (r = a + a) - * - * r A single precision integer. - * a A single precision integer. - */ -static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a) -{ - __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "adds x3, x3, x3\n\t" - "ldr x5, [%[a], 16]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 24]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 48]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 56]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 80]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 88]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6" - ); - - return (sp_digit)r; -} - -#endif /* WOLFSSL_SP_SMALL */ -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z2[24]; - sp_digit z1[24]; - sp_digit a1[12]; - sp_digit u; - - u = sp_3072_add_12(a1, a, &a[12]); - sp_3072_sqr_12(z1, a1); - sp_3072_sqr_12(z2, &a[12]); - sp_3072_sqr_12(z0, a); - sp_3072_mask_12(r + 24, a1, 0 - u); - u += sp_3072_dbl_12(r + 24, r + 24); - u += sp_3072_sub_in_place_24(z1, z2); - u += sp_3072_sub_in_place_24(z1, z0); - u += sp_3072_add_24(r + 12, r + 12, z1); - u += sp_3072_add_12(r + 24, r + 24, z2); - sp_3072_add_zero_12(r + 36, z2 + 12, u); -} - /* Sub b from a into a. (a -= b) * * a A single precision integer and result. @@ -8423,7 +8182,7 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -8559,108 +8318,122 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 352]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 368]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -/* AND m into each word of a and store in r. +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. */ -static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<24; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 24; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } -#endif -} - -/* Add digit to a into r. (r = a + b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static void sp_3072_add_zero_24(sp_digit* r, const sp_digit* a, - const sp_digit d) +static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "adds x3, x3, %[d]\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "ldp x3, x4, [%[a], 96]\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 112]\n\t" - "ldp x3, x4, [%[a], 128]\n\t" - "ldp x5, x6, [%[a], 144]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 128]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 144]\n\t" - "ldp x3, x4, [%[a], 160]\n\t" - "ldp x5, x6, [%[a], 176]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 160]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 176]\n\t" - : - : [r] "r" (r), [a] "r" (a), [d] "r" (d) - : "memory", "x3", "x4", "x5", "x6" + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "ldp x10, x11, [%[b], 80]\n\t" + "ldp x4, x5, [%[a], 64]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 64]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 80]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "ldp x10, x11, [%[b], 112]\n\t" + "ldp x4, x5, [%[a], 96]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 96]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 112]\n\t" + "ldp x8, x9, [%[b], 128]\n\t" + "ldp x10, x11, [%[b], 144]\n\t" + "ldp x4, x5, [%[a], 128]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 144]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 128]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 144]\n\t" + "ldp x8, x9, [%[b], 160]\n\t" + "ldp x10, x11, [%[b], 176]\n\t" + "ldp x4, x5, [%[a], 160]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 176]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 160]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 176]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); + + return (sp_digit)r; } +#endif /* !WOLFSSL_SP_SMALL */ /* Multiply a and b into r. (r = a * b) * @@ -8675,131 +8448,2788 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, sp_digit z1[48]; sp_digit a1[24]; sp_digit b1[24]; - sp_digit z2[48]; - sp_digit u, ca, cb; + sp_digit* z2 = r + 48; + sp_digit u; + sp_digit ca; + sp_digit cb; ca = sp_3072_add_24(a1, a, &a[24]); cb = sp_3072_add_24(b1, b, &b[24]); u = ca & cb; - sp_3072_mul_24(z1, a1, b1); + sp_3072_mul_24(z2, &a[24], &b[24]); sp_3072_mul_24(z0, a, b); - sp_3072_mask_24(r + 48, a1, 0 - cb); - sp_3072_mask_24(b1, b1, 0 - ca); - u += sp_3072_add_24(r + 48, r + 48, b1); - u += sp_3072_sub_in_place_48(z1, z2); + sp_3072_mul_24(z1, a1, b1); + u += sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_sub_in_place_48(z1, z2); + u += sp_3072_cond_add_24(z1 + 24, z1 + 24, a1, 0 - cb); + u += sp_3072_cond_add_24(z1 + 24, z1 + 24, b1, 0 - ca); + u += sp_3072_add_48(r + 24, r + 24, z1); - u += sp_3072_add_24(r + 48, r + 48, z2); - sp_3072_add_zero_24(r + 72, z2 + 24, u); + (void)sp_3072_add_word_24(r + 72, r + 72, u); } -#ifdef WOLFSSL_SP_SMALL -/* Double a into r. (r = a + a) +/* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ -static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a) +static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) { - sp_digit c = 0; + sp_digit tmp[24]; __asm__ __volatile__ ( - "add x11, %[a], 192\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "adcs x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r]], #16\n\t" + "# A[0] * A[0]\n\t" + "ldr x9, [%[a], 0]\n\t" + "mul x8, x9, x9\n\t" + "umulh x3, x9, x9\n\t" + "mov x4, xzr\n\t" + "str x8, [%[tmp]]\n\t" + "# A[0] * A[1]\n\t" + "ldr x9, [%[a], 8]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[tmp], 8]\n\t" + "# A[0] * A[2]\n\t" + "ldr x9, [%[a], 16]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "# A[1] * A[1]\n\t" + "ldr x9, [%[a], 8]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 16]\n\t" + "# A[0] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "# A[1] * A[2]\n\t" + "ldr x9, [%[a], 16]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "str x2, [%[tmp], 24]\n\t" + "# A[0] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[1] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[2] * A[2]\n\t" + "ldr x9, [%[a], 16]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[tmp], 32]\n\t" + "# A[0] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 40]\n\t" + "# A[0] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 48]\n\t" + "# A[0] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 56]\n\t" + "# A[0] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 64]\n\t" + "# A[0] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 72]\n\t" + "# A[0] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 80]\n\t" + "# A[0] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 88]\n\t" + "# A[0] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 96]\n\t" + "# A[0] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 104]\n\t" + "# A[0] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 112]\n\t" + "# A[0] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 120]\n\t" + "# A[0] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 128]\n\t" + "# A[0] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 136]\n\t" + "# A[0] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 144]\n\t" + "# A[0] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 152]\n\t" + "# A[0] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 160]\n\t" + "# A[0] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 168]\n\t" + "# A[0] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 176]\n\t" + "# A[0] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 184]\n\t" + "# A[1] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[2] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 192]\n\t" + "# A[2] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[3] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 200]\n\t" + "# A[3] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[4] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 208]\n\t" + "# A[4] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[5] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 216]\n\t" + "# A[5] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[6] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 224]\n\t" + "# A[6] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[7] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 232]\n\t" + "# A[7] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[8] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 240]\n\t" + "# A[8] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[9] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 248]\n\t" + "# A[9] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[10] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 256]\n\t" + "# A[10] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[11] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 264]\n\t" + "# A[11] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[12] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 272]\n\t" + "# A[12] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[13] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 280]\n\t" + "# A[13] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[14] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 288]\n\t" + "# A[14] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[15] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 296]\n\t" + "# A[15] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[16] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 304]\n\t" + "# A[16] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[17] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 312]\n\t" + "# A[17] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[18] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 320]\n\t" + "# A[18] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[19] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 328]\n\t" + "# A[19] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "str x2, [%[r], 336]\n\t" + "# A[20] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[21] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[r], 344]\n\t" + "# A[21] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 352]\n\t" + "# A[22] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "str x2, [%[r], 360]\n\t" + "# A[23] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x3, x3, x8\n\t" + "adc x4, x4, x9\n\t" + "stp x3, x4, [%[r], 368]\n\t" + "ldp x9, x10, [%[tmp], 0]\n\t" + "stp x9, x10, [%[r], 0]\n\t" + "ldp x9, x10, [%[tmp], 16]\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x9, x10, [%[tmp], 32]\n\t" + "stp x9, x10, [%[r], 32]\n\t" + "ldp x9, x10, [%[tmp], 48]\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldp x9, x10, [%[tmp], 64]\n\t" + "stp x9, x10, [%[r], 64]\n\t" + "ldp x9, x10, [%[tmp], 80]\n\t" + "stp x9, x10, [%[r], 80]\n\t" + "ldp x9, x10, [%[tmp], 96]\n\t" + "stp x9, x10, [%[r], 96]\n\t" + "ldp x9, x10, [%[tmp], 112]\n\t" + "stp x9, x10, [%[r], 112]\n\t" + "ldp x9, x10, [%[tmp], 128]\n\t" + "stp x9, x10, [%[r], 128]\n\t" + "ldp x9, x10, [%[tmp], 144]\n\t" + "stp x9, x10, [%[r], 144]\n\t" + "ldp x9, x10, [%[tmp], 160]\n\t" + "stp x9, x10, [%[r], 160]\n\t" + "ldp x9, x10, [%[tmp], 176]\n\t" + "stp x9, x10, [%[r], 176]\n\t" : - : "memory", "x3", "x4", "x5", "x6", "x11" + : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) + : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "cc" ); - - return c; } -#else -/* Double a into r. (r = a + a) +/* Sub b from a into r. (r = a - b) * * r A single precision integer. * a A single precision integer. + * b A single precision integer. */ -static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a) +static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, + const sp_digit* b) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" - "adds x3, x3, x3\n\t" - "ldr x5, [%[a], 16]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 24]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 16]\n\t" "ldp x3, x4, [%[a], 32]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 48]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 56]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 48]\n\t" "ldp x3, x4, [%[a], 64]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 80]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 88]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 80]\n\t" "ldp x3, x4, [%[a], 96]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 112]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 120]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 112]\n\t" "ldp x3, x4, [%[a], 128]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 144]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 152]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 128]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 144]\n\t" "ldp x3, x4, [%[a], 160]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 176]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 184]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 160]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 176]\n\t" - "cset %[r], cs\n\t" + "csetm %[r], cc\n\t" : [r] "+r" (r) - : [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6" + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -#endif /* WOLFSSL_SP_SMALL */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -8808,22 +11238,31 @@ static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a) SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[48]; + sp_digit* z2 = r + 48; sp_digit z1[48]; - sp_digit a1[24]; + sp_digit* a1 = z1; + sp_digit* zero = z1 + 24; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 24); + + mask = sp_3072_sub_24(a1, a, &a[24]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_3072_sub_24(a1, p1, p2); - u = sp_3072_add_24(a1, a, &a[24]); - sp_3072_sqr_24(z1, a1); sp_3072_sqr_24(z2, &a[24]); sp_3072_sqr_24(z0, a); - sp_3072_mask_24(r + 48, a1, 0 - u); - u += sp_3072_dbl_24(r + 48, r + 48); - u += sp_3072_sub_in_place_48(z1, z2); - u += sp_3072_sub_in_place_48(z1, z0); - u += sp_3072_add_48(r + 24, r + 24, z1); - u += sp_3072_add_24(r + 48, r + 48, z2); - sp_3072_add_zero_24(r + 72, z2 + 24, u); + sp_3072_sqr_24(z1, a1); + + u = 0; + u -= sp_3072_sub_in_place_48(z1, z2); + u -= sp_3072_sub_in_place_48(z1, z0); + u += sp_3072_sub_in_place_48(r + 24, z1); + sp_3072_add_word_24(r + 72, r + 72, u); } #endif /* !WOLFSSL_SP_SMALL */ @@ -8853,12 +11292,12 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r]], #16\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" + "adc %[c], xzr, xzr\n\t" "cmp %[a], x11\n\t" "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -8894,7 +11333,7 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) "b.ne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return c; @@ -8913,10 +11352,10 @@ static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[96]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 376\n\t" "csel x3, xzr, x3, cc\n\t" @@ -8946,7 +11385,7 @@ static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -8962,10 +11401,10 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) sp_digit tmp[96]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 376\n\t" "csel x3, xzr, x3, cc\n\t" @@ -9011,7 +11450,7 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -9020,23 +11459,6 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) #endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) #ifdef WOLFSSL_SP_SMALL -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) -{ - int i; - - for (i=0; i<24; i++) { - r[i] = a[i] & m; - } -} - -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -9062,12 +11484,12 @@ static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r]], #16\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" + "adc %[c], xzr, xzr\n\t" "cmp %[a], x11\n\t" "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -9103,7 +11525,7 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) "b.ne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return c; @@ -9122,10 +11544,10 @@ static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[48]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 184\n\t" "csel x3, xzr, x3, cc\n\t" @@ -9155,7 +11577,7 @@ static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -9171,10 +11593,10 @@ static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) sp_digit tmp[48]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 184\n\t" "csel x3, xzr, x3, cc\n\t" @@ -9220,7 +11642,7 @@ static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -9229,7 +11651,7 @@ static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) #endif /* WOLFSSL_SP_SMALL */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -9265,9 +11687,9 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -9286,416 +11708,417 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, "str x3, [%[r], 384]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[6] * B\n\t" - "ldp x8, x9, [%[a], 48]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" "str x5, [%[r], 40]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[7] * B\n\t" "str x3, [%[r], 48]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[8] * B\n\t" - "ldp x8, x9, [%[a], 64]\n\t" + "ldp x9, x10, [%[a], 64]\n\t" "str x4, [%[r], 56]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[9] * B\n\t" "str x5, [%[r], 64]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[10] * B\n\t" - "ldp x8, x9, [%[a], 80]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" "str x3, [%[r], 72]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[11] * B\n\t" "str x4, [%[r], 80]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[12] * B\n\t" - "ldp x8, x9, [%[a], 96]\n\t" + "ldp x9, x10, [%[a], 96]\n\t" "str x5, [%[r], 88]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[13] * B\n\t" "str x3, [%[r], 96]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[14] * B\n\t" - "ldp x8, x9, [%[a], 112]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" "str x4, [%[r], 104]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[15] * B\n\t" "str x5, [%[r], 112]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[16] * B\n\t" - "ldp x8, x9, [%[a], 128]\n\t" + "ldp x9, x10, [%[a], 128]\n\t" "str x3, [%[r], 120]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[17] * B\n\t" "str x4, [%[r], 128]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[18] * B\n\t" - "ldp x8, x9, [%[a], 144]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" "str x5, [%[r], 136]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[19] * B\n\t" "str x3, [%[r], 144]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[20] * B\n\t" - "ldp x8, x9, [%[a], 160]\n\t" + "ldp x9, x10, [%[a], 160]\n\t" "str x4, [%[r], 152]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[21] * B\n\t" "str x5, [%[r], 160]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[22] * B\n\t" - "ldp x8, x9, [%[a], 176]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" "str x3, [%[r], 168]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[23] * B\n\t" "str x4, [%[r], 176]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[24] * B\n\t" - "ldp x8, x9, [%[a], 192]\n\t" + "ldp x9, x10, [%[a], 192]\n\t" "str x5, [%[r], 184]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[25] * B\n\t" "str x3, [%[r], 192]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[26] * B\n\t" - "ldp x8, x9, [%[a], 208]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" "str x4, [%[r], 200]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[27] * B\n\t" "str x5, [%[r], 208]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[28] * B\n\t" - "ldp x8, x9, [%[a], 224]\n\t" + "ldp x9, x10, [%[a], 224]\n\t" "str x3, [%[r], 216]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[29] * B\n\t" "str x4, [%[r], 224]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[30] * B\n\t" - "ldp x8, x9, [%[a], 240]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" "str x5, [%[r], 232]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[31] * B\n\t" "str x3, [%[r], 240]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[32] * B\n\t" - "ldp x8, x9, [%[a], 256]\n\t" + "ldp x9, x10, [%[a], 256]\n\t" "str x4, [%[r], 248]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[33] * B\n\t" "str x5, [%[r], 256]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[34] * B\n\t" - "ldp x8, x9, [%[a], 272]\n\t" + "ldp x9, x10, [%[a], 272]\n\t" "str x3, [%[r], 264]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[35] * B\n\t" "str x4, [%[r], 272]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[36] * B\n\t" - "ldp x8, x9, [%[a], 288]\n\t" + "ldp x9, x10, [%[a], 288]\n\t" "str x5, [%[r], 280]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[37] * B\n\t" "str x3, [%[r], 288]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[38] * B\n\t" - "ldp x8, x9, [%[a], 304]\n\t" + "ldp x9, x10, [%[a], 304]\n\t" "str x4, [%[r], 296]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[39] * B\n\t" "str x5, [%[r], 304]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[40] * B\n\t" - "ldp x8, x9, [%[a], 320]\n\t" + "ldp x9, x10, [%[a], 320]\n\t" "str x3, [%[r], 312]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[41] * B\n\t" "str x4, [%[r], 320]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[42] * B\n\t" - "ldp x8, x9, [%[a], 336]\n\t" + "ldp x9, x10, [%[a], 336]\n\t" "str x5, [%[r], 328]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[43] * B\n\t" "str x3, [%[r], 336]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[44] * B\n\t" - "ldp x8, x9, [%[a], 352]\n\t" + "ldp x9, x10, [%[a], 352]\n\t" "str x4, [%[r], 344]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[45] * B\n\t" "str x5, [%[r], 352]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[46] * B\n\t" - "ldp x8, x9, [%[a], 368]\n\t" + "ldp x9, x10, [%[a], 368]\n\t" "str x3, [%[r], 360]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[47] * B\n\t" "str x4, [%[r], 368]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "adc x3, x3, x7\n\t" - "stp x5, x3, [%[r], 376]\n\t" + "str x5, [%[r], 376]\n\t" + "str x3, [%[r], 384]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } @@ -9724,334 +12147,308 @@ static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m) SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "ldp x21, x22, [%[m], 48]\n\t" - "ldp x23, x24, [%[m], 64]\n\t" - "ldp x25, x26, [%[m], 80]\n\t" - "ldp x27, x28, [%[m], 96]\n\t" + "ldp x11, x12, [%[a], 0]\n\t" + "ldp x13, x14, [%[a], 16]\n\t" + "ldp x15, x16, [%[a], 32]\n\t" + "ldp x17, x19, [%[a], 48]\n\t" + "ldp x20, x21, [%[a], 64]\n\t" + "ldp x22, x23, [%[a], 80]\n\t" + "# No carry yet\n\t" "mov x3, xzr\n\t" - "# i = 24\n\t" + "# i = 0..23\n\t" "mov x4, 24\n\t" - "ldp x12, x13, [%[a], 0]\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" - "mul x9, %[mp], x12\n\t" + "mul x10, %[mp], x11\n\t" + "ldp x24, x25, [%[m], 0]\n\t" + "ldp x26, x27, [%[m], 16]\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" - "adds x12, x12, x7\n\t" + "mul x5, x24, x10\n\t" + "umulh x6, x24, x10\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x12, x13, x7\n\t" + "adds x11, x11, x5\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x11, x12, x5\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" - "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" - "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x11, x11, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x12, x13, x5\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" - "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" - "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "adds x12, x12, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x13, x14, x5\n\t" + "ldp x24, x25, [%[m], 32]\n\t" + "ldp x26, x27, [%[m], 48]\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x13, x13, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x14, x15, x5\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "str x11, [%[a], 32]\n\t" - "adds x10, x10, x7\n\t" + "adds x14, x14, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x15, x16, x5\n\t" "# a[i+6] += m[6] * mu\n\t" - "ldr x11, [%[a], 48]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "str x10, [%[a], 40]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x15, x15, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x16, x17, x5\n\t" "# a[i+7] += m[7] * mu\n\t" - "ldr x10, [%[a], 56]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x22, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "str x11, [%[a], 48]\n\t" - "adds x10, x10, x7\n\t" + "adds x16, x16, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x17, x19, x5\n\t" + "ldp x24, x25, [%[m], 64]\n\t" + "ldp x26, x27, [%[m], 80]\n\t" "# a[i+8] += m[8] * mu\n\t" - "ldr x11, [%[a], 64]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x23, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x23, x9\n\t" - "str x10, [%[a], 56]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x17, x17, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x19, x20, x5\n\t" "# a[i+9] += m[9] * mu\n\t" - "ldr x10, [%[a], 72]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x24, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x24, x9\n\t" - "str x11, [%[a], 64]\n\t" - "adds x10, x10, x7\n\t" + "adds x19, x19, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x20, x21, x5\n\t" "# a[i+10] += m[10] * mu\n\t" - "ldr x11, [%[a], 80]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x25, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x25, x9\n\t" - "str x10, [%[a], 72]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x20, x20, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x21, x22, x5\n\t" "# a[i+11] += m[11] * mu\n\t" - "ldr x10, [%[a], 88]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x26, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x26, x9\n\t" - "str x11, [%[a], 80]\n\t" - "adds x10, x10, x7\n\t" + "adds x21, x21, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x22, x23, x5\n\t" + "ldp x24, x25, [%[m], 96]\n\t" + "ldp x26, x27, [%[m], 112]\n\t" "# a[i+12] += m[12] * mu\n\t" - "ldr x11, [%[a], 96]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x27, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x27, x9\n\t" - "str x10, [%[a], 88]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x22, x22, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "ldr x23, [%[a], 96]\n\t" + "umulh x6, x24, x10\n\t" + "adds x23, x23, x5\n\t" "# a[i+13] += m[13] * mu\n\t" - "ldr x10, [%[a], 104]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x28, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x28, x9\n\t" - "str x11, [%[a], 96]\n\t" - "adds x10, x10, x7\n\t" + "adds x23, x23, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "ldp x8, x9, [%[a], 104]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+14] += m[14] * mu\n\t" - "ldr x11, [%[a], 112]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 112]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 104]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 104]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+15] += m[15] * mu\n\t" - "ldr x10, [%[a], 120]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 120]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 112]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 112]\n\t" + "ldp x8, x9, [%[a], 120]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 128]\n\t" + "ldp x26, x27, [%[m], 144]\n\t" "# a[i+16] += m[16] * mu\n\t" - "ldr x11, [%[a], 128]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 128]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 120]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 120]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+17] += m[17] * mu\n\t" - "ldr x10, [%[a], 136]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 136]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 128]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 128]\n\t" + "ldp x8, x9, [%[a], 136]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+18] += m[18] * mu\n\t" - "ldr x11, [%[a], 144]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 144]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 136]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 136]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+19] += m[19] * mu\n\t" - "ldr x10, [%[a], 152]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 152]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 144]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 144]\n\t" + "ldp x8, x9, [%[a], 152]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 160]\n\t" + "ldp x26, x27, [%[m], 176]\n\t" "# a[i+20] += m[20] * mu\n\t" - "ldr x11, [%[a], 160]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 160]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 152]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 152]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+21] += m[21] * mu\n\t" - "ldr x10, [%[a], 168]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 168]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 160]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 160]\n\t" + "ldp x8, x9, [%[a], 168]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+22] += m[22] * mu\n\t" - "ldr x11, [%[a], 176]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 176]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 168]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 168]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+23] += m[23] * mu\n\t" - "ldr x10, [%[a], 184]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 184]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x8, x8, x3\n\t" - "str x11, [%[a], 176]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 192]\n\t" - "str x10, [%[a], 184]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 192]\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 176]\n\t" + "umulh x7, x27, x10\n\t" + "ldp x8, x9, [%[a], 184]\n\t" + "adds x5, x5, x6\n\t" + "adcs x7, x7, x3\n\t" + "adc x3, xzr, xzr\n\t" + "adds x8, x8, x5\n\t" + "str x8, [%[a], 184]\n\t" + "adcs x9, x9, x7\n\t" + "str x9, [%[a], 192]\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" - "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" + "b.ne 1b\n\t" "# Create mask\n\t" - "neg x3, x3\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 192\n\t" + "neg x3, x3\n\t" + "mov %[mp], %[a]\n\t" + "sub %[a], %[a], 192\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "ldp x11, x10, [x9, 48]\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "and x21, x21, x3\n\t" - "sbcs x13, x13, x20\n\t" - "and x22, x22, x3\n\t" - "sbcs x11, x11, x21\n\t" - "stp x12, x13, [%[a], 32]\n\t" - "sbcs x10, x10, x22\n\t" - "stp x11, x10, [%[a], 48]\n\t" - "ldp x12, x13, [x9, 64]\n\t" - "and x23, x23, x3\n\t" - "ldp x11, x10, [x9, 80]\n\t" - "and x24, x24, x3\n\t" - "sbcs x12, x12, x23\n\t" - "and x25, x25, x3\n\t" - "sbcs x13, x13, x24\n\t" - "and x26, x26, x3\n\t" - "sbcs x11, x11, x25\n\t" - "stp x12, x13, [%[a], 64]\n\t" - "sbcs x10, x10, x26\n\t" - "stp x11, x10, [%[a], 80]\n\t" - "ldp x7, x8, [%[m], 112]\n\t" - "ldp x12, x13, [x9, 96]\n\t" - "and x27, x27, x3\n\t" - "ldp x11, x10, [x9, 112]\n\t" - "and x28, x28, x3\n\t" - "sbcs x12, x12, x27\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x28\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 96]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 112]\n\t" - "ldp x5, x6, [%[m], 128]\n\t" - "ldp x7, x8, [%[m], 144]\n\t" - "ldp x12, x13, [x9, 128]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 144]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 128]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 144]\n\t" - "ldp x5, x6, [%[m], 160]\n\t" - "ldp x7, x8, [%[m], 176]\n\t" - "ldp x12, x13, [x9, 160]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 176]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 160]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 176]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x11, x11, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x12, x12, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x13, x13, x6\n\t" + "stp x11, x12, [%[a], 0]\n\t" + "sbcs x14, x14, x7\n\t" + "stp x13, x14, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x15, x15, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x16, x16, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x17, x17, x6\n\t" + "stp x15, x16, [%[a], 32]\n\t" + "sbcs x19, x19, x7\n\t" + "stp x17, x19, [%[a], 48]\n\t" + "ldp x4, x5, [%[m], 64]\n\t" + "ldp x6, x7, [%[m], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x20, x20, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x21, x21, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x22, x22, x6\n\t" + "stp x20, x21, [%[a], 64]\n\t" + "sbcs x23, x23, x7\n\t" + "stp x22, x23, [%[a], 80]\n\t" + "ldp x4, x5, [%[m], 96]\n\t" + "ldp x6, x7, [%[m], 112]\n\t" + "ldp x8, x9, [%[mp], 96]\n\t" + "ldp x10, x11, [%[mp], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 96]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 112]\n\t" + "ldp x4, x5, [%[m], 128]\n\t" + "ldp x6, x7, [%[m], 144]\n\t" + "ldp x8, x9, [%[mp], 128]\n\t" + "ldp x10, x11, [%[mp], 144]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 128]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 144]\n\t" + "ldp x4, x5, [%[m], 160]\n\t" + "ldp x6, x7, [%[m], 176]\n\t" + "ldp x8, x9, [%[mp], 160]\n\t" + "ldp x10, x11, [%[mp], 176]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 160]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 176]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x10", "x8", "x9", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "cc" ); } @@ -10063,9 +12460,9 @@ SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_24(r, a, b); @@ -10077,9 +12474,9 @@ static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_24(r, a); @@ -10115,7 +12512,7 @@ static sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; @@ -10209,7 +12606,7 @@ static sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_dig "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; @@ -10231,9 +12628,9 @@ static void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -10252,248 +12649,252 @@ static void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a, "str x3, [%[r], 192]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[6] * B\n\t" - "ldp x8, x9, [%[a], 48]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" "str x5, [%[r], 40]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[7] * B\n\t" "str x3, [%[r], 48]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[8] * B\n\t" - "ldp x8, x9, [%[a], 64]\n\t" + "ldp x9, x10, [%[a], 64]\n\t" "str x4, [%[r], 56]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[9] * B\n\t" "str x5, [%[r], 64]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[10] * B\n\t" - "ldp x8, x9, [%[a], 80]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" "str x3, [%[r], 72]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[11] * B\n\t" "str x4, [%[r], 80]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[12] * B\n\t" - "ldp x8, x9, [%[a], 96]\n\t" + "ldp x9, x10, [%[a], 96]\n\t" "str x5, [%[r], 88]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[13] * B\n\t" "str x3, [%[r], 96]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[14] * B\n\t" - "ldp x8, x9, [%[a], 112]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" "str x4, [%[r], 104]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[15] * B\n\t" "str x5, [%[r], 112]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[16] * B\n\t" - "ldp x8, x9, [%[a], 128]\n\t" + "ldp x9, x10, [%[a], 128]\n\t" "str x3, [%[r], 120]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[17] * B\n\t" "str x4, [%[r], 128]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[18] * B\n\t" - "ldp x8, x9, [%[a], 144]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" "str x5, [%[r], 136]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[19] * B\n\t" "str x3, [%[r], 144]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[20] * B\n\t" - "ldp x8, x9, [%[a], 160]\n\t" + "ldp x9, x10, [%[a], 160]\n\t" "str x4, [%[r], 152]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[21] * B\n\t" "str x5, [%[r], 160]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[22] * B\n\t" - "ldp x8, x9, [%[a], 176]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" "str x3, [%[r], 168]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[23] * B\n\t" "str x4, [%[r], 176]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "adc x3, x3, x7\n\t" - "stp x5, x3, [%[r], 184]\n\t" + "str x5, [%[r], 184]\n\t" + "str x3, [%[r], 192]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ static sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -10502,23 +12903,52 @@ static sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); - return r; + return d1; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<24; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 24; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif } /* Compare a with b in constant time. @@ -10532,203 +12962,187 @@ static sp_int64 sp_3072_cmp_24(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 184\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #24\n\t" + "add %[a], %[a], #176\n\t" + "add %[b], %[b], #176\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 176]\n\t" - "ldp x7, x8, [%[b], 176]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 176]\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 160]\n\t" - "ldp x7, x8, [%[b], 160]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 160]\n\t" + "ldp x8, x9, [%[b], 160]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 144]\n\t" - "ldp x7, x8, [%[b], 144]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 144]\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 128]\n\t" - "ldp x7, x8, [%[b], 128]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 128]\n\t" + "ldp x8, x9, [%[b], 128]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "ldp x7, x8, [%[b], 112]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 96]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "ldp x7, x8, [%[b], 80]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 64]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "ldp x7, x8, [%[b], 48]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "ldp x7, x8, [%[b], 16]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -10744,8 +13158,8 @@ static sp_int64 sp_3072_cmp_24(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[48], t2[25]; sp_digit div, r1; @@ -10755,9 +13169,13 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig div = d[23]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 24); - for (i=23; i>=0; i--) { - sp_digit hi = t1[24 + i] - (t1[24 + i] == div); + r1 = sp_3072_cmp_24(&t1[24], d) >= 0; + sp_3072_cond_sub_24(&t1[24], &t1[24], d, (sp_digit)0 - r1); + for (i = 23; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[24 + i] == div); + sp_digit hi = t1[24 + i] + mask; r1 = div_3072_word_24(hi, t1[24 + i - 1], div); + r1 |= mask; sp_3072_mul_d_24(t2, d, r1); t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2); @@ -10794,12 +13212,14 @@ static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 48]; @@ -10814,11 +13234,17 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 48), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 48), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -10878,6 +13304,10 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -10919,7 +13349,7 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_24(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -10934,12 +13364,14 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[32 * 48]; @@ -10954,11 +13386,17 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 48), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 48), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -11034,6 +13472,10 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -11076,7 +13518,7 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_24(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -11112,658 +13554,608 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "ldp x21, x22, [%[m], 48]\n\t" - "ldp x23, x24, [%[m], 64]\n\t" - "ldp x25, x26, [%[m], 80]\n\t" - "ldp x27, x28, [%[m], 96]\n\t" + "ldp x11, x12, [%[a], 0]\n\t" + "ldp x13, x14, [%[a], 16]\n\t" + "ldp x15, x16, [%[a], 32]\n\t" + "ldp x17, x19, [%[a], 48]\n\t" + "ldp x20, x21, [%[a], 64]\n\t" + "ldp x22, x23, [%[a], 80]\n\t" + "# No carry yet\n\t" "mov x3, xzr\n\t" - "# i = 48\n\t" + "# i = 0..47\n\t" "mov x4, 48\n\t" - "ldp x12, x13, [%[a], 0]\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" - "mul x9, %[mp], x12\n\t" + "mul x10, %[mp], x11\n\t" + "ldp x24, x25, [%[m], 0]\n\t" + "ldp x26, x27, [%[m], 16]\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" - "adds x12, x12, x7\n\t" + "mul x5, x24, x10\n\t" + "umulh x6, x24, x10\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x12, x13, x7\n\t" + "adds x11, x11, x5\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x11, x12, x5\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" - "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" - "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x11, x11, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x12, x13, x5\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" - "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" - "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "adds x12, x12, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x13, x14, x5\n\t" + "ldp x24, x25, [%[m], 32]\n\t" + "ldp x26, x27, [%[m], 48]\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x13, x13, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x14, x15, x5\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "str x11, [%[a], 32]\n\t" - "adds x10, x10, x7\n\t" + "adds x14, x14, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x15, x16, x5\n\t" "# a[i+6] += m[6] * mu\n\t" - "ldr x11, [%[a], 48]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "str x10, [%[a], 40]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x15, x15, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x16, x17, x5\n\t" "# a[i+7] += m[7] * mu\n\t" - "ldr x10, [%[a], 56]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x22, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "str x11, [%[a], 48]\n\t" - "adds x10, x10, x7\n\t" + "adds x16, x16, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x17, x19, x5\n\t" + "ldp x24, x25, [%[m], 64]\n\t" + "ldp x26, x27, [%[m], 80]\n\t" "# a[i+8] += m[8] * mu\n\t" - "ldr x11, [%[a], 64]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x23, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x23, x9\n\t" - "str x10, [%[a], 56]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x17, x17, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x19, x20, x5\n\t" "# a[i+9] += m[9] * mu\n\t" - "ldr x10, [%[a], 72]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x24, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x24, x9\n\t" - "str x11, [%[a], 64]\n\t" - "adds x10, x10, x7\n\t" + "adds x19, x19, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x20, x21, x5\n\t" "# a[i+10] += m[10] * mu\n\t" - "ldr x11, [%[a], 80]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x25, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x25, x9\n\t" - "str x10, [%[a], 72]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x20, x20, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x21, x22, x5\n\t" "# a[i+11] += m[11] * mu\n\t" - "ldr x10, [%[a], 88]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x26, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x26, x9\n\t" - "str x11, [%[a], 80]\n\t" - "adds x10, x10, x7\n\t" + "adds x21, x21, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x22, x23, x5\n\t" + "ldp x24, x25, [%[m], 96]\n\t" + "ldp x26, x27, [%[m], 112]\n\t" "# a[i+12] += m[12] * mu\n\t" - "ldr x11, [%[a], 96]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x27, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x27, x9\n\t" - "str x10, [%[a], 88]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x22, x22, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "ldr x23, [%[a], 96]\n\t" + "umulh x6, x24, x10\n\t" + "adds x23, x23, x5\n\t" "# a[i+13] += m[13] * mu\n\t" - "ldr x10, [%[a], 104]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x28, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x28, x9\n\t" - "str x11, [%[a], 96]\n\t" - "adds x10, x10, x7\n\t" + "adds x23, x23, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "ldp x8, x9, [%[a], 104]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+14] += m[14] * mu\n\t" - "ldr x11, [%[a], 112]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 112]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 104]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 104]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+15] += m[15] * mu\n\t" - "ldr x10, [%[a], 120]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 120]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 112]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 112]\n\t" + "ldp x8, x9, [%[a], 120]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 128]\n\t" + "ldp x26, x27, [%[m], 144]\n\t" "# a[i+16] += m[16] * mu\n\t" - "ldr x11, [%[a], 128]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 128]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 120]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 120]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+17] += m[17] * mu\n\t" - "ldr x10, [%[a], 136]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 136]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 128]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 128]\n\t" + "ldp x8, x9, [%[a], 136]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+18] += m[18] * mu\n\t" - "ldr x11, [%[a], 144]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 144]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 136]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 136]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+19] += m[19] * mu\n\t" - "ldr x10, [%[a], 152]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 152]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 144]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 144]\n\t" + "ldp x8, x9, [%[a], 152]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 160]\n\t" + "ldp x26, x27, [%[m], 176]\n\t" "# a[i+20] += m[20] * mu\n\t" - "ldr x11, [%[a], 160]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 160]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 152]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 152]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+21] += m[21] * mu\n\t" - "ldr x10, [%[a], 168]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 168]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 160]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 160]\n\t" + "ldp x8, x9, [%[a], 168]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+22] += m[22] * mu\n\t" - "ldr x11, [%[a], 176]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 176]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 168]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 168]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+23] += m[23] * mu\n\t" - "ldr x10, [%[a], 184]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 184]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 176]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 176]\n\t" + "ldp x8, x9, [%[a], 184]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 192]\n\t" + "ldp x26, x27, [%[m], 208]\n\t" "# a[i+24] += m[24] * mu\n\t" - "ldr x11, [%[a], 192]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 192]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 184]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 184]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+25] += m[25] * mu\n\t" - "ldr x10, [%[a], 200]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 200]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 192]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 192]\n\t" + "ldp x8, x9, [%[a], 200]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+26] += m[26] * mu\n\t" - "ldr x11, [%[a], 208]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 208]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 200]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 200]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+27] += m[27] * mu\n\t" - "ldr x10, [%[a], 216]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 216]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 208]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 208]\n\t" + "ldp x8, x9, [%[a], 216]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 224]\n\t" + "ldp x26, x27, [%[m], 240]\n\t" "# a[i+28] += m[28] * mu\n\t" - "ldr x11, [%[a], 224]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 224]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 216]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 216]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+29] += m[29] * mu\n\t" - "ldr x10, [%[a], 232]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 232]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 224]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 224]\n\t" + "ldp x8, x9, [%[a], 232]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+30] += m[30] * mu\n\t" - "ldr x11, [%[a], 240]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 240]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 232]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 232]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+31] += m[31] * mu\n\t" - "ldr x10, [%[a], 248]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 248]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 240]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 240]\n\t" + "ldp x8, x9, [%[a], 248]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 256]\n\t" + "ldp x26, x27, [%[m], 272]\n\t" "# a[i+32] += m[32] * mu\n\t" - "ldr x11, [%[a], 256]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 256]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 248]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 248]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+33] += m[33] * mu\n\t" - "ldr x10, [%[a], 264]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 264]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 256]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 256]\n\t" + "ldp x8, x9, [%[a], 264]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+34] += m[34] * mu\n\t" - "ldr x11, [%[a], 272]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 272]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 264]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 264]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+35] += m[35] * mu\n\t" - "ldr x10, [%[a], 280]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 280]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 272]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 272]\n\t" + "ldp x8, x9, [%[a], 280]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 288]\n\t" + "ldp x26, x27, [%[m], 304]\n\t" "# a[i+36] += m[36] * mu\n\t" - "ldr x11, [%[a], 288]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 288]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 280]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 280]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+37] += m[37] * mu\n\t" - "ldr x10, [%[a], 296]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 296]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 288]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 288]\n\t" + "ldp x8, x9, [%[a], 296]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+38] += m[38] * mu\n\t" - "ldr x11, [%[a], 304]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 304]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 296]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 296]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+39] += m[39] * mu\n\t" - "ldr x10, [%[a], 312]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 312]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 304]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 304]\n\t" + "ldp x8, x9, [%[a], 312]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 320]\n\t" + "ldp x26, x27, [%[m], 336]\n\t" "# a[i+40] += m[40] * mu\n\t" - "ldr x11, [%[a], 320]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 320]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 312]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 312]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+41] += m[41] * mu\n\t" - "ldr x10, [%[a], 328]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 328]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 320]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 320]\n\t" + "ldp x8, x9, [%[a], 328]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+42] += m[42] * mu\n\t" - "ldr x11, [%[a], 336]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 336]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 328]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 328]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+43] += m[43] * mu\n\t" - "ldr x10, [%[a], 344]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 344]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 336]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 336]\n\t" + "ldp x8, x9, [%[a], 344]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 352]\n\t" + "ldp x26, x27, [%[m], 368]\n\t" "# a[i+44] += m[44] * mu\n\t" - "ldr x11, [%[a], 352]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 352]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 344]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 344]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+45] += m[45] * mu\n\t" - "ldr x10, [%[a], 360]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 360]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 352]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 352]\n\t" + "ldp x8, x9, [%[a], 360]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+46] += m[46] * mu\n\t" - "ldr x11, [%[a], 368]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 368]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 360]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 360]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+47] += m[47] * mu\n\t" - "ldr x10, [%[a], 376]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 376]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x8, x8, x3\n\t" - "str x11, [%[a], 368]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 384]\n\t" - "str x10, [%[a], 376]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 384]\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 368]\n\t" + "umulh x7, x27, x10\n\t" + "ldp x8, x9, [%[a], 376]\n\t" + "adds x5, x5, x6\n\t" + "adcs x7, x7, x3\n\t" + "adc x3, xzr, xzr\n\t" + "adds x8, x8, x5\n\t" + "str x8, [%[a], 376]\n\t" + "adcs x9, x9, x7\n\t" + "str x9, [%[a], 384]\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" - "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" + "b.ne 1b\n\t" "# Create mask\n\t" - "neg x3, x3\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 384\n\t" + "neg x3, x3\n\t" + "mov %[mp], %[a]\n\t" + "sub %[a], %[a], 384\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "ldp x11, x10, [x9, 48]\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "and x21, x21, x3\n\t" - "sbcs x13, x13, x20\n\t" - "and x22, x22, x3\n\t" - "sbcs x11, x11, x21\n\t" - "stp x12, x13, [%[a], 32]\n\t" - "sbcs x10, x10, x22\n\t" - "stp x11, x10, [%[a], 48]\n\t" - "ldp x12, x13, [x9, 64]\n\t" - "and x23, x23, x3\n\t" - "ldp x11, x10, [x9, 80]\n\t" - "and x24, x24, x3\n\t" - "sbcs x12, x12, x23\n\t" - "and x25, x25, x3\n\t" - "sbcs x13, x13, x24\n\t" - "and x26, x26, x3\n\t" - "sbcs x11, x11, x25\n\t" - "stp x12, x13, [%[a], 64]\n\t" - "sbcs x10, x10, x26\n\t" - "stp x11, x10, [%[a], 80]\n\t" - "ldp x7, x8, [%[m], 112]\n\t" - "ldp x12, x13, [x9, 96]\n\t" - "and x27, x27, x3\n\t" - "ldp x11, x10, [x9, 112]\n\t" - "and x28, x28, x3\n\t" - "sbcs x12, x12, x27\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x28\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 96]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 112]\n\t" - "ldp x5, x6, [%[m], 128]\n\t" - "ldp x7, x8, [%[m], 144]\n\t" - "ldp x12, x13, [x9, 128]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 144]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 128]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 144]\n\t" - "ldp x5, x6, [%[m], 160]\n\t" - "ldp x7, x8, [%[m], 176]\n\t" - "ldp x12, x13, [x9, 160]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 176]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 160]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 176]\n\t" - "ldp x5, x6, [%[m], 192]\n\t" - "ldp x7, x8, [%[m], 208]\n\t" - "ldp x12, x13, [x9, 192]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 208]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 192]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 208]\n\t" - "ldp x5, x6, [%[m], 224]\n\t" - "ldp x7, x8, [%[m], 240]\n\t" - "ldp x12, x13, [x9, 224]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 240]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 224]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 240]\n\t" - "ldp x5, x6, [%[m], 256]\n\t" - "ldp x7, x8, [%[m], 272]\n\t" - "ldp x12, x13, [x9, 256]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 272]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 256]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 272]\n\t" - "ldp x5, x6, [%[m], 288]\n\t" - "ldp x7, x8, [%[m], 304]\n\t" - "ldp x12, x13, [x9, 288]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 304]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 288]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 304]\n\t" - "ldp x5, x6, [%[m], 320]\n\t" - "ldp x7, x8, [%[m], 336]\n\t" - "ldp x12, x13, [x9, 320]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 336]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 320]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 336]\n\t" - "ldp x5, x6, [%[m], 352]\n\t" - "ldp x7, x8, [%[m], 368]\n\t" - "ldp x12, x13, [x9, 352]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 368]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 352]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 368]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x11, x11, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x12, x12, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x13, x13, x6\n\t" + "stp x11, x12, [%[a], 0]\n\t" + "sbcs x14, x14, x7\n\t" + "stp x13, x14, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x15, x15, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x16, x16, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x17, x17, x6\n\t" + "stp x15, x16, [%[a], 32]\n\t" + "sbcs x19, x19, x7\n\t" + "stp x17, x19, [%[a], 48]\n\t" + "ldp x4, x5, [%[m], 64]\n\t" + "ldp x6, x7, [%[m], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x20, x20, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x21, x21, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x22, x22, x6\n\t" + "stp x20, x21, [%[a], 64]\n\t" + "sbcs x23, x23, x7\n\t" + "stp x22, x23, [%[a], 80]\n\t" + "ldp x4, x5, [%[m], 96]\n\t" + "ldp x6, x7, [%[m], 112]\n\t" + "ldp x8, x9, [%[mp], 96]\n\t" + "ldp x10, x11, [%[mp], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 96]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 112]\n\t" + "ldp x4, x5, [%[m], 128]\n\t" + "ldp x6, x7, [%[m], 144]\n\t" + "ldp x8, x9, [%[mp], 128]\n\t" + "ldp x10, x11, [%[mp], 144]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 128]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 144]\n\t" + "ldp x4, x5, [%[m], 160]\n\t" + "ldp x6, x7, [%[m], 176]\n\t" + "ldp x8, x9, [%[mp], 160]\n\t" + "ldp x10, x11, [%[mp], 176]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 160]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 176]\n\t" + "ldp x4, x5, [%[m], 192]\n\t" + "ldp x6, x7, [%[m], 208]\n\t" + "ldp x8, x9, [%[mp], 192]\n\t" + "ldp x10, x11, [%[mp], 208]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 192]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 208]\n\t" + "ldp x4, x5, [%[m], 224]\n\t" + "ldp x6, x7, [%[m], 240]\n\t" + "ldp x8, x9, [%[mp], 224]\n\t" + "ldp x10, x11, [%[mp], 240]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 224]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 240]\n\t" + "ldp x4, x5, [%[m], 256]\n\t" + "ldp x6, x7, [%[m], 272]\n\t" + "ldp x8, x9, [%[mp], 256]\n\t" + "ldp x10, x11, [%[mp], 272]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 256]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 272]\n\t" + "ldp x4, x5, [%[m], 288]\n\t" + "ldp x6, x7, [%[m], 304]\n\t" + "ldp x8, x9, [%[mp], 288]\n\t" + "ldp x10, x11, [%[mp], 304]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 288]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 304]\n\t" + "ldp x4, x5, [%[m], 320]\n\t" + "ldp x6, x7, [%[m], 336]\n\t" + "ldp x8, x9, [%[mp], 320]\n\t" + "ldp x10, x11, [%[mp], 336]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 320]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 336]\n\t" + "ldp x4, x5, [%[m], 352]\n\t" + "ldp x6, x7, [%[m], 368]\n\t" + "ldp x8, x9, [%[mp], 352]\n\t" + "ldp x10, x11, [%[mp], 368]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 352]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 368]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x10", "x8", "x9", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "cc" ); } @@ -11775,9 +14167,9 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_48(r, a, b); @@ -11789,9 +14181,9 @@ static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_48(r, a); @@ -11829,7 +14221,7 @@ static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -11969,45 +14361,45 @@ static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } #endif /* WOLFSSL_SP_SMALL */ -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ -static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_48_cond(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "b.lt 1f\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + "1:\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -12016,23 +14408,25 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "cmp x3, x5\n\t" + "b.lt 2f\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + "2:\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); - return r; + return d1; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -12044,8 +14438,8 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -12055,9 +14449,20 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s div = d[47]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); - for (i=47; i>=0; i--) { - sp_digit hi = t1[48 + i] - (t1[48 + i] == div); - r1 = div_3072_word_48(hi, t1[48 + i - 1], div); + for (i = 47; i > 0; i--) { + if (t1[i + 48] != d[i]) + break; + } + if (t1[i + 48] >= d[i]) { + sp_3072_sub_in_place_48(&t1[48], d); + } + for (i = 47; i >= 0; i--) { + if (t1[48 + i] == div) { + r1 = SP_DIGIT_MAX; + } + else { + r1 = div_3072_word_48_cond(t1[48 + i], t1[48 + i - 1], div); + } sp_3072_mul_d_48(t2, d, r1); t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); @@ -12125,7 +14530,7 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; @@ -12303,13 +14708,74 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; #endif /* WOLFSSL_SP_SMALL */ } +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + */ +static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[d1], x6, x3\n\t" + + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return d1; +} + /* AND m into each word of a and store in r. * * r A single precision integer. @@ -12351,371 +14817,331 @@ static sp_int64 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 376\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #48\n\t" + "add %[a], %[a], #368\n\t" + "add %[b], %[b], #368\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 368]\n\t" - "ldp x7, x8, [%[b], 368]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 368]\n\t" + "ldp x8, x9, [%[b], 368]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 352]\n\t" - "ldp x7, x8, [%[b], 352]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 352]\n\t" + "ldp x8, x9, [%[b], 352]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 336]\n\t" - "ldp x7, x8, [%[b], 336]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 336]\n\t" + "ldp x8, x9, [%[b], 336]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 320]\n\t" - "ldp x7, x8, [%[b], 320]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 320]\n\t" + "ldp x8, x9, [%[b], 320]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 304]\n\t" - "ldp x7, x8, [%[b], 304]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 304]\n\t" + "ldp x8, x9, [%[b], 304]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 288]\n\t" - "ldp x7, x8, [%[b], 288]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 288]\n\t" + "ldp x8, x9, [%[b], 288]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 272]\n\t" - "ldp x7, x8, [%[b], 272]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 272]\n\t" + "ldp x8, x9, [%[b], 272]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 256]\n\t" - "ldp x7, x8, [%[b], 256]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 256]\n\t" + "ldp x8, x9, [%[b], 256]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 240]\n\t" - "ldp x7, x8, [%[b], 240]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 240]\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 224]\n\t" - "ldp x7, x8, [%[b], 224]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 224]\n\t" + "ldp x8, x9, [%[b], 224]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 208]\n\t" - "ldp x7, x8, [%[b], 208]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 208]\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 192]\n\t" - "ldp x7, x8, [%[b], 192]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 192]\n\t" + "ldp x8, x9, [%[b], 192]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 176]\n\t" - "ldp x7, x8, [%[b], 176]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 176]\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 160]\n\t" - "ldp x7, x8, [%[b], 160]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 160]\n\t" + "ldp x8, x9, [%[b], 160]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 144]\n\t" - "ldp x7, x8, [%[b], 144]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 144]\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 128]\n\t" - "ldp x7, x8, [%[b], 128]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 128]\n\t" + "ldp x8, x9, [%[b], 128]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "ldp x7, x8, [%[b], 112]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 96]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "ldp x7, x8, [%[b], 80]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 64]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "ldp x7, x8, [%[b], 48]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "ldp x7, x8, [%[b], 16]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -12731,8 +15157,8 @@ static sp_int64 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -12742,9 +15168,13 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig div = d[47]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 48); - for (i=47; i>=0; i--) { - sp_digit hi = t1[48 + i] - (t1[48 + i] == div); + r1 = sp_3072_cmp_48(&t1[48], d) >= 0; + sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1); + for (i = 47; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[48 + i] == div); + sp_digit hi = t1[48 + i] + mask; r1 = div_3072_word_48(hi, t1[48 + i - 1], div); + r1 |= mask; sp_3072_mul_d_48(t2, d, r1); t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2); @@ -12783,12 +15213,14 @@ static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[8 * 96]; @@ -12803,11 +15235,17 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 96), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 96), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12859,6 +15297,10 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -12899,7 +15341,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_48(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -12914,12 +15356,14 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 96]; @@ -12934,11 +15378,17 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 96), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 96), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12998,6 +15448,10 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -13039,7 +15493,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_cond_sub_48(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -13066,7 +15520,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[48 * 5]; @@ -13088,7 +15542,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -13098,9 +15552,9 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, #endif if (err == MP_OKAY) { + ah = a + 48; r = a + 48 * 2; m = r + 48 * 2; - ah = a + 48; sp_3072_from_bin(ah, 48, in, inLen); #if DIGIT_BIT >= 64 @@ -13118,7 +15572,38 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_3072_from_mp(m, 48, mm); - if (e[0] == 0x3) { + if (e[0] == 0x10001) { + int i; + sp_digit mp; + + sp_3072_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 48); + err = sp_3072_mod_48_cond(r, a, m); + /* Montgomery form: r = a.R mod m */ + + if (err == MP_OKAY) { + /* r = a ^ 0x10000 => r = a squared 16 times */ + for (i = 15; i >= 0; i--) { + sp_3072_mont_sqr_48(r, r, m, mp); + } + /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m + * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m + */ + sp_3072_mont_mul_48(r, r, ah, m, mp); + + for (i = 47; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_3072_sub_in_place_48(r, m); + } + } + } + else if (e[0] == 0x3) { if (err == MP_OKAY) { sp_3072_sqr_48(r, ah); err = sp_3072_mod_48_cond(r, r, m); @@ -13146,7 +15631,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } XMEMCPY(r, a, sizeof(sp_digit) * 48); - for (i--; i>=0; i--) { + for (i--; i >= 0; i--) { sp_3072_mont_sqr_48(r, r, m, mp); if (((e[0] >> i) & 1) == 1) { sp_3072_mont_mul_48(r, r, a, m, mp); @@ -13172,7 +15657,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif @@ -13181,6 +15666,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -13192,7 +15678,6 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { -#ifdef WOLFSSL_SP_SMALL sp_digit c = 0; __asm__ __volatile__ ( @@ -13210,106 +15695,12 @@ static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; -#else - __asm__ __volatile__ ( - - "ldp x5, x7, [%[b], 0]\n\t" - "ldp x11, x12, [%[b], 16]\n\t" - "ldp x4, x6, [%[a], 0]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 16]\n\t" - "and x7, x7, %[m]\n\t" - "adds x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 0]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 16]\n\t" - "ldp x5, x7, [%[b], 32]\n\t" - "ldp x11, x12, [%[b], 48]\n\t" - "ldp x4, x6, [%[a], 32]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 48]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 32]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 48]\n\t" - "ldp x5, x7, [%[b], 64]\n\t" - "ldp x11, x12, [%[b], 80]\n\t" - "ldp x4, x6, [%[a], 64]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 80]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 64]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 80]\n\t" - "ldp x5, x7, [%[b], 96]\n\t" - "ldp x11, x12, [%[b], 112]\n\t" - "ldp x4, x6, [%[a], 96]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 112]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 96]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 112]\n\t" - "ldp x5, x7, [%[b], 128]\n\t" - "ldp x11, x12, [%[b], 144]\n\t" - "ldp x4, x6, [%[a], 128]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 144]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 128]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 144]\n\t" - "ldp x5, x7, [%[b], 160]\n\t" - "ldp x11, x12, [%[b], 176]\n\t" - "ldp x4, x6, [%[a], 160]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 176]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 160]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 176]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" - ); - - return (sp_digit)r; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * @@ -13333,7 +15724,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, const mp_int* qim, const mp_int* mm, byte* out, word32* outLen) { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[48 * 4]; @@ -13367,7 +15758,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -13392,21 +15783,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 48); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[24 * 11]; @@ -13434,8 +15825,14 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL, DYNAMIC_TYPE_RSA); @@ -13483,12 +15880,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 24 * 11); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -13619,7 +16016,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 -static void sp_3072_lshift_48(sp_digit* r, sp_digit* a, byte n) +static void sp_3072_lshift_48(sp_digit* r, const sp_digit* a, byte n) { word64 n64 = n; __asm__ __volatile__ ( @@ -13911,11 +16308,10 @@ static void sp_3072_lshift_48(sp_digit* r, sp_digit* a, byte n) "lsl x4, x4, %[n]\n\t" "lsr x5, x5, x6\n\t" "orr x2, x2, x5\n\t" - "str x4, [%[r]]\n\t" - "str x2, [%[r], 8]\n\t" + "stp x4, x2, [%[r]]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n64) - : "memory", "x2", "x3", "x4", "x5", "x6" + : "memory", "x2", "x3", "x4", "x5", "x6", "cc" ); } @@ -13925,12 +16321,14 @@ static void sp_3072_lshift_48(sp_digit* r, sp_digit* a, byte n) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[145]; @@ -13946,11 +16344,17 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -13979,6 +16383,10 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -14026,7 +16434,7 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, sp_3072_cond_sub_48(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -14167,41 +16575,108 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, */ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) { - int i; - int j; - byte* d; + sp_int64 nl = n; + sp_int64 bytes = size * 8; - for (i = n - 1,j = 0; i >= 7; i -= 8) { - r[j] = ((sp_digit)a[i - 0] << 0) | - ((sp_digit)a[i - 1] << 8) | - ((sp_digit)a[i - 2] << 16) | - ((sp_digit)a[i - 3] << 24) | - ((sp_digit)a[i - 4] << 32) | - ((sp_digit)a[i - 5] << 40) | - ((sp_digit)a[i - 6] << 48) | - ((sp_digit)a[i - 7] << 56); - j++; - } - - if (i >= 0) { - r[j] = 0; - - d = (byte*)r; - switch (i) { - case 6: d[n - 1 - 6] = a[6]; //fallthrough - case 5: d[n - 1 - 5] = a[5]; //fallthrough - case 4: d[n - 1 - 4] = a[4]; //fallthrough - case 3: d[n - 1 - 3] = a[3]; //fallthrough - case 2: d[n - 1 - 2] = a[2]; //fallthrough - case 1: d[n - 1 - 1] = a[1]; //fallthrough - case 0: d[n - 1 - 0] = a[0]; //fallthrough - } - j++; - } - - for (; j < size; j++) { - r[j] = 0; - } + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); } /* Convert an mp_int to an array of sp_digit. @@ -14213,20 +16688,23 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 64 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); } #elif DIGIT_BIT > 64 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffffffffffffl; s = 64U - s; @@ -14256,12 +16734,12 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 64) { r[j] &= 0xffffffffffffffffl; @@ -14298,17 +16776,19 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) static void sp_4096_to_bin_64(sp_digit* r, byte* a) { int i; - int j; + int j = 0; - for (i = 63, j = 0; i >= 0; i--) { - a[j++] = r[i] >> 56; - a[j++] = r[i] >> 48; - a[j++] = r[i] >> 40; - a[j++] = r[i] >> 32; - a[j++] = r[i] >> 24; - a[j++] = r[i] >> 16; - a[j++] = r[i] >> 8; - a[j++] = r[i] >> 0; + for (i = 63; i >= 0; i--, j += 8) { + __asm__ __volatile__ ( + "ldr x4, [%[r]]\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x4, x4\n\t" + #endif + "str x4, [%[a]]\n\t" + : + : [r] "r" (r + i), [a] "r" (a + j) + : "memory", "x4" + ); } } @@ -14327,103 +16807,84 @@ static void sp_4096_to_bin_64(sp_digit* r, byte* a) #define sp_4096_norm_64(a) #ifndef WOLFSSL_SP_SMALL -/* Add b to a into r. (r = a + b) +/* Add digit to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static void sp_4096_add_word_32(sp_digit* r, const sp_digit* a, + sp_digit b) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "adds x3, x3, x7\n\t" "ldp x5, x6, [%[a], 16]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 16]\n\t" - "adcs x5, x5, x9\n\t" + "adds x3, x3, %[b]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 16]\n\t" "ldp x3, x4, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 48]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 48]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 48]\n\t" "ldp x3, x4, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 80]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 80]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 80]\n\t" "ldp x3, x4, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 112]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 112]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 112]\n\t" "ldp x3, x4, [%[a], 128]\n\t" - "ldp x7, x8, [%[b], 128]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 144]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 144]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 128]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 144]\n\t" "ldp x3, x4, [%[a], 160]\n\t" - "ldp x7, x8, [%[b], 160]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 176]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 176]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 160]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 176]\n\t" "ldp x3, x4, [%[a], 192]\n\t" - "ldp x7, x8, [%[b], 192]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 208]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 208]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 192]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 208]\n\t" "ldp x3, x4, [%[a], 224]\n\t" - "ldp x7, x8, [%[b], 224]\n\t" - "adcs x3, x3, x7\n\t" "ldp x5, x6, [%[a], 240]\n\t" - "adcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 240]\n\t" - "adcs x5, x5, x9\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" "stp x3, x4, [%[r], 224]\n\t" - "adcs x6, x6, x10\n\t" + "adcs x6, x6, xzr\n\t" "stp x5, x6, [%[r], 240]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" ); - - return (sp_digit)r; } /* Sub b from a into a. (a -= b) @@ -14597,7 +17058,7 @@ static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -14773,94 +17234,150 @@ static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 480]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 496]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -/* Add digit to a into r. (r = a + b) +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. */ -static void sp_4096_add_zero_32(sp_digit* r, const sp_digit* a, - const sp_digit d) +static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "adds x3, x3, %[d]\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "ldp x3, x4, [%[a], 96]\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 112]\n\t" - "ldp x3, x4, [%[a], 128]\n\t" - "ldp x5, x6, [%[a], 144]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 128]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 144]\n\t" - "ldp x3, x4, [%[a], 160]\n\t" - "ldp x5, x6, [%[a], 176]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 160]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 176]\n\t" - "ldp x3, x4, [%[a], 192]\n\t" - "ldp x5, x6, [%[a], 208]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 192]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 208]\n\t" - "ldp x3, x4, [%[a], 224]\n\t" - "ldp x5, x6, [%[a], 240]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 224]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 240]\n\t" - : - : [r] "r" (r), [a] "r" (a), [d] "r" (d) - : "memory", "x3", "x4", "x5", "x6" + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "ldp x10, x11, [%[b], 80]\n\t" + "ldp x4, x5, [%[a], 64]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 64]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 80]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "ldp x10, x11, [%[b], 112]\n\t" + "ldp x4, x5, [%[a], 96]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 96]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 112]\n\t" + "ldp x8, x9, [%[b], 128]\n\t" + "ldp x10, x11, [%[b], 144]\n\t" + "ldp x4, x5, [%[a], 128]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 144]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 128]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 144]\n\t" + "ldp x8, x9, [%[b], 160]\n\t" + "ldp x10, x11, [%[b], 176]\n\t" + "ldp x4, x5, [%[a], 160]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 176]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 160]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 176]\n\t" + "ldp x8, x9, [%[b], 192]\n\t" + "ldp x10, x11, [%[b], 208]\n\t" + "ldp x4, x5, [%[a], 192]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 208]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 192]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 208]\n\t" + "ldp x8, x9, [%[b], 224]\n\t" + "ldp x10, x11, [%[b], 240]\n\t" + "ldp x4, x5, [%[a], 224]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 240]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 224]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 240]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); + + return (sp_digit)r; } +#endif /* !WOLFSSL_SP_SMALL */ /* Multiply a and b into r. (r = a * b) * @@ -14875,149 +17392,28 @@ SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, sp_digit z1[64]; sp_digit a1[32]; sp_digit b1[32]; - sp_digit z2[64]; - sp_digit u, ca, cb; + sp_digit* z2 = r + 64; + sp_digit u; + sp_digit ca; + sp_digit cb; ca = sp_2048_add_32(a1, a, &a[32]); cb = sp_2048_add_32(b1, b, &b[32]); u = ca & cb; - sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); sp_2048_mul_32(z0, a, b); - sp_2048_mask_32(r + 64, a1, 0 - cb); - sp_2048_mask_32(b1, b1, 0 - ca); - u += sp_2048_add_32(r + 64, r + 64, b1); - u += sp_4096_sub_in_place_64(z1, z2); + sp_2048_mul_32(z1, a1, b1); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_cond_add_32(z1 + 32, z1 + 32, a1, 0 - cb); + u += sp_4096_cond_add_32(z1 + 32, z1 + 32, b1, 0 - ca); + u += sp_4096_add_64(r + 32, r + 32, z1); - u += sp_4096_add_32(r + 64, r + 64, z2); - sp_4096_add_zero_32(r + 96, z2 + 32, u); + (void)sp_4096_add_word_32(r + 96, r + 96, u); } -#ifdef WOLFSSL_SP_SMALL -/* Double a into r. (r = a + a) - * - * r A single precision integer. - * a A single precision integer. - */ -static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add x11, %[a], 256\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "adcs x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r]], #16\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) - : - : "memory", "x3", "x4", "x5", "x6", "x11" - ); - - return c; -} - -#else -/* Double a into r. (r = a + a) - * - * r A single precision integer. - * a A single precision integer. - */ -static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a) -{ - __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "adds x3, x3, x3\n\t" - "ldr x5, [%[a], 16]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 24]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 48]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 56]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 80]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 88]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "ldp x3, x4, [%[a], 96]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 112]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 120]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 96]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 112]\n\t" - "ldp x3, x4, [%[a], 128]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 144]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 152]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 128]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 144]\n\t" - "ldp x3, x4, [%[a], 160]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 176]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 184]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 160]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 176]\n\t" - "ldp x3, x4, [%[a], 192]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 208]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 216]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 192]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 208]\n\t" - "ldp x3, x4, [%[a], 224]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 240]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 248]\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r], 224]\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r], 240]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6" - ); - - return (sp_digit)r; -} - -#endif /* WOLFSSL_SP_SMALL */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -15026,22 +17422,31 @@ static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a) SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[64]; + sp_digit* z2 = r + 64; sp_digit z1[64]; - sp_digit a1[32]; + sp_digit* a1 = z1; + sp_digit* zero = z1 + 32; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 32); + + mask = sp_2048_sub_32(a1, a, &a[32]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_2048_sub_32(a1, p1, p2); - u = sp_2048_add_32(a1, a, &a[32]); - sp_2048_sqr_32(z1, a1); sp_2048_sqr_32(z2, &a[32]); sp_2048_sqr_32(z0, a); - sp_2048_mask_32(r + 64, a1, 0 - u); - u += sp_2048_dbl_32(r + 64, r + 64); - u += sp_4096_sub_in_place_64(z1, z2); - u += sp_4096_sub_in_place_64(z1, z0); - u += sp_4096_add_64(r + 32, r + 32, z1); - u += sp_4096_add_32(r + 64, r + 64, z2); - sp_4096_add_zero_32(r + 96, z2 + 32, u); + sp_2048_sqr_32(z1, a1); + + u = 0; + u -= sp_4096_sub_in_place_64(z1, z2); + u -= sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_sub_in_place_64(r + 32, z1); + sp_4096_add_word_32(r + 96, r + 96, u); } #endif /* !WOLFSSL_SP_SMALL */ @@ -15071,12 +17476,12 @@ static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r]], #16\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" + "adc %[c], xzr, xzr\n\t" "cmp %[a], x11\n\t" "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -15112,7 +17517,7 @@ static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b) "b.ne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return c; @@ -15131,10 +17536,10 @@ static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[128]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 504\n\t" "csel x3, xzr, x3, cc\n\t" @@ -15164,7 +17569,7 @@ static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -15180,10 +17585,10 @@ static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) sp_digit tmp[128]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 504\n\t" "csel x3, xzr, x3, cc\n\t" @@ -15229,14 +17634,14 @@ static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); } #endif /* WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -15272,9 +17677,9 @@ static void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -15293,552 +17698,552 @@ static void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, "str x3, [%[r], 512]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[6] * B\n\t" - "ldp x8, x9, [%[a], 48]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" "str x5, [%[r], 40]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[7] * B\n\t" "str x3, [%[r], 48]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[8] * B\n\t" - "ldp x8, x9, [%[a], 64]\n\t" + "ldp x9, x10, [%[a], 64]\n\t" "str x4, [%[r], 56]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[9] * B\n\t" "str x5, [%[r], 64]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[10] * B\n\t" - "ldp x8, x9, [%[a], 80]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" "str x3, [%[r], 72]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[11] * B\n\t" "str x4, [%[r], 80]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[12] * B\n\t" - "ldp x8, x9, [%[a], 96]\n\t" + "ldp x9, x10, [%[a], 96]\n\t" "str x5, [%[r], 88]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[13] * B\n\t" "str x3, [%[r], 96]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[14] * B\n\t" - "ldp x8, x9, [%[a], 112]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" "str x4, [%[r], 104]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[15] * B\n\t" "str x5, [%[r], 112]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[16] * B\n\t" - "ldp x8, x9, [%[a], 128]\n\t" + "ldp x9, x10, [%[a], 128]\n\t" "str x3, [%[r], 120]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[17] * B\n\t" "str x4, [%[r], 128]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[18] * B\n\t" - "ldp x8, x9, [%[a], 144]\n\t" + "ldp x9, x10, [%[a], 144]\n\t" "str x5, [%[r], 136]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[19] * B\n\t" "str x3, [%[r], 144]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[20] * B\n\t" - "ldp x8, x9, [%[a], 160]\n\t" + "ldp x9, x10, [%[a], 160]\n\t" "str x4, [%[r], 152]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[21] * B\n\t" "str x5, [%[r], 160]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[22] * B\n\t" - "ldp x8, x9, [%[a], 176]\n\t" + "ldp x9, x10, [%[a], 176]\n\t" "str x3, [%[r], 168]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[23] * B\n\t" "str x4, [%[r], 176]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[24] * B\n\t" - "ldp x8, x9, [%[a], 192]\n\t" + "ldp x9, x10, [%[a], 192]\n\t" "str x5, [%[r], 184]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[25] * B\n\t" "str x3, [%[r], 192]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[26] * B\n\t" - "ldp x8, x9, [%[a], 208]\n\t" + "ldp x9, x10, [%[a], 208]\n\t" "str x4, [%[r], 200]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[27] * B\n\t" "str x5, [%[r], 208]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[28] * B\n\t" - "ldp x8, x9, [%[a], 224]\n\t" + "ldp x9, x10, [%[a], 224]\n\t" "str x3, [%[r], 216]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[29] * B\n\t" "str x4, [%[r], 224]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[30] * B\n\t" - "ldp x8, x9, [%[a], 240]\n\t" + "ldp x9, x10, [%[a], 240]\n\t" "str x5, [%[r], 232]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[31] * B\n\t" "str x3, [%[r], 240]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[32] * B\n\t" - "ldp x8, x9, [%[a], 256]\n\t" + "ldp x9, x10, [%[a], 256]\n\t" "str x4, [%[r], 248]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[33] * B\n\t" "str x5, [%[r], 256]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[34] * B\n\t" - "ldp x8, x9, [%[a], 272]\n\t" + "ldp x9, x10, [%[a], 272]\n\t" "str x3, [%[r], 264]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[35] * B\n\t" "str x4, [%[r], 272]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[36] * B\n\t" - "ldp x8, x9, [%[a], 288]\n\t" + "ldp x9, x10, [%[a], 288]\n\t" "str x5, [%[r], 280]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[37] * B\n\t" "str x3, [%[r], 288]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[38] * B\n\t" - "ldp x8, x9, [%[a], 304]\n\t" + "ldp x9, x10, [%[a], 304]\n\t" "str x4, [%[r], 296]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[39] * B\n\t" "str x5, [%[r], 304]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[40] * B\n\t" - "ldp x8, x9, [%[a], 320]\n\t" + "ldp x9, x10, [%[a], 320]\n\t" "str x3, [%[r], 312]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[41] * B\n\t" "str x4, [%[r], 320]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[42] * B\n\t" - "ldp x8, x9, [%[a], 336]\n\t" + "ldp x9, x10, [%[a], 336]\n\t" "str x5, [%[r], 328]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[43] * B\n\t" "str x3, [%[r], 336]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[44] * B\n\t" - "ldp x8, x9, [%[a], 352]\n\t" + "ldp x9, x10, [%[a], 352]\n\t" "str x4, [%[r], 344]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[45] * B\n\t" "str x5, [%[r], 352]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[46] * B\n\t" - "ldp x8, x9, [%[a], 368]\n\t" + "ldp x9, x10, [%[a], 368]\n\t" "str x3, [%[r], 360]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[47] * B\n\t" "str x4, [%[r], 368]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[48] * B\n\t" - "ldp x8, x9, [%[a], 384]\n\t" + "ldp x9, x10, [%[a], 384]\n\t" "str x5, [%[r], 376]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[49] * B\n\t" "str x3, [%[r], 384]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[50] * B\n\t" - "ldp x8, x9, [%[a], 400]\n\t" + "ldp x9, x10, [%[a], 400]\n\t" "str x4, [%[r], 392]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[51] * B\n\t" "str x5, [%[r], 400]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[52] * B\n\t" - "ldp x8, x9, [%[a], 416]\n\t" + "ldp x9, x10, [%[a], 416]\n\t" "str x3, [%[r], 408]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[53] * B\n\t" "str x4, [%[r], 416]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[54] * B\n\t" - "ldp x8, x9, [%[a], 432]\n\t" + "ldp x9, x10, [%[a], 432]\n\t" "str x5, [%[r], 424]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[55] * B\n\t" "str x3, [%[r], 432]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[56] * B\n\t" - "ldp x8, x9, [%[a], 448]\n\t" + "ldp x9, x10, [%[a], 448]\n\t" "str x4, [%[r], 440]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[57] * B\n\t" "str x5, [%[r], 448]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[58] * B\n\t" - "ldp x8, x9, [%[a], 464]\n\t" + "ldp x9, x10, [%[a], 464]\n\t" "str x3, [%[r], 456]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[59] * B\n\t" "str x4, [%[r], 464]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[60] * B\n\t" - "ldp x8, x9, [%[a], 480]\n\t" + "ldp x9, x10, [%[a], 480]\n\t" "str x5, [%[r], 472]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[61] * B\n\t" "str x3, [%[r], 480]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[62] * B\n\t" - "ldp x8, x9, [%[a], 496]\n\t" + "ldp x9, x10, [%[a], 496]\n\t" "str x4, [%[r], 488]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[63] * B\n\t" "str x5, [%[r], 496]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "adc x4, x4, x7\n\t" "stp x3, x4, [%[r], 504]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } @@ -15868,874 +18273,808 @@ static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "ldp x21, x22, [%[m], 48]\n\t" - "ldp x23, x24, [%[m], 64]\n\t" - "ldp x25, x26, [%[m], 80]\n\t" - "ldp x27, x28, [%[m], 96]\n\t" + "ldp x11, x12, [%[a], 0]\n\t" + "ldp x13, x14, [%[a], 16]\n\t" + "ldp x15, x16, [%[a], 32]\n\t" + "ldp x17, x19, [%[a], 48]\n\t" + "ldp x20, x21, [%[a], 64]\n\t" + "ldp x22, x23, [%[a], 80]\n\t" + "# No carry yet\n\t" "mov x3, xzr\n\t" - "# i = 64\n\t" + "# i = 0..63\n\t" "mov x4, 64\n\t" - "ldp x12, x13, [%[a], 0]\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" - "mul x9, %[mp], x12\n\t" + "mul x10, %[mp], x11\n\t" + "ldp x24, x25, [%[m], 0]\n\t" + "ldp x26, x27, [%[m], 16]\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" - "adds x12, x12, x7\n\t" + "mul x5, x24, x10\n\t" + "umulh x6, x24, x10\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" - "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" - "adds x12, x13, x7\n\t" + "adds x11, x11, x5\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x11, x12, x5\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" - "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" - "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" - "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x11, x11, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x12, x13, x5\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" - "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" - "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "adds x12, x12, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x13, x14, x5\n\t" + "ldp x24, x25, [%[m], 32]\n\t" + "ldp x26, x27, [%[m], 48]\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x13, x13, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x14, x15, x5\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "str x11, [%[a], 32]\n\t" - "adds x10, x10, x7\n\t" + "adds x14, x14, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x15, x16, x5\n\t" "# a[i+6] += m[6] * mu\n\t" - "ldr x11, [%[a], 48]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x21, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "str x10, [%[a], 40]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x15, x15, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x16, x17, x5\n\t" "# a[i+7] += m[7] * mu\n\t" - "ldr x10, [%[a], 56]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x22, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "str x11, [%[a], 48]\n\t" - "adds x10, x10, x7\n\t" + "adds x16, x16, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x17, x19, x5\n\t" + "ldp x24, x25, [%[m], 64]\n\t" + "ldp x26, x27, [%[m], 80]\n\t" "# a[i+8] += m[8] * mu\n\t" - "ldr x11, [%[a], 64]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x23, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x23, x9\n\t" - "str x10, [%[a], 56]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x17, x17, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x24, x10\n\t" + "adds x19, x20, x5\n\t" "# a[i+9] += m[9] * mu\n\t" - "ldr x10, [%[a], 72]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x24, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x24, x9\n\t" - "str x11, [%[a], 64]\n\t" - "adds x10, x10, x7\n\t" + "adds x19, x19, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x25, x10\n\t" + "adds x20, x21, x5\n\t" "# a[i+10] += m[10] * mu\n\t" - "ldr x11, [%[a], 80]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x25, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x25, x9\n\t" - "str x10, [%[a], 72]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x20, x20, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "umulh x6, x26, x10\n\t" + "adds x21, x22, x5\n\t" "# a[i+11] += m[11] * mu\n\t" - "ldr x10, [%[a], 88]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x26, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x26, x9\n\t" - "str x11, [%[a], 80]\n\t" - "adds x10, x10, x7\n\t" + "adds x21, x21, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "umulh x7, x27, x10\n\t" + "adds x22, x23, x5\n\t" + "ldp x24, x25, [%[m], 96]\n\t" + "ldp x26, x27, [%[m], 112]\n\t" "# a[i+12] += m[12] * mu\n\t" - "ldr x11, [%[a], 96]\n\t" - "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x27, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x27, x9\n\t" - "str x10, [%[a], 88]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x22, x22, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "ldr x23, [%[a], 96]\n\t" + "umulh x6, x24, x10\n\t" + "adds x23, x23, x5\n\t" "# a[i+13] += m[13] * mu\n\t" - "ldr x10, [%[a], 104]\n\t" - "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x28, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x28, x9\n\t" - "str x11, [%[a], 96]\n\t" - "adds x10, x10, x7\n\t" + "adds x23, x23, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "ldp x8, x9, [%[a], 104]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+14] += m[14] * mu\n\t" - "ldr x11, [%[a], 112]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 112]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 104]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 104]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+15] += m[15] * mu\n\t" - "ldr x10, [%[a], 120]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 120]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 112]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 112]\n\t" + "ldp x8, x9, [%[a], 120]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 128]\n\t" + "ldp x26, x27, [%[m], 144]\n\t" "# a[i+16] += m[16] * mu\n\t" - "ldr x11, [%[a], 128]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 128]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 120]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 120]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+17] += m[17] * mu\n\t" - "ldr x10, [%[a], 136]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 136]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 128]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 128]\n\t" + "ldp x8, x9, [%[a], 136]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+18] += m[18] * mu\n\t" - "ldr x11, [%[a], 144]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 144]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 136]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 136]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+19] += m[19] * mu\n\t" - "ldr x10, [%[a], 152]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 152]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 144]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 144]\n\t" + "ldp x8, x9, [%[a], 152]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 160]\n\t" + "ldp x26, x27, [%[m], 176]\n\t" "# a[i+20] += m[20] * mu\n\t" - "ldr x11, [%[a], 160]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 160]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 152]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 152]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+21] += m[21] * mu\n\t" - "ldr x10, [%[a], 168]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 168]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 160]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 160]\n\t" + "ldp x8, x9, [%[a], 168]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+22] += m[22] * mu\n\t" - "ldr x11, [%[a], 176]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 176]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 168]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 168]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+23] += m[23] * mu\n\t" - "ldr x10, [%[a], 184]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 184]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 176]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 176]\n\t" + "ldp x8, x9, [%[a], 184]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 192]\n\t" + "ldp x26, x27, [%[m], 208]\n\t" "# a[i+24] += m[24] * mu\n\t" - "ldr x11, [%[a], 192]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 192]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 184]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 184]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+25] += m[25] * mu\n\t" - "ldr x10, [%[a], 200]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 200]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 192]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 192]\n\t" + "ldp x8, x9, [%[a], 200]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+26] += m[26] * mu\n\t" - "ldr x11, [%[a], 208]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 208]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 200]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 200]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+27] += m[27] * mu\n\t" - "ldr x10, [%[a], 216]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 216]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 208]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 208]\n\t" + "ldp x8, x9, [%[a], 216]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 224]\n\t" + "ldp x26, x27, [%[m], 240]\n\t" "# a[i+28] += m[28] * mu\n\t" - "ldr x11, [%[a], 224]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 224]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 216]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 216]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+29] += m[29] * mu\n\t" - "ldr x10, [%[a], 232]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 232]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 224]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 224]\n\t" + "ldp x8, x9, [%[a], 232]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+30] += m[30] * mu\n\t" - "ldr x11, [%[a], 240]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 240]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 232]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 232]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+31] += m[31] * mu\n\t" - "ldr x10, [%[a], 248]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 248]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 240]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 240]\n\t" + "ldp x8, x9, [%[a], 248]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 256]\n\t" + "ldp x26, x27, [%[m], 272]\n\t" "# a[i+32] += m[32] * mu\n\t" - "ldr x11, [%[a], 256]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 256]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 248]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 248]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+33] += m[33] * mu\n\t" - "ldr x10, [%[a], 264]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 264]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 256]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 256]\n\t" + "ldp x8, x9, [%[a], 264]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+34] += m[34] * mu\n\t" - "ldr x11, [%[a], 272]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 272]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 264]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 264]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+35] += m[35] * mu\n\t" - "ldr x10, [%[a], 280]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 280]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 272]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 272]\n\t" + "ldp x8, x9, [%[a], 280]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 288]\n\t" + "ldp x26, x27, [%[m], 304]\n\t" "# a[i+36] += m[36] * mu\n\t" - "ldr x11, [%[a], 288]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 288]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 280]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 280]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+37] += m[37] * mu\n\t" - "ldr x10, [%[a], 296]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 296]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 288]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 288]\n\t" + "ldp x8, x9, [%[a], 296]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+38] += m[38] * mu\n\t" - "ldr x11, [%[a], 304]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 304]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 296]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 296]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+39] += m[39] * mu\n\t" - "ldr x10, [%[a], 312]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 312]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 304]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 304]\n\t" + "ldp x8, x9, [%[a], 312]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 320]\n\t" + "ldp x26, x27, [%[m], 336]\n\t" "# a[i+40] += m[40] * mu\n\t" - "ldr x11, [%[a], 320]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 320]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 312]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 312]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+41] += m[41] * mu\n\t" - "ldr x10, [%[a], 328]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 328]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 320]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 320]\n\t" + "ldp x8, x9, [%[a], 328]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+42] += m[42] * mu\n\t" - "ldr x11, [%[a], 336]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 336]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 328]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 328]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+43] += m[43] * mu\n\t" - "ldr x10, [%[a], 344]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 344]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 336]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 336]\n\t" + "ldp x8, x9, [%[a], 344]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 352]\n\t" + "ldp x26, x27, [%[m], 368]\n\t" "# a[i+44] += m[44] * mu\n\t" - "ldr x11, [%[a], 352]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 352]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 344]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 344]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+45] += m[45] * mu\n\t" - "ldr x10, [%[a], 360]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 360]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 352]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 352]\n\t" + "ldp x8, x9, [%[a], 360]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+46] += m[46] * mu\n\t" - "ldr x11, [%[a], 368]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 368]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 360]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 360]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+47] += m[47] * mu\n\t" - "ldr x10, [%[a], 376]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 376]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 368]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 368]\n\t" + "ldp x8, x9, [%[a], 376]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 384]\n\t" + "ldp x26, x27, [%[m], 400]\n\t" "# a[i+48] += m[48] * mu\n\t" - "ldr x11, [%[a], 384]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 384]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 376]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 376]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+49] += m[49] * mu\n\t" - "ldr x10, [%[a], 392]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 392]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 384]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 384]\n\t" + "ldp x8, x9, [%[a], 392]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+50] += m[50] * mu\n\t" - "ldr x11, [%[a], 400]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 400]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 392]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 392]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+51] += m[51] * mu\n\t" - "ldr x10, [%[a], 408]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 408]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 400]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 400]\n\t" + "ldp x8, x9, [%[a], 408]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 416]\n\t" + "ldp x26, x27, [%[m], 432]\n\t" "# a[i+52] += m[52] * mu\n\t" - "ldr x11, [%[a], 416]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 416]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 408]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 408]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+53] += m[53] * mu\n\t" - "ldr x10, [%[a], 424]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 424]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 416]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 416]\n\t" + "ldp x8, x9, [%[a], 424]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+54] += m[54] * mu\n\t" - "ldr x11, [%[a], 432]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 432]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 424]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 424]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+55] += m[55] * mu\n\t" - "ldr x10, [%[a], 440]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 440]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 432]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 432]\n\t" + "ldp x8, x9, [%[a], 440]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 448]\n\t" + "ldp x26, x27, [%[m], 464]\n\t" "# a[i+56] += m[56] * mu\n\t" - "ldr x11, [%[a], 448]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 448]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 440]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 440]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+57] += m[57] * mu\n\t" - "ldr x10, [%[a], 456]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 456]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 448]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 448]\n\t" + "ldp x8, x9, [%[a], 456]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+58] += m[58] * mu\n\t" - "ldr x11, [%[a], 464]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 464]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 456]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 456]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+59] += m[59] * mu\n\t" - "ldr x10, [%[a], 472]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 472]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 464]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 464]\n\t" + "ldp x8, x9, [%[a], 472]\n\t" + "umulh x7, x27, x10\n\t" + "adds x8, x8, x5\n\t" + "ldp x24, x25, [%[m], 480]\n\t" + "ldp x26, x27, [%[m], 496]\n\t" "# a[i+60] += m[60] * mu\n\t" - "ldr x11, [%[a], 480]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 480]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 472]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x24, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 472]\n\t" + "umulh x6, x24, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+61] += m[61] * mu\n\t" - "ldr x10, [%[a], 488]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 488]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x11, [%[a], 480]\n\t" - "adds x10, x10, x7\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x25, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 480]\n\t" + "ldp x8, x9, [%[a], 488]\n\t" + "umulh x7, x25, x10\n\t" + "adds x8, x8, x5\n\t" "# a[i+62] += m[62] * mu\n\t" - "ldr x11, [%[a], 496]\n\t" - "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 496]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" - "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 488]\n\t" - "adds x11, x11, x7\n\t" + "adc x7, x7, xzr\n\t" + "adds x8, x8, x6\n\t" + "mul x5, x26, x10\n\t" + "adc x7, x7, xzr\n\t" + "str x8, [%[a], 488]\n\t" + "umulh x6, x26, x10\n\t" + "adds x9, x9, x5\n\t" "# a[i+63] += m[63] * mu\n\t" - "ldr x10, [%[a], 504]\n\t" - "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 504]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" - "adds x6, x6, x7\n\t" - "adcs x8, x8, x3\n\t" - "str x11, [%[a], 496]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 512]\n\t" - "str x10, [%[a], 504]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 512]\n\t" + "adds x9, x9, x7\n\t" + "mul x5, x27, x10\n\t" + "adc x6, x6, xzr\n\t" + "str x9, [%[a], 496]\n\t" + "umulh x7, x27, x10\n\t" + "ldp x8, x9, [%[a], 504]\n\t" + "adds x5, x5, x6\n\t" + "adcs x7, x7, x3\n\t" + "adc x3, xzr, xzr\n\t" + "adds x8, x8, x5\n\t" + "str x8, [%[a], 504]\n\t" + "adcs x9, x9, x7\n\t" + "str x9, [%[a], 512]\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" - "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" + "b.ne 1b\n\t" "# Create mask\n\t" - "neg x3, x3\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 512\n\t" + "neg x3, x3\n\t" + "mov %[mp], %[a]\n\t" + "sub %[a], %[a], 512\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "ldp x11, x10, [x9, 48]\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "and x21, x21, x3\n\t" - "sbcs x13, x13, x20\n\t" - "and x22, x22, x3\n\t" - "sbcs x11, x11, x21\n\t" - "stp x12, x13, [%[a], 32]\n\t" - "sbcs x10, x10, x22\n\t" - "stp x11, x10, [%[a], 48]\n\t" - "ldp x12, x13, [x9, 64]\n\t" - "and x23, x23, x3\n\t" - "ldp x11, x10, [x9, 80]\n\t" - "and x24, x24, x3\n\t" - "sbcs x12, x12, x23\n\t" - "and x25, x25, x3\n\t" - "sbcs x13, x13, x24\n\t" - "and x26, x26, x3\n\t" - "sbcs x11, x11, x25\n\t" - "stp x12, x13, [%[a], 64]\n\t" - "sbcs x10, x10, x26\n\t" - "stp x11, x10, [%[a], 80]\n\t" - "ldp x7, x8, [%[m], 112]\n\t" - "ldp x12, x13, [x9, 96]\n\t" - "and x27, x27, x3\n\t" - "ldp x11, x10, [x9, 112]\n\t" - "and x28, x28, x3\n\t" - "sbcs x12, x12, x27\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x28\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 96]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 112]\n\t" - "ldp x5, x6, [%[m], 128]\n\t" - "ldp x7, x8, [%[m], 144]\n\t" - "ldp x12, x13, [x9, 128]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 144]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 128]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 144]\n\t" - "ldp x5, x6, [%[m], 160]\n\t" - "ldp x7, x8, [%[m], 176]\n\t" - "ldp x12, x13, [x9, 160]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 176]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 160]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 176]\n\t" - "ldp x5, x6, [%[m], 192]\n\t" - "ldp x7, x8, [%[m], 208]\n\t" - "ldp x12, x13, [x9, 192]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 208]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 192]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 208]\n\t" - "ldp x5, x6, [%[m], 224]\n\t" - "ldp x7, x8, [%[m], 240]\n\t" - "ldp x12, x13, [x9, 224]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 240]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 224]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 240]\n\t" - "ldp x5, x6, [%[m], 256]\n\t" - "ldp x7, x8, [%[m], 272]\n\t" - "ldp x12, x13, [x9, 256]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 272]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 256]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 272]\n\t" - "ldp x5, x6, [%[m], 288]\n\t" - "ldp x7, x8, [%[m], 304]\n\t" - "ldp x12, x13, [x9, 288]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 304]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 288]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 304]\n\t" - "ldp x5, x6, [%[m], 320]\n\t" - "ldp x7, x8, [%[m], 336]\n\t" - "ldp x12, x13, [x9, 320]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 336]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 320]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 336]\n\t" - "ldp x5, x6, [%[m], 352]\n\t" - "ldp x7, x8, [%[m], 368]\n\t" - "ldp x12, x13, [x9, 352]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 368]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 352]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 368]\n\t" - "ldp x5, x6, [%[m], 384]\n\t" - "ldp x7, x8, [%[m], 400]\n\t" - "ldp x12, x13, [x9, 384]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 400]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 384]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 400]\n\t" - "ldp x5, x6, [%[m], 416]\n\t" - "ldp x7, x8, [%[m], 432]\n\t" - "ldp x12, x13, [x9, 416]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 432]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 416]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 432]\n\t" - "ldp x5, x6, [%[m], 448]\n\t" - "ldp x7, x8, [%[m], 464]\n\t" - "ldp x12, x13, [x9, 448]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 464]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 448]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 464]\n\t" - "ldp x5, x6, [%[m], 480]\n\t" - "ldp x7, x8, [%[m], 496]\n\t" - "ldp x12, x13, [x9, 480]\n\t" - "and x5, x5, x3\n\t" - "ldp x11, x10, [x9, 496]\n\t" - "and x6, x6, x3\n\t" - "sbcs x12, x12, x5\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x6\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 480]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 496]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x11, x11, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x12, x12, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x13, x13, x6\n\t" + "stp x11, x12, [%[a], 0]\n\t" + "sbcs x14, x14, x7\n\t" + "stp x13, x14, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x15, x15, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x16, x16, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x17, x17, x6\n\t" + "stp x15, x16, [%[a], 32]\n\t" + "sbcs x19, x19, x7\n\t" + "stp x17, x19, [%[a], 48]\n\t" + "ldp x4, x5, [%[m], 64]\n\t" + "ldp x6, x7, [%[m], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x20, x20, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x21, x21, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x22, x22, x6\n\t" + "stp x20, x21, [%[a], 64]\n\t" + "sbcs x23, x23, x7\n\t" + "stp x22, x23, [%[a], 80]\n\t" + "ldp x4, x5, [%[m], 96]\n\t" + "ldp x6, x7, [%[m], 112]\n\t" + "ldp x8, x9, [%[mp], 96]\n\t" + "ldp x10, x11, [%[mp], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 96]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 112]\n\t" + "ldp x4, x5, [%[m], 128]\n\t" + "ldp x6, x7, [%[m], 144]\n\t" + "ldp x8, x9, [%[mp], 128]\n\t" + "ldp x10, x11, [%[mp], 144]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 128]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 144]\n\t" + "ldp x4, x5, [%[m], 160]\n\t" + "ldp x6, x7, [%[m], 176]\n\t" + "ldp x8, x9, [%[mp], 160]\n\t" + "ldp x10, x11, [%[mp], 176]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 160]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 176]\n\t" + "ldp x4, x5, [%[m], 192]\n\t" + "ldp x6, x7, [%[m], 208]\n\t" + "ldp x8, x9, [%[mp], 192]\n\t" + "ldp x10, x11, [%[mp], 208]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 192]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 208]\n\t" + "ldp x4, x5, [%[m], 224]\n\t" + "ldp x6, x7, [%[m], 240]\n\t" + "ldp x8, x9, [%[mp], 224]\n\t" + "ldp x10, x11, [%[mp], 240]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 224]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 240]\n\t" + "ldp x4, x5, [%[m], 256]\n\t" + "ldp x6, x7, [%[m], 272]\n\t" + "ldp x8, x9, [%[mp], 256]\n\t" + "ldp x10, x11, [%[mp], 272]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 256]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 272]\n\t" + "ldp x4, x5, [%[m], 288]\n\t" + "ldp x6, x7, [%[m], 304]\n\t" + "ldp x8, x9, [%[mp], 288]\n\t" + "ldp x10, x11, [%[mp], 304]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 288]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 304]\n\t" + "ldp x4, x5, [%[m], 320]\n\t" + "ldp x6, x7, [%[m], 336]\n\t" + "ldp x8, x9, [%[mp], 320]\n\t" + "ldp x10, x11, [%[mp], 336]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 320]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 336]\n\t" + "ldp x4, x5, [%[m], 352]\n\t" + "ldp x6, x7, [%[m], 368]\n\t" + "ldp x8, x9, [%[mp], 352]\n\t" + "ldp x10, x11, [%[mp], 368]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 352]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 368]\n\t" + "ldp x4, x5, [%[m], 384]\n\t" + "ldp x6, x7, [%[m], 400]\n\t" + "ldp x8, x9, [%[mp], 384]\n\t" + "ldp x10, x11, [%[mp], 400]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 384]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 400]\n\t" + "ldp x4, x5, [%[m], 416]\n\t" + "ldp x6, x7, [%[m], 432]\n\t" + "ldp x8, x9, [%[mp], 416]\n\t" + "ldp x10, x11, [%[mp], 432]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 416]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 432]\n\t" + "ldp x4, x5, [%[m], 448]\n\t" + "ldp x6, x7, [%[m], 464]\n\t" + "ldp x8, x9, [%[mp], 448]\n\t" + "ldp x10, x11, [%[mp], 464]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 448]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 464]\n\t" + "ldp x4, x5, [%[m], 480]\n\t" + "ldp x6, x7, [%[m], 496]\n\t" + "ldp x8, x9, [%[mp], 480]\n\t" + "ldp x10, x11, [%[mp], 496]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x8, x8, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x9, x9, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x10, x10, x6\n\t" + "stp x8, x9, [%[a], 480]\n\t" + "sbcs x11, x11, x7\n\t" + "stp x10, x11, [%[a], 496]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x10", "x8", "x9", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "cc" ); } @@ -16747,9 +19086,9 @@ SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_64(r, a, b); @@ -16761,9 +19100,9 @@ static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_64(r, a); @@ -16801,7 +19140,7 @@ static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a, "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -16981,45 +19320,45 @@ static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a, "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } #endif /* WOLFSSL_SP_SMALL */ -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ -static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_4096_word_64_cond(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "b.lt 1f\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + "1:\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -17028,23 +19367,25 @@ static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "cmp x3, x5\n\t" + "b.lt 2f\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + "2:\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); - return r; + return d1; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -17056,8 +19397,8 @@ static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -17067,9 +19408,20 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s div = d[63]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); - for (i=63; i>=0; i--) { - sp_digit hi = t1[64 + i] - (t1[64 + i] == div); - r1 = div_4096_word_64(hi, t1[64 + i - 1], div); + for (i = 63; i > 0; i--) { + if (t1[i + 64] != d[i]) + break; + } + if (t1[i + 64] >= d[i]) { + sp_4096_sub_in_place_64(&t1[64], d); + } + for (i = 63; i >= 0; i--) { + if (t1[64 + i] == div) { + r1 = SP_DIGIT_MAX; + } + else { + r1 = div_4096_word_64_cond(t1[64 + i], t1[64 + i - 1], div); + } sp_4096_mul_d_64(t2, d, r1); t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); @@ -17137,7 +19489,7 @@ static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; @@ -17371,13 +19723,74 @@ static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; #endif /* WOLFSSL_SP_SMALL */ } +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + */ +static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[d1], x6, x3\n\t" + + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return d1; +} + /* AND m into each word of a and store in r. * * r A single precision integer. @@ -17419,483 +19832,427 @@ static sp_int64 sp_4096_cmp_64(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 504\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #64\n\t" + "add %[a], %[a], #496\n\t" + "add %[b], %[b], #496\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 496]\n\t" - "ldp x7, x8, [%[b], 496]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 496]\n\t" + "ldp x8, x9, [%[b], 496]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 480]\n\t" - "ldp x7, x8, [%[b], 480]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 480]\n\t" + "ldp x8, x9, [%[b], 480]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 464]\n\t" - "ldp x7, x8, [%[b], 464]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 464]\n\t" + "ldp x8, x9, [%[b], 464]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 448]\n\t" - "ldp x7, x8, [%[b], 448]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 448]\n\t" + "ldp x8, x9, [%[b], 448]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 432]\n\t" - "ldp x7, x8, [%[b], 432]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 432]\n\t" + "ldp x8, x9, [%[b], 432]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 416]\n\t" - "ldp x7, x8, [%[b], 416]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 416]\n\t" + "ldp x8, x9, [%[b], 416]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 400]\n\t" - "ldp x7, x8, [%[b], 400]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 400]\n\t" + "ldp x8, x9, [%[b], 400]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 384]\n\t" - "ldp x7, x8, [%[b], 384]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 384]\n\t" + "ldp x8, x9, [%[b], 384]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 368]\n\t" - "ldp x7, x8, [%[b], 368]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 368]\n\t" + "ldp x8, x9, [%[b], 368]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 352]\n\t" - "ldp x7, x8, [%[b], 352]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 352]\n\t" + "ldp x8, x9, [%[b], 352]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 336]\n\t" - "ldp x7, x8, [%[b], 336]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 336]\n\t" + "ldp x8, x9, [%[b], 336]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 320]\n\t" - "ldp x7, x8, [%[b], 320]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 320]\n\t" + "ldp x8, x9, [%[b], 320]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 304]\n\t" - "ldp x7, x8, [%[b], 304]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 304]\n\t" + "ldp x8, x9, [%[b], 304]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 288]\n\t" - "ldp x7, x8, [%[b], 288]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 288]\n\t" + "ldp x8, x9, [%[b], 288]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 272]\n\t" - "ldp x7, x8, [%[b], 272]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 272]\n\t" + "ldp x8, x9, [%[b], 272]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 256]\n\t" - "ldp x7, x8, [%[b], 256]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 256]\n\t" + "ldp x8, x9, [%[b], 256]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 240]\n\t" - "ldp x7, x8, [%[b], 240]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 240]\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 224]\n\t" - "ldp x7, x8, [%[b], 224]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 224]\n\t" + "ldp x8, x9, [%[b], 224]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 208]\n\t" - "ldp x7, x8, [%[b], 208]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 208]\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 192]\n\t" - "ldp x7, x8, [%[b], 192]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 192]\n\t" + "ldp x8, x9, [%[b], 192]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 176]\n\t" - "ldp x7, x8, [%[b], 176]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 176]\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 160]\n\t" - "ldp x7, x8, [%[b], 160]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 160]\n\t" + "ldp x8, x9, [%[b], 160]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 144]\n\t" - "ldp x7, x8, [%[b], 144]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 144]\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 128]\n\t" - "ldp x7, x8, [%[b], 128]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 128]\n\t" + "ldp x8, x9, [%[b], 128]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "ldp x7, x8, [%[b], 112]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 96]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "ldp x7, x8, [%[b], 80]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 64]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "ldp x7, x8, [%[b], 48]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "ldp x7, x8, [%[b], 16]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -17911,8 +20268,8 @@ static sp_int64 sp_4096_cmp_64(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -17922,9 +20279,13 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig div = d[63]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); - for (i=63; i>=0; i--) { - sp_digit hi = t1[64 + i] - (t1[64 + i] == div); + r1 = sp_4096_cmp_64(&t1[64], d) >= 0; + sp_4096_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1); + for (i = 63; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[64 + i] == div); + sp_digit hi = t1[64 + i] + mask; r1 = div_4096_word_64(hi, t1[64 + i - 1], div); + r1 |= mask; sp_4096_mul_d_64(t2, d, r1); t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); @@ -17963,12 +20324,14 @@ static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_dig * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[8 * 128]; @@ -17983,11 +20346,17 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 128), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 128), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -18039,6 +20408,10 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -18079,7 +20452,7 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_cond_sub_64(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -18094,12 +20467,14 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[16 * 128]; @@ -18114,11 +20489,17 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 128), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 128), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -18178,6 +20559,10 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -18219,7 +20604,7 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_cond_sub_64(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -18246,7 +20631,7 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[64 * 5]; @@ -18268,7 +20653,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -18278,9 +20663,9 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, #endif if (err == MP_OKAY) { + ah = a + 64; r = a + 64 * 2; m = r + 64 * 2; - ah = a + 64; sp_4096_from_bin(ah, 64, in, inLen); #if DIGIT_BIT >= 64 @@ -18298,7 +20683,38 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_4096_from_mp(m, 64, mm); - if (e[0] == 0x3) { + if (e[0] == 0x10001) { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_4096_mod_64_cond(r, a, m); + /* Montgomery form: r = a.R mod m */ + + if (err == MP_OKAY) { + /* r = a ^ 0x10000 => r = a squared 16 times */ + for (i = 15; i >= 0; i--) { + sp_4096_mont_sqr_64(r, r, m, mp); + } + /* mont_red(r.R.R) = (r.R.R / R) mod m = r.R mod m + * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m + */ + sp_4096_mont_mul_64(r, r, ah, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) { + break; + } + } + if (r[i] >= m[i]) { + sp_4096_sub_in_place_64(r, m); + } + } + } + else if (e[0] == 0x3) { if (err == MP_OKAY) { sp_4096_sqr_64(r, ah); err = sp_4096_mod_64_cond(r, r, m); @@ -18326,7 +20742,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } XMEMCPY(r, a, sizeof(sp_digit) * 64); - for (i--; i>=0; i--) { + for (i--; i >= 0; i--) { sp_4096_mont_sqr_64(r, r, m, mp); if (((e[0] >> i) & 1) == 1) { sp_4096_mont_mul_64(r, r, a, m, mp); @@ -18352,7 +20768,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif @@ -18361,6 +20777,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -18372,7 +20789,6 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { -#ifdef WOLFSSL_SP_SMALL sp_digit c = 0; __asm__ __volatile__ ( @@ -18390,134 +20806,12 @@ static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; -#else - __asm__ __volatile__ ( - - "ldp x5, x7, [%[b], 0]\n\t" - "ldp x11, x12, [%[b], 16]\n\t" - "ldp x4, x6, [%[a], 0]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 16]\n\t" - "and x7, x7, %[m]\n\t" - "adds x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 0]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 16]\n\t" - "ldp x5, x7, [%[b], 32]\n\t" - "ldp x11, x12, [%[b], 48]\n\t" - "ldp x4, x6, [%[a], 32]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 48]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 32]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 48]\n\t" - "ldp x5, x7, [%[b], 64]\n\t" - "ldp x11, x12, [%[b], 80]\n\t" - "ldp x4, x6, [%[a], 64]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 80]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 64]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 80]\n\t" - "ldp x5, x7, [%[b], 96]\n\t" - "ldp x11, x12, [%[b], 112]\n\t" - "ldp x4, x6, [%[a], 96]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 112]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 96]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 112]\n\t" - "ldp x5, x7, [%[b], 128]\n\t" - "ldp x11, x12, [%[b], 144]\n\t" - "ldp x4, x6, [%[a], 128]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 144]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 128]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 144]\n\t" - "ldp x5, x7, [%[b], 160]\n\t" - "ldp x11, x12, [%[b], 176]\n\t" - "ldp x4, x6, [%[a], 160]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 176]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 160]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 176]\n\t" - "ldp x5, x7, [%[b], 192]\n\t" - "ldp x11, x12, [%[b], 208]\n\t" - "ldp x4, x6, [%[a], 192]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 208]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 192]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 208]\n\t" - "ldp x5, x7, [%[b], 224]\n\t" - "ldp x11, x12, [%[b], 240]\n\t" - "ldp x4, x6, [%[a], 224]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 240]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 224]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 240]\n\t" - "cset %[r], cs\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" - ); - - return (sp_digit)r; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * @@ -18541,7 +20835,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, const mp_int* qim, const mp_int* mm, byte* out, word32* outLen) { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[64 * 4]; @@ -18575,7 +20869,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -18600,21 +20894,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 64); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[32 * 11]; @@ -18642,8 +20936,14 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, DYNAMIC_TYPE_RSA); @@ -18691,12 +20991,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 32 * 11); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -18827,7 +21127,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 -static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n) +static void sp_4096_lshift_64(sp_digit* r, const sp_digit* a, byte n) { word64 n64 = n; __asm__ __volatile__ ( @@ -19215,11 +21515,10 @@ static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n) "lsl x3, x3, %[n]\n\t" "lsr x5, x5, x6\n\t" "orr x4, x4, x5\n\t" - "str x3, [%[r]]\n\t" - "str x4, [%[r], 8]\n\t" + "stp x3, x4, [%[r]]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n64) - : "memory", "x2", "x3", "x4", "x5", "x6" + : "memory", "x2", "x3", "x4", "x5", "x6", "cc" ); } @@ -19229,12 +21528,14 @@ static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[193]; @@ -19250,11 +21551,17 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19283,6 +21590,10 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, n <<= c; c = 64 - c; } + else if (c == 0) { + /* All bits in top word used. */ + y = (byte)n; + } else { y = (byte)(n >> c); n <<= 64 - c; @@ -19330,7 +21641,7 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, sp_4096_cond_sub_64(r, r, m, mask); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19505,10 +21816,10 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[8]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 24\n\t" "csel x3, xzr, x3, cc\n\t" @@ -19538,7 +21849,7 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -19551,114 +21862,104 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +SP_NOINLINE static void sp_256_mul_4(sp_digit* r, const sp_digit* a, + const sp_digit* b) { - sp_digit tmp[4]; - __asm__ __volatile__ ( - "ldp x16, x17, [%[a], 0]\n\t" - "ldp x21, x22, [%[b], 0]\n\t" - "# A[0] * B[0]\n\t" - "mul x8, x16, x21\n\t" - "ldr x19, [%[a], 16]\n\t" - "umulh x9, x16, x21\n\t" - "ldr x23, [%[b], 16]\n\t" - "# A[0] * B[1]\n\t" - "mul x4, x16, x22\n\t" - "ldr x20, [%[a], 24]\n\t" - "umulh x5, x16, x22\n\t" - "ldr x24, [%[b], 24]\n\t" - "adds x9, x9, x4\n\t" - "# A[1] * B[0]\n\t" - "mul x4, x17, x21\n\t" - "adc x10, xzr, x5\n\t" - "umulh x5, x17, x21\n\t" - "adds x9, x9, x4\n\t" - "# A[0] * B[2]\n\t" - "mul x4, x16, x23\n\t" - "adcs x10, x10, x5\n\t" - "umulh x5, x16, x23\n\t" - "adc x11, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" - "# A[1] * B[1]\n\t" - "mul x4, x17, x22\n\t" - "adc x11, x11, x5\n\t" - "umulh x5, x17, x22\n\t" - "adds x10, x10, x4\n\t" - "# A[2] * B[0]\n\t" - "mul x4, x19, x21\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x19, x21\n\t" - "adc x12, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" - "# A[0] * B[3]\n\t" - "mul x4, x16, x24\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x16, x24\n\t" - "adc x12, x12, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * B[2]\n\t" - "mul x4, x17, x23\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x17, x23\n\t" - "adc x13, xzr, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[2] * B[1]\n\t" - "mul x4, x19, x22\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x19, x22\n\t" - "adc x13, x13, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[3] * B[0]\n\t" - "mul x4, x20, x21\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x20, x21\n\t" - "adc x13, x13, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * B[3]\n\t" - "mul x4, x17, x24\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x17, x24\n\t" - "adc x13, x13, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * B[2]\n\t" - "mul x4, x19, x23\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x19, x23\n\t" - "adc x14, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[3] * B[1]\n\t" - "mul x4, x20, x22\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x20, x22\n\t" - "adc x14, x14, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * B[3]\n\t" - "mul x4, x19, x24\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x19, x24\n\t" - "adc x14, x14, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[3] * B[2]\n\t" - "mul x4, x20, x23\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x20, x23\n\t" - "adc x15, xzr, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[3] * B[3]\n\t" - "mul x4, x20, x24\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x20, x24\n\t" - "adc x15, x15, xzr\n\t" - "adds x14, x14, x4\n\t" - "adc x15, x15, x5\n\t" - "stp x8, x9, [%[r], 0]\n\t" - "stp x10, x11, [%[r], 16]\n\t" - "stp x12, x13, [%[r], 32]\n\t" - "stp x14, x15, [%[r], 48]\n\t" + "ldp x13, x14, [%[a], 0]\n\t" + "ldp x15, x16, [%[a], 16]\n\t" + "ldp x17, x19, [%[b], 0]\n\t" + "ldp x20, x21, [%[b], 16]\n\t" + "# A[0] * B[0]\n\t" + "umulh x6, x13, x17\n\t" + "mul x5, x13, x17\n\t" + "# A[2] * B[0]\n\t" + "umulh x8, x15, x17\n\t" + "mul x7, x15, x17\n\t" + "# A[1] * B[0]\n\t" + "mul x3, x14, x17\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x7, x7, x4\n\t" + "adc x8, x8, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x3, x13, x20\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x13, x20\n\t" + "adcs x8, x8, x4\n\t" + "# A[1] * B[3]\n\t" + "mul x9, x14, x21\n\t" + "adcs x9, x9, xzr\n\t" + "umulh x10, x14, x21\n\t" + "adc x10, x10, xzr\n\t" + "# A[0] * B[1]\n\t" + "mul x3, x13, x19\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x13, x19\n\t" + "adcs x7, x7, x4\n\t" + "# A[2] * B[1]\n\t" + "mul x3, x15, x19\n\t" + "adcs x8, x8, x3\n\t" + "umulh x4, x15, x19\n\t" + "adcs x9, x9, x4\n\t" + "adc x10, x10, xzr\n\t" + "# A[1] * B[2]\n\t" + "mul x3, x14, x20\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x14, x20\n\t" + "adcs x9, x9, x4\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, xzr, xzr\n\t" + "# A[1] * B[1]\n\t" + "mul x3, x14, x19\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x14, x19\n\t" + "adcs x8, x8, x4\n\t" + "# A[3] * B[1]\n\t" + "mul x3, x16, x19\n\t" + "adcs x9, x9, x3\n\t" + "umulh x4, x16, x19\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, x11, xzr\n\t" + "# A[2] * B[2]\n\t" + "mul x3, x15, x20\n\t" + "adds x9, x9, x3\n\t" + "umulh x4, x15, x20\n\t" + "adcs x10, x10, x4\n\t" + "# A[3] * B[3]\n\t" + "mul x3, x16, x21\n\t" + "adcs x11, x11, x3\n\t" + "umulh x12, x16, x21\n\t" + "adc x12, x12, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x3, x13, x21\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x13, x21\n\t" + "adcs x9, x9, x4\n\t" + "# A[2] * B[3]\n\t" + "mul x3, x15, x21\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x15, x21\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + "# A[3] * B[0]\n\t" + "mul x3, x16, x17\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x9, x9, x4\n\t" + "# A[3] * B[2]\n\t" + "mul x3, x16, x20\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x16, x20\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + "stp x5, x6, [%[r], 0]\n\t" + "stp x7, x8, [%[r], 16]\n\t" + "stp x9, x10, [%[r], 32]\n\t" + "stp x11, x12, [%[r], 48]\n\t" : - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) - : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15" + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); } @@ -19668,75 +21969,71 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_256_sqr_4(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_256_sqr_4(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "ldp x16, x17, [%[a], 0]\n\t" - "# A[0] * A[1]\n\t" - "mul x9, x16, x17\n\t" - "ldr x19, [%[a], 16]\n\t" - "umulh x10, x16, x17\n\t" - "ldr x20, [%[a], 24]\n\t" - "# A[0] * A[2]\n\t" - "mul x4, x16, x19\n\t" - "umulh x5, x16, x19\n\t" - "adds x10, x10, x4\n\t" - "# A[0] * A[3]\n\t" - "mul x4, x16, x20\n\t" - "adc x11, xzr, x5\n\t" - "umulh x5, x16, x20\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * A[2]\n\t" - "mul x4, x17, x19\n\t" - "adc x12, xzr, x5\n\t" - "umulh x5, x17, x19\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * A[3]\n\t" - "mul x4, x17, x20\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x17, x20\n\t" - "adc x13, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * A[3]\n\t" - "mul x4, x19, x20\n\t" - "adc x13, x13, x5\n\t" - "umulh x5, x19, x20\n\t" - "adds x13, x13, x4\n\t" - "adc x14, xzr, x5\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "# A[0] * A[1]\n\t" + "umulh x6, x12, x13\n\t" + "mul x5, x12, x13\n\t" + "# A[0] * A[3]\n\t" + "umulh x8, x12, x15\n\t" + "mul x7, x12, x15\n\t" + "# A[0] * A[2]\n\t" + "mul x2, x12, x14\n\t" + "adds x6, x6, x2\n\t" + "umulh x3, x12, x14\n\t" + "adcs x7, x7, x3\n\t" + "# A[1] * A[3]\n\t" + "mul x2, x13, x15\n\t" + "adcs x8, x8, x2\n\t" + "umulh x9, x13, x15\n\t" + "adc x9, x9, xzr\n\t" + "# A[1] * A[2]\n\t" + "mul x2, x13, x14\n\t" + "adds x7, x7, x2\n\t" + "umulh x3, x13, x14\n\t" + "adcs x8, x8, x3\n\t" + "# A[2] * A[3]\n\t" + "mul x2, x14, x15\n\t" + "adcs x9, x9, x2\n\t" + "umulh x10, x14, x15\n\t" + "adc x10, x10, xzr\n\t" "# Double\n\t" - "adds x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" "adcs x10, x10, x10\n\t" - "adcs x11, x11, x11\n\t" - "adcs x12, x12, x12\n\t" - "adcs x13, x13, x13\n\t" - "# A[0] * A[0]\n\t" - "mul x8, x16, x16\n\t" - "adcs x14, x14, x14\n\t" - "umulh x3, x16, x16\n\t" - "cset x15, cs\n\t" - "# A[1] * A[1]\n\t" - "mul x4, x17, x17\n\t" - "adds x9, x9, x3\n\t" - "umulh x5, x17, x17\n\t" - "adcs x10, x10, x4\n\t" - "# A[2] * A[2]\n\t" - "mul x6, x19, x19\n\t" - "adcs x11, x11, x5\n\t" - "umulh x7, x19, x19\n\t" - "adcs x12, x12, x6\n\t" - "# A[3] * A[3]\n\t" - "mul x16, x20, x20\n\t" - "adcs x13, x13, x7\n\t" - "umulh x17, x20, x20\n\t" - "adcs x14, x14, x16\n\t" - "adc x15, x15, x17\n\t" - "stp x8, x9, [%[r], 0]\n\t" - "stp x10, x11, [%[r], 16]\n\t" - "stp x12, x13, [%[r], 32]\n\t" - "stp x14, x15, [%[r], 48]\n\t" + "adc x11, xzr, xzr\n\t" + "# A[0] * A[0]\n\t" + "umulh x3, x12, x12\n\t" + "mul x4, x12, x12\n\t" + "# A[1] * A[1]\n\t" + "mul x2, x13, x13\n\t" + "adds x5, x5, x3\n\t" + "umulh x3, x13, x13\n\t" + "adcs x6, x6, x2\n\t" + "# A[2] * A[2]\n\t" + "mul x2, x14, x14\n\t" + "adcs x7, x7, x3\n\t" + "umulh x3, x14, x14\n\t" + "adcs x8, x8, x2\n\t" + "# A[3] * A[3]\n\t" + "mul x2, x15, x15\n\t" + "adcs x9, x9, x3\n\t" + "umulh x3, x15, x15\n\t" + "adcs x10, x10, x2\n\t" + "adc x11, x11, x3\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "stp x8, x9, [%[r], 32]\n\t" + "stp x10, x11, [%[r], 48]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "cc" ); } @@ -19760,10 +22057,10 @@ static sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 0]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 16]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; @@ -19792,7 +22089,7 @@ static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a, "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; @@ -19857,6 +22154,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* t[5] += t[4] >> 32; t[4] &= 0xffffffff; t[6] += t[5] >> 32; t[5] &= 0xffffffff; t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; r[0] = (t[1] << 32) | t[0]; r[1] = (t[3] << 32) | t[2]; r[2] = (t[5] << 32) | t[4]; @@ -19874,20 +22183,23 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 64 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); } #elif DIGIT_BIT > 64 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffffffffffffl; s = 64U - s; @@ -19917,12 +22229,12 @@ static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 64) { r[j] &= 0xffffffffffffffffl; @@ -20087,7 +22399,7 @@ static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m) "stp x5, x6, [%[r], 16]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); } @@ -20098,187 +22410,173 @@ static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m, sp_digit mp) { (void)m; (void)mp; __asm__ __volatile__ ( - "ldp x16, x17, [%[a], 0]\n\t" - "ldp x21, x22, [%[b], 0]\n\t" - "# A[0] * B[0]\n\t" - "mul x8, x16, x21\n\t" - "ldr x19, [%[a], 16]\n\t" - "umulh x9, x16, x21\n\t" - "ldr x23, [%[b], 16]\n\t" - "# A[0] * B[1]\n\t" - "mul x4, x16, x22\n\t" - "ldr x20, [%[a], 24]\n\t" - "umulh x5, x16, x22\n\t" - "ldr x24, [%[b], 24]\n\t" - "adds x9, x9, x4\n\t" - "# A[1] * B[0]\n\t" - "mul x4, x17, x21\n\t" - "adc x10, xzr, x5\n\t" - "umulh x5, x17, x21\n\t" - "adds x9, x9, x4\n\t" - "# A[0] * B[2]\n\t" - "mul x4, x16, x23\n\t" - "adcs x10, x10, x5\n\t" - "umulh x5, x16, x23\n\t" - "adc x11, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" - "# A[1] * B[1]\n\t" - "mul x4, x17, x22\n\t" - "adc x11, x11, x5\n\t" - "umulh x5, x17, x22\n\t" - "adds x10, x10, x4\n\t" - "# A[2] * B[0]\n\t" - "mul x4, x19, x21\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x19, x21\n\t" - "adc x12, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" - "# A[0] * B[3]\n\t" - "mul x4, x16, x24\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x16, x24\n\t" - "adc x12, x12, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * B[2]\n\t" - "mul x4, x17, x23\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x17, x23\n\t" - "adc x13, xzr, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[2] * B[1]\n\t" - "mul x4, x19, x22\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x19, x22\n\t" - "adc x13, x13, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[3] * B[0]\n\t" - "mul x4, x20, x21\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x20, x21\n\t" - "adc x13, x13, xzr\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * B[3]\n\t" - "mul x4, x17, x24\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x17, x24\n\t" - "adc x13, x13, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * B[2]\n\t" - "mul x4, x19, x23\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x19, x23\n\t" - "adc x14, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[3] * B[1]\n\t" - "mul x4, x20, x22\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x20, x22\n\t" - "adc x14, x14, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * B[3]\n\t" - "mul x4, x19, x24\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x19, x24\n\t" - "adc x14, x14, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[3] * B[2]\n\t" - "mul x4, x20, x23\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x20, x23\n\t" - "adc x15, xzr, xzr\n\t" - "adds x13, x13, x4\n\t" - "# A[3] * B[3]\n\t" - "mul x4, x20, x24\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x20, x24\n\t" - "adc x15, x15, xzr\n\t" - "adds x14, x14, x4\n\t" - "mov x4, x8\n\t" - "adc x15, x15, x5\n\t" + "ldp x13, x14, [%[a], 0]\n\t" + "ldp x15, x16, [%[a], 16]\n\t" + "ldp x17, x19, [%[b], 0]\n\t" + "ldp x20, x21, [%[b], 16]\n\t" + "# A[0] * B[0]\n\t" + "umulh x6, x13, x17\n\t" + "mul x5, x13, x17\n\t" + "# A[2] * B[0]\n\t" + "umulh x8, x15, x17\n\t" + "mul x7, x15, x17\n\t" + "# A[1] * B[0]\n\t" + "mul x3, x14, x17\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x7, x7, x4\n\t" + "adc x8, x8, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x3, x13, x20\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x13, x20\n\t" + "adcs x8, x8, x4\n\t" + "# A[1] * B[3]\n\t" + "mul x9, x14, x21\n\t" + "adcs x9, x9, xzr\n\t" + "umulh x10, x14, x21\n\t" + "adc x10, x10, xzr\n\t" + "# A[0] * B[1]\n\t" + "mul x3, x13, x19\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x13, x19\n\t" + "adcs x7, x7, x4\n\t" + "# A[2] * B[1]\n\t" + "mul x3, x15, x19\n\t" + "adcs x8, x8, x3\n\t" + "umulh x4, x15, x19\n\t" + "adcs x9, x9, x4\n\t" + "adc x10, x10, xzr\n\t" + "# A[1] * B[2]\n\t" + "mul x3, x14, x20\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x14, x20\n\t" + "adcs x9, x9, x4\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, xzr, xzr\n\t" + "# A[1] * B[1]\n\t" + "mul x3, x14, x19\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x14, x19\n\t" + "adcs x8, x8, x4\n\t" + "# A[3] * B[1]\n\t" + "mul x3, x16, x19\n\t" + "adcs x9, x9, x3\n\t" + "umulh x4, x16, x19\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, x11, xzr\n\t" + "# A[2] * B[2]\n\t" + "mul x3, x15, x20\n\t" + "adds x9, x9, x3\n\t" + "umulh x4, x15, x20\n\t" + "adcs x10, x10, x4\n\t" + "# A[3] * B[3]\n\t" + "mul x3, x16, x21\n\t" + "adcs x11, x11, x3\n\t" + "umulh x12, x16, x21\n\t" + "adc x12, x12, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x3, x13, x21\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x13, x21\n\t" + "adcs x9, x9, x4\n\t" + "# A[2] * B[3]\n\t" + "mul x3, x15, x21\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x15, x21\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + "# A[3] * B[0]\n\t" + "mul x3, x16, x17\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x9, x9, x4\n\t" + "# A[3] * B[2]\n\t" + "mul x3, x16, x20\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x16, x20\n\t" + "adcs x11, x11, x4\n\t" + "mov x3, x5\n\t" + "adc x12, x12, xzr\n\t" "# Start Reduction\n\t" - "mov x5, x9\n\t" - "mov x6, x10\n\t" + "mov x4, x6\n\t" + "mov x13, x7\n\t" "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t" "# - a[0] << 32 << 192\n\t" "# + (a[0] * 2) << 192\n\t" "# a[0]-a[2] << 32\n\t" - "lsl x10, x10, 32\n\t" - "add x7, x11, x8\n\t" - "eor x10, x10, x9, lsr #32\n\t" - "lsl x9, x9, 32\n\t" - "add x7, x7, x8\n\t" - "eor x9, x9, x8, lsr #32\n\t" + "lsl x15, x5, #32\n\t" + "extr x17, x7, x6, 32\n\t" + "add x14, x8, x5\n\t" + "extr x16, x6, x5, 32\n\t" + "add x14, x14, x5\n\t" "# + a[0]-a[2] << 32 << 64\n\t" "# - a[0] << 32 << 192\n\t" - "adds x5, x5, x8, lsl #32\n\t" - "sub x7, x7, x8, lsl #32\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, x10\n\t" + "adds x4, x4, x15\n\t" + "sub x14, x14, x15\n\t" + "adcs x13, x13, x16\n\t" + "adc x14, x14, x17\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += mu << 256\n\t" - "adds x12, x12, x4\n\t" - "adcs x13, x13, x5\n\t" - "adcs x14, x14, x6\n\t" - "adcs x15, x15, x7\n\t" - "cset x8, cs\n\t" + "adds x9, x9, x3\n\t" + "adcs x10, x10, x4\n\t" + "adcs x11, x11, x13\n\t" + "adcs x12, x12, x14\n\t" + "adc x5, xzr, xzr\n\t" "# a += mu << 192\n\t" "# mu <<= 32\n\t" "# a += (mu << 32) << 64\n\t" - "adds x11, x11, x4\n\t" - "adcs x12, x12, x5\n\t" - "adcs x13, x13, x6\n\t" - "lsr x16, x7, 32\n\t" - "adcs x14, x14, x7\n\t" - "lsl x7, x7, 32\n\t" - "adcs x15, x15, xzr\n\t" - "eor x7, x7, x6, lsr #32\n\t" - "adc x8, x8, xzr\n\t" - "lsl x6, x6, 32\n\t" - "eor x6, x6, x5, lsr #32\n\t" - "adds x11, x11, x6\n\t" - "lsl x5, x5, 32\n\t" - "adcs x12, x12, x7\n\t" - "eor x5, x5, x4, lsr #32\n\t" - "adcs x13, x13, x16\n\t" - "lsl x4, x4, 32\n\t" - "adcs x14, x14, xzr\n\t" - "adcs x15, x15, xzr\n\t" - "adc x8, x8, xzr\n\t" + "adds x8, x8, x3\n\t" + "extr x16, x14, x13, 32\n\t" + "adcs x9, x9, x4\n\t" + "extr x15, x13, x4, 32\n\t" + "adcs x10, x10, x13\n\t" + "extr x4, x4, x3, 32\n\t" + "adcs x11, x11, x14\n\t" + "lsl x3, x3, 32\n\t" + "adc x13, xzr, xzr\n\t" + "adds x6, x6, x3\n\t" + "lsr x17, x14, 32\n\t" + "adcs x7, x7, x4\n\t" + "adcs x8, x8, x15\n\t" + "adcs x9, x9, x16\n\t" + "adcs x10, x10, x17\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, x13\n\t" + "adc x5, x5, xzr\n\t" "# a -= (mu << 32) << 192\n\t" - "subs x11, x11, x4\n\t" - "sbcs x12, x12, x5\n\t" - "sbcs x13, x13, x6\n\t" - "sub x8, xzr, x8\n\t" - "sbcs x14, x14, x7\n\t" - "sub x8, x8, #1\n\t" - "sbcs x15, x15, x16\n\t" - "mov x19, 0xffffffff00000001\n\t" - "adc x8, x8, xzr\n\t" + "subs x8, x8, x3\n\t" + "sbcs x9, x9, x4\n\t" + "sbcs x10, x10, x15\n\t" + "sbcs x11, x11, x16\n\t" + "sbcs x12, x12, x17\n\t" + "sbc x5, x5, xzr\n\t" + "neg x5, x5\n\t" "# mask m and sub from result if overflow\n\t" "# m[0] = -1 & mask = mask\n\t" - "subs x12, x12, x8\n\t" + "subs x9, x9, x5\n\t" "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t" - "lsr x17, x8, 32\n\t" - "sbcs x13, x13, x17\n\t" - "and x19, x19, x8\n\t" + "lsr x16, x5, 32\n\t" + "sbcs x10, x10, x16\n\t" + "sub x17, xzr, x16\n\t" "# m[2] = 0 & mask = 0\n\t" - "sbcs x14, x14, xzr\n\t" - "stp x12, x13, [%[r], 0]\n\t" + "sbcs x11, x11, xzr\n\t" + "stp x9, x10, [%[r], 0]\n\t" "# m[3] = 0xffffffff00000001 & mask\n\t" - "sbc x15, x15, x19\n\t" - "stp x14, x15, [%[r], 16]\n\t" - : [a] "+r" (a), [b] "+r" (b) - : [r] "r" (r) - : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15" + "sbc x12, x12, x17\n\t" + "stp x11, x12, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); } @@ -20287,150 +22585,141 @@ SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, - sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, + const sp_digit* m, sp_digit mp) { (void)m; (void)mp; __asm__ __volatile__ ( - "ldp x16, x17, [%[a], 0]\n\t" - "# A[0] * A[1]\n\t" - "mul x9, x16, x17\n\t" - "ldr x19, [%[a], 16]\n\t" - "umulh x10, x16, x17\n\t" - "ldr x20, [%[a], 24]\n\t" - "# A[0] * A[2]\n\t" - "mul x4, x16, x19\n\t" - "umulh x5, x16, x19\n\t" - "adds x10, x10, x4\n\t" - "# A[0] * A[3]\n\t" - "mul x4, x16, x20\n\t" - "adc x11, xzr, x5\n\t" - "umulh x5, x16, x20\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * A[2]\n\t" - "mul x4, x17, x19\n\t" - "adc x12, xzr, x5\n\t" - "umulh x5, x17, x19\n\t" - "adds x11, x11, x4\n\t" - "# A[1] * A[3]\n\t" - "mul x4, x17, x20\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x17, x20\n\t" - "adc x13, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" - "# A[2] * A[3]\n\t" - "mul x4, x19, x20\n\t" - "adc x13, x13, x5\n\t" - "umulh x5, x19, x20\n\t" - "adds x13, x13, x4\n\t" - "adc x14, xzr, x5\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "# A[0] * A[1]\n\t" + "umulh x6, x12, x13\n\t" + "mul x5, x12, x13\n\t" + "# A[0] * A[3]\n\t" + "umulh x8, x12, x15\n\t" + "mul x7, x12, x15\n\t" + "# A[0] * A[2]\n\t" + "mul x2, x12, x14\n\t" + "adds x6, x6, x2\n\t" + "umulh x3, x12, x14\n\t" + "adcs x7, x7, x3\n\t" + "# A[1] * A[3]\n\t" + "mul x2, x13, x15\n\t" + "adcs x8, x8, x2\n\t" + "umulh x9, x13, x15\n\t" + "adc x9, x9, xzr\n\t" + "# A[1] * A[2]\n\t" + "mul x2, x13, x14\n\t" + "adds x7, x7, x2\n\t" + "umulh x3, x13, x14\n\t" + "adcs x8, x8, x3\n\t" + "# A[2] * A[3]\n\t" + "mul x2, x14, x15\n\t" + "adcs x9, x9, x2\n\t" + "umulh x10, x14, x15\n\t" + "adc x10, x10, xzr\n\t" "# Double\n\t" - "adds x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" "adcs x10, x10, x10\n\t" - "adcs x11, x11, x11\n\t" - "adcs x12, x12, x12\n\t" - "adcs x13, x13, x13\n\t" - "# A[0] * A[0]\n\t" - "mul x8, x16, x16\n\t" - "adcs x14, x14, x14\n\t" - "umulh x3, x16, x16\n\t" - "cset x15, cs\n\t" - "# A[1] * A[1]\n\t" - "mul x4, x17, x17\n\t" - "adds x9, x9, x3\n\t" - "umulh x5, x17, x17\n\t" - "adcs x10, x10, x4\n\t" - "# A[2] * A[2]\n\t" - "mul x6, x19, x19\n\t" - "adcs x11, x11, x5\n\t" - "umulh x7, x19, x19\n\t" - "adcs x12, x12, x6\n\t" - "# A[3] * A[3]\n\t" - "mul x16, x20, x20\n\t" - "adcs x13, x13, x7\n\t" - "umulh x17, x20, x20\n\t" - "adcs x14, x14, x16\n\t" - "mov x3, x8\n\t" - "adc x15, x15, x17\n\t" + "adc x11, xzr, xzr\n\t" + "# A[0] * A[0]\n\t" + "umulh x3, x12, x12\n\t" + "mul x4, x12, x12\n\t" + "# A[1] * A[1]\n\t" + "mul x2, x13, x13\n\t" + "adds x5, x5, x3\n\t" + "umulh x3, x13, x13\n\t" + "adcs x6, x6, x2\n\t" + "# A[2] * A[2]\n\t" + "mul x2, x14, x14\n\t" + "adcs x7, x7, x3\n\t" + "umulh x3, x14, x14\n\t" + "adcs x8, x8, x2\n\t" + "# A[3] * A[3]\n\t" + "mul x2, x15, x15\n\t" + "adcs x9, x9, x3\n\t" + "umulh x3, x15, x15\n\t" + "adcs x10, x10, x2\n\t" + "mov x2, x4\n\t" + "adc x11, x11, x3\n\t" "# Start Reduction\n\t" - "mov x4, x9\n\t" - "mov x5, x10\n\t" + "mov x3, x5\n\t" + "mov x12, x6\n\t" "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t" "# - a[0] << 32 << 192\n\t" "# + (a[0] * 2) << 192\n\t" "# a[0]-a[2] << 32\n\t" - "lsl x10, x10, 32\n\t" - "add x6, x11, x8\n\t" - "eor x10, x10, x9, lsr #32\n\t" - "lsl x9, x9, 32\n\t" - "add x6, x6, x8\n\t" - "eor x9, x9, x8, lsr #32\n\t" + "lsl x14, x4, #32\n\t" + "extr x16, x6, x5, 32\n\t" + "add x13, x7, x4\n\t" + "extr x15, x5, x4, 32\n\t" + "add x13, x13, x4\n\t" "# + a[0]-a[2] << 32 << 64\n\t" "# - a[0] << 32 << 192\n\t" - "adds x4, x4, x8, lsl #32\n\t" - "sub x6, x6, x8, lsl #32\n\t" - "adcs x5, x5, x9\n\t" - "adc x6, x6, x10\n\t" + "adds x3, x3, x14\n\t" + "sub x13, x13, x14\n\t" + "adcs x12, x12, x15\n\t" + "adc x13, x13, x16\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += mu << 256\n\t" - "adds x12, x12, x3\n\t" - "adcs x13, x13, x4\n\t" - "adcs x14, x14, x5\n\t" - "adcs x15, x15, x6\n\t" - "cset x8, cs\n\t" + "adds x8, x8, x2\n\t" + "adcs x9, x9, x3\n\t" + "adcs x10, x10, x12\n\t" + "adcs x11, x11, x13\n\t" + "adc x4, xzr, xzr\n\t" "# a += mu << 192\n\t" "# mu <<= 32\n\t" "# a += (mu << 32) << 64\n\t" - "adds x11, x11, x3\n\t" - "adcs x12, x12, x4\n\t" - "adcs x13, x13, x5\n\t" - "lsr x7, x6, 32\n\t" - "adcs x14, x14, x6\n\t" - "lsl x6, x6, 32\n\t" - "adcs x15, x15, xzr\n\t" - "eor x6, x6, x5, lsr #32\n\t" - "adc x8, x8, xzr\n\t" - "lsl x5, x5, 32\n\t" - "eor x5, x5, x4, lsr #32\n\t" - "adds x11, x11, x5\n\t" - "lsl x4, x4, 32\n\t" - "adcs x12, x12, x6\n\t" - "eor x4, x4, x3, lsr #32\n\t" - "adcs x13, x13, x7\n\t" - "lsl x3, x3, 32\n\t" - "adcs x14, x14, xzr\n\t" - "adcs x15, x15, xzr\n\t" - "adc x8, x8, xzr\n\t" + "adds x7, x7, x2\n\t" + "extr x15, x13, x12, 32\n\t" + "adcs x8, x8, x3\n\t" + "extr x14, x12, x3, 32\n\t" + "adcs x9, x9, x12\n\t" + "extr x3, x3, x2, 32\n\t" + "adcs x10, x10, x13\n\t" + "lsl x2, x2, 32\n\t" + "adc x12, xzr, xzr\n\t" + "adds x5, x5, x2\n\t" + "lsr x16, x13, 32\n\t" + "adcs x6, x6, x3\n\t" + "adcs x7, x7, x14\n\t" + "adcs x8, x8, x15\n\t" + "adcs x9, x9, x16\n\t" + "adcs x10, x10, xzr\n\t" + "adcs x11, x11, x12\n\t" + "adc x4, x4, xzr\n\t" "# a -= (mu << 32) << 192\n\t" - "subs x11, x11, x3\n\t" - "sbcs x12, x12, x4\n\t" - "sbcs x13, x13, x5\n\t" - "sub x8, xzr, x8\n\t" - "sbcs x14, x14, x6\n\t" - "sub x8, x8, #1\n\t" - "sbcs x15, x15, x7\n\t" - "mov x17, 0xffffffff00000001\n\t" - "adc x8, x8, xzr\n\t" + "subs x7, x7, x2\n\t" + "sbcs x8, x8, x3\n\t" + "sbcs x9, x9, x14\n\t" + "sbcs x10, x10, x15\n\t" + "sbcs x11, x11, x16\n\t" + "sbc x4, x4, xzr\n\t" + "neg x4, x4\n\t" "# mask m and sub from result if overflow\n\t" "# m[0] = -1 & mask = mask\n\t" - "subs x12, x12, x8\n\t" + "subs x8, x8, x4\n\t" "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t" - "lsr x16, x8, 32\n\t" - "sbcs x13, x13, x16\n\t" - "and x17, x17, x8\n\t" + "lsr x15, x4, 32\n\t" + "sbcs x9, x9, x15\n\t" + "sub x16, xzr, x15\n\t" "# m[2] = 0 & mask = 0\n\t" - "sbcs x14, x14, xzr\n\t" - "stp x12, x13, [%[r], 0]\n\t" + "sbcs x10, x10, xzr\n\t" + "stp x8, x9, [%[r], 0]\n\t" "# m[3] = 0xffffffff00000001 & mask\n\t" - "sbc x15, x15, x17\n\t" - "stp x14, x15, [%[r], 16]\n\t" + "sbc x11, x11, x16\n\t" + "stp x10, x11, [%[r], 16]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "cc" ); } @@ -20441,10 +22730,10 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_4(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_4(r, a, m, mp); for (; n > 1; n--) { @@ -20452,7 +22741,7 @@ static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P256 curve. */ static const uint64_t p256_mod_minus_2[4] = { @@ -20541,63 +22830,67 @@ static sp_int64 sp_256_cmp_4(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 24\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #4\n\t" + "add %[a], %[a], #16\n\t" + "add %[b], %[b], #16\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[a], 16]\n\t" - "ldp x9, x10, [%[b], 0]\n\t" - "ldp x11, x12, [%[b], 16]\n\t" - "and x8, x8, x4\n\t" - "and x12, x12, x4\n\t" - "subs x8, x8, x12\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x7, x7, x4\n\t" - "and x11, x11, x4\n\t" - "subs x7, x7, x11\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x6, x6, x4\n\t" - "and x10, x10, x4\n\t" - "subs x6, x6, x10\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x9, x9, x4\n\t" - "subs x5, x5, x9\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -20640,14 +22933,12 @@ static sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; } -#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4 - /* Reduce the number back to 256 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -20658,152 +22949,83 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m, sp_digit mp) { __asm__ __volatile__ ( - "ldp x9, x10, [%[a], 0]\n\t" - "ldp x11, x12, [%[a], 16]\n\t" - "ldp x17, x19, [%[m], 0]\n\t" - "ldp x20, x21, [%[m], 16]\n\t" - "mov x8, xzr\n\t" - "# mu = a[0] * mp\n\t" - "mul x5, %[mp], x9\n\t" - "ldr x13, [%[a], 32]\n\t" - "# a[0+0] += m[0] * mu\n\t" - "mul x3, x17, x5\n\t" - "ldr x14, [%[a], 40]\n\t" - "umulh x6, x17, x5\n\t" - "ldr x15, [%[a], 48]\n\t" - "adds x9, x9, x3\n\t" - "ldr x16, [%[a], 56]\n\t" - "adc x6, x6, xzr\n\t" - "# a[0+1] += m[1] * mu\n\t" - "mul x3, x19, x5\n\t" - "umulh x7, x19, x5\n\t" - "adds x3, x3, x6\n\t" - "adc x7, x7, xzr\n\t" - "adds x10, x10, x3\n\t" - "adc x7, x7, xzr\n\t" - "# a[0+2] += m[2] * mu\n\t" - "mul x3, x20, x5\n\t" - "umulh x6, x20, x5\n\t" - "adds x3, x3, x7\n\t" - "adc x6, x6, xzr\n\t" - "adds x11, x11, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[0+3] += m[3] * mu\n\t" - "mul x3, x21, x5\n\t" - "umulh x4, x21, x5\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x8\n\t" - "cset x8, cs\n\t" - "adds x12, x12, x3\n\t" - "adcs x13, x13, x4\n\t" - "adc x8, x8, xzr\n\t" - "# mu = a[1] * mp\n\t" - "mul x5, %[mp], x10\n\t" - "# a[1+0] += m[0] * mu\n\t" - "mul x3, x17, x5\n\t" - "umulh x6, x17, x5\n\t" - "adds x10, x10, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[1+1] += m[1] * mu\n\t" - "mul x3, x19, x5\n\t" - "umulh x7, x19, x5\n\t" - "adds x3, x3, x6\n\t" - "adc x7, x7, xzr\n\t" - "adds x11, x11, x3\n\t" - "adc x7, x7, xzr\n\t" - "# a[1+2] += m[2] * mu\n\t" - "mul x3, x20, x5\n\t" - "umulh x6, x20, x5\n\t" - "adds x3, x3, x7\n\t" - "adc x6, x6, xzr\n\t" - "adds x12, x12, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[1+3] += m[3] * mu\n\t" - "mul x3, x21, x5\n\t" - "umulh x4, x21, x5\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x8\n\t" - "cset x8, cs\n\t" - "adds x13, x13, x3\n\t" - "adcs x14, x14, x4\n\t" - "adc x8, x8, xzr\n\t" - "# mu = a[2] * mp\n\t" - "mul x5, %[mp], x11\n\t" - "# a[2+0] += m[0] * mu\n\t" - "mul x3, x17, x5\n\t" - "umulh x6, x17, x5\n\t" - "adds x11, x11, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[2+1] += m[1] * mu\n\t" - "mul x3, x19, x5\n\t" - "umulh x7, x19, x5\n\t" - "adds x3, x3, x6\n\t" - "adc x7, x7, xzr\n\t" - "adds x12, x12, x3\n\t" - "adc x7, x7, xzr\n\t" - "# a[2+2] += m[2] * mu\n\t" - "mul x3, x20, x5\n\t" - "umulh x6, x20, x5\n\t" - "adds x3, x3, x7\n\t" - "adc x6, x6, xzr\n\t" - "adds x13, x13, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[2+3] += m[3] * mu\n\t" - "mul x3, x21, x5\n\t" - "umulh x4, x21, x5\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x8\n\t" - "cset x8, cs\n\t" + "ldp x10, x11, [%[a], 0]\n\t" + "ldp x12, x13, [%[a], 16]\n\t" + "ldp x14, x15, [%[a], 32]\n\t" + "ldp x16, x17, [%[a], 48]\n\t" + "mov x3, x10\n\t" + "# Start Reduction\n\t" + "mov x4, x11\n\t" + "mov x5, x12\n\t" + "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t" + "# - a[0] << 32 << 192\n\t" + "# + (a[0] * 2) << 192\n\t" + "# a[0]-a[2] << 32\n\t" + "lsl x7, x10, #32\n\t" + "extr x9, x12, x11, 32\n\t" + "add x6, x13, x10\n\t" + "extr x8, x11, x10, 32\n\t" + "add x6, x6, x10\n\t" + "# + a[0]-a[2] << 32 << 64\n\t" + "# - a[0] << 32 << 192\n\t" + "adds x4, x4, x7\n\t" + "sub x6, x6, x7\n\t" + "adcs x5, x5, x8\n\t" + "adc x6, x6, x9\n\t" + "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" + "# a += mu << 256\n\t" "adds x14, x14, x3\n\t" "adcs x15, x15, x4\n\t" - "adc x8, x8, xzr\n\t" - "# mu = a[3] * mp\n\t" - "mul x5, %[mp], x12\n\t" - "# a[3+0] += m[0] * mu\n\t" - "mul x3, x17, x5\n\t" - "umulh x6, x17, x5\n\t" - "adds x12, x12, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[3+1] += m[1] * mu\n\t" - "mul x3, x19, x5\n\t" - "umulh x7, x19, x5\n\t" - "adds x3, x3, x6\n\t" - "adc x7, x7, xzr\n\t" + "adcs x16, x16, x5\n\t" + "adcs x17, x17, x6\n\t" + "adc x10, xzr, xzr\n\t" + "# a += mu << 192\n\t" + "# mu <<= 32\n\t" + "# a += (mu << 32) << 64\n\t" "adds x13, x13, x3\n\t" - "adc x7, x7, xzr\n\t" - "# a[3+2] += m[2] * mu\n\t" - "mul x3, x20, x5\n\t" - "umulh x6, x20, x5\n\t" - "adds x3, x3, x7\n\t" - "adc x6, x6, xzr\n\t" - "adds x14, x14, x3\n\t" - "adc x6, x6, xzr\n\t" - "# a[3+3] += m[3] * mu\n\t" - "mul x3, x21, x5\n\t" - "umulh x4, x21, x5\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x8\n\t" - "cset x8, cs\n\t" - "adds x15, x15, x3\n\t" - "adcs x16, x16, x4\n\t" - "adc x8, x8, xzr\n\t" - "sub x3, xzr, x8\n\t" - "and x17, x17, x3\n\t" - "and x19, x19, x3\n\t" - "and x20, x20, x3\n\t" - "and x21, x21, x3\n\t" - "subs x13, x13, x17\n\t" - "sbcs x14, x14, x19\n\t" - "sbcs x15, x15, x20\n\t" - "stp x13, x14, [%[a], 0]\n\t" - "sbc x16, x16, x21\n\t" - "stp x15, x16, [%[a], 16]\n\t" + "extr x8, x6, x5, 32\n\t" + "adcs x14, x14, x4\n\t" + "extr x7, x5, x4, 32\n\t" + "adcs x15, x15, x5\n\t" + "extr x4, x4, x3, 32\n\t" + "adcs x16, x16, x6\n\t" + "lsl x3, x3, 32\n\t" + "adc x5, xzr, xzr\n\t" + "adds x11, x11, x3\n\t" + "lsr x9, x6, 32\n\t" + "adcs x12, x12, x4\n\t" + "adcs x13, x13, x7\n\t" + "adcs x14, x14, x8\n\t" + "adcs x15, x15, x9\n\t" + "adcs x16, x16, xzr\n\t" + "adcs x17, x17, x5\n\t" + "adc x10, x10, xzr\n\t" + "# a -= (mu << 32) << 192\n\t" + "subs x13, x13, x3\n\t" + "sbcs x14, x14, x4\n\t" + "sbcs x15, x15, x7\n\t" + "sbcs x16, x16, x8\n\t" + "sbcs x17, x17, x9\n\t" + "sbc x10, x10, xzr\n\t" + "neg x10, x10\n\t" + "# mask m and sub from result if overflow\n\t" + "# m[0] = -1 & mask = mask\n\t" + "subs x14, x14, x10\n\t" + "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t" + "lsr x8, x10, 32\n\t" + "sbcs x15, x15, x8\n\t" + "sub x9, xzr, x8\n\t" + "# m[2] = 0 & mask = 0\n\t" + "sbcs x16, x16, xzr\n\t" + "stp x14, x15, [%[a], 0]\n\t" + "# m[3] = 0xffffffff00000001 & mask\n\t" + "sbc x17, x17, x9\n\t" + "stp x16, x17, [%[a], 16]\n\t" : : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) - : "memory", "x3", "x4", "x5", "x8", "x6", "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21" + : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x19", "x20", "cc" ); } - /* Map the Montgomery form projective coordinate point to an affine point. * * r Resulting affine coordinate point. @@ -20824,62 +23046,24 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); - sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->x, r->x, p256_mod, ~(n >> 63)); sp_256_norm_4(r->x); /* y /= z^3 */ sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); - sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->y, r->y, p256_mod, ~(n >> 63)); sp_256_norm_4(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - -} - -/* Add two Montgomery form numbers (r = a + b % m). - * - * r Result of addition. - * a First number to add in Montgomery form. - * b Second number to add in Montgomery form. - * m Modulus (prime). - */ -static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) -{ - __asm__ __volatile__ ( - "ldp x4, x5, [%[a], 0]\n\t" - "ldp x8, x9, [%[b], 0]\n\t" - "adds x4, x4, x8\n\t" - "ldp x6, x7, [%[a], 16]\n\t" - "adcs x5, x5, x9\n\t" - "ldp x10, x11, [%[b], 16]\n\t" - "adcs x6, x6, x10\n\t" - "adcs x7, x7, x11\n\t" - "mov x13, 0xffffffff00000001\n\t" - "csetm x14, cs\n\t" - "subs x4, x4, x14\n\t" - "lsr x12, x14, 32\n\t" - "sbcs x5, x5, x12\n\t" - "and x13, x13, x14\n\t" - "sbcs x6, x6, xzr\n\t" - "stp x4, x5, [%[r],0]\n\t" - "sbc x7, x7, x13\n\t" - "stp x6, x7, [%[r],16]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" - ); } /* Double a Montgomery form number (r = a + a % m). @@ -20888,28 +23072,36 @@ static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x3, x4, [%[a]]\n\t" "ldp x5, x6, [%[a],16]\n\t" - "adds x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "adcs x6, x6, x6\n\t" - "mov x8, 0xffffffff00000001\n\t" - "csetm x9, cs\n\t" - "subs x3, x3, x9\n\t" - "lsr x7, x9, 32\n\t" - "sbcs x4, x4, x7\n\t" - "and x8, x8, x9\n\t" - "sbcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r],0]\n\t" - "sbc x6, x6, x8\n\t" - "stp x5, x6, [%[r],16]\n\t" + "lsl x9, x3, #1\n\t" + "extr x10, x4, x3, #63\n\t" + "extr x11, x5, x4, #63\n\t" + "asr x13, x6, #63\n\t" + "extr x12, x6, x5, #63\n\t" + "subs x9, x9, x13\n\t" + "lsr x7, x13, 32\n\t" + "sbcs x10, x10, x7\n\t" + "sub x8, xzr, x7\n\t" + "sbcs x11, x11, xzr\n\t" + "sbcs x12, x12, x8\n\t" + "sbc x8, xzr, xzr\n\t" + "sub x13, x13, x8\n\t" + "subs x9, x9, x13\n\t" + "lsr x7, x13, 32\n\t" + "sbcs x10, x10, x7\n\t" + "sub x8, xzr, x7\n\t" + "sbcs x11, x11, xzr\n\t" + "stp x9, x10, [%[r],0]\n\t" + "sbc x12, x12, x8\n\t" + "stp x11, x12, [%[r],16]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x13", "x9", "x10", "x11", "x12", "cc" ); (void)m; @@ -20921,41 +23113,50 @@ static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( - "ldp x10, x11, [%[a]]\n\t" - "adds x3, x10, x10\n\t" - "ldr x12, [%[a], 16]\n\t" - "adcs x4, x11, x11\n\t" - "ldr x13, [%[a], 24]\n\t" - "adcs x5, x12, x12\n\t" - "adcs x6, x13, x13\n\t" - "mov x8, 0xffffffff00000001\n\t" - "csetm x9, cs\n\t" - "subs x3, x3, x9\n\t" - "lsr x7, x9, 32\n\t" + "ldp x9, x10, [%[a]]\n\t" + "ldp x11, x12, [%[a], 16]\n\t" + "lsl x3, x9, #1\n\t" + "extr x4, x10, x9, #63\n\t" + "extr x5, x11, x10, #63\n\t" + "asr x13, x12, #63\n\t" + "extr x6, x12, x11, #63\n\t" + "subs x3, x3, x13\n\t" + "lsr x7, x13, 32\n\t" "sbcs x4, x4, x7\n\t" - "and x8, x8, x9\n\t" + "sub x8, xzr, x7\n\t" "sbcs x5, x5, xzr\n\t" - "sbc x6, x6, x8\n\t" - "adds x3, x3, x10\n\t" - "adcs x4, x4, x11\n\t" - "adcs x5, x5, x12\n\t" - "adcs x6, x6, x13\n\t" - "mov x8, 0xffffffff00000001\n\t" - "csetm x9, cs\n\t" - "subs x3, x3, x9\n\t" - "lsr x7, x9, 32\n\t" + "sbcs x6, x6, x8\n\t" + "neg x13, x13\n\t" + "sbc x13, x13, xzr\n\t" + "adds x3, x3, x9\n\t" + "adcs x4, x4, x10\n\t" + "adcs x5, x5, x11\n\t" + "adcs x6, x6, x12\n\t" + "adc x13, x13, xzr\n\t" + "neg x13, x13\n\t" + "subs x3, x3, x13, asr #1\n\t" + "lsr x7, x13, 32\n\t" "sbcs x4, x4, x7\n\t" - "and x8, x8, x9\n\t" + "sub x8, xzr, x7\n\t" "sbcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x8\n\t" + "sbc x8, xzr, xzr\n\t" + "sub x13, x13, x8\n\t" + "subs x3, x3, x13\n\t" + "lsr x7, x13, 32\n\t" + "sbcs x4, x4, x7\n\t" + "sub x8, xzr, x7\n\t" + "sbcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r],0]\n\t" "sbc x6, x6, x8\n\t" - "stp x5, x6, [%[r], 16]\n\t" + "stp x5, x6, [%[r],16]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x10", "x11", "x12", "x13", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x9", "x10", "x11", "x12", "x3", "x4", "x5", "x6", "x7", "x8", "x13", "cc" ); (void)m; @@ -20968,32 +23169,40 @@ static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" - "ldp x8, x9, [%[b], 0]\n\t" - "subs x4, x4, x8\n\t" "ldp x6, x7, [%[a], 16]\n\t" - "sbcs x5, x5, x9\n\t" + "ldp x8, x9, [%[b], 0]\n\t" "ldp x10, x11, [%[b], 16]\n\t" + "subs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" "sbcs x6, x6, x10\n\t" "sbcs x7, x7, x11\n\t" - "mov x13, 0xffffffff00000001\n\t" - "csetm x14, cc\n\t" + "sbc x14, xzr, xzr\n\t" "adds x4, x4, x14\n\t" "lsr x12, x14, 32\n\t" "adcs x5, x5, x12\n\t" - "and x13, x13, x14\n\t" + "sub x13, xzr, x12\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, x13\n\t" + "adc x14, x14, xzr\n\t" + "adds x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x5, x5, x12\n\t" + "sub x13, xzr, x12\n\t" "adcs x6, x6, xzr\n\t" "stp x4, x5, [%[r],0]\n\t" "adc x7, x7, x13\n\t" "stp x6, x7, [%[r],16]\n\t" : - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "cc" ); + + (void)m; } /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -21002,35 +23211,171 @@ static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_div2_4(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "and x9, x3, 1\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "sub x10, xzr, x9\n\t" - "lsr x7, x10, 32\n\t" - "adds x3, x3, x10\n\t" - "and x8, x10, 0xffffffff00000001\n\t" - "adcs x4, x4, x7\n\t" - "lsr x3, x3, 1\n\t" - "adcs x5, x5, xzr\n\t" - "lsr x7, x4, 1\n\t" - "adcs x6, x6, x8\n\t" - "lsr x8, x5, 1\n\t" - "cset x9, cs\n\t" - "lsr x10, x6, 1\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "orr x4, x7, x5, lsl 63\n\t" - "orr x5, x8, x6, lsl 63\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "orr x6, x10, x9, lsl 63\n\t" - "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbfx x8, x3, 0, 1\n\t" + "adds x3, x3, x8\n\t" + "lsr x7, x8, 32\n\t" + "adcs x4, x4, x7\n\t" + "sub x8, xzr, x7\n\t" + "adcs x5, x5, xzr\n\t" + "extr x3, x4, x3, 1\n\t" + "adcs x6, x6, x8\n\t" + "extr x4, x5, x4, 1\n\t" + "adc x9, xzr, xzr\n\t" + "extr x5, x6, x5, 1\n\t" + "extr x6, x9, x6, 1\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "stp x5, x6, [%[r], 16]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x9", "x8", "cc" + ); +} + +/* Double number and subtract (r = (a - 2.b) % m). + * + * r Result of subtration. + * a Number to subtract from in Montgomery form. + * b Number to subtract with in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_rsb_sub_dbl_4(sp_digit* r, + const sp_digit* a, sp_digit* b, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x8, x9, [%[b]]\n\t" + "ldp x10, x11, [%[b],16]\n\t" + "lsl x15, x8, #1\n\t" + "extr x16, x9, x8, #63\n\t" + "extr x17, x10, x9, #63\n\t" + "asr x14, x11, #63\n\t" + "extr x19, x11, x10, #63\n\t" + "ldp x4, x5, [%[a]]\n\t" + "ldp x6, x7, [%[a],16]\n\t" + "subs x15, x15, x14\n\t" + "lsr x12, x14, 32\n\t" + "sbcs x16, x16, x12\n\t" + "sub x13, xzr, x12\n\t" + "sbcs x17, x17, xzr\n\t" + "sbcs x19, x19, x13\n\t" + "neg x14, x14\n\t" + "sbc x14, x14, xzr\n\t" + "subs x15, x4, x15\n\t" + "sbcs x16, x5, x16\n\t" + "sbcs x17, x6, x17\n\t" + "sbcs x19, x7, x19\n\t" + "sbc x14, xzr, x14\n\t" + "adds x15, x15, x14, asr #1\n\t" + "lsr x12, x14, 32\n\t" + "adcs x16, x16, x12\n\t" + "sub x13, xzr, x12\n\t" + "adcs x17, x17, xzr\n\t" + "adcs x19, x19, x13\n\t" + "adc x14, x14, xzr\n\t" + "adds x15, x15, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x16, x16, x12\n\t" + "sub x13, xzr, x12\n\t" + "adcs x17, x17, xzr\n\t" + "stp x15, x16, [%[r],0]\n\t" + "adc x19, x19, x13\n\t" + "stp x17, x19, [%[r],16]\n\t" + "subs x15, x8, x15\n\t" + "sbcs x16, x9, x16\n\t" + "sbcs x17, x10, x17\n\t" + "sbcs x19, x11, x19\n\t" + "sbc x14, xzr, xzr\n\t" + "adds x15, x15, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x16, x16, x12\n\t" + "sub x13, xzr, x12\n\t" + "adcs x17, x17, xzr\n\t" + "adcs x19, x19, x13\n\t" + "adc x14, x14, xzr\n\t" + "adds x15, x15, x14\n\t" + "lsr x12, x14, 32\n\t" + "adcs x16, x16, x12\n\t" + "sub x13, xzr, x12\n\t" + "adcs x17, x17, xzr\n\t" + "stp x15, x16, [%[b],0]\n\t" + "adc x19, x19, x13\n\t" + "stp x17, x19, [%[b],16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "cc" ); + (void)m; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * ra Result of addition. + * rs Result of subtration. + * a Number to subtract from in Montgomery form. + * b Number to subtract with in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_add_sub_4(sp_digit* ra, + sp_digit* rs, const sp_digit* a, const sp_digit* b, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "adds x14, x4, x8\n\t" + "adcs x15, x5, x9\n\t" + "adcs x16, x6, x10\n\t" + "adcs x17, x7, x11\n\t" + "csetm x19, cs\n\t" + "subs x14, x14, x19\n\t" + "lsr x12, x19, 32\n\t" + "sbcs x15, x15, x12\n\t" + "sub x13, xzr, x12\n\t" + "sbcs x16, x16, xzr\n\t" + "sbcs x17, x17, x13\n\t" + "sbc x13, xzr, xzr\n\t" + "sub x19, x19, x13\n\t" + "subs x14, x14, x19\n\t" + "lsr x12, x19, 32\n\t" + "sbcs x15, x15, x12\n\t" + "sub x13, xzr, x12\n\t" + "sbcs x16, x16, xzr\n\t" + "stp x14, x15, [%[ra],0]\n\t" + "sbc x17, x17, x13\n\t" + "stp x16, x17, [%[ra],16]\n\t" + "subs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "sbcs x6, x6, x10\n\t" + "sbcs x7, x7, x11\n\t" + "sbc x19, xzr, xzr\n\t" + "adds x4, x4, x19\n\t" + "lsr x12, x19, 32\n\t" + "adcs x5, x5, x12\n\t" + "sub x13, xzr, x12\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, x13\n\t" + "adc x19, x19, xzr\n\t" + "adds x4, x4, x19\n\t" + "lsr x12, x19, 32\n\t" + "adcs x5, x5, x12\n\t" + "sub x13, xzr, x12\n\t" + "adcs x6, x6, xzr\n\t" + "stp x4, x5, [%[rs],0]\n\t" + "adc x7, x7, x13\n\t" + "stp x6, x7, [%[rs],16]\n\t" + : + : [ra] "r" (ra), [rs] "r" (rs), [a] "r" (a), [b] "r" (b) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x19", "x14", "x15", "x16", "x17", "cc" + ); + + (void)m; } /* Double the Montgomery form projective point p. @@ -21039,6 +23384,56 @@ static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_4(z, z, p256_mod); + /* T1/T2 = X +/- T1 */ + sp_256_mont_add_sub_4(t1, t2, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_mont_div2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - 2*Y */ + /* Y = Y - X */ + sp_256_mont_rsb_sub_dbl_4(x, x, y, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_4(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_4_ctx { int state; @@ -21049,7 +23444,14 @@ typedef struct sp_256_proj_point_dbl_4_ctx { sp_digit* z; } sp_256_proj_point_dbl_4_ctx; -static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_4_ctx* ctx = (sp_256_proj_point_dbl_4_ctx*)sp_ctx->data; @@ -21087,13 +23489,11 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con ctx->state = 4; break; case 4: - /* T2 = X - T1 */ - sp_256_mont_sub_4(ctx->t2, p->x, ctx->t1, p256_mod); + /* T1/T2 = X +/- T1 */ + sp_256_mont_add_sub_4(ctx->t1, ctx->t2, p->x, ctx->t1, p256_mod); ctx->state = 5; break; case 5: - /* T1 = X + T1 */ - sp_256_mont_add_4(ctx->t1, p->x, ctx->t1, p256_mod); ctx->state = 6; break; case 6: @@ -21123,7 +23523,7 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_4(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_4(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -21137,18 +23537,15 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con ctx->state = 14; break; case 14: - /* X = X - Y */ - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->y, p256_mod); + /* X = X - 2*Y */ + /* Y = Y - X */ + sp_256_mont_rsb_sub_dbl_4(ctx->x, ctx->x, ctx->y, p256_mod); ctx->state = 15; break; case 15: - /* X = X - Y */ - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->y, p256_mod); ctx->state = 16; break; case 16: - /* Y = Y - X */ - sp_256_mont_sub_4(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 17; break; case 17: @@ -21173,155 +23570,6 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_4(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_4(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_4(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_4(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_4(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_4(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_4(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_4(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_4(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_4(y, y, t2, p256_mod); -} - -/* Subtract two Montgomery form numbers (r = a - b % m). - * - * r Result of subtration. - * a Number to subtract from in Montgomery form. - * b Number to subtract with in Montgomery form. - * m Modulus (prime). - */ -static void sp_256_mont_sub_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) -{ - __asm__ __volatile__ ( - "ldp x8, x9, [%[b]]\n\t" - "ldp x10, x11, [%[b],16]\n\t" - "adds x8, x8, x8\n\t" - "ldp x4, x5, [%[a]]\n\t" - "adcs x9, x9, x9\n\t" - "ldp x6, x7, [%[a],16]\n\t" - "adcs x10, x10, x10\n\t" - "adcs x11, x11, x11\n\t" - "mov x13, 0xffffffff00000001\n\t" - "csetm x14, cs\n\t" - "subs x8, x8, x14\n\t" - "lsr x12, x14, 32\n\t" - "sbcs x9, x9, x12\n\t" - "and x13, x13, x14\n\t" - "sbcs x10, x10, xzr\n\t" - "sbc x11, x11, x13\n\t" - "subs x4, x4, x8\n\t" - "sbcs x5, x5, x9\n\t" - "sbcs x6, x6, x10\n\t" - "sbcs x7, x7, x11\n\t" - "mov x13, 0xffffffff00000001\n\t" - "csetm x14, cc\n\t" - "adds x4, x4, x14\n\t" - "lsr x12, x14, 32\n\t" - "adcs x5, x5, x12\n\t" - "and x13, x13, x14\n\t" - "adcs x6, x6, xzr\n\t" - "stp x4, x5, [%[r],0]\n\t" - "adc x7, x7, x13\n\t" - "stp x6, x7, [%[r],16]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" - ); -} - -/* Subtract two Montgomery form numbers (r = a - b % m). - * - * r Result of subtration. - * a Number to subtract from in Montgomery form. - * b Number to subtract with in Montgomery form. - * m Modulus (prime). - */ -static void sp_256_mont_dbl_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) -{ - __asm__ __volatile__ ( - "ldp x4, x5, [%[a]]\n\t" - "ldp x6, x7, [%[a],16]\n\t" - "adds x4, x4, x4\n\t" - "ldp x8, x9, [%[b]]\n\t" - "adcs x5, x5, x5\n\t" - "ldp x10, x11, [%[b],16]\n\t" - "adcs x6, x6, x6\n\t" - "adcs x7, x7, x7\n\t" - "mov x13, 0xffffffff00000001\n\t" - "csetm x14, cs\n\t" - "subs x4, x4, x14\n\t" - "lsr x12, x14, 32\n\t" - "sbcs x5, x5, x12\n\t" - "and x13, x13, x14\n\t" - "sbcs x6, x6, xzr\n\t" - "sbc x7, x7, x13\n\t" - "subs x4, x4, x8\n\t" - "sbcs x5, x5, x9\n\t" - "sbcs x6, x6, x10\n\t" - "sbcs x7, x7, x11\n\t" - "mov x13, 0xffffffff00000001\n\t" - "csetm x14, cc\n\t" - "adds x4, x4, x14\n\t" - "lsr x12, x14, 32\n\t" - "adcs x5, x5, x12\n\t" - "and x13, x13, x14\n\t" - "adcs x6, x6, xzr\n\t" - "stp x4, x5, [%[r],0]\n\t" - "adc x7, x7, x13\n\t" - "stp x6, x7, [%[r],16]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" - ); -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -21329,7 +23577,7 @@ static void sp_256_mont_dbl_sub_4(sp_digit* r, const sp_digit* a, const sp_digit * n Number of times to double * t Temporary ordinate data. */ -static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, +static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, sp_digit* t) { sp_digit* w = t; @@ -21339,6 +23587,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -21349,7 +23598,6 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, /* W = Z^4 */ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -21365,10 +23613,12 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod); /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, b, p256_mod); + sp_256_mont_rsb_sub_dbl_4(x, x, b, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_dbl_4(b, b, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -21378,9 +23628,8 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_256_mont_mul_4(w, w, t1, p256_mod, p256_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_4(y, b, x, p256_mod); - sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_sub_4(y, y, t1, p256_mod); + sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t1, p256_mod); } #ifndef WOLFSSL_SP_SMALL /* A = 3*(X^2 - W) */ @@ -21392,18 +23641,19 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod); /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, b, p256_mod); + sp_256_mont_rsb_sub_dbl_4(x, x, b, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_dbl_4(b, b, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_4(y, b, x, p256_mod); - sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_sub_4(y, y, t1, p256_mod); -#endif + sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t1, p256_mod); +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_4(y, y, p256_mod); + sp_256_mont_div2_4(y, y, p256_mod); } /* Compare two numbers to determine if they are equal. @@ -21419,6 +23669,18 @@ static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_4(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -21426,6 +23688,125 @@ static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*4; + sp_digit* t2 = t + 4*4; + sp_digit* t3 = t + 6*4; + sp_digit* t4 = t + 8*4; + sp_digit* t5 = t + 10*4; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(t2, t1) & + sp_256_cmp_equal_4(t4, t3)) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_rsb_sub_dbl_4(x, x, y, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); +{ + __asm__ __volatile__ ( + "ldrsw x10, [%[p], #192]\n\t" + "ldrsw x11, [%[q], #192]\n\t" + "ldp x12, x13, [%[x], #0]\n\t" + "ldp x14, x15, [%[x], #16]\n\t" + "ldp x16, x17, [%[y], #0]\n\t" + "ldp x19, x20, [%[y], #16]\n\t" + "ldp x21, x22, [%[z], #0]\n\t" + "ldp x23, x24, [%[z], #16]\n\t" + "bics xzr, x11, x10\n\t" + "ldp x25, x26, [%[p], #0]\n\t" + "ldp x27, x28, [%[p], #16]\n\t" + "csel x12, x12, x25, eq\n\t" + "csel x13, x13, x26, eq\n\t" + "csel x14, x14, x27, eq\n\t" + "csel x15, x15, x28, eq\n\t" + "ldp x25, x26, [%[p], #64]\n\t" + "ldp x27, x28, [%[p], #80]\n\t" + "csel x16, x16, x25, eq\n\t" + "csel x17, x17, x26, eq\n\t" + "csel x19, x19, x27, eq\n\t" + "csel x20, x20, x28, eq\n\t" + "ldp x25, x26, [%[p], #128]\n\t" + "ldp x27, x28, [%[p], #144]\n\t" + "csel x21, x21, x25, eq\n\t" + "csel x22, x22, x26, eq\n\t" + "csel x23, x23, x27, eq\n\t" + "csel x24, x24, x28, eq\n\t" + "bics xzr, x10, x11\n\t" + "and x10, x10, x11\n\t" + "ldp x25, x26, [%[q], #0]\n\t" + "ldp x27, x28, [%[q], #16]\n\t" + "csel x12, x12, x25, eq\n\t" + "csel x13, x13, x26, eq\n\t" + "csel x14, x14, x27, eq\n\t" + "csel x15, x15, x28, eq\n\t" + "ldp x25, x26, [%[q], #64]\n\t" + "ldp x27, x28, [%[q], #80]\n\t" + "csel x16, x16, x25, eq\n\t" + "csel x17, x17, x26, eq\n\t" + "csel x19, x19, x27, eq\n\t" + "csel x20, x20, x28, eq\n\t" + "ldp x25, x26, [%[q], #128]\n\t" + "ldp x27, x28, [%[q], #144]\n\t" + "csel x21, x21, x25, eq\n\t" + "csel x22, x22, x26, eq\n\t" + "csel x23, x23, x27, eq\n\t" + "csel x24, x24, x28, eq\n\t" + "orr x21, x21, x10\n\t" + "stp x12, x13, [%[r], #0]\n\t" + "stp x14, x15, [%[r], #16]\n\t" + "stp x16, x17, [%[r], #64]\n\t" + "stp x19, x20, [%[r], #80]\n\t" + "stp x21, x22, [%[r], #128]\n\t" + "stp x23, x24, [%[r], #144]\n\t" + "str w10, [%[r], #192]\n\t" + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q), [x] "r" (x), + [y] "r" (y), [z] "r" (z) + : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", + "cc" + ); +} + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_4_ctx { @@ -21438,11 +23819,19 @@ typedef struct sp_256_proj_point_add_4_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_256_proj_point_add_4_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -21461,261 +23850,209 @@ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*4; - ctx->t3 = t + 4*4; - ctx->t4 = t + 6*4; - ctx->t5 = t + 8*4; + ctx->t6 = t; + ctx->t1 = t + 2*4; + ctx->t2 = t + 4*4; + ctx->t3 = t + 6*4; + ctx->t4 = t + 8*4; + ctx->t5 = t + 10*4; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_4(ctx->t1, p256_mod, q->y); - sp_256_norm_4(ctx->t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_256_proj_point_dbl_4_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_256)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<4; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<4; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<4; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_256_mont_sqr_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; + break; + case 2: + sp_256_mont_mul_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; + break; + case 3: + sp_256_mont_mul_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_4(ctx->t1, ctx->t1, ctx->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_4(ctx->t4, ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(ctx->t2, ctx->t1) & + sp_256_cmp_equal_4(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_4(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(ctx->t3, ctx->t3, ctx->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_4(ctx->z, ctx->z, ctx->t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - sp_256_mont_sqr_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t5, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_rsb_sub_dbl_4(ctx->x, ctx->x, ctx->y, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_dbl_4(ctx->t1, ctx->y, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t1, p256_mod); + sp_256_mont_mul_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_4(ctx->y, ctx->y, ctx->x, p256_mod); + sp_256_mont_sub_4(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - sp_256_mont_mul_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); + { +{ + __asm__ __volatile__ ( + "ldrsw x10, [%[p], #192]\n\t" + "ldrsw x11, [%[q], #192]\n\t" + "ldp x12, x13, [%[x], #0]\n\t" + "ldp x14, x15, [%[x], #16]\n\t" + "ldp x16, x17, [%[y], #0]\n\t" + "ldp x19, x20, [%[y], #16]\n\t" + "ldp x21, x22, [%[z], #0]\n\t" + "ldp x23, x24, [%[z], #16]\n\t" + "bics xzr, x11, x10\n\t" + "ldp x25, x26, [%[p], #0]\n\t" + "ldp x27, x28, [%[p], #16]\n\t" + "csel x12, x12, x25, eq\n\t" + "csel x13, x13, x26, eq\n\t" + "csel x14, x14, x27, eq\n\t" + "csel x15, x15, x28, eq\n\t" + "ldp x25, x26, [%[p], #64]\n\t" + "ldp x27, x28, [%[p], #80]\n\t" + "csel x16, x16, x25, eq\n\t" + "csel x17, x17, x26, eq\n\t" + "csel x19, x19, x27, eq\n\t" + "csel x20, x20, x28, eq\n\t" + "ldp x25, x26, [%[p], #128]\n\t" + "ldp x27, x28, [%[p], #144]\n\t" + "csel x21, x21, x25, eq\n\t" + "csel x22, x22, x26, eq\n\t" + "csel x23, x23, x27, eq\n\t" + "csel x24, x24, x28, eq\n\t" + "bics xzr, x10, x11\n\t" + "and x10, x10, x11\n\t" + "ldp x25, x26, [%[q], #0]\n\t" + "ldp x27, x28, [%[q], #16]\n\t" + "csel x12, x12, x25, eq\n\t" + "csel x13, x13, x26, eq\n\t" + "csel x14, x14, x27, eq\n\t" + "csel x15, x15, x28, eq\n\t" + "ldp x25, x26, [%[q], #64]\n\t" + "ldp x27, x28, [%[q], #80]\n\t" + "csel x16, x16, x25, eq\n\t" + "csel x17, x17, x26, eq\n\t" + "csel x19, x19, x27, eq\n\t" + "csel x20, x20, x28, eq\n\t" + "ldp x25, x26, [%[q], #128]\n\t" + "ldp x27, x28, [%[q], #144]\n\t" + "csel x21, x21, x25, eq\n\t" + "csel x22, x22, x26, eq\n\t" + "csel x23, x23, x27, eq\n\t" + "csel x24, x24, x28, eq\n\t" + "orr x21, x21, x10\n\t" + "stp x12, x13, [%[r], #0]\n\t" + "stp x14, x15, [%[r], #16]\n\t" + "stp x16, x17, [%[r], #64]\n\t" + "stp x19, x20, [%[r], #80]\n\t" + "stp x21, x22, [%[r], #128]\n\t" + "stp x23, x24, [%[r], #144]\n\t" + "str w10, [%[r], #192]\n\t" + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q), [x] "r" (ctx->x), + [y] "r" (ctx->y), [z] "r" (ctx->z) + : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", + "cc" + ); +} ctx->state = 25; break; + } case 25: - sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_4(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_4(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* t3 = t + 4*4; - sp_digit* t4 = t + 6*4; - sp_digit* t5 = t + 8*4; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_256* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { - sp_256_proj_point_dbl_4(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<4; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<4; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<4; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_4(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_4(t4, t4, t3, p256_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, x, t5, p256_mod); - sp_256_mont_dbl_4(t1, y, p256_mod); - sp_256_mont_sub_4(x, x, t1, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_4(y, y, x, p256_mod); - sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, y, t5, p256_mod); - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -21730,7 +24067,6 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, sp_digit* a = t + 2*4; sp_digit* b = t + 4*4; sp_digit* t1 = t + 6*4; - sp_digit* t2 = t + 8*4; sp_digit* x = r[2*m].x; sp_digit* y = r[(1<x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, xa, p256_mod, p256_mp_mod); /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t2, za, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, za, p256_mod, p256_mp_mod); sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t3, ya, p256_mod, p256_mp_mod); /* S2 = Y2*Z1^3 */ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - U1 */ sp_256_mont_sub_4(t2, t2, t1, p256_mod); - /* RS = S2 + S1 */ - sp_256_mont_add_4(t6, t4, t3, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* RS/R = S2 +/ S1 */ + sp_256_mont_add_sub_4(t6, t4, t4, t3, p256_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_256_mont_mul_4(za, za, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(za, za, t2, p256_mod, p256_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(xa, t4, p256_mod, p256_mp_mod); sp_256_mont_sqr_4(xs, t6, p256_mod, p256_mp_mod); sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ya, t1, t5, p256_mod, p256_mp_mod); sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_sub_4(xa, xa, t5, p256_mod); sp_256_mont_sub_4(xs, xs, t5, p256_mod); - sp_256_mont_dbl_4(t1, y, p256_mod); - sp_256_mont_sub_4(x, x, t1, p256_mod); + sp_256_mont_dbl_4(t1, ya, p256_mod); + sp_256_mont_sub_4(xa, xa, t1, p256_mod); sp_256_mont_sub_4(xs, xs, t1, p256_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_256_mont_sub_4(ys, y, xs, p256_mod); - sp_256_mont_sub_4(y, y, x, p256_mod); - sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(ys, ya, xs, p256_mod); + sp_256_mont_sub_4(ya, ya, xa, p256_mod); + sp_256_mont_mul_4(ya, ya, t4, p256_mod, p256_mp_mod); sp_256_sub_4(t6, p256_mod, t6); sp_256_mont_mul_4(ys, ys, t6, p256_mod, p256_mp_mod); sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, y, t5, p256_mod); + sp_256_mont_sub_4(ya, ya, t5, p256_mod); sp_256_mont_sub_4(ys, ys, t5, p256_mod); } @@ -21943,42 +24275,69 @@ static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v) /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_point_33_4(sp_point_256* r, const sp_point_256* table, - int idx) +SP_NOINLINE static void sp_256_get_point_33_4(sp_point_256* r, + const sp_point_256* table, int idx) { - int i; - sp_digit mask; - - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - r->z[0] = 0; - r->z[1] = 0; - r->z[2] = 0; - r->z[3] = 0; - for (i = 1; i < 33; i++) { - mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - r->z[0] |= mask & table[i].z[0]; - r->z[1] |= mask & table[i].z[1]; - r->z[2] |= mask & table[i].z[2]; - r->z[3] |= mask & table[i].z[3]; - } + __asm__ __volatile__ ( + "mov w30, #1\n\t" + "add %[table], %[table], #200\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x15, x16, [%[table], #0]\n\t" + "ldp x17, x19, [%[table], #16]\n\t" + "csel x3, xzr, x15, ne\n\t" + "csel x4, xzr, x16, ne\n\t" + "csel x5, xzr, x17, ne\n\t" + "csel x6, xzr, x19, ne\n\t" + "ldp x15, x16, [%[table], #64]\n\t" + "ldp x17, x19, [%[table], #80]\n\t" + "csel x7, xzr, x15, ne\n\t" + "csel x8, xzr, x16, ne\n\t" + "csel x9, xzr, x17, ne\n\t" + "csel x10, xzr, x19, ne\n\t" + "ldp x15, x16, [%[table], #128]\n\t" + "ldp x17, x19, [%[table], #144]\n\t" + "csel x11, xzr, x15, ne\n\t" + "csel x12, xzr, x16, ne\n\t" + "csel x13, xzr, x17, ne\n\t" + "csel x14, xzr, x19, ne\n\t" + "1:\n\t" + "add %[table], %[table], #200\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x15, x16, [%[table], #0]\n\t" + "ldp x17, x19, [%[table], #16]\n\t" + "csel x3, x3, x15, ne\n\t" + "csel x4, x4, x16, ne\n\t" + "csel x5, x5, x17, ne\n\t" + "csel x6, x6, x19, ne\n\t" + "ldp x15, x16, [%[table], #64]\n\t" + "ldp x17, x19, [%[table], #80]\n\t" + "csel x7, x7, x15, ne\n\t" + "csel x8, x8, x16, ne\n\t" + "csel x9, x9, x17, ne\n\t" + "csel x10, x10, x19, ne\n\t" + "ldp x15, x16, [%[table], #128]\n\t" + "ldp x17, x19, [%[table], #144]\n\t" + "csel x11, x11, x15, ne\n\t" + "csel x12, x12, x16, ne\n\t" + "csel x13, x13, x17, ne\n\t" + "csel x14, x14, x19, ne\n\t" + "cmp w30, #33\n\t" + "b.ne 1b\n\t" + "stp x3, x4, [%[r], #0]\n\t" + "stp x5, x6, [%[r], #16]\n\t" + "stp x7, x8, [%[r], #64]\n\t" + "stp x9, x10, [%[r], #80]\n\t" + "stp x11, x12, [%[r], #128]\n\t" + "stp x13, x14, [%[r], #144]\n\t" + : [table] "+r" (table) + : [r] "r" (r), [idx] "r" (idx) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" + ); } #endif /* !WC_NO_CACHE_RESISTANT */ /* Multiply the point by the scalar and return the result. @@ -22002,7 +24361,7 @@ static void sp_256_get_point_33_4(sp_point_256* r, const sp_point_256* table, static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; sp_digit* tmp = NULL; #else @@ -22020,8 +24379,8 @@ static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * (33+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -22116,7 +24475,7 @@ static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -22126,15 +24485,12 @@ static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* return err; } -#ifndef WC_NO_CACHE_RESISTANT /* A table entry for pre-computed points. */ typedef struct sp_table_entry_256 { sp_digit x[4]; sp_digit y[4]; } sp_table_entry_256; -#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) -#endif /* FP_ECC | WOLFSSL_SP_SMALL */ /* Add two Montgomery form projective points. The second point has a q value of * one. * Only the first point can be the same pointer as the result point. @@ -22144,79 +24500,121 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* t3 = t + 4*4; - sp_digit* t4 = t + 6*4; - sp_digit* t5 = t + 8*4; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*4; + sp_digit* t6 = t + 4*4; + sp_digit* t1 = t + 6*4; + sp_digit* t4 = t + 8*4; - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(p->x, t2) & + sp_256_cmp_equal_4(p->y, t4)) { sp_256_proj_point_dbl_4(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<4; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<4; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<4; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ - sp_256_mont_sub_4(t2, t2, x, p256_mod); + sp_256_mont_sub_4(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ - sp_256_mont_sub_4(t4, t4, y, p256_mod); + sp_256_mont_sub_4(t4, t4, p->y, p256_mod); /* Z3 = H*Z1 */ - sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_256_mont_sqr_4(t1, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t3, x, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, t1, t5, p256_mod); - sp_256_mont_dbl_4(t1, t3, p256_mod); - sp_256_mont_sub_4(x, x, t1, p256_mod); + sp_256_mont_sqr_4(t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, p->x, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(t2, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + sp_256_mont_rsb_sub_dbl_4(x, t2, t3, p256_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ - sp_256_mont_sub_4(t3, t3, x, p256_mod); sp_256_mont_mul_4(t3, t3, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, y, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, t3, t5, p256_mod); + sp_256_mont_mul_4(t1, t1, p->y, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, t3, t1, p256_mod); +{ + __asm__ __volatile__ ( + "ldrsw x10, [%[p], #192]\n\t" + "ldrsw x11, [%[q], #192]\n\t" + "ldp x12, x13, [%[x], #0]\n\t" + "ldp x14, x15, [%[x], #16]\n\t" + "ldp x16, x17, [%[y], #0]\n\t" + "ldp x19, x20, [%[y], #16]\n\t" + "ldp x21, x22, [%[z], #0]\n\t" + "ldp x23, x24, [%[z], #16]\n\t" + "bics xzr, x11, x10\n\t" + "ldp x25, x26, [%[p], #0]\n\t" + "ldp x27, x28, [%[p], #16]\n\t" + "csel x12, x12, x25, eq\n\t" + "csel x13, x13, x26, eq\n\t" + "csel x14, x14, x27, eq\n\t" + "csel x15, x15, x28, eq\n\t" + "ldp x25, x26, [%[p], #64]\n\t" + "ldp x27, x28, [%[p], #80]\n\t" + "csel x16, x16, x25, eq\n\t" + "csel x17, x17, x26, eq\n\t" + "csel x19, x19, x27, eq\n\t" + "csel x20, x20, x28, eq\n\t" + "ldp x25, x26, [%[p], #128]\n\t" + "ldp x27, x28, [%[p], #144]\n\t" + "csel x21, x21, x25, eq\n\t" + "csel x22, x22, x26, eq\n\t" + "csel x23, x23, x27, eq\n\t" + "csel x24, x24, x28, eq\n\t" + "bics xzr, x10, x11\n\t" + "and x10, x10, x11\n\t" + "ldp x25, x26, [%[q], #0]\n\t" + "ldp x27, x28, [%[q], #16]\n\t" + "csel x12, x12, x25, eq\n\t" + "csel x13, x13, x26, eq\n\t" + "csel x14, x14, x27, eq\n\t" + "csel x15, x15, x28, eq\n\t" + "ldp x25, x26, [%[q], #64]\n\t" + "ldp x27, x28, [%[q], #80]\n\t" + "csel x16, x16, x25, eq\n\t" + "csel x17, x17, x26, eq\n\t" + "csel x19, x19, x27, eq\n\t" + "csel x20, x20, x28, eq\n\t" + "ldp x25, x26, [%[q], #128]\n\t" + "ldp x27, x28, [%[q], #144]\n\t" + "csel x21, x21, x25, eq\n\t" + "csel x22, x22, x26, eq\n\t" + "csel x23, x23, x27, eq\n\t" + "csel x24, x24, x28, eq\n\t" + "orr x21, x21, x10\n\t" + "stp x12, x13, [%[r], #0]\n\t" + "stp x14, x15, [%[r], #16]\n\t" + "stp x16, x17, [%[r], #64]\n\t" + "stp x19, x20, [%[r], #80]\n\t" + "stp x21, x22, [%[r], #128]\n\t" + "stp x23, x24, [%[r], #144]\n\t" + "str w10, [%[r], #192]\n\t" + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q), [x] "r" (x), + [y] "r" (y), [z] "r" (z) + : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", + "cc" + ); +} } } +#ifndef WC_NO_CACHE_RESISTANT +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC | WOLFSSL_SP_SMALL */ #ifdef FP_ECC /* Convert the projective point to affine. * Ordinates are in Montgomery form. @@ -22254,7 +24652,7 @@ static void sp_256_proj_to_affine_4(sp_point_256* a, sp_digit* t) static int sp_256_gen_stripe_table_4(const sp_point_256* a, sp_table_entry_256* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; #else sp_point_256 t[3]; @@ -22267,7 +24665,7 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -22322,7 +24720,7 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -22336,34 +24734,55 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_entry_64_4(sp_point_256* r, +SP_NOINLINE static void sp_256_get_entry_64_4(sp_point_256* r, const sp_table_entry_256* table, int idx) { - int i; - sp_digit mask; - - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - for (i = 1; i < 64; i++) { - mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - } + __asm__ __volatile__ ( + "mov w30, #1\n\t" + "add %[table], %[table], #64\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x11, x12, [%[table], #0]\n\t" + "ldp x13, x14, [%[table], #16]\n\t" + "ldp x15, x16, [%[table], #32]\n\t" + "ldp x17, x19, [%[table], #48]\n\t" + "csel x3, xzr, x11, ne\n\t" + "csel x4, xzr, x12, ne\n\t" + "csel x5, xzr, x13, ne\n\t" + "csel x6, xzr, x14, ne\n\t" + "csel x7, xzr, x15, ne\n\t" + "csel x8, xzr, x16, ne\n\t" + "csel x9, xzr, x17, ne\n\t" + "csel x10, xzr, x19, ne\n\t" + "1:\n\t" + "add %[table], %[table], #64\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x11, x12, [%[table], #0]\n\t" + "ldp x13, x14, [%[table], #16]\n\t" + "ldp x15, x16, [%[table], #32]\n\t" + "ldp x17, x19, [%[table], #48]\n\t" + "csel x3, x3, x11, ne\n\t" + "csel x4, x4, x12, ne\n\t" + "csel x5, x5, x13, ne\n\t" + "csel x6, x6, x14, ne\n\t" + "csel x7, x7, x15, ne\n\t" + "csel x8, x8, x16, ne\n\t" + "csel x9, x9, x17, ne\n\t" + "csel x10, x10, x19, ne\n\t" + "cmp w30, #64\n\t" + "b.ne 1b\n\t" + "stp x3, x4, [%[r], #0]\n\t" + "stp x5, x6, [%[r], #16]\n\t" + "stp x7, x8, [%[r], #64]\n\t" + "stp x9, x10, [%[r], #80]\n\t" + : [table] "+r" (table) + : [r] "r" (r), [idx] "r" (idx) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" + ); } #endif /* !WC_NO_CACHE_RESISTANT */ /* Multiply the point by the scalar and return the result. @@ -22386,7 +24805,7 @@ static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* t = NULL; #else @@ -22406,7 +24825,7 @@ static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -22472,7 +24891,7 @@ static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -22516,7 +24935,7 @@ static THREAD_LS_T int sp_cache_256_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) @@ -22587,23 +25006,36 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 4 * 5]; +#endif sp_cache_256_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_256 == 0) { - wc_InitMutex(&sp_cache_256_lock); - initCacheMutex_256 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_256_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -22624,6 +25056,9 @@ static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -22646,7 +25081,7 @@ static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_ static int sp_256_gen_stripe_table_4(const sp_point_256* a, sp_table_entry_256* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; #else sp_point_256 t[3]; @@ -22659,7 +25094,7 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -22714,7 +25149,7 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -22728,34 +25163,55 @@ static int sp_256_gen_stripe_table_4(const sp_point_256* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_entry_256_4(sp_point_256* r, +SP_NOINLINE static void sp_256_get_entry_256_4(sp_point_256* r, const sp_table_entry_256* table, int idx) { - int i; - sp_digit mask; - - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - for (i = 1; i < 256; i++) { - mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - } + __asm__ __volatile__ ( + "mov w30, #1\n\t" + "add %[table], %[table], #64\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x11, x12, [%[table], #0]\n\t" + "ldp x13, x14, [%[table], #16]\n\t" + "ldp x15, x16, [%[table], #32]\n\t" + "ldp x17, x19, [%[table], #48]\n\t" + "csel x3, xzr, x11, ne\n\t" + "csel x4, xzr, x12, ne\n\t" + "csel x5, xzr, x13, ne\n\t" + "csel x6, xzr, x14, ne\n\t" + "csel x7, xzr, x15, ne\n\t" + "csel x8, xzr, x16, ne\n\t" + "csel x9, xzr, x17, ne\n\t" + "csel x10, xzr, x19, ne\n\t" + "1:\n\t" + "add %[table], %[table], #64\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x11, x12, [%[table], #0]\n\t" + "ldp x13, x14, [%[table], #16]\n\t" + "ldp x15, x16, [%[table], #32]\n\t" + "ldp x17, x19, [%[table], #48]\n\t" + "csel x3, x3, x11, ne\n\t" + "csel x4, x4, x12, ne\n\t" + "csel x5, x5, x13, ne\n\t" + "csel x6, x6, x14, ne\n\t" + "csel x7, x7, x15, ne\n\t" + "csel x8, x8, x16, ne\n\t" + "csel x9, x9, x17, ne\n\t" + "csel x10, x10, x19, ne\n\t" + "cmp w30, #256\n\t" + "b.ne 1b\n\t" + "stp x3, x4, [%[r], #0]\n\t" + "stp x5, x6, [%[r], #16]\n\t" + "stp x7, x8, [%[r], #64]\n\t" + "stp x9, x10, [%[r], #80]\n\t" + : [table] "+r" (table) + : [r] "r" (r), [idx] "r" (idx) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" + ); } #endif /* !WC_NO_CACHE_RESISTANT */ /* Multiply the point by the scalar and return the result. @@ -22778,7 +25234,7 @@ static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* t = NULL; #else @@ -22798,7 +25254,7 @@ static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -22864,7 +25320,7 @@ static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -22908,7 +25364,7 @@ static THREAD_LS_T int sp_cache_256_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) @@ -22979,23 +25435,36 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 4 * 5]; +#endif sp_cache_256_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_256 == 0) { - wc_InitMutex(&sp_cache_256_lock); - initCacheMutex_256 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_256_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -23016,6 +25485,9 @@ static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -23034,7 +25506,7 @@ static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -23043,7 +25515,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -23066,7 +25538,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_256_point_to_ecc_point_4(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -23081,7 +25553,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -23091,25 +25563,25 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_256* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[4 + 4 * 2 * 5]; + sp_digit k[4 + 4 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (4 + 4 * 2 * 5), heap, + sizeof(sp_digit) * (4 + 4 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -23146,7 +25618,7 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, err = sp_256_point_to_ecc_point_4(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -24890,34 +27362,55 @@ static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v) /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_256_get_entry_65_4(sp_point_256* r, +SP_NOINLINE static void sp_256_get_entry_65_4(sp_point_256* r, const sp_table_entry_256* table, int idx) { - int i; - sp_digit mask; - - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - for (i = 1; i < 65; i++) { - mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - } + __asm__ __volatile__ ( + "mov w30, #1\n\t" + "add %[table], %[table], #64\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x11, x12, [%[table], #0]\n\t" + "ldp x13, x14, [%[table], #16]\n\t" + "ldp x15, x16, [%[table], #32]\n\t" + "ldp x17, x19, [%[table], #48]\n\t" + "csel x3, xzr, x11, ne\n\t" + "csel x4, xzr, x12, ne\n\t" + "csel x5, xzr, x13, ne\n\t" + "csel x6, xzr, x14, ne\n\t" + "csel x7, xzr, x15, ne\n\t" + "csel x8, xzr, x16, ne\n\t" + "csel x9, xzr, x17, ne\n\t" + "csel x10, xzr, x19, ne\n\t" + "1:\n\t" + "add %[table], %[table], #64\n\t" + "cmp %w[idx], w30\n\t" + "add w30, w30, #1\n\t" + "ldp x11, x12, [%[table], #0]\n\t" + "ldp x13, x14, [%[table], #16]\n\t" + "ldp x15, x16, [%[table], #32]\n\t" + "ldp x17, x19, [%[table], #48]\n\t" + "csel x3, x3, x11, ne\n\t" + "csel x4, x4, x12, ne\n\t" + "csel x5, x5, x13, ne\n\t" + "csel x6, x6, x14, ne\n\t" + "csel x7, x7, x15, ne\n\t" + "csel x8, x8, x16, ne\n\t" + "csel x9, x9, x17, ne\n\t" + "csel x10, x10, x19, ne\n\t" + "cmp w30, #65\n\t" + "b.ne 1b\n\t" + "stp x3, x4, [%[r], #0]\n\t" + "stp x5, x6, [%[r], #16]\n\t" + "stp x7, x8, [%[r], #64]\n\t" + "stp x9, x10, [%[r], #80]\n\t" + : [table] "+r" (table) + : [r] "r" (r), [idx] "r" (idx) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "w30", "cc" + ); } #endif /* !WC_NO_CACHE_RESISTANT */ static const sp_table_entry_256 p256_table[2405] = { @@ -36894,7 +39387,7 @@ static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* tmp = NULL; #else @@ -36912,7 +39405,7 @@ static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -36973,21 +39466,21 @@ static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) #endif { ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(tmp, heap, DYNAMIC_TYPE_ECC); #endif } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (rt != NULL) XFREE(rt, heap, DYNAMIC_TYPE_ECC); #endif - return MP_OKAY; + return err; } /* Multiply the base point of P256 by the scalar and return the result. @@ -37019,7 +39512,7 @@ static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k, */ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -37028,7 +39521,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -37050,7 +39543,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_256_point_to_ecc_point_4(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -37064,7 +39557,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -37074,25 +39567,25 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[4 + 4 * 2 * 5]; + sp_digit k[4 + 4 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (4 + 4 * 2 * 5), + sizeof(sp_digit) * (4 + 4 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -37128,7 +39621,7 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, err = sp_256_point_to_ecc_point_4(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -37140,17 +39633,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_4(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -37170,7 +39652,7 @@ static void sp_256_add_one_4(sp_digit* a) "stp x3, x4, [%[a], 16]\n\t" : : [a] "r" (a) - : "memory", "x1", "x2", "x3", "x4" + : "memory", "x1", "x2", "x3", "x4", "cc" ); } @@ -37183,41 +39665,108 @@ static void sp_256_add_one_4(sp_digit* a) */ static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n) { - int i; - int j; - byte* d; + sp_int64 nl = n; + sp_int64 bytes = size * 8; - for (i = n - 1,j = 0; i >= 7; i -= 8) { - r[j] = ((sp_digit)a[i - 0] << 0) | - ((sp_digit)a[i - 1] << 8) | - ((sp_digit)a[i - 2] << 16) | - ((sp_digit)a[i - 3] << 24) | - ((sp_digit)a[i - 4] << 32) | - ((sp_digit)a[i - 5] << 40) | - ((sp_digit)a[i - 6] << 48) | - ((sp_digit)a[i - 7] << 56); - j++; - } - - if (i >= 0) { - r[j] = 0; - - d = (byte*)r; - switch (i) { - case 6: d[n - 1 - 6] = a[6]; //fallthrough - case 5: d[n - 1 - 5] = a[5]; //fallthrough - case 4: d[n - 1 - 4] = a[4]; //fallthrough - case 3: d[n - 1 - 3] = a[3]; //fallthrough - case 2: d[n - 1 - 2] = a[2]; //fallthrough - case 1: d[n - 1 - 1] = a[1]; //fallthrough - case 0: d[n - 1 - 0] = a[0]; //fallthrough - } - j++; - } - - for (; j < size; j++) { - r[j] = 0; - } + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); } /* Generates a scalar that is in the range 1..order-1. @@ -37236,7 +39785,7 @@ static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_256_from_bin(k, 4, buf, (int)sizeof(buf)); - if (sp_256_cmp_4(k, p256_order2) < 0) { + if (sp_256_cmp_4(k, p256_order2) <= 0) { sp_256_add_one_4(k); break; } @@ -37258,7 +39807,7 @@ static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -37273,15 +39822,15 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_256* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -37322,7 +39871,7 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_256_point_to_ecc_point_4(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -37334,6 +39883,84 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_256_ctx { + int state; + sp_256_ecc_mulmod_4_ctx mulmod_ctx; + sp_digit k[4]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 point[2]; +#else + sp_point_256 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_256_ctx; + +int sp_ecc_make_key_256_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_256_ctx* ctx = (sp_ecc_key_gen_256_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_256_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_256_ecc_gen_k_4(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_256_ecc_mulmod_base_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p256_order, 1, 1); + if (err == MP_OKAY) { + if (sp_256_iszero_4(ctx->point->x) || + sp_256_iszero_4(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_256_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_4(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 32 @@ -37344,17 +39971,19 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) static void sp_256_to_bin_4(sp_digit* r, byte* a) { int i; - int j; + int j = 0; - for (i = 3, j = 0; i >= 0; i--) { - a[j++] = r[i] >> 56; - a[j++] = r[i] >> 48; - a[j++] = r[i] >> 40; - a[j++] = r[i] >> 32; - a[j++] = r[i] >> 24; - a[j++] = r[i] >> 16; - a[j++] = r[i] >> 8; - a[j++] = r[i] >> 0; + for (i = 3; i >= 0; i--, j += 8) { + __asm__ __volatile__ ( + "ldr x4, [%[r]]\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x4, x4\n\t" + #endif + "str x4, [%[a]]\n\t" + : + : [r] "r" (r + i), [a] "r" (a + j) + : "memory", "x4" + ); } } @@ -37373,7 +40002,7 @@ static void sp_256_to_bin_4(sp_digit* r, byte* a) int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -37386,7 +40015,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); @@ -37411,7 +40040,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 32; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -37420,8 +40049,60 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_256_ctx { + int state; + union { + sp_256_ecc_mulmod_4_ctx mulmod_ctx; + }; + sp_digit k[4]; + sp_point_256 point; +} sp_ecc_sec_gen_256_ctx; + +int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_256_ctx* ctx = (sp_ecc_sec_gen_256_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_256_from_mp(ctx->k, 4, priv); + sp_256_point_from_ecc_point_4(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_256_to_bin_4(ctx->point.x, out); + *outLen = 32; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -37446,7 +40127,7 @@ static sp_digit sp_256_sub_in_place_4(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -37487,42 +40168,45 @@ static void sp_256_mul_d_4(sp_digit* r, const sp_digit* a, "stp x5, x6, [%[r], 24]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ static sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -37531,23 +40215,22 @@ static sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); - return r; + return d1; } /* AND m into each word of a and store in r. @@ -37581,8 +40264,8 @@ static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[8], t2[5]; sp_digit div, r1; @@ -37592,9 +40275,13 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit div = d[3]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 4); - for (i=3; i>=0; i--) { - sp_digit hi = t1[4 + i] - (t1[4 + i] == div); + r1 = sp_256_cmp_4(&t1[4], d) >= 0; + sp_256_cond_sub_4(&t1[4], &t1[4], d, (sp_digit)0 - r1); + for (i = 3; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[4 + i] == div); + sp_digit hi = t1[4 + i] + mask; r1 = div_256_word_4(hi, t1[4 + i - 1], div); + r1 |= mask; sp_256_mul_d_4(t2, d, r1); t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2); @@ -37625,40 +40312,416 @@ static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) -#ifdef WOLFSSL_SP_SMALL -/* Order-2 for the P256 curve. */ -static const uint64_t p256_order_minus_2[4] = { - 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU, - 0xffffffff00000000U -}; -#else -/* The low half of the order-2 of the P256 curve. */ -static const sp_int_digit p256_order_low[2] = { - 0xf3b9cac2fc63254fU,0xbce6faada7179e84U -}; -#endif /* WOLFSSL_SP_SMALL */ - /* Multiply two number mod the order of P256 curve. (r = a * b mod order) * * r Result of the multiplication. * a First operand of the multiplication. * b Second operand of the multiplication. */ -static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b) +SP_NOINLINE static void sp_256_mont_mul_order_4(sp_digit* r, + const sp_digit* a, const sp_digit* b) { - sp_256_mul_4(r, a, b); - sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); + __asm__ __volatile__ ( + "ldp x13, x14, [%[a], 0]\n\t" + "ldp x15, x16, [%[a], 16]\n\t" + "ldp x17, x19, [%[b], 0]\n\t" + "ldp x20, x21, [%[b], 16]\n\t" + "# A[0] * B[0]\n\t" + "umulh x6, x13, x17\n\t" + "mul x5, x13, x17\n\t" + "# A[2] * B[0]\n\t" + "umulh x8, x15, x17\n\t" + "mul x7, x15, x17\n\t" + "# A[1] * B[0]\n\t" + "mul x3, x14, x17\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x7, x7, x4\n\t" + "adc x8, x8, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x3, x13, x20\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x13, x20\n\t" + "adcs x8, x8, x4\n\t" + "# A[1] * B[3]\n\t" + "mul x9, x14, x21\n\t" + "adcs x9, x9, xzr\n\t" + "umulh x10, x14, x21\n\t" + "adc x10, x10, xzr\n\t" + "# A[0] * B[1]\n\t" + "mul x3, x13, x19\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x13, x19\n\t" + "adcs x7, x7, x4\n\t" + "# A[2] * B[1]\n\t" + "mul x3, x15, x19\n\t" + "adcs x8, x8, x3\n\t" + "umulh x4, x15, x19\n\t" + "adcs x9, x9, x4\n\t" + "adc x10, x10, xzr\n\t" + "# A[1] * B[2]\n\t" + "mul x3, x14, x20\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x14, x20\n\t" + "adcs x9, x9, x4\n\t" + "adcs x10, x10, xzr\n\t" + "adc x11, xzr, xzr\n\t" + "# A[1] * B[1]\n\t" + "mul x3, x14, x19\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x14, x19\n\t" + "adcs x8, x8, x4\n\t" + "# A[3] * B[1]\n\t" + "mul x3, x16, x19\n\t" + "adcs x9, x9, x3\n\t" + "umulh x4, x16, x19\n\t" + "adcs x10, x10, x4\n\t" + "adc x11, x11, xzr\n\t" + "# A[2] * B[2]\n\t" + "mul x3, x15, x20\n\t" + "adds x9, x9, x3\n\t" + "umulh x4, x15, x20\n\t" + "adcs x10, x10, x4\n\t" + "# A[3] * B[3]\n\t" + "mul x3, x16, x21\n\t" + "adcs x11, x11, x3\n\t" + "umulh x12, x16, x21\n\t" + "adc x12, x12, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x3, x13, x21\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x13, x21\n\t" + "adcs x9, x9, x4\n\t" + "# A[2] * B[3]\n\t" + "mul x3, x15, x21\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x15, x21\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + "# A[3] * B[0]\n\t" + "mul x3, x16, x17\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x9, x9, x4\n\t" + "# A[3] * B[2]\n\t" + "mul x3, x16, x20\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x16, x20\n\t" + "adcs x11, x11, x4\n\t" + "adc x12, x12, xzr\n\t" + "ldp x13, x14, [%[m], 0]\n\t" + "mov x15, 0xffffffffffffffff\n\t" + "mov x16, 0xffffffff00000000\n\t" + "# mu = a[0] * mp\n\t" + "mul x17, %[mp], x5\n\t" + "# a[0+0] += m[0] * mu\n\t" + "mul x3, x13, x17\n\t" + "adds x5, x5, x3\n\t" + "umulh x4, x13, x17\n\t" + "adcs x6, x6, x4\n\t" + "# a[0+2] += m[2] * mu\n\t" + "mul x3, x15, x17\n\t" + "adcs x7, x7, x3\n\t" + "umulh x4, x15, x17\n\t" + "adcs x8, x8, x4\n\t" + "adcs x9, x9, xzr\n\t" + "adc x19, xzr, xzr\n\t" + "# a[0+1] += m[1] * mu\n\t" + "mul x3, x14, x17\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x7, x7, x4\n\t" + "# a[0+3] += m[3] * mu\n\t" + "mul x3, x16, x17\n\t" + "adcs x8, x8, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x9, x9, x4\n\t" + "# mu = a[1] * mp\n\t" + "mul x17, %[mp], x6\n\t" + "adc x19, x19, xzr\n\t" + "# a[1+0] += m[0] * mu\n\t" + "mul x3, x13, x17\n\t" + "adds x6, x6, x3\n\t" + "umulh x4, x13, x17\n\t" + "adcs x7, x7, x4\n\t" + "# a[1+2] += m[2] * mu\n\t" + "mul x3, x15, x17\n\t" + "adcs x8, x8, x3\n\t" + "umulh x4, x15, x17\n\t" + "adcs x9, x9, x4\n\t" + "adcs x10, x10, x19\n\t" + "adc x19, xzr, xzr\n\t" + "# a[1+1] += m[1] * mu\n\t" + "mul x3, x14, x17\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x8, x8, x4\n\t" + "# a[1+3] += m[3] * mu\n\t" + "mul x3, x16, x17\n\t" + "adcs x9, x9, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x10, x10, x4\n\t" + "# mu = a[2] * mp\n\t" + "mul x17, %[mp], x7\n\t" + "adc x19, x19, xzr\n\t" + "# a[2+0] += m[0] * mu\n\t" + "mul x3, x13, x17\n\t" + "adds x7, x7, x3\n\t" + "umulh x4, x13, x17\n\t" + "adcs x8, x8, x4\n\t" + "# a[2+2] += m[2] * mu\n\t" + "mul x3, x15, x17\n\t" + "adcs x9, x9, x3\n\t" + "umulh x4, x15, x17\n\t" + "adcs x10, x10, x4\n\t" + "adcs x11, x11, x19\n\t" + "adc x19, xzr, xzr\n\t" + "# a[2+1] += m[1] * mu\n\t" + "mul x3, x14, x17\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x9, x9, x4\n\t" + "# a[2+3] += m[3] * mu\n\t" + "mul x3, x16, x17\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x11, x11, x4\n\t" + "# mu = a[3] * mp\n\t" + "mul x17, %[mp], x8\n\t" + "adc x19, x19, xzr\n\t" + "# a[3+0] += m[0] * mu\n\t" + "mul x3, x13, x17\n\t" + "adds x8, x8, x3\n\t" + "umulh x4, x13, x17\n\t" + "adcs x9, x9, x4\n\t" + "# a[3+2] += m[2] * mu\n\t" + "mul x3, x15, x17\n\t" + "adcs x10, x10, x3\n\t" + "umulh x4, x15, x17\n\t" + "adcs x11, x11, x4\n\t" + "adcs x12, x12, x19\n\t" + "adc x19, xzr, xzr\n\t" + "# a[3+1] += m[1] * mu\n\t" + "mul x3, x14, x17\n\t" + "adds x9, x9, x3\n\t" + "umulh x4, x14, x17\n\t" + "adcs x10, x10, x4\n\t" + "# a[3+3] += m[3] * mu\n\t" + "mul x3, x16, x17\n\t" + "adcs x11, x11, x3\n\t" + "umulh x4, x16, x17\n\t" + "adcs x12, x12, x4\n\t" + "# x15 == -1\n\t" + "adcs x19, x19, x15\n\t" + "csel x13, x13, xzr, cs\n\t" + "csel x14, x14, xzr, cs\n\t" + "csel x15, x15, xzr, cs\n\t" + "csel x16, x16, xzr, cs\n\t" + "subs x9, x9, x13\n\t" + "sbcs x10, x10, x14\n\t" + "sbcs x11, x11, x15\n\t" + "stp x9, x10, [%[r], 0]\n\t" + "sbc x12, x12, x16\n\t" + "stp x11, x12, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (p256_order), + [mp] "r" (p256_mp_order) + : "memory", "x3", "x4", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "cc" + ); } +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P256 curve. */ +static const uint64_t p256_order_minus_2[4] = { + 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU, + 0xffffffff00000000U +}; +#endif /* WOLFSSL_SP_SMALL */ + /* Square number mod the order of P256 curve. (r = a * a mod order) * * r Result of the squaring. * a Number to square. */ -static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_256_mont_sqr_order_4(sp_digit* r, + const sp_digit* a) { - sp_256_sqr_4(r, a); - sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); + __asm__ __volatile__ ( + "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "# A[0] * A[1]\n\t" + "umulh x6, x12, x13\n\t" + "mul x5, x12, x13\n\t" + "# A[0] * A[3]\n\t" + "umulh x8, x12, x15\n\t" + "mul x7, x12, x15\n\t" + "# A[0] * A[2]\n\t" + "mul x2, x12, x14\n\t" + "adds x6, x6, x2\n\t" + "umulh x3, x12, x14\n\t" + "adcs x7, x7, x3\n\t" + "# A[1] * A[3]\n\t" + "mul x2, x13, x15\n\t" + "adcs x8, x8, x2\n\t" + "umulh x9, x13, x15\n\t" + "adc x9, x9, xzr\n\t" + "# A[1] * A[2]\n\t" + "mul x2, x13, x14\n\t" + "adds x7, x7, x2\n\t" + "umulh x3, x13, x14\n\t" + "adcs x8, x8, x3\n\t" + "# A[2] * A[3]\n\t" + "mul x2, x14, x15\n\t" + "adcs x9, x9, x2\n\t" + "umulh x10, x14, x15\n\t" + "adc x10, x10, xzr\n\t" + "# Double\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adc x11, xzr, xzr\n\t" + "# A[0] * A[0]\n\t" + "umulh x3, x12, x12\n\t" + "mul x4, x12, x12\n\t" + "# A[1] * A[1]\n\t" + "mul x2, x13, x13\n\t" + "adds x5, x5, x3\n\t" + "umulh x3, x13, x13\n\t" + "adcs x6, x6, x2\n\t" + "# A[2] * A[2]\n\t" + "mul x2, x14, x14\n\t" + "adcs x7, x7, x3\n\t" + "umulh x3, x14, x14\n\t" + "adcs x8, x8, x2\n\t" + "# A[3] * A[3]\n\t" + "mul x2, x15, x15\n\t" + "adcs x9, x9, x3\n\t" + "umulh x3, x15, x15\n\t" + "adcs x10, x10, x2\n\t" + "adc x11, x11, x3\n\t" + "ldp x12, x13, [%[m], 0]\n\t" + "mov x14, 0xffffffffffffffff\n\t" + "mov x15, 0xffffffff00000000\n\t" + "# mu = a[0] * mp\n\t" + "mul x16, %[mp], x4\n\t" + "# a[0+0] += m[0] * mu\n\t" + "mul x2, x12, x16\n\t" + "adds x4, x4, x2\n\t" + "umulh x3, x12, x16\n\t" + "adcs x5, x5, x3\n\t" + "# a[0+2] += m[2] * mu\n\t" + "mul x2, x14, x16\n\t" + "adcs x6, x6, x2\n\t" + "umulh x3, x14, x16\n\t" + "adcs x7, x7, x3\n\t" + "adcs x8, x8, xzr\n\t" + "adc x17, xzr, xzr\n\t" + "# a[0+1] += m[1] * mu\n\t" + "mul x2, x13, x16\n\t" + "adds x5, x5, x2\n\t" + "umulh x3, x13, x16\n\t" + "adcs x6, x6, x3\n\t" + "# a[0+3] += m[3] * mu\n\t" + "mul x2, x15, x16\n\t" + "adcs x7, x7, x2\n\t" + "umulh x3, x15, x16\n\t" + "adcs x8, x8, x3\n\t" + "# mu = a[1] * mp\n\t" + "mul x16, %[mp], x5\n\t" + "adc x17, x17, xzr\n\t" + "# a[1+0] += m[0] * mu\n\t" + "mul x2, x12, x16\n\t" + "adds x5, x5, x2\n\t" + "umulh x3, x12, x16\n\t" + "adcs x6, x6, x3\n\t" + "# a[1+2] += m[2] * mu\n\t" + "mul x2, x14, x16\n\t" + "adcs x7, x7, x2\n\t" + "umulh x3, x14, x16\n\t" + "adcs x8, x8, x3\n\t" + "adcs x9, x9, x17\n\t" + "adc x17, xzr, xzr\n\t" + "# a[1+1] += m[1] * mu\n\t" + "mul x2, x13, x16\n\t" + "adds x6, x6, x2\n\t" + "umulh x3, x13, x16\n\t" + "adcs x7, x7, x3\n\t" + "# a[1+3] += m[3] * mu\n\t" + "mul x2, x15, x16\n\t" + "adcs x8, x8, x2\n\t" + "umulh x3, x15, x16\n\t" + "adcs x9, x9, x3\n\t" + "# mu = a[2] * mp\n\t" + "mul x16, %[mp], x6\n\t" + "adc x17, x17, xzr\n\t" + "# a[2+0] += m[0] * mu\n\t" + "mul x2, x12, x16\n\t" + "adds x6, x6, x2\n\t" + "umulh x3, x12, x16\n\t" + "adcs x7, x7, x3\n\t" + "# a[2+2] += m[2] * mu\n\t" + "mul x2, x14, x16\n\t" + "adcs x8, x8, x2\n\t" + "umulh x3, x14, x16\n\t" + "adcs x9, x9, x3\n\t" + "adcs x10, x10, x17\n\t" + "adc x17, xzr, xzr\n\t" + "# a[2+1] += m[1] * mu\n\t" + "mul x2, x13, x16\n\t" + "adds x7, x7, x2\n\t" + "umulh x3, x13, x16\n\t" + "adcs x8, x8, x3\n\t" + "# a[2+3] += m[3] * mu\n\t" + "mul x2, x15, x16\n\t" + "adcs x9, x9, x2\n\t" + "umulh x3, x15, x16\n\t" + "adcs x10, x10, x3\n\t" + "# mu = a[3] * mp\n\t" + "mul x16, %[mp], x7\n\t" + "adc x17, x17, xzr\n\t" + "# a[3+0] += m[0] * mu\n\t" + "mul x2, x12, x16\n\t" + "adds x7, x7, x2\n\t" + "umulh x3, x12, x16\n\t" + "adcs x8, x8, x3\n\t" + "# a[3+2] += m[2] * mu\n\t" + "mul x2, x14, x16\n\t" + "adcs x9, x9, x2\n\t" + "umulh x3, x14, x16\n\t" + "adcs x10, x10, x3\n\t" + "adcs x11, x11, x17\n\t" + "adc x17, xzr, xzr\n\t" + "# a[3+1] += m[1] * mu\n\t" + "mul x2, x13, x16\n\t" + "adds x8, x8, x2\n\t" + "umulh x3, x13, x16\n\t" + "adcs x9, x9, x3\n\t" + "# a[3+3] += m[3] * mu\n\t" + "mul x2, x15, x16\n\t" + "adcs x10, x10, x2\n\t" + "umulh x3, x15, x16\n\t" + "adcs x11, x11, x3\n\t" + "# x14 == -1\n\t" + "adcs x17, x17, x14\n\t" + "csel x12, x12, xzr, cs\n\t" + "csel x13, x13, xzr, cs\n\t" + "csel x14, x14, xzr, cs\n\t" + "csel x15, x15, xzr, cs\n\t" + "subs x8, x8, x12\n\t" + "sbcs x9, x9, x13\n\t" + "sbcs x10, x10, x14\n\t" + "stp x8, x9, [%[r], 0]\n\t" + "sbc x11, x11, x15\n\t" + "stp x10, x11, [%[r], 16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (p256_order), + [mp] "r" (p256_mp_order) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "cc" + ); } #ifndef WOLFSSL_SP_SMALL @@ -37668,14 +40731,188 @@ static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) * r Result of the squaring. * a Number to square. */ -static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n) +SP_NOINLINE static void sp_256_mont_sqr_n_order_4(sp_digit* r, + const sp_digit* a, int n) { - int i; - sp_256_mont_sqr_order_4(r, a); - for (i=1; i=112; i--) { - sp_256_mont_sqr_order_4(t2, t2); - if ((p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { - sp_256_mont_mul_order_4(t2, t2, a); - } - } - /* t2= a^ffffffff00000000ffffffffffffffffbce6f */ + + /* ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ + sp_256_mont_sqr_order_4(t2, t2); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t15); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t7); sp_256_mont_sqr_n_order_4(t2, t2, 4); sp_256_mont_mul_order_4(t2, t2, t3); - /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */ - for (i=107; i>=64; i--) { - sp_256_mont_sqr_order_4(t2, t2); - if ((p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { - sp_256_mont_mul_order_4(t2, t2, a); - } - } - /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */ + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t15); + sp_256_mont_sqr_n_order_4(t2, t2, 3); + sp_256_mont_mul_order_4(t2, t2, t5); sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t5); + sp_256_mont_sqr_n_order_4(t2, t2, 3); sp_256_mont_mul_order_4(t2, t2, t3); - /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */ - for (i=59; i>=32; i--) { - sp_256_mont_sqr_order_4(t2, t2); - if ((p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { - sp_256_mont_mul_order_4(t2, t2, a); - } - } - /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */ - sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_sqr_n_order_4(t2, t2, 3); sp_256_mont_mul_order_4(t2, t2, t3); - /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */ - for (i=27; i>=0; i--) { - sp_256_mont_sqr_order_4(t2, t2); - if ((p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { - sp_256_mont_mul_order_4(t2, t2, a); - } - } - /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */ + sp_256_mont_sqr_n_order_4(t2, t2, 2); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t7); sp_256_mont_sqr_n_order_4(t2, t2, 4); - /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */ - sp_256_mont_mul_order_4(r, t2, t3); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t15); + sp_256_mont_sqr_n_order_4(t2, t2, 6); + sp_256_mont_mul_order_4(t2, t2, t15); + sp_256_mont_sqr_n_order_4(t2, t2, 2); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 6); + sp_256_mont_mul_order_4(t2, t2, t15); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t7); + sp_256_mont_sqr_n_order_4(t2, t2, 4); + sp_256_mont_mul_order_4(t2, t2, t7); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t7); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t5); + sp_256_mont_sqr_n_order_4(t2, t2, 3); + sp_256_mont_mul_order_4(t2, t2, t3); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t15); + sp_256_mont_sqr_n_order_4(t2, t2, 2); + sp_256_mont_mul_order_4(t2, t2, t3); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t3); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t3); + sp_256_mont_sqr_n_order_4(t2, t2, 3); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 5); + sp_256_mont_mul_order_4(t2, t2, t5); + sp_256_mont_sqr_n_order_4(t2, t2, 2); + sp_256_mont_mul_order_4(t2, t2, a); + sp_256_mont_sqr_n_order_4(t2, t2, 6); + sp_256_mont_mul_order_4(r, t2, t15); + /* Multiplications: 31 */ #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -37894,6 +41164,128 @@ static int sp_256_calc_s_4(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_256* point = NULL; +#else + sp_digit e[10 * 2 * 4]; + sp_point_256 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int64 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10 * 2 * 4, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 4; + k = e + 4 * 4; + r = e + 6 * 4; + tmp = e + 8 * 4; + s = e; + + if (hashLen > 32U) { + hashLen = 32U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_4(rng, k); + } + else { + sp_256_from_mp(k, 4, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_4(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 4U); + sp_256_norm_4(r); + c = sp_256_cmp_4(r, p256_order); + sp_256_cond_sub_4(r, r, p256_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_256_norm_4(r); + + if (!sp_256_iszero_4(r)) { + /* x is modified in calculation of s. */ + sp_256_from_mp(x, 4, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_256_from_bin(e, 4, hash, (int)hashLen); + + err = sp_256_calc_s_4(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_256_iszero_4(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 10 * 2 * 4); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_256)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_256_ctx { int state; @@ -37905,7 +41297,7 @@ typedef struct sp_ecc_sign_256_ctx { sp_digit x[2*4]; sp_digit k[2*4]; sp_digit r[2*4]; - sp_digit tmp[3 * 2*4]; + sp_digit tmp[6 * 2*4]; sp_point_256 point; sp_digit* s; sp_digit* kInv; @@ -37921,15 +41313,10 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 32U) { - hashLen = 32U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -37964,6 +41351,9 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_4(ctx->r); + if (hashLen > 32U) { + hashLen = 32U; + } sp_256_from_mp(ctx->x, 4, priv); sp_256_from_bin(ctx->e, 4, hash, (int)hashLen); ctx->state = 4; @@ -38052,130 +41442,12 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 4U); XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 4U); XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 4U); - XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 4U); + XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 6U * 2U * 4U); } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_256* point = NULL; -#else - sp_digit e[7 * 2 * 4]; - sp_point_256 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int64 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 4; - k = e + 4 * 4; - r = e + 6 * 4; - tmp = e + 8 * 4; - s = e; - - if (hashLen > 32U) { - hashLen = 32U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_256_ecc_gen_k_4(rng, k); - } - else { - sp_256_from_mp(k, 4, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_256_ecc_mulmod_base_4(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 4U); - sp_256_norm_4(r); - c = sp_256_cmp_4(r, p256_order); - sp_256_cond_sub_4(r, r, p256_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_256_norm_4(r); - - sp_256_from_mp(x, 4, priv); - sp_256_from_bin(e, 4, hash, (int)hashLen); - - err = sp_256_calc_s_4(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_4(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_256_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_256_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 4); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_256)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -38263,12 +41535,9 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "tst x7, 1\n\t" "b.ne 90f\n\t" "\n1:\n\t" - "lsr x7, x7, 1\n\t" - "lsr x26, x8, 1\n\t" - "lsr x27, x9, 1\n\t" - "orr x7, x7, x8, lsl 63\n\t" - "orr x8, x26, x9, lsl 63\n\t" - "orr x9, x27, x10, lsl 63\n\t" + "extr x7, x8, x7, 1\n\t" + "extr x8, x9, x8, 1\n\t" + "extr x9, x10, x9, 1\n\t" "lsr x10, x10, 1\n\t" "sub x24, x24, 1\n\t" "ands x25, x15, 1\n\t" @@ -38279,14 +41548,10 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "adcs x19, x19, %[m]\n\t" "cset x25, cs\n\t" "\n2:\n\t" - "lsr x15, x15, 1\n\t" - "lsr x26, x16, 1\n\t" - "lsr x27, x17, 1\n\t" - "lsr x28, x19, 1\n\t" - "orr x15, x15, x16, lsl 63\n\t" - "orr x16, x26, x17, lsl 63\n\t" - "orr x17, x27, x19, lsl 63\n\t" - "orr x19, x28, x25, lsl 63\n\t" + "extr x15, x16, x15, 1\n\t" + "extr x16, x17, x16, 1\n\t" + "extr x17, x19, x17, 1\n\t" + "extr x19, x25, x19, 1\n\t" "tst x7, 1\n\t" "b.eq 1b\n\t" "\n90:\n\t" @@ -38349,12 +41614,9 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "sub x23, x25, x23\n\t" "\n43:\n\t" "\n50:\n\t" - "lsr x3, x3, 1\n\t" - "lsr x26, x4, 1\n\t" - "lsr x27, x5, 1\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "orr x4, x26, x5, lsl 63\n\t" - "orr x5, x27, x6, lsl 63\n\t" + "extr x3, x4, x3, 1\n\t" + "extr x4, x5, x4, 1\n\t" + "extr x5, x6, x5, 1\n\t" "lsr x6, x6, 1\n\t" "sub x23, x23, 1\n\t" "ands x25, x11, 1\n\t" @@ -38365,14 +41627,10 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "adcs x14, x14, %[m]\n\t" "cset x25, cs\n\t" "\n51:\n\t" - "lsr x11, x11, 1\n\t" - "lsr x26, x12, 1\n\t" - "lsr x27, x13, 1\n\t" - "lsr x28, x14, 1\n\t" - "orr x11, x11, x12, lsl 63\n\t" - "orr x12, x26, x13, lsl 63\n\t" - "orr x13, x27, x14, lsl 63\n\t" - "orr x14, x28, x25, lsl 63\n\t" + "extr x11, x12, x11, 1\n\t" + "extr x12, x13, x12, 1\n\t" + "extr x13, x14, x13, 1\n\t" + "extr x14, x25, x14, 1\n\t" "tst x3, 1\n\t" "b.eq 50b\n\t" "b 90b\n\t" @@ -38417,12 +41675,9 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "sub x24, x25, x24\n\t" "\n73:\n\t" "\n80:\n\t" - "lsr x7, x7, 1\n\t" - "lsr x26, x8, 1\n\t" - "lsr x27, x9, 1\n\t" - "orr x7, x7, x8, lsl 63\n\t" - "orr x8, x26, x9, lsl 63\n\t" - "orr x9, x27, x10, lsl 63\n\t" + "extr x7, x8, x7, 1\n\t" + "extr x8, x9, x8, 1\n\t" + "extr x9, x10, x9, 1\n\t" "lsr x10, x10, 1\n\t" "sub x24, x24, 1\n\t" "ands x25, x15, 1\n\t" @@ -38433,14 +41688,10 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "adcs x19, x19, %[m]\n\t" "cset x25, cs\n\t" "\n81:\n\t" - "lsr x15, x15, 1\n\t" - "lsr x26, x16, 1\n\t" - "lsr x27, x17, 1\n\t" - "lsr x28, x19, 1\n\t" - "orr x15, x15, x16, lsl 63\n\t" - "orr x16, x26, x17, lsl 63\n\t" - "orr x17, x27, x19, lsl 63\n\t" - "orr x19, x28, x25, lsl 63\n\t" + "extr x15, x16, x15, 1\n\t" + "extr x16, x17, x16, 1\n\t" + "extr x17, x19, x17, 1\n\t" + "extr x19, x25, x19, 1\n\t" "tst x7, 1\n\t" "b.eq 80b\n\t" "b 90b\n\t" @@ -38458,7 +41709,7 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a, "\n102:\n\t" : [m] "+r" (m) : [r] "r" (r), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "cc" ); return MP_OKAY; @@ -38498,7 +41749,7 @@ static void sp_256_add_points_4(sp_point_256* p1, const sp_point_256* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -38570,6 +41821,106 @@ static int sp_256_calc_vfy_point_4(sp_point_256* p1, sp_point_256* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_256* p1 = NULL; +#else + sp_digit u1[18 * 4]; + sp_point_256 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p2 = NULL; + sp_digit carry; + sp_int64 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 4, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 4; + s = u1 + 4 * 4; + tmp = u1 + 6 * 4; + p2 = p1 + 1; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 4, hash, (int)hashLen); + sp_256_from_mp(u2, 4, rm); + sp_256_from_mp(s, 4, sm); + sp_256_from_mp(p2->x, 4, pX); + sp_256_from_mp(p2->y, 4, pY); + sp_256_from_mp(p2->z, 4, pZ); + + err = sp_256_calc_vfy_point_4(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 4, rm); + err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_4(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 4, rm); + carry = sp_256_add_4(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_4(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_4(u2, p256_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod); + } + *res = (sp_256_cmp_4(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_256_ctx { int state; @@ -38582,7 +41933,7 @@ typedef struct sp_ecc_verify_256_ctx { sp_digit u1[2*4]; sp_digit u2[2*4]; sp_digit s[2*4]; - sp_digit tmp[2*4 * 5]; + sp_digit tmp[2*4 * 6]; sp_point_256 p1; sp_point_256 p2; } sp_ecc_verify_256_ctx; @@ -38719,109 +42070,54 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_256* p1 = NULL; -#else - sp_digit u1[16 * 4]; - sp_point_256 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_256* p2 = NULL; - sp_digit carry; - sp_int64 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 4; - s = u1 + 4 * 4; - tmp = u1 + 6 * 4; - p2 = p1 + 1; - - if (hashLen > 32U) { - hashLen = 32U; - } - - sp_256_from_bin(u1, 4, hash, (int)hashLen); - sp_256_from_mp(u2, 4, rm); - sp_256_from_mp(s, 4, sm); - sp_256_from_mp(p2->x, 4, pX); - sp_256_from_mp(p2->y, 4, pY); - sp_256_from_mp(p2->z, 4, pZ); - - err = sp_256_calc_vfy_point_4(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_256_from_mp(u2, 4, rm); - err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod); - *res = (int)(sp_256_cmp_4(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_256_from_mp(u2, 4, rm); - carry = sp_256_add_4(u2, u2, p256_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_256_norm_4(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_256_cmp_4(u2, p256_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_256_mod_mul_norm_4(u2, u2, p256_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, - p256_mp_mod); - *res = (sp_256_cmp_4(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montgomery form. + * b Second number to add in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "adds x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "adcs x7, x7, x11\n\t" + "csetm x14, cs\n\t" + "subs x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "sbcs x5, x5, x12\n\t" + "sub x13, xzr, x12\n\t" + "sbcs x6, x6, xzr\n\t" + "sbcs x7, x7, x13\n\t" + "sbc x13, xzr, xzr\n\t" + "sub x14, x14, x13\n\t" + "subs x4, x4, x14\n\t" + "lsr x12, x14, 32\n\t" + "sbcs x5, x5, x12\n\t" + "sub x13, xzr, x12\n\t" + "sbcs x6, x6, xzr\n\t" + "stp x4, x5, [%[r],0]\n\t" + "sbc x7, x7, x13\n\t" + "stp x6, x7, [%[r],16]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "cc" + ); + + (void)m; +} + +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -38831,7 +42127,7 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_256_ecc_is_point_4(const sp_point_256* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 4]; @@ -38839,7 +42135,7 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -38849,25 +42145,27 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 4; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_4(t1, point->y); (void)sp_256_mod_4(t1, t1, p256_mod); sp_256_sqr_4(t2, point->x); (void)sp_256_mod_4(t2, t2, p256_mod); sp_256_mul_4(t2, t2, point->x); (void)sp_256_mod_4(t2, t2, p256_mod); - (void)sp_256_sub_4(t2, p256_mod, t2); - sp_256_mont_add_4(t1, t1, t2, p256_mod); + sp_256_mont_sub_4(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_4(t1, t1, point->x, p256_mod); sp_256_mont_add_4(t1, t1, point->x, p256_mod); sp_256_mont_add_4(t1, t1, point->x, p256_mod); + if (sp_256_cmp_4(t1, p256_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -38875,7 +42173,7 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -38884,7 +42182,7 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, */ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* pub = NULL; #else sp_point_256 pub[1]; @@ -38892,7 +42190,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -38907,7 +42205,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) err = sp_256_ecc_is_point_4(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -38929,7 +42227,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_256* pub = NULL; #else @@ -38950,7 +42248,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); @@ -39016,7 +42314,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -39045,17 +42343,17 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else - sp_digit tmp[2 * 4 * 5]; + sp_digit tmp[2 * 4 * 6]; sp_point_256 p[2]; #endif sp_point_256* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -39063,7 +42361,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -39098,7 +42396,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -39122,7 +42420,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -39131,7 +42429,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -39166,7 +42464,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -39186,7 +42484,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -39196,7 +42494,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -39230,7 +42528,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_256_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -39248,7 +42546,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_256_mont_sqrt_4(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 4]; @@ -39256,7 +42554,7 @@ static int sp_256_mont_sqrt_4(sp_digit* y) sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) { err = MEMORY_E; @@ -39299,7 +42597,7 @@ static int sp_256_mont_sqrt_4(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -39317,7 +42615,7 @@ static int sp_256_mont_sqrt_4(sp_digit* y) */ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 4]; @@ -39325,7 +42623,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -39365,7 +42663,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) err = sp_256_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -39471,10 +42769,10 @@ static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[12]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 40\n\t" "csel x3, xzr, x3, cc\n\t" @@ -39504,7 +42802,7 @@ static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -39750,7 +43048,7 @@ static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b) "stp x4, x5, [%[r], 80]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "cc" ); } @@ -39766,10 +43064,10 @@ static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) sp_digit tmp[12]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 40\n\t" "csel x3, xzr, x3, cc\n\t" @@ -39815,7 +43113,7 @@ static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -39836,87 +43134,87 @@ static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) "ldp x19, x20, [%[a], 16]\n\t" "ldp x21, x22, [%[a], 32]\n\t" "# A[0] * A[1]\n\t" - "mul x6, x16, x17\n\t" - "umulh x7, x16, x17\n\t" + "mul x6, x16, x17\n\t" + "umulh x7, x16, x17\n\t" "# A[0] * A[2]\n\t" - "mul x4, x16, x19\n\t" - "umulh x5, x16, x19\n\t" - "adds x7, x7, x4\n\t" + "mul x4, x16, x19\n\t" + "umulh x5, x16, x19\n\t" + "adds x7, x7, x4\n\t" "# A[0] * A[3]\n\t" - "mul x4, x16, x20\n\t" - "adc x8, xzr, x5\n\t" - "umulh x5, x16, x20\n\t" - "adds x8, x8, x4\n\t" + "mul x4, x16, x20\n\t" + "adc x8, xzr, x5\n\t" + "umulh x5, x16, x20\n\t" + "adds x8, x8, x4\n\t" "# A[1] * A[2]\n\t" - "mul x4, x17, x19\n\t" - "adc x9, xzr, x5\n\t" - "umulh x5, x17, x19\n\t" - "adds x8, x8, x4\n\t" + "mul x4, x17, x19\n\t" + "adc x9, xzr, x5\n\t" + "umulh x5, x17, x19\n\t" + "adds x8, x8, x4\n\t" "# A[0] * A[4]\n\t" - "mul x4, x16, x21\n\t" - "adcs x9, x9, x5\n\t" - "umulh x5, x16, x21\n\t" - "adc x10, xzr, xzr\n\t" - "adds x9, x9, x4\n\t" + "mul x4, x16, x21\n\t" + "adcs x9, x9, x5\n\t" + "umulh x5, x16, x21\n\t" + "adc x10, xzr, xzr\n\t" + "adds x9, x9, x4\n\t" "# A[1] * A[3]\n\t" - "mul x4, x17, x20\n\t" - "adc x10, x10, x5\n\t" - "umulh x5, x17, x20\n\t" - "adds x9, x9, x4\n\t" + "mul x4, x17, x20\n\t" + "adc x10, x10, x5\n\t" + "umulh x5, x17, x20\n\t" + "adds x9, x9, x4\n\t" "# A[0] * A[5]\n\t" - "mul x4, x16, x22\n\t" - "adcs x10, x10, x5\n\t" - "umulh x5, x16, x22\n\t" - "adc x11, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" + "mul x4, x16, x22\n\t" + "adcs x10, x10, x5\n\t" + "umulh x5, x16, x22\n\t" + "adc x11, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" "# A[1] * A[4]\n\t" - "mul x4, x17, x21\n\t" - "adc x11, x11, x5\n\t" - "umulh x5, x17, x21\n\t" - "adds x10, x10, x4\n\t" + "mul x4, x17, x21\n\t" + "adc x11, x11, x5\n\t" + "umulh x5, x17, x21\n\t" + "adds x10, x10, x4\n\t" "# A[2] * A[3]\n\t" - "mul x4, x19, x20\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x19, x20\n\t" - "adc x12, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" + "mul x4, x19, x20\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x19, x20\n\t" + "adc x12, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" "# A[1] * A[5]\n\t" - "mul x4, x17, x22\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x17, x22\n\t" - "adc x12, x12, xzr\n\t" - "adds x11, x11, x4\n\t" + "mul x4, x17, x22\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x17, x22\n\t" + "adc x12, x12, xzr\n\t" + "adds x11, x11, x4\n\t" "# A[2] * A[4]\n\t" - "mul x4, x19, x21\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x19, x21\n\t" - "adc x13, xzr, xzr\n\t" - "adds x11, x11, x4\n\t" + "mul x4, x19, x21\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x19, x21\n\t" + "adc x13, xzr, xzr\n\t" + "adds x11, x11, x4\n\t" "# A[2] * A[5]\n\t" - "mul x4, x19, x22\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x19, x22\n\t" - "adc x13, x13, xzr\n\t" - "adds x12, x12, x4\n\t" + "mul x4, x19, x22\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x19, x22\n\t" + "adc x13, x13, xzr\n\t" + "adds x12, x12, x4\n\t" "# A[3] * A[4]\n\t" - "mul x4, x20, x21\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x20, x21\n\t" - "adc x14, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" + "mul x4, x20, x21\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x20, x21\n\t" + "adc x14, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" "# A[3] * A[5]\n\t" - "mul x4, x20, x22\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x20, x22\n\t" - "adc x14, x14, xzr\n\t" - "adds x13, x13, x4\n\t" + "mul x4, x20, x22\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x20, x22\n\t" + "adc x14, x14, xzr\n\t" + "adds x13, x13, x4\n\t" "# A[4] * A[5]\n\t" - "mul x4, x21, x22\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x21, x22\n\t" - "adc x15, xzr, xzr\n\t" - "adds x14, x14, x4\n\t" - "adc x15, x15, x5\n\t" + "mul x4, x21, x22\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x21, x22\n\t" + "adc x15, xzr, xzr\n\t" + "adds x14, x14, x4\n\t" + "adc x15, x15, x5\n\t" "# Double\n\t" "adds x6, x6, x6\n\t" "adcs x7, x7, x7\n\t" @@ -39928,34 +43226,34 @@ static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) "adcs x13, x13, x13\n\t" "adcs x14, x14, x14\n\t" "# A[0] * A[0]\n\t" - "mul x5, x16, x16\n\t" + "mul x5, x16, x16\n\t" "adcs x15, x15, x15\n\t" - "umulh x2, x16, x16\n\t" + "umulh x2, x16, x16\n\t" "cset x16, cs\n\t" "# A[1] * A[1]\n\t" - "mul x3, x17, x17\n\t" + "mul x3, x17, x17\n\t" "adds x6, x6, x2\n\t" - "umulh x4, x17, x17\n\t" + "umulh x4, x17, x17\n\t" "adcs x7, x7, x3\n\t" "# A[2] * A[2]\n\t" - "mul x2, x19, x19\n\t" + "mul x2, x19, x19\n\t" "adcs x8, x8, x4\n\t" - "umulh x3, x19, x19\n\t" + "umulh x3, x19, x19\n\t" "adcs x9, x9, x2\n\t" "# A[3] * A[3]\n\t" - "mul x4, x20, x20\n\t" + "mul x4, x20, x20\n\t" "adcs x10, x10, x3\n\t" - "umulh x2, x20, x20\n\t" + "umulh x2, x20, x20\n\t" "adcs x11, x11, x4\n\t" "# A[4] * A[4]\n\t" - "mul x3, x21, x21\n\t" + "mul x3, x21, x21\n\t" "adcs x12, x12, x2\n\t" - "umulh x4, x21, x21\n\t" + "umulh x4, x21, x21\n\t" "adcs x13, x13, x3\n\t" "# A[5] * A[5]\n\t" - "mul x2, x22, x22\n\t" + "mul x2, x22, x22\n\t" "adcs x14, x14, x4\n\t" - "umulh x3, x22, x22\n\t" + "umulh x3, x22, x22\n\t" "adcs x15, x15, x2\n\t" "stp x5, x6, [%[r], 0]\n\t" "adc x16, x16, x3\n\t" @@ -39966,7 +43264,7 @@ static void sp_384_sqr_6(sp_digit* r, const sp_digit* a) "stp x15, x16, [%[r], 80]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x16", "x17", "x19", "x20", "x21", "x22" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x16", "x17", "x19", "x20", "x21", "x22", "cc" ); } @@ -39991,18 +43289,18 @@ static sp_digit sp_384_add_6(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 0]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 16]\n\t" - "ldr x3, [%[a], 32]\n\t" - "ldr x4, [%[a], 40]\n\t" - "ldr x7, [%[b], 32]\n\t" - "ldr x8, [%[b], 40]\n\t" + "ldr x3, [%[a], 32]\n\t" + "ldr x4, [%[a], 40]\n\t" + "ldr x7, [%[b], 32]\n\t" + "ldr x8, [%[b], 40]\n\t" "adcs x3, x3, x7\n\t" "adcs x4, x4, x8\n\t" - "str x3, [%[r], 32]\n\t" - "str x4, [%[r], 40]\n\t" - "cset %[r], cs\n\t" + "str x3, [%[r], 32]\n\t" + "str x4, [%[r], 40]\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; @@ -40028,18 +43326,18 @@ static sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 0]\n\t" "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 16]\n\t" - "ldr x3, [%[a], 32]\n\t" - "ldr x4, [%[a], 40]\n\t" - "ldr x7, [%[b], 32]\n\t" - "ldr x8, [%[b], 40]\n\t" + "ldr x3, [%[a], 32]\n\t" + "ldr x4, [%[a], 40]\n\t" + "ldr x7, [%[b], 32]\n\t" + "ldr x8, [%[b], 40]\n\t" "sbcs x3, x3, x7\n\t" "sbcs x4, x4, x8\n\t" - "str x3, [%[r], 32]\n\t" - "str x4, [%[r], 40]\n\t" + "str x3, [%[r], 32]\n\t" + "str x4, [%[r], 40]\n\t" "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; @@ -40054,7 +43352,7 @@ static sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a, */ static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK int64_t* t = NULL; #else int64_t t[2 * 12]; @@ -40065,7 +43363,7 @@ static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -40148,7 +43446,7 @@ static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* r[5] = (t[11] << 32) | t[10]; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, NULL, DYNAMIC_TYPE_ECC); #endif @@ -40165,20 +43463,23 @@ static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 64 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); } #elif DIGIT_BIT > 64 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffffffffffffl; s = 64U - s; @@ -40208,12 +43509,12 @@ static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 64) { r[j] &= 0xffffffffffffffffl; @@ -40387,12 +43688,10 @@ static void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m) "stp x7, x8, [%[r], 32]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "cc" ); } -#define sp_384_mont_reduce_order_6 sp_384_mont_reduce_6 - /* Reduce the number back to 384 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -40402,99 +43701,223 @@ static void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m) SP_NOINLINE static void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "mov x3, xzr\n\t" - "# i = 6\n\t" - "mov x4, 6\n\t" + "ldp x7, x8, [%[a], #0]\n\t" + "ldp x9, x10, [%[a], #16]\n\t" + "ldp x11, x12, [%[a], #32]\n\t" + "mov x6, xzr\n\t" + "# a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)\n\t" + "ldp x13, x14, [%[a], #48]\n\t" + "extr x2, x8, x7, 32\n\t" + "extr x1, x7, xzr, 32\n\t" + "adds x1, x1, x7\n\t" + "adc x2, x2, x8\n\t" + "add x2, x2, x7\n\t" + "extr x5, xzr, x2, 32\n\t" + "extr x4, x2, x1, 32\n\t" + "extr x3, x1, xzr, 32\n\t" + "adds x7, x7, x3\n\t" + "adcs x8, x8, x4\n\t" + "adcs x9, x9, x5\n\t" + "adcs x10, x10, xzr\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, xzr\n\t" + "adcs x13, x13, x1\n\t" + "adcs x14, x14, x2\n\t" + "adcs x6, x6, xzr\n\t" + "adds x3, x3, x2\n\t" + "adcs x4, x4, x1\n\t" + "adcs x5, x5, x2\n\t" + "adcs x2, xzr, xzr\n\t" + "subs x9, x9, x4\n\t" + "sbcs x10, x10, x5\n\t" + "sbcs x11, x11, x2\n\t" + "sbcs x12, x12, xzr\n\t" + "sbcs x13, x13, xzr\n\t" + "sbcs x14, x14, xzr\n\t" + "sbc x6, x6, xzr\n\t" + "# a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)\n\t" + "ldp x7, x8, [%[a], #64]\n\t" + "extr x2, x10, x9, 32\n\t" + "extr x1, x9, xzr, 32\n\t" + "adds x1, x1, x9\n\t" + "adc x2, x2, x10\n\t" + "add x2, x2, x9\n\t" + "extr x5, xzr, x2, 32\n\t" + "extr x4, x2, x1, 32\n\t" + "extr x3, x1, xzr, 32\n\t" + "adds x7, x7, x6\n\t" + "adcs x8, x8, xzr\n\t" + "adc x6, xzr, xzr\n\t" + "adds x9, x9, x3\n\t" + "adcs x10, x10, x4\n\t" + "adcs x11, x11, x5\n\t" + "adcs x12, x12, xzr\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adcs x7, x7, x1\n\t" + "adcs x8, x8, x2\n\t" + "adcs x6, x6, xzr\n\t" + "adds x3, x3, x2\n\t" + "adcs x4, x4, x1\n\t" + "adcs x5, x5, x2\n\t" + "adcs x2, xzr, xzr\n\t" + "subs x11, x11, x4\n\t" + "sbcs x12, x12, x5\n\t" + "sbcs x13, x13, x2\n\t" + "sbcs x14, x14, xzr\n\t" + "sbcs x7, x7, xzr\n\t" + "sbcs x8, x8, xzr\n\t" + "sbc x6, x6, xzr\n\t" + "# a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)\n\t" + "ldp x9, x10, [%[a], #80]\n\t" + "extr x2, x12, x11, 32\n\t" + "extr x1, x11, xzr, 32\n\t" + "adds x1, x1, x11\n\t" + "adc x2, x2, x12\n\t" + "add x2, x2, x11\n\t" + "extr x5, xzr, x2, 32\n\t" + "extr x4, x2, x1, 32\n\t" + "extr x3, x1, xzr, 32\n\t" + "adds x9, x9, x6\n\t" + "adcs x10, x10, xzr\n\t" + "adc x6, xzr, xzr\n\t" + "adds x11, x11, x3\n\t" + "adcs x12, x12, x4\n\t" + "adcs x13, x13, x5\n\t" + "adcs x14, x14, xzr\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adcs x9, x9, x1\n\t" + "adcs x10, x10, x2\n\t" + "adcs x6, x6, xzr\n\t" + "adds x3, x3, x2\n\t" + "adcs x4, x4, x1\n\t" + "adcs x5, x5, x2\n\t" + "adcs x2, xzr, xzr\n\t" + "subs x13, x13, x4\n\t" + "sbcs x14, x14, x5\n\t" + "sbcs x7, x7, x2\n\t" + "sbcs x8, x8, xzr\n\t" + "sbcs x9, x9, xzr\n\t" + "sbcs x10, x10, xzr\n\t" + "sbc x6, x6, xzr\n\t" + "# Subtract mod if carry\n\t" + "neg x6, x6\n\t" + "mov x5, -2\n\t" + "lsr x3, x6, 32\n\t" + "lsl x4, x6, 32\n\t" + "and x5, x5, x6\n\t" + "subs x13, x13, x3\n\t" + "sbcs x14, x14, x4\n\t" + "sbcs x7, x7, x5\n\t" + "sbcs x8, x8, x6\n\t" + "sbcs x9, x9, x6\n\t" + "sbc x10, x10, x6\n\t" + "stp x13, x14, [%[a], #0]\n\t" + "stp x7, x8, [%[a], #16]\n\t" + "stp x9, x10, [%[a], #32]\n\t" + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) + : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "cc" + ); +} + +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_384_mont_reduce_order_6(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + __asm__ __volatile__ ( "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "ldp x16, x17, [%[a], 32]\n\t" + "mov x3, xzr\n\t" + "# i = 0..5\n\t" + "mov x4, 6\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" "mul x9, %[mp], x12\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" + "ldp x10, x11, [%[m], 0]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" "adds x12, x12, x7\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" + "mul x7, x11, x9\n\t" + "umulh x8, x11, x9\n\t" "adds x12, x13, x7\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" + "ldp x11, x10, [%[m], 16]\n\t" "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x13, x14, x7\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x14, x15, x7\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" + "ldp x11, x10, [%[m], 32]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" + "adds x14, x14, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x15, x16, x7\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" + "ldr x10, [%[m], 40]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" + "adds x15, x15, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" + "umulh x8, x10, x9\n\t" "adds x6, x6, x7\n\t" "adcs x8, x8, x3\n\t" - "str x11, [%[a], 32]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 48]\n\t" - "str x10, [%[a], 40]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 48]\n\t" + "adc x3, xzr, xzr\n\t" + "adds x16, x17, x6\n\t" + "ldr x17, [%[a], 48]\n\t" + "adcs x17, x17, x8\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" "# Create mask\n\t" - "neg x3, x3\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 48\n\t" + "neg x3, x3\n\t" + "mov x9, %[a]\n\t" + "sub %[a], %[a], 48\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "sbcs x13, x13, x20\n\t" - "stp x12, x13, [%[a], 32]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x12, x12, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x13, x13, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x14, x14, x6\n\t" + "stp x12, x13, [%[a], 0]\n\t" + "sbcs x15, x15, x7\n\t" + "stp x14, x15, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x16, x16, x4\n\t" + "sbcs x17, x17, x5\n\t" + "stp x16, x17, [%[a], 32]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "cc" ); } @@ -40506,9 +43929,9 @@ SP_NOINLINE static void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_384_mul_6(r, a, b); @@ -40520,9 +43943,9 @@ static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_384_sqr_6(r, a); @@ -40536,10 +43959,10 @@ static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_6(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_6(r, a, m, mp); for (; n > 1; n--) { @@ -40547,7 +43970,7 @@ static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P384 curve. */ static const uint64_t p384_mod_minus_2[6] = { @@ -40652,77 +44075,79 @@ static sp_int64 sp_384_cmp_6(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 40\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #6\n\t" + "add %[a], %[a], #32\n\t" + "add %[b], %[b], #32\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[a], 16]\n\t" - "ldp x9, x10, [%[a], 32]\n\t" - "ldp x11, x12, [%[b], 0]\n\t" - "ldp x13, x14, [%[b], 16]\n\t" - "ldp x15, x16, [%[b], 32]\n\t" - "and x10, x10, x4\n\t" - "and x16, x16, x4\n\t" - "subs x10, x10, x16\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x9, x9, x4\n\t" - "and x15, x15, x4\n\t" - "subs x9, x9, x15\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x8, x8, x4\n\t" - "and x14, x14, x4\n\t" - "subs x8, x8, x14\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x7, x7, x4\n\t" - "and x13, x13, x4\n\t" - "subs x7, x7, x13\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x6, x6, x4\n\t" - "and x12, x12, x4\n\t" - "subs x6, x6, x12\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x11, x11, x4\n\t" - "subs x5, x5, x11\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -40772,7 +44197,7 @@ static sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; @@ -40798,27 +44223,24 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); - sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->x, r->x, p384_mod, ~(n >> 63)); sp_384_norm_6(r->x); /* y /= z^3 */ sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); - sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->y, r->y, p384_mod, ~(n >> 63)); sp_384_norm_6(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -40828,8 +44250,8 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -40843,7 +44265,8 @@ static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -40857,7 +44280,8 @@ static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -40867,6 +44291,7 @@ static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) sp_384_cond_sub_6(r, r, m, 0 - o); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -40878,7 +44303,6 @@ static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { -#ifdef WOLFSSL_SP_SMALL sp_digit c = 0; __asm__ __volatile__ ( @@ -40896,43 +44320,57 @@ static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ __asm__ __volatile__ ( - "ldp x5, x7, [%[b], 0]\n\t" - "ldp x11, x12, [%[b], 16]\n\t" - "ldp x4, x6, [%[a], 0]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 16]\n\t" - "and x7, x7, %[m]\n\t" - "adds x4, x4, x5\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 0]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 16]\n\t" - "ldp x5, x7, [%[b], 32]\n\t" - "ldp x4, x6, [%[a], 32]\n\t" - "and x5, x5, %[m]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" - "adcs x6, x6, x7\n\t" - "stp x4, x6, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x4, x5, [%[r], 32]\n\t" "cset %[r], cs\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return (sp_digit)r; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * @@ -40941,8 +44379,8 @@ static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -40950,29 +44388,24 @@ static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_384_cond_add_6(r, r, m, o); } -static void sp_384_rshift1_6(sp_digit* r, sp_digit* a) +static void sp_384_rshift1_6(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "ldp x2, x3, [%[a]]\n\t" "ldp x4, x5, [%[a], 16]\n\t" "ldp x6, x7, [%[a], 32]\n\t" - "lsr x11, x6, 1\n\t" - "lsr x10, x5, 1\n\t" - "lsr x9, x4, 1\n\t" - "lsr x8, x3, 1\n\t" - "lsr x2, x2, 1\n\t" - "orr x2, x2, x3, lsl 63\n\t" - "orr x3, x8, x4, lsl 63\n\t" - "orr x4, x9, x5, lsl 63\n\t" - "orr x5, x10, x6, lsl 63\n\t" - "orr x6, x11, x7, lsl 63\n\t" - "lsr x7, x7, 1\n\t" + "extr x2, x3, x2, #1\n\t" + "extr x3, x4, x3, #1\n\t" + "extr x4, x5, x4, #1\n\t" + "extr x5, x6, x5, #1\n\t" + "extr x6, x7, x6, #1\n\t" + "lsr x7, x7, #1\n\t" "stp x2, x3, [%[r]]\n\t" "stp x4, x5, [%[r], 16]\n\t" "stp x6, x7, [%[r], 32]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "cc" ); } @@ -40982,7 +44415,8 @@ static void sp_384_rshift1_6(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_div2_6(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -40997,6 +44431,61 @@ static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_mont_div2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_6(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_6_ctx { int state; @@ -41007,7 +44496,14 @@ typedef struct sp_384_proj_point_dbl_6_ctx { sp_digit* z; } sp_384_proj_point_dbl_6_ctx; -static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_6_ctx* ctx = (sp_384_proj_point_dbl_6_ctx*)sp_ctx->data; @@ -41081,7 +44577,7 @@ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con break; case 11: /* T2 = T2/2 */ - sp_384_div2_6(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_6(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -41131,61 +44627,6 @@ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_6(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_6(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_6(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_6(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_6(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_6(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_6(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_6(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_6(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_6(y, y, t2, p384_mod); -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -41193,7 +44634,7 @@ static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_d * n Number of times to double * t Temporary ordinate data. */ -static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, +static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_digit* t) { sp_digit* w = t; @@ -41204,6 +44645,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -41214,7 +44656,6 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, /* W = Z^4 */ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -41232,9 +44673,12 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_6(t2, b, x, p384_mod); + sp_384_mont_dbl_6(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -41244,9 +44688,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_6(y, b, x, p384_mod); - sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); } #ifndef WOLFSSL_SP_SMALL @@ -41261,18 +44703,19 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_6(t2, b, x, p384_mod); + sp_384_mont_dbl_6(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_6(y, b, x, p384_mod); - sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(y, y, p384_mod); + sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_6(y, y, p384_mod); + sp_384_mont_div2_6(y, y, p384_mod); } /* Compare two numbers to determine if they are equal. @@ -41288,6 +44731,18 @@ static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_6(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -41295,6 +44750,84 @@ static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*6; + sp_digit* t2 = t + 4*6; + sp_digit* t3 = t + 6*6; + sp_digit* t4 = t + 8*6; + sp_digit* t5 = t + 10*6; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(t2, t1) & + sp_384_cmp_equal_6(t4, t3)) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t3, y, p384_mod); + sp_384_mont_sub_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_6_ctx { @@ -41307,11 +44840,19 @@ typedef struct sp_384_proj_point_add_6_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_384_proj_point_add_6_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -41330,261 +44871,168 @@ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*6; - ctx->t3 = t + 4*6; - ctx->t4 = t + 6*6; - ctx->t5 = t + 8*6; + ctx->t6 = t; + ctx->t1 = t + 2*6; + ctx->t2 = t + 4*6; + ctx->t3 = t + 6*6; + ctx->t4 = t + 8*6; + ctx->t5 = t + 10*6; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_6(ctx->t1, p384_mod, q->y); - sp_384_norm_6(ctx->t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_384_proj_point_dbl_6_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_384)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<6; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<6; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<6; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_384_mont_sqr_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; + break; + case 2: + sp_384_mont_mul_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; + break; + case 3: + sp_384_mont_mul_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_6(ctx->t1, ctx->t1, ctx->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_6(ctx->t4, ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(ctx->t2, ctx->t1) & + sp_384_cmp_equal_6(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_6(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(ctx->t3, ctx->t3, ctx->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_6(ctx->z, ctx->z, ctx->t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - sp_384_mont_sqr_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_dbl_6(ctx->t1, ctx->y, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_6(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t1, p384_mod); + sp_384_mont_mul_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_6(ctx->y, ctx->y, ctx->x, p384_mod); + sp_384_mont_sub_6(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - sp_384_mont_mul_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_6(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_6(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* t3 = t + 4*6; - sp_digit* t4 = t + 6*6; - sp_digit* t5 = t + 8*6; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_384* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { - sp_384_proj_point_dbl_6(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<6; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<6; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<6; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_6(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_6(t4, t4, t3, p384_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, x, t5, p384_mod); - sp_384_mont_dbl_6(t1, y, p384_mod); - sp_384_mont_sub_6(x, x, t1, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_6(y, y, x, p384_mod); - sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, y, t5, p384_mod); - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -41630,30 +45078,30 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, sp_384_mont_sub_6(t1, t1, w, p384_mod); sp_384_mont_tpl_6(a, t1, p384_mod); /* B = X*Y^2 */ - sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(b, t2, x, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(t1, b, p384_mod); - sp_384_mont_sub_6(x, x, t1, p384_mod); + sp_384_mont_dbl_6(t2, b, p384_mod); + sp_384_mont_sub_6(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_6(t2, b, x, p384_mod); + sp_384_mont_dbl_6(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_6(r[j].z, z, y, p384_mod, p384_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_384_mont_sqr_6(t2, t2, p384_mod, p384_mp_mod); + /* t1 = Y^4 */ + sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_384_mont_mul_6(w, w, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_6(y, b, x, p384_mod); - sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(y, y, p384_mod); - sp_384_mont_sub_6(y, y, t2, p384_mod); - + sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_6(r[j].y, y, p384_mod); + sp_384_mont_div2_6(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -41676,30 +45124,30 @@ static void sp_384_proj_point_add_sub_6(sp_point_384* ra, sp_digit* t4 = t + 6*6; sp_digit* t5 = t + 8*6; sp_digit* t6 = t + 10*6; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, xa, p384_mod, p384_mp_mod); /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t2, za, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, za, p384_mod, p384_mp_mod); sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t3, ya, p384_mod, p384_mp_mod); /* S2 = Y2*Z1^3 */ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - U1 */ @@ -41710,30 +45158,30 @@ static void sp_384_proj_point_add_sub_6(sp_point_384* ra, sp_384_mont_sub_6(t4, t4, t3, p384_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_384_mont_mul_6(za, za, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(za, za, t2, p384_mod, p384_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(xa, t4, p384_mod, p384_mp_mod); sp_384_mont_sqr_6(xs, t6, p384_mod, p384_mp_mod); sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ya, t1, t5, p384_mod, p384_mp_mod); sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_sub_6(xa, xa, t5, p384_mod); sp_384_mont_sub_6(xs, xs, t5, p384_mod); - sp_384_mont_dbl_6(t1, y, p384_mod); - sp_384_mont_sub_6(x, x, t1, p384_mod); + sp_384_mont_dbl_6(t1, ya, p384_mod); + sp_384_mont_sub_6(xa, xa, t1, p384_mod); sp_384_mont_sub_6(xs, xs, t1, p384_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_384_mont_sub_6(ys, y, xs, p384_mod); - sp_384_mont_sub_6(y, y, x, p384_mod); - sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(ys, ya, xs, p384_mod); + sp_384_mont_sub_6(ya, ya, xa, p384_mod); + sp_384_mont_mul_6(ya, ya, t4, p384_mod, p384_mp_mod); sp_384_sub_6(t6, p384_mod, t6); sp_384_mont_mul_6(ys, ys, t6, p384_mod, p384_mp_mod); sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, y, t5, p384_mod); + sp_384_mont_sub_6(ya, ya, t5, p384_mod); sp_384_mont_sub_6(ys, ys, t5, p384_mod); } @@ -41812,54 +45260,73 @@ static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v) /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ -static void sp_384_get_point_33_6(sp_point_384* r, const sp_point_384* table, - int idx) +SP_NOINLINE static void sp_384_get_point_33_6(sp_point_384* r, + const sp_point_384* table, int idx) { int i; sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; + sp_digit z0 = 0; + sp_digit z1 = 0; + sp_digit z2 = 0; + sp_digit z3 = 0; + sp_digit z4 = 0; + sp_digit z5 = 0; - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->x[4] = 0; - r->x[5] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - r->y[4] = 0; - r->y[5] = 0; - r->z[0] = 0; - r->z[1] = 0; - r->z[2] = 0; - r->z[3] = 0; - r->z[4] = 0; - r->z[5] = 0; for (i = 1; i < 33; i++) { mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->x[4] |= mask & table[i].x[4]; - r->x[5] |= mask & table[i].x[5]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - r->y[4] |= mask & table[i].y[4]; - r->y[5] |= mask & table[i].y[5]; - r->z[0] |= mask & table[i].z[0]; - r->z[1] |= mask & table[i].z[1]; - r->z[2] |= mask & table[i].z[2]; - r->z[3] |= mask & table[i].z[3]; - r->z[4] |= mask & table[i].z[4]; - r->z[5] |= mask & table[i].z[5]; + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; + z0 |= mask & table[i].z[0]; + z1 |= mask & table[i].z[1]; + z2 |= mask & table[i].z[2]; + z3 |= mask & table[i].z[3]; + z4 |= mask & table[i].z[4]; + z5 |= mask & table[i].z[5]; } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; + r->z[0] = z0; + r->z[1] = z1; + r->z[2] = z2; + r->z[3] = z3; + r->z[4] = z4; + r->z[5] = z5; } #endif /* !WC_NO_CACHE_RESISTANT */ /* Multiply the point by the scalar and return the result. @@ -41883,7 +45350,7 @@ static void sp_384_get_point_33_6(sp_point_384* r, const sp_point_384* table, static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; sp_digit* tmp = NULL; #else @@ -41901,8 +45368,8 @@ static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * (33+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -41997,7 +45464,7 @@ static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -42007,15 +45474,12 @@ static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* return err; } -#ifndef WC_NO_CACHE_RESISTANT /* A table entry for pre-computed points. */ typedef struct sp_table_entry_384 { sp_digit x[6]; sp_digit y[6]; } sp_table_entry_384; -#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) -#endif /* FP_ECC | WOLFSSL_SP_SMALL */ /* Add two Montgomery form projective points. The second point has a q value of * one. * Only the first point can be the same pointer as the result point. @@ -42025,79 +45489,81 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* t3 = t + 4*6; - sp_digit* t4 = t + 6*6; - sp_digit* t5 = t + 8*6; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*6; + sp_digit* t6 = t + 4*6; + sp_digit* t1 = t + 6*6; + sp_digit* t4 = t + 8*6; + sp_digit* t5 = t + 10*6; - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(p->x, t2) & + sp_384_cmp_equal_6(p->y, t4)) { sp_384_proj_point_dbl_6(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<6; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<6; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<6; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ - sp_384_mont_sub_6(t2, t2, x, p384_mod); + sp_384_mont_sub_6(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ - sp_384_mont_sub_6(t4, t4, y, p384_mod); + sp_384_mont_sub_6(t4, t4, p->y, p384_mod); /* Z3 = H*Z1 */ - sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_384_mont_sqr_6(t1, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t3, x, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, t1, t5, p384_mod); - sp_384_mont_dbl_6(t1, t3, p384_mod); - sp_384_mont_sub_6(x, x, t1, p384_mod); + sp_384_mont_sqr_6(t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, p->x, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(t2, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + sp_384_mont_dbl_6(t5, t3, p384_mod); + sp_384_mont_sub_6(x, t2, t5, p384_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_384_mont_sub_6(t3, t3, x, p384_mod); sp_384_mont_mul_6(t3, t3, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, y, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, t3, t5, p384_mod); + sp_384_mont_mul_6(t1, t1, p->y, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, t3, t1, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } +#ifndef WC_NO_CACHE_RESISTANT +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC | WOLFSSL_SP_SMALL */ #ifdef FP_ECC /* Convert the projective point to affine. * Ordinates are in Montgomery form. @@ -42135,7 +45601,7 @@ static void sp_384_proj_to_affine_6(sp_point_384* a, sp_digit* t) static int sp_384_gen_stripe_table_6(const sp_point_384* a, sp_table_entry_384* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; #else sp_point_384 t[3]; @@ -42148,7 +45614,7 @@ static int sp_384_gen_stripe_table_6(const sp_point_384* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -42203,7 +45669,7 @@ static int sp_384_gen_stripe_table_6(const sp_point_384* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -42217,7 +45683,7 @@ static int sp_384_gen_stripe_table_6(const sp_point_384* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_64_6(sp_point_384* r, @@ -42225,34 +45691,47 @@ static void sp_384_get_entry_64_6(sp_point_384* r, { int i; sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->x[4] = 0; - r->x[5] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - r->y[4] = 0; - r->y[5] = 0; for (i = 1; i < 64; i++) { mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->x[4] |= mask & table[i].x[4]; - r->x[5] |= mask & table[i].x[5]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - r->y[4] |= mask & table[i].y[4]; - r->y[5] |= mask & table[i].y[5]; + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; } #endif /* !WC_NO_CACHE_RESISTANT */ /* Multiply the point by the scalar and return the result. @@ -42275,7 +45754,7 @@ static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* t = NULL; #else @@ -42295,7 +45774,7 @@ static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -42361,7 +45840,7 @@ static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -42405,7 +45884,7 @@ static THREAD_LS_T int sp_cache_384_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) @@ -42476,23 +45955,36 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 6 * 7]; +#endif sp_cache_384_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_384 == 0) { - wc_InitMutex(&sp_cache_384_lock); - initCacheMutex_384 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 7, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_384_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -42513,6 +46005,9 @@ static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -42535,7 +46030,7 @@ static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_ static int sp_384_gen_stripe_table_6(const sp_point_384* a, sp_table_entry_384* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; #else sp_point_384 t[3]; @@ -42548,7 +46043,7 @@ static int sp_384_gen_stripe_table_6(const sp_point_384* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -42603,7 +46098,7 @@ static int sp_384_gen_stripe_table_6(const sp_point_384* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -42617,7 +46112,7 @@ static int sp_384_gen_stripe_table_6(const sp_point_384* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_256_6(sp_point_384* r, @@ -42625,34 +46120,47 @@ static void sp_384_get_entry_256_6(sp_point_384* r, { int i; sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->x[4] = 0; - r->x[5] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - r->y[4] = 0; - r->y[5] = 0; for (i = 1; i < 256; i++) { mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->x[4] |= mask & table[i].x[4]; - r->x[5] |= mask & table[i].x[5]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - r->y[4] |= mask & table[i].y[4]; - r->y[5] |= mask & table[i].y[5]; + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; } #endif /* !WC_NO_CACHE_RESISTANT */ /* Multiply the point by the scalar and return the result. @@ -42675,7 +46183,7 @@ static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* t = NULL; #else @@ -42695,7 +46203,7 @@ static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -42761,7 +46269,7 @@ static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -42805,7 +46313,7 @@ static THREAD_LS_T int sp_cache_384_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) @@ -42876,23 +46384,36 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 6 * 7]; +#endif sp_cache_384_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_384 == 0) { - wc_InitMutex(&sp_cache_384_lock); - initCacheMutex_384 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 7, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_384_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -42913,6 +46434,9 @@ static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -42931,7 +46455,7 @@ static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -42940,7 +46464,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -42963,7 +46487,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_384_point_to_ecc_point_6(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42978,7 +46502,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -42988,8 +46512,8 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_384* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_384* point = NULL; sp_digit* k = NULL; #else sp_point_384 point[2]; @@ -42999,7 +46523,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -43043,7 +46567,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, err = sp_384_point_to_ecc_point_6(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -44787,7 +48311,7 @@ static void sp_384_ecc_recode_7_6(const sp_digit* k, ecc_recode_384* v) /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_65_6(sp_point_384* r, @@ -44795,34 +48319,47 @@ static void sp_384_get_entry_65_6(sp_point_384* r, { int i; sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; - r->x[0] = 0; - r->x[1] = 0; - r->x[2] = 0; - r->x[3] = 0; - r->x[4] = 0; - r->x[5] = 0; - r->y[0] = 0; - r->y[1] = 0; - r->y[2] = 0; - r->y[3] = 0; - r->y[4] = 0; - r->y[5] = 0; for (i = 1; i < 65; i++) { mask = 0 - (i == idx); - r->x[0] |= mask & table[i].x[0]; - r->x[1] |= mask & table[i].x[1]; - r->x[2] |= mask & table[i].x[2]; - r->x[3] |= mask & table[i].x[3]; - r->x[4] |= mask & table[i].x[4]; - r->x[5] |= mask & table[i].x[5]; - r->y[0] |= mask & table[i].y[0]; - r->y[1] |= mask & table[i].y[1]; - r->y[2] |= mask & table[i].y[2]; - r->y[3] |= mask & table[i].y[3]; - r->y[4] |= mask & table[i].y[4]; - r->y[5] |= mask & table[i].y[5]; + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; } #endif /* !WC_NO_CACHE_RESISTANT */ static const sp_table_entry_384 p384_table[3575] = { @@ -62613,7 +66150,7 @@ static int sp_384_ecc_mulmod_add_only_6(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* tmp = NULL; #else @@ -62631,7 +66168,7 @@ static int sp_384_ecc_mulmod_add_only_6(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -62692,21 +66229,21 @@ static int sp_384_ecc_mulmod_add_only_6(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) #endif { ForceZero(tmp, sizeof(sp_digit) * 2 * 6 * 6); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(tmp, heap, DYNAMIC_TYPE_ECC); #endif } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (rt != NULL) XFREE(rt, heap, DYNAMIC_TYPE_ECC); #endif - return MP_OKAY; + return err; } /* Multiply the base point of P384 by the scalar and return the result. @@ -62738,7 +66275,7 @@ static int sp_384_ecc_mulmod_base_6(sp_point_384* r, const sp_digit* k, */ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -62747,7 +66284,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -62769,7 +66306,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_384_point_to_ecc_point_6(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -62783,7 +66320,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -62793,7 +66330,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -62804,8 +66341,8 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; @@ -62847,7 +66384,7 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, err = sp_384_point_to_ecc_point_6(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -62859,17 +66396,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_6(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -62893,7 +66419,7 @@ static void sp_384_add_one_6(sp_digit* a) "stp x1, x2, [%[a], 32]\n\t" : : [a] "r" (a) - : "memory", "x1", "x2", "x3", "x4" + : "memory", "x1", "x2", "x3", "x4", "cc" ); } @@ -62906,41 +66432,108 @@ static void sp_384_add_one_6(sp_digit* a) */ static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n) { - int i; - int j; - byte* d; + sp_int64 nl = n; + sp_int64 bytes = size * 8; - for (i = n - 1,j = 0; i >= 7; i -= 8) { - r[j] = ((sp_digit)a[i - 0] << 0) | - ((sp_digit)a[i - 1] << 8) | - ((sp_digit)a[i - 2] << 16) | - ((sp_digit)a[i - 3] << 24) | - ((sp_digit)a[i - 4] << 32) | - ((sp_digit)a[i - 5] << 40) | - ((sp_digit)a[i - 6] << 48) | - ((sp_digit)a[i - 7] << 56); - j++; - } - - if (i >= 0) { - r[j] = 0; - - d = (byte*)r; - switch (i) { - case 6: d[n - 1 - 6] = a[6]; //fallthrough - case 5: d[n - 1 - 5] = a[5]; //fallthrough - case 4: d[n - 1 - 4] = a[4]; //fallthrough - case 3: d[n - 1 - 3] = a[3]; //fallthrough - case 2: d[n - 1 - 2] = a[2]; //fallthrough - case 1: d[n - 1 - 1] = a[1]; //fallthrough - case 0: d[n - 1 - 0] = a[0]; //fallthrough - } - j++; - } - - for (; j < size; j++) { - r[j] = 0; - } + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); } /* Generates a scalar that is in the range 1..order-1. @@ -62959,7 +66552,7 @@ static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_384_from_bin(k, 6, buf, (int)sizeof(buf)); - if (sp_384_cmp_6(k, p384_order2) < 0) { + if (sp_384_cmp_6(k, p384_order2) <= 0) { sp_384_add_one_6(k); break; } @@ -62981,7 +66574,7 @@ static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -62996,15 +66589,15 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_384* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -63045,7 +66638,7 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_384_point_to_ecc_point_6(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -63057,6 +66650,84 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_384_ctx { + int state; + sp_384_ecc_mulmod_6_ctx mulmod_ctx; + sp_digit k[6]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 point[2]; +#else + sp_point_384 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_384_ctx; + +int sp_ecc_make_key_384_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_384_ctx* ctx = (sp_ecc_key_gen_384_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_384_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_384_ecc_gen_k_6(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_384_ecc_mulmod_base_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p384_order, 1, 1); + if (err == MP_OKAY) { + if (sp_384_iszero_6(ctx->point->x) || + sp_384_iszero_6(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_384_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_6(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 48 @@ -63067,17 +66738,19 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) static void sp_384_to_bin_6(sp_digit* r, byte* a) { int i; - int j; + int j = 0; - for (i = 5, j = 0; i >= 0; i--) { - a[j++] = r[i] >> 56; - a[j++] = r[i] >> 48; - a[j++] = r[i] >> 40; - a[j++] = r[i] >> 32; - a[j++] = r[i] >> 24; - a[j++] = r[i] >> 16; - a[j++] = r[i] >> 8; - a[j++] = r[i] >> 0; + for (i = 5; i >= 0; i--, j += 8) { + __asm__ __volatile__ ( + "ldr x4, [%[r]]\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x4, x4\n\t" + #endif + "str x4, [%[a]]\n\t" + : + : [r] "r" (r + i), [a] "r" (a + j) + : "memory", "x4" + ); } } @@ -63096,7 +66769,7 @@ static void sp_384_to_bin_6(sp_digit* r, byte* a) int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -63109,7 +66782,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); @@ -63134,7 +66807,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 48; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -63143,8 +66816,60 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_384_ctx { + int state; + union { + sp_384_ecc_mulmod_6_ctx mulmod_ctx; + }; + sp_digit k[6]; + sp_point_384 point; +} sp_ecc_sec_gen_384_ctx; + +int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_384_ctx* ctx = (sp_ecc_sec_gen_384_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_384_from_mp(ctx->k, 6, priv); + sp_384_point_from_ecc_point_6(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_384_to_bin_6(ctx->point.x, out); + *outLen = 48; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -63166,18 +66891,18 @@ static sp_digit sp_384_sub_in_place_6(sp_digit* a, const sp_digit* b) "stp x2, x3, [%[a], 0]\n\t" "sbcs x5, x5, x9\n\t" "stp x4, x5, [%[a], 16]\n\t" - "ldr x2, [%[a], 32]\n\t" - "ldr x3, [%[a], 40]\n\t" - "ldr x6, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "ldr x2, [%[a], 32]\n\t" + "ldr x3, [%[a], 40]\n\t" + "ldr x6, [%[b], 32]\n\t" + "ldr x7, [%[b], 40]\n\t" "sbcs x2, x2, x6\n\t" "sbcs x3, x3, x7\n\t" - "str x2, [%[a], 32]\n\t" - "str x3, [%[a], 40]\n\t" + "str x2, [%[a], 32]\n\t" + "str x3, [%[a], 40]\n\t" "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -63198,9 +66923,9 @@ static void sp_384_mul_d_6(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -63219,95 +66944,99 @@ static void sp_384_mul_d_6(sp_digit* r, const sp_digit* a, "str x3, [%[r], 48]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "adc x3, x3, x7\n\t" - "stp x5, x3, [%[r], 40]\n\t" + "str x5, [%[r], 40]\n\t" + "str x3, [%[r], 48]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ static sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -63316,23 +67045,22 @@ static sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); - return r; + return d1; } /* AND m into each word of a and store in r. @@ -63368,8 +67096,8 @@ static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[12], t2[7]; sp_digit div, r1; @@ -63379,9 +67107,13 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit div = d[5]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 6); - for (i=5; i>=0; i--) { - sp_digit hi = t1[6 + i] - (t1[6 + i] == div); + r1 = sp_384_cmp_6(&t1[6], d) >= 0; + sp_384_cond_sub_6(&t1[6], &t1[6], d, (sp_digit)0 - r1); + for (i = 5; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[6 + i] == div); + sp_digit hi = t1[6 + i] + mask; r1 = div_384_word_6(hi, t1[6 + i - 1], div); + r1 |= mask; sp_384_mul_d_6(t2, d, r1); t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2); @@ -63412,6 +67144,19 @@ static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_6(r, a, b); + sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P384 curve. */ static const uint64_t p384_order_minus_2[6] = { @@ -63425,18 +67170,6 @@ static const uint64_t p384_order_low[3] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P384 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_384_mul_6(r, a, b); - sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order); -} - /* Square number mod the order of P384 curve. (r = a * a mod order) * * r Result of the squaring. @@ -63578,6 +67311,7 @@ static void sp_384_mont_inv_order_6(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -63652,6 +67386,128 @@ static int sp_384_calc_s_6(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_384* point = NULL; +#else + sp_digit e[7 * 2 * 6]; + sp_point_384 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int64 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 6; + k = e + 4 * 6; + r = e + 6 * 6; + tmp = e + 8 * 6; + s = e; + + if (hashLen > 48U) { + hashLen = 48U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_6(rng, k); + } + else { + sp_384_from_mp(k, 6, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_6(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 6U); + sp_384_norm_6(r); + c = sp_384_cmp_6(r, p384_order); + sp_384_cond_sub_6(r, r, p384_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_384_norm_6(r); + + if (!sp_384_iszero_6(r)) { + /* x is modified in calculation of s. */ + sp_384_from_mp(x, 6, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_384_from_bin(e, 6, hash, (int)hashLen); + + err = sp_384_calc_s_6(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_384_iszero_6(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_384)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_384_ctx { int state; @@ -63679,15 +67535,10 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 48U) { - hashLen = 48U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -63722,6 +67573,9 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_6(ctx->r); + if (hashLen > 48U) { + hashLen = 48U; + } sp_384_from_mp(ctx->x, 6, priv); sp_384_from_bin(ctx->e, 6, hash, (int)hashLen); ctx->state = 4; @@ -63816,124 +67670,6 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_384* point = NULL; -#else - sp_digit e[7 * 2 * 6]; - sp_point_384 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int64 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 6; - k = e + 4 * 6; - r = e + 6 * 6; - tmp = e + 8 * 6; - s = e; - - if (hashLen > 48U) { - hashLen = 48U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_384_ecc_gen_k_6(rng, k); - } - else { - sp_384_from_mp(k, 6, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_384_ecc_mulmod_base_6(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 6U); - sp_384_norm_6(r); - c = sp_384_cmp_6(r, p384_order); - sp_384_cond_sub_6(r, r, p384_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_384_norm_6(r); - - sp_384_from_mp(x, 6, priv); - sp_384_from_bin(e, 6, hash, (int)hashLen); - - err = sp_384_calc_s_6(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_6(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_384_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_384_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 6); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_384)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -63947,49 +67683,43 @@ static void sp_384_div2_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "ldr x3, [%[a], 0]\n\t" - "ldr x4, [%[a], 8]\n\t" - "ldr x5, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x7, [%[a], 32]\n\t" - "ldr x8, [%[a], 40]\n\t" - "ldr x9, [%[m], 0]\n\t" - "ldr x10, [%[m], 8]\n\t" - "ldr x11, [%[m], 16]\n\t" - "ldr x12, [%[m], 24]\n\t" - "ldr x13, [%[m], 32]\n\t" - "ldr x14, [%[m], 40]\n\t" - "ands x15, x3, 1\n\t" - "b.eq 1f\n\t" - "adds x3, x3, x9\n\t" - "adcs x4, x4, x10\n\t" - "adcs x5, x5, x11\n\t" - "adcs x6, x6, x12\n\t" - "adcs x7, x7, x13\n\t" - "adcs x8, x8, x14\n\t" - "cset x15, cs\n\t" + "ldr x3, [%[a], 0]\n\t" + "ldr x4, [%[a], 8]\n\t" + "ldr x5, [%[a], 16]\n\t" + "ldr x6, [%[a], 24]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[a], 40]\n\t" + "ldr x9, [%[m], 0]\n\t" + "ldr x10, [%[m], 8]\n\t" + "ldr x11, [%[m], 16]\n\t" + "ldr x12, [%[m], 24]\n\t" + "ldr x13, [%[m], 32]\n\t" + "ldr x14, [%[m], 40]\n\t" + "ands x15, x3, 1\n\t" + "b.eq 1f\n\t" + "adds x3, x3, x9\n\t" + "adcs x4, x4, x10\n\t" + "adcs x5, x5, x11\n\t" + "adcs x6, x6, x12\n\t" + "adcs x7, x7, x13\n\t" + "adcs x8, x8, x14\n\t" + "cset x15, cs\n\t" "\n1:\n\t" - "lsr x3, x3, 1\n\t" - "lsr x10, x4, 1\n\t" - "lsr x11, x5, 1\n\t" - "lsr x12, x6, 1\n\t" - "lsr x13, x7, 1\n\t" - "lsr x14, x8, 1\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "orr x4, x10, x5, lsl 63\n\t" - "orr x5, x11, x6, lsl 63\n\t" - "orr x6, x12, x7, lsl 63\n\t" - "orr x7, x13, x8, lsl 63\n\t" - "orr x8, x14, x15, lsl 63\n\t" - "str x3, [%[r], 0]\n\t" - "str x4, [%[r], 8]\n\t" - "str x5, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "str x7, [%[r], 32]\n\t" - "str x8, [%[r], 40]\n\t" + "extr x3, x4, x3, 1\n\t" + "extr x4, x5, x4, 1\n\t" + "extr x5, x6, x5, 1\n\t" + "extr x6, x7, x6, 1\n\t" + "extr x7, x8, x7, 1\n\t" + "extr x8, x15, x8, 1\n\t" + "str x3, [%[r], 0]\n\t" + "str x4, [%[r], 8]\n\t" + "str x5, [%[r], 16]\n\t" + "str x6, [%[r], 24]\n\t" + "str x7, [%[r], 32]\n\t" + "str x8, [%[r], 40]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "cc" ); } @@ -64006,7 +67736,7 @@ static int sp_384_num_bits_64_6(sp_digit n) : "x1" ); - return r + 1; + return (int)(r + 1); } static int sp_384_num_bits_6(const sp_digit* a) @@ -64041,6 +67771,7 @@ static int sp_384_mod_inv_6(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut, vt; sp_digit o; + XMEMCPY(u, m, sizeof(u)); XMEMCPY(v, a, sizeof(v)); @@ -64067,7 +67798,7 @@ static int sp_384_mod_inv_6(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_6(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_6(u, v) >= 0))) { sp_384_sub_6(u, u, v); o = sp_384_sub_6(b, b, d); if (o != 0) @@ -64102,6 +67833,7 @@ static int sp_384_mod_inv_6(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(d)); + return MP_OKAY; } @@ -64141,7 +67873,7 @@ static void sp_384_add_points_6(sp_point_384* p1, const sp_point_384* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -64213,6 +67945,106 @@ static int sp_384_calc_vfy_point_6(sp_point_384* p1, sp_point_384* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_384* p1 = NULL; +#else + sp_digit u1[18 * 6]; + sp_point_384 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p2 = NULL; + sp_digit carry; + sp_int64 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 6, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 6; + s = u1 + 4 * 6; + tmp = u1 + 6 * 6; + p2 = p1 + 1; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 6, hash, (int)hashLen); + sp_384_from_mp(u2, 6, rm); + sp_384_from_mp(s, 6, sm); + sp_384_from_mp(p2->x, 6, pX); + sp_384_from_mp(p2->y, 6, pY); + sp_384_from_mp(p2->z, 6, pZ); + + err = sp_384_calc_vfy_point_6(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 6, rm); + err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_6(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 6, rm); + carry = sp_384_add_6(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_6(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_6(u2, p384_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod); + } + *res = (sp_384_cmp_6(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_384_ctx { int state; @@ -64225,7 +68057,7 @@ typedef struct sp_ecc_verify_384_ctx { sp_digit u1[2*6]; sp_digit u2[2*6]; sp_digit s[2*6]; - sp_digit tmp[2*6 * 5]; + sp_digit tmp[2*6 * 6]; sp_point_384 p1; sp_point_384 p2; } sp_ecc_verify_384_ctx; @@ -64362,109 +68194,10 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_384* p1 = NULL; -#else - sp_digit u1[16 * 6]; - sp_point_384 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_384* p2 = NULL; - sp_digit carry; - sp_int64 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 6, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 6; - s = u1 + 4 * 6; - tmp = u1 + 6 * 6; - p2 = p1 + 1; - - if (hashLen > 48U) { - hashLen = 48U; - } - - sp_384_from_bin(u1, 6, hash, (int)hashLen); - sp_384_from_mp(u2, 6, rm); - sp_384_from_mp(s, 6, sm); - sp_384_from_mp(p2->x, 6, pX); - sp_384_from_mp(p2->y, 6, pY); - sp_384_from_mp(p2->z, 6, pZ); - - err = sp_384_calc_vfy_point_6(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_384_from_mp(u2, 6, rm); - err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod); - *res = (int)(sp_384_cmp_6(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_384_from_mp(u2, 6, rm); - carry = sp_384_add_6(u2, u2, p384_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_384_norm_6(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_384_cmp_6(u2, p384_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_384_mod_mul_norm_6(u2, u2, p384_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, - p384_mp_mod); - *res = (sp_384_cmp_6(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -64474,7 +68207,7 @@ int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_384_ecc_is_point_6(const sp_point_384* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[6 * 4]; @@ -64482,7 +68215,7 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -64492,25 +68225,27 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 6; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_6(t1, point->y); (void)sp_384_mod_6(t1, t1, p384_mod); sp_384_sqr_6(t2, point->x); (void)sp_384_mod_6(t2, t2, p384_mod); sp_384_mul_6(t2, t2, point->x); (void)sp_384_mod_6(t2, t2, p384_mod); - (void)sp_384_sub_6(t2, p384_mod, t2); - sp_384_mont_add_6(t1, t1, t2, p384_mod); + sp_384_mont_sub_6(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_6(t1, t1, point->x, p384_mod); sp_384_mont_add_6(t1, t1, point->x, p384_mod); sp_384_mont_add_6(t1, t1, point->x, p384_mod); + if (sp_384_cmp_6(t1, p384_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -64518,7 +68253,7 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -64527,7 +68262,7 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, */ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* pub = NULL; #else sp_point_384 pub[1]; @@ -64535,7 +68270,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -64550,7 +68285,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) err = sp_384_ecc_is_point_6(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -64572,7 +68307,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_384* pub = NULL; #else @@ -64593,7 +68328,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); @@ -64659,7 +68394,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -64688,17 +68423,17 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else - sp_digit tmp[2 * 6 * 5]; + sp_digit tmp[2 * 6 * 6]; sp_point_384 p[2]; #endif sp_point_384* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -64706,7 +68441,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -64741,7 +68476,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -64765,7 +68500,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -64774,7 +68509,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -64809,7 +68544,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -64829,7 +68564,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -64839,7 +68574,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -64873,7 +68608,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_384_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -64891,7 +68626,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_384_mont_sqrt_6(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[5 * 2 * 6]; @@ -64902,7 +68637,7 @@ static int sp_384_mont_sqrt_6(sp_digit* y) sp_digit* t5 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 6, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -64972,7 +68707,7 @@ static int sp_384_mont_sqrt_6(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -64990,7 +68725,7 @@ static int sp_384_mont_sqrt_6(sp_digit* y) */ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 6]; @@ -64998,7 +68733,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 6, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -65038,7 +68773,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) err = sp_384_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -65047,6 +68782,44905 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) } #endif #endif /* WOLFSSL_SP_384 */ +#ifdef WOLFSSL_SP_521 + +/* Point structure to use. */ +typedef struct sp_point_521 { + /* X ordinate of point. */ + sp_digit x[2 * 9]; + /* Y ordinate of point. */ + sp_digit y[2 * 9]; + /* Z ordinate of point. */ + sp_digit z[2 * 9]; + /* Indicates point is at infinity. */ + int infinity; +} sp_point_521; + +/* The modulus (prime) of the curve P521. */ +static const sp_digit p521_mod[9] = { + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL, + 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL, + 0xffffffffffffffffL,0xffffffffffffffffL,0x00000000000001ffL +}; +/* The Montgomery normalizer for modulus of the curve P521. */ +static const sp_digit p521_norm_mod[9] = { + 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L +}; +/* The Montgomery multiplier for modulus of the curve P521. */ +static sp_digit p521_mp_mod = 0x0000000000000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P521. */ +static const sp_digit p521_order[9] = { + 0xbb6fb71e91386409L,0x3bb5c9b8899c47aeL,0x7fcc0148f709a5d0L, + 0x51868783bf2f966bL,0xfffffffffffffffaL,0xffffffffffffffffL, + 0xffffffffffffffffL,0xffffffffffffffffL,0x00000000000001ffL +}; +#endif +/* The order of the curve P521 minus 2. */ +static const sp_digit p521_order2[9] = { + 0xbb6fb71e91386407L,0x3bb5c9b8899c47aeL,0x7fcc0148f709a5d0L, + 0x51868783bf2f966bL,0xfffffffffffffffaL,0xffffffffffffffffL, + 0xffffffffffffffffL,0xffffffffffffffffL,0x00000000000001ffL +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery normalizer for order of the curve P521. */ +static const sp_digit p521_norm_order[9] = { + 0x449048e16ec79bf7L,0xc44a36477663b851L,0x8033feb708f65a2fL, + 0xae79787c40d06994L,0x0000000000000005L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery multiplier for order of the curve P521. */ +static sp_digit p521_mp_order = 0x1d2f5ccd79a995c7L; +#endif +#ifdef WOLFSSL_SP_SMALL +/* The base point of curve P521. */ +static const sp_point_521 p521_base = { + /* X ordinate */ + { + 0xf97e7e31c2e5bd66L,0x3348b3c1856a429bL,0xfe1dc127a2ffa8deL, + 0xa14b5e77efe75928L,0xf828af606b4d3dbaL,0x9c648139053fb521L, + 0x9e3ecb662395b442L,0x858e06b70404e9cdL,0x00000000000000c6L, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0 + }, + /* Y ordinate */ + { + 0x88be94769fd16650L,0x353c7086a272c240L,0xc550b9013fad0761L, + 0x97ee72995ef42640L,0x17afbd17273e662cL,0x98f54449579b4468L, + 0x5c8a5fb42c7d1bd9L,0x39296a789a3bc004L,0x0000000000000118L, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0 + }, + /* Z ordinate */ + { + 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L, + 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0 + }, + /* infinity */ + 0 +}; +#endif /* WOLFSSL_SP_SMALL */ +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p521_b[9] = { + 0xef451fd46b503f00L,0x3573df883d2c34f1L,0x1652c0bd3bb1bf07L, + 0x56193951ec7e937bL,0xb8b489918ef109e1L,0xa2da725b99b315f3L, + 0x929a21a0b68540eeL,0x953eb9618e1c9a1fL,0x0000000000000051L +}; +#endif + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_521_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[18]; + + __asm__ __volatile__ ( + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "\n1:\n\t" + "subs x3, x5, 64\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 72\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 128\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_521_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x8, x9, [%[a], 0]\n\t" + "ldp x10, x11, [%[a], 16]\n\t" + "ldp x12, x13, [%[a], 32]\n\t" + "ldp x14, x15, [%[a], 48]\n\t" + "ldr x16, [%[a], 64]\n\t" + "ldp x17, x19, [%[b], 0]\n\t" + "ldp x20, x21, [%[b], 16]\n\t" + "ldp x22, x23, [%[b], 32]\n\t" + "ldp x24, x25, [%[b], 48]\n\t" + "ldr x26, [%[b], 64]\n\t" + "# A[0] * B[0]\n\t" + "mul x3, x8, x17\n\t" + "umulh x4, x8, x17\n\t" + "str x3, [%[r]]\n\t" + "# A[0] * B[1]\n\t" + "mul x6, x8, x19\n\t" + "umulh x7, x8, x19\n\t" + "adds x4, x4, x6\n\t" + "# A[1] * B[0]\n\t" + "mul x6, x9, x17\n\t" + "adc x5, xzr, x7\n\t" + "umulh x7, x9, x17\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 8]\n\t" + "adc x3, xzr, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x6, x8, x20\n\t" + "umulh x7, x8, x20\n\t" + "adds x5, x5, x6\n\t" + "# A[1] * B[1]\n\t" + "mul x6, x9, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x19\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[2] * B[0]\n\t" + "mul x6, x10, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 16]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x6, x8, x21\n\t" + "umulh x7, x8, x21\n\t" + "adds x3, x3, x6\n\t" + "# A[1] * B[2]\n\t" + "mul x6, x9, x20\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x9, x20\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[2] * B[1]\n\t" + "mul x6, x10, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[3] * B[0]\n\t" + "mul x6, x11, x17\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 24]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[4]\n\t" + "mul x6, x8, x22\n\t" + "umulh x7, x8, x22\n\t" + "adds x4, x4, x6\n\t" + "# A[1] * B[3]\n\t" + "mul x6, x9, x21\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x9, x21\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B[2]\n\t" + "mul x6, x10, x20\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x10, x20\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[3] * B[1]\n\t" + "mul x6, x11, x19\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x19\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[4] * B[0]\n\t" + "mul x6, x12, x17\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 32]\n\t" + "adc x3, x3, xzr\n\t" + "# A[0] * B[5]\n\t" + "mul x6, x8, x23\n\t" + "umulh x7, x8, x23\n\t" + "adds x5, x5, x6\n\t" + "# A[1] * B[4]\n\t" + "mul x6, x9, x22\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x22\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[2] * B[3]\n\t" + "mul x6, x10, x21\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B[2]\n\t" + "mul x6, x11, x20\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x11, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[4] * B[1]\n\t" + "mul x6, x12, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[5] * B[0]\n\t" + "mul x6, x13, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 40]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[6]\n\t" + "mul x6, x8, x24\n\t" + "umulh x7, x8, x24\n\t" + "adds x3, x3, x6\n\t" + "# A[1] * B[5]\n\t" + "mul x6, x9, x23\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x9, x23\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[2] * B[4]\n\t" + "mul x6, x10, x22\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[3] * B[3]\n\t" + "mul x6, x11, x21\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x21\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B[2]\n\t" + "mul x6, x12, x20\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x12, x20\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[5] * B[1]\n\t" + "mul x6, x13, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[6] * B[0]\n\t" + "mul x6, x14, x17\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x14, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 48]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[7]\n\t" + "mul x6, x8, x25\n\t" + "umulh x7, x8, x25\n\t" + "adds x4, x4, x6\n\t" + "# A[1] * B[6]\n\t" + "mul x6, x9, x24\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x9, x24\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B[5]\n\t" + "mul x6, x10, x23\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x10, x23\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[3] * B[4]\n\t" + "mul x6, x11, x22\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x22\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[4] * B[3]\n\t" + "mul x6, x12, x21\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x21\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B[2]\n\t" + "mul x6, x13, x20\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x13, x20\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[6] * B[1]\n\t" + "mul x6, x14, x19\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x14, x19\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[7] * B[0]\n\t" + "mul x6, x15, x17\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x15, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 56]\n\t" + "adc x3, x3, xzr\n\t" + "# A[0] * B[8]\n\t" + "mul x6, x8, x26\n\t" + "umulh x7, x8, x26\n\t" + "adds x5, x5, x6\n\t" + "# A[1] * B[7]\n\t" + "mul x6, x9, x25\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x25\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[2] * B[6]\n\t" + "mul x6, x10, x24\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x24\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B[5]\n\t" + "mul x6, x11, x23\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x11, x23\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[4] * B[4]\n\t" + "mul x6, x12, x22\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[5] * B[3]\n\t" + "mul x6, x13, x21\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B[2]\n\t" + "mul x6, x14, x20\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x14, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[7] * B[1]\n\t" + "mul x6, x15, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x15, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[8] * B[0]\n\t" + "mul x6, x16, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x16, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 64]\n\t" + "adc x4, x4, xzr\n\t" + "# A[1] * B[8]\n\t" + "mul x6, x9, x26\n\t" + "umulh x7, x9, x26\n\t" + "adds x3, x3, x6\n\t" + "# A[2] * B[7]\n\t" + "mul x6, x10, x25\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x25\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[3] * B[6]\n\t" + "mul x6, x11, x24\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x24\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B[5]\n\t" + "mul x6, x12, x23\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x12, x23\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[5] * B[4]\n\t" + "mul x6, x13, x22\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[6] * B[3]\n\t" + "mul x6, x14, x21\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x14, x21\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B[2]\n\t" + "mul x6, x15, x20\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x15, x20\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[8] * B[1]\n\t" + "mul x6, x16, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x16, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 72]\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[8]\n\t" + "mul x6, x10, x26\n\t" + "umulh x7, x10, x26\n\t" + "adds x4, x4, x6\n\t" + "# A[3] * B[7]\n\t" + "mul x6, x11, x25\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x25\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[4] * B[6]\n\t" + "mul x6, x12, x24\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x24\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B[5]\n\t" + "mul x6, x13, x23\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x13, x23\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[6] * B[4]\n\t" + "mul x6, x14, x22\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x14, x22\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[7] * B[3]\n\t" + "mul x6, x15, x21\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x15, x21\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B[2]\n\t" + "mul x6, x16, x20\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x16, x20\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 80]\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[8]\n\t" + "mul x6, x11, x26\n\t" + "umulh x7, x11, x26\n\t" + "adds x5, x5, x6\n\t" + "# A[4] * B[7]\n\t" + "mul x6, x12, x25\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x25\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[5] * B[6]\n\t" + "mul x6, x13, x24\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x24\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B[5]\n\t" + "mul x6, x14, x23\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x14, x23\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[7] * B[4]\n\t" + "mul x6, x15, x22\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x15, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[8] * B[3]\n\t" + "mul x6, x16, x21\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x16, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 88]\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[8]\n\t" + "mul x6, x12, x26\n\t" + "umulh x7, x12, x26\n\t" + "adds x3, x3, x6\n\t" + "# A[5] * B[7]\n\t" + "mul x6, x13, x25\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x25\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[6] * B[6]\n\t" + "mul x6, x14, x24\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x14, x24\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B[5]\n\t" + "mul x6, x15, x23\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x15, x23\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[8] * B[4]\n\t" + "mul x6, x16, x22\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x16, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 96]\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[8]\n\t" + "mul x6, x13, x26\n\t" + "umulh x7, x13, x26\n\t" + "adds x4, x4, x6\n\t" + "# A[6] * B[7]\n\t" + "mul x6, x14, x25\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x14, x25\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[7] * B[6]\n\t" + "mul x6, x15, x24\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x15, x24\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B[5]\n\t" + "mul x6, x16, x23\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x16, x23\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [%[r], 104]\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[8]\n\t" + "mul x6, x14, x26\n\t" + "umulh x7, x14, x26\n\t" + "adds x5, x5, x6\n\t" + "# A[7] * B[7]\n\t" + "mul x6, x15, x25\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x15, x25\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[8] * B[6]\n\t" + "mul x6, x16, x24\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x16, x24\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [%[r], 112]\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[8]\n\t" + "mul x6, x15, x26\n\t" + "umulh x7, x15, x26\n\t" + "adds x3, x3, x6\n\t" + "# A[8] * B[7]\n\t" + "mul x6, x16, x25\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x16, x25\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [%[r], 120]\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[8]\n\t" + "mul x6, x16, x26\n\t" + "umulh x7, x16, x26\n\t" + "adds x4, x4, x6\n\t" + "adc x5, x5, x7\n\t" + "stp x4, x5, [%[r], 128]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_521_sqr_9(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[18]; + + __asm__ __volatile__ ( + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" + "\n1:\n\t" + "subs x3, x5, 64\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 72\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 128\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_521_sqr_9(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x10, x11, [%[a], 0]\n\t" + "ldp x12, x13, [%[a], 16]\n\t" + "ldp x14, x15, [%[a], 32]\n\t" + "ldp x16, x17, [%[a], 48]\n\t" + "ldr x19, [%[a], 64]\n\t" + "# A[0] * A[0]\n\t" + "mul x2, x10, x10\n\t" + "umulh x3, x10, x10\n\t" + "str x2, [%[r]]\n\t" + "mov x4, xzr\n\t" + "# A[0] * A[1]\n\t" + "mul x8, x10, x11\n\t" + "umulh x9, x10, x11\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 8]\n\t" + "# A[0] * A[2]\n\t" + "mul x8, x10, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x10, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[1] * A[1]\n\t" + "mul x8, x11, x11\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x11, x11\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 16]\n\t" + "# A[0] * A[3]\n\t" + "mul x8, x10, x13\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x10, x13\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[1] * A[2]\n\t" + "mul x8, x11, x12\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x11, x12\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 24]\n\t" + "# A[0] * A[4]\n\t" + "mul x8, x10, x14\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x10, x14\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[1] * A[3]\n\t" + "mul x8, x11, x13\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x11, x13\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[2] * A[2]\n\t" + "mul x8, x12, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x12, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 32]\n\t" + "# A[0] * A[5]\n\t" + "mul x5, x10, x15\n\t" + "adcs x4, x4, x9\n\t" + "umulh x6, x10, x15\n\t" + "adc x2, x2, xzr\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[4]\n\t" + "mul x8, x11, x14\n\t" + "umulh x9, x11, x14\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[3]\n\t" + "mul x8, x12, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 40]\n\t" + "# A[0] * A[6]\n\t" + "mul x5, x10, x16\n\t" + "umulh x6, x10, x16\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[5]\n\t" + "mul x8, x11, x15\n\t" + "umulh x9, x11, x15\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[4]\n\t" + "mul x8, x12, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[3]\n\t" + "mul x8, x13, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 48]\n\t" + "# A[0] * A[7]\n\t" + "mul x5, x10, x17\n\t" + "umulh x6, x10, x17\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[6]\n\t" + "mul x8, x11, x16\n\t" + "umulh x9, x11, x16\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[5]\n\t" + "mul x8, x12, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[4]\n\t" + "mul x8, x13, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 56]\n\t" + "# A[0] * A[8]\n\t" + "mul x5, x10, x19\n\t" + "umulh x6, x10, x19\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[7]\n\t" + "mul x8, x11, x17\n\t" + "umulh x9, x11, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[6]\n\t" + "mul x8, x12, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[5]\n\t" + "mul x8, x13, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[4]\n\t" + "mul x8, x14, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 64]\n\t" + "# A[1] * A[8]\n\t" + "mul x5, x11, x19\n\t" + "umulh x6, x11, x19\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[2] * A[7]\n\t" + "mul x8, x12, x17\n\t" + "umulh x9, x12, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[6]\n\t" + "mul x8, x13, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[5]\n\t" + "mul x8, x14, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 72]\n\t" + "# A[2] * A[8]\n\t" + "mul x5, x12, x19\n\t" + "umulh x6, x12, x19\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[3] * A[7]\n\t" + "mul x8, x13, x17\n\t" + "umulh x9, x13, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[6]\n\t" + "mul x8, x14, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[5]\n\t" + "mul x8, x15, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 80]\n\t" + "# A[3] * A[8]\n\t" + "mul x5, x13, x19\n\t" + "umulh x6, x13, x19\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[4] * A[7]\n\t" + "mul x8, x14, x17\n\t" + "umulh x9, x14, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[6]\n\t" + "mul x8, x15, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 88]\n\t" + "# A[4] * A[8]\n\t" + "mul x8, x14, x19\n\t" + "umulh x9, x14, x19\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[5] * A[7]\n\t" + "mul x8, x15, x17\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x15, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[6] * A[6]\n\t" + "mul x8, x16, x16\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x16, x16\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 96]\n\t" + "# A[5] * A[8]\n\t" + "mul x8, x15, x19\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x15, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[6] * A[7]\n\t" + "mul x8, x16, x17\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x16, x17\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [%[r], 104]\n\t" + "# A[6] * A[8]\n\t" + "mul x8, x16, x19\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x16, x19\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[7] * A[7]\n\t" + "mul x8, x17, x17\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x17, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [%[r], 112]\n\t" + "# A[7] * A[8]\n\t" + "mul x8, x17, x19\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x17, x19\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [%[r], 120]\n\t" + "# A[8] * A[8]\n\t" + "mul x8, x19, x19\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x19, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adc x4, x4, x9\n\t" + "stp x3, x4, [%[r], 128]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "cc" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 64\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "adc %[c], xzr, xzr\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x3, [%[a]], #8\n\t" + "ldr x7, [%[b]], #8\n\t" + "adcs x3, x3, x7\n\t" + "str x3, [%[r]], #8\n\t" + "adc %[c], xzr, xzr\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + return c; +} + +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "adds x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "adcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldr x3, [%[a], 64]\n\t" + "ldr x7, [%[b], 64]\n\t" + "adcs x3, x3, x7\n\t" + "str x3, [%[r], 64]\n\t" + "adc %[r], xzr, xzr\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 64\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x3, [%[a]], #8\n\t" + "ldr x7, [%[b]], #8\n\t" + "sbcs x3, x3, x7\n\t" + "str x3, [%[r]], #8\n\t" + "csetm %[c], cc\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + return c; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "sbcs x6, x6, x10\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldr x3, [%[a], 64]\n\t" + "ldr x7, [%[b], 64]\n\t" + "sbcs x3, x3, x7\n\t" + "str x3, [%[r], 64]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return (sp_digit)r; +} + +#endif /* WOLFSSL_SP_SMALL */ +static void sp_521_lshift_9(sp_digit* r, const sp_digit* a, byte n) +{ + word64 n64 = n; + __asm__ __volatile__ ( + "mov x6, 63\n\t" + "sub x6, x6, %[n]\n\t" + "ldr x3, [%[a], 64]\n\t" + "lsr x4, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x4, x4, x6\n\t" + "ldr x2, [%[a], 56]\n\t" + "str x4, [%[r], 72]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 48]\n\t" + "str x3, [%[r], 64]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 40]\n\t" + "str x2, [%[r], 56]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 32]\n\t" + "str x4, [%[r], 48]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 24]\n\t" + "str x3, [%[r], 40]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 16]\n\t" + "str x2, [%[r], 32]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 8]\n\t" + "str x4, [%[r], 24]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 0]\n\t" + "str x3, [%[r], 16]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "stp x4, x2, [%[r]]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n64) + : "memory", "x2", "x3", "x4", "x5", "x6", "cc" + ); +} + +static void sp_521_lshift_18(sp_digit* r, const sp_digit* a, byte n) +{ + word64 n64 = n; + __asm__ __volatile__ ( + "mov x6, 63\n\t" + "sub x6, x6, %[n]\n\t" + "ldr x3, [%[a], 136]\n\t" + "lsr x4, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x4, x4, x6\n\t" + "ldr x2, [%[a], 128]\n\t" + "str x4, [%[r], 144]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 120]\n\t" + "str x3, [%[r], 136]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 112]\n\t" + "str x2, [%[r], 128]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 104]\n\t" + "str x4, [%[r], 120]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 96]\n\t" + "str x3, [%[r], 112]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 88]\n\t" + "str x2, [%[r], 104]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 80]\n\t" + "str x4, [%[r], 96]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 72]\n\t" + "str x3, [%[r], 88]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 64]\n\t" + "str x2, [%[r], 80]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 56]\n\t" + "str x4, [%[r], 72]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 48]\n\t" + "str x3, [%[r], 64]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 40]\n\t" + "str x2, [%[r], 56]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 32]\n\t" + "str x4, [%[r], 48]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 24]\n\t" + "str x3, [%[r], 40]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 16]\n\t" + "str x2, [%[r], 32]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 8]\n\t" + "str x4, [%[r], 24]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 0]\n\t" + "str x3, [%[r], 16]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "stp x4, x2, [%[r]]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n64) + : "memory", "x2", "x3", "x4", "x5", "x6", "cc" + ); +} + +static void sp_521_rshift_9(sp_digit* r, const sp_digit* a, byte n) +{ + sp_uint64 nl = n; + __asm__ __volatile__ ( + "mov x6, 64\n\t" + "sub x6, x6, %[n]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "lsr x2, x2, %[n]\n\t" + "lsl x5, x3, x6\n\t" + "lsr x3, x3, %[n]\n\t" + "orr x2, x2, x5\n\t" + "ldr x4, [%[a], 16]\n\t" + "str x2, [%[r], 0]\n\t" + "lsl x5, x4, x6\n\t" + "lsr x4, x4, %[n]\n\t" + "orr x3, x3, x5\n\t" + "ldr x2, [%[a], 24]\n\t" + "str x3, [%[r], 8]\n\t" + "lsl x5, x2, x6\n\t" + "lsr x2, x2, %[n]\n\t" + "orr x4, x4, x5\n\t" + "ldr x3, [%[a], 32]\n\t" + "str x4, [%[r], 16]\n\t" + "lsl x5, x3, x6\n\t" + "lsr x3, x3, %[n]\n\t" + "orr x2, x2, x5\n\t" + "ldr x4, [%[a], 40]\n\t" + "str x2, [%[r], 24]\n\t" + "lsl x5, x4, x6\n\t" + "lsr x4, x4, %[n]\n\t" + "orr x3, x3, x5\n\t" + "ldr x2, [%[a], 48]\n\t" + "str x3, [%[r], 32]\n\t" + "lsl x5, x2, x6\n\t" + "lsr x2, x2, %[n]\n\t" + "orr x4, x4, x5\n\t" + "ldr x3, [%[a], 56]\n\t" + "str x4, [%[r], 40]\n\t" + "lsl x5, x3, x6\n\t" + "lsr x3, x3, %[n]\n\t" + "orr x2, x2, x5\n\t" + "ldr x4, [%[a], 64]\n\t" + "str x2, [%[r], 48]\n\t" + "lsl x5, x4, x6\n\t" + "lsr x4, x4, %[n]\n\t" + "orr x3, x3, x5\n\t" + "stp x3, x4, [%[r], 56]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (nl) + : "memory", "x2", "x3", "x4", "x5", "x6", "cc" + ); +} + +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_in_place_9(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 64\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x2, [%[a], 0]\n\t" + "ldr x6, [%[b]], #8\n\t" + "sbcs x2, x2, x6\n\t" + "str x2, [%[a]], #8\n\t" + "csetm %[c], cc\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return c; +} + +#else +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_521_sub_in_place_9(sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "subs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "sbcs x2, x2, x6\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "sbcs x5, x5, x9\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldr x2, [%[a], 64]\n\t" + "ldr x6, [%[b], 64]\n\t" + "sbcs x2, x2, x6\n\t" + "str x2, [%[a], 64]\n\t" + "csetm %[a], cc\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" + ); + + return (sp_digit)a; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_521_cond_sub_9(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 72\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" + ); + + return c; +#else + __asm__ __volatile__ ( + + "ldp x5, x7, [%[b], 0]\n\t" + "ldp x11, x12, [%[b], 16]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 16]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "ldp x11, x12, [%[b], 48]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "and x5, x5, %[m]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "and x11, x11, %[m]\n\t" + "sbcs x6, x6, x7\n\t" + "and x12, x12, %[m]\n\t" + "sbcs x9, x9, x11\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "sbcs x10, x10, x12\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "ldr x5, [%[b], 64]\n\t" + "ldr x4, [%[a], 64]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "str x4, [%[r], 64]\n\t" + "csetm %[r], cc\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" + ); + + return (sp_digit)r; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_521_mul_d_9(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, xzr\n\t" + "str x5, [%[r]]\n\t" + "mov x5, xzr\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 72\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 72]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" + "# A[1] * B\n\t" + "str x3, [%[r]]\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B\n\t" + "ldp x9, x10, [%[a], 16]\n\t" + "str x4, [%[r], 8]\n\t" + "adcs x5, x5, x7\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B\n\t" + "str x5, [%[r], 16]\n\t" + "adcs x3, x3, x7\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B\n\t" + "ldp x9, x10, [%[a], 32]\n\t" + "str x3, [%[r], 24]\n\t" + "adcs x4, x4, x7\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B\n\t" + "str x4, [%[r], 32]\n\t" + "adcs x5, x5, x7\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B\n\t" + "ldr x9, [%[a], 48]\n\t" + "str x5, [%[r], 40]\n\t" + "adcs x3, x3, x7\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B\n\t" + "ldp x9, x10, [%[a], 56]\n\t" + "str x3, [%[r], 48]\n\t" + "adcs x4, x4, x7\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" + "adc x5, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B\n\t" + "str x4, [%[r], 56]\n\t" + "mul x6, %[b], x10\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, %[b], x10\n\t" + "adc x3, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "adc x3, x3, x7\n\t" + "str x5, [%[r], 64]\n\t" + "str x3, [%[r], 72]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); +#endif +} + +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + */ +static sp_digit div_521_word_9(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "extr x3, %[d1], %[d0], 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add %[d1], x6, x3\n\t" + + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); + + return d1; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_521_mask_9(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<9; i++) { + r[i] = a[i] & m; + } +#else + r[0] = a[0] & m; + r[1] = a[1] & m; + r[2] = a[2] & m; + r[3] = a[3] & m; + r[4] = a[4] & m; + r[5] = a[5] & m; + r[6] = a[6] & m; + r[7] = a[7] & m; + r[8] = a[8] & m; +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_int64 sp_521_cmp_9(const sp_digit* a, const sp_digit* b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #8\n\t" + "add %[a], %[a], #56\n\t" + "add %[b], %[b], #56\n\t" + "1:\n\t" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "ldr x6, [%[a], 8]\n\t" + "ldr x8, [%[b], 8]\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + ); +#else + __asm__ __volatile__ ( + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 56]\n\t" + "ldp x8, x9, [%[b], 56]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 40]\n\t" + "ldp x8, x9, [%[b], 40]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 24]\n\t" + "ldp x8, x9, [%[b], 24]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 8]\n\t" + "ldp x8, x9, [%[b], 8]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldr x6, [%[a]]\n\t" + "ldr x8, [%[b]]\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a) + : [b] "r" (b) + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" + ); +#endif + + return (sp_int64)a; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_521_div_9(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) +{ + sp_digit t1[19]; + sp_digit t2[10]; + sp_digit sd[10]; + sp_digit div; + sp_digit r1; + int i; + + ASSERT_SAVED_VECTOR_REGISTERS(); + + (void)m; + div = (d[8] << 55) | (d[7] >> 9); + XMEMCPY(t1, a, sizeof(*t1) * 2 * 9); + r1 = sp_521_cmp_9(&t1[9], d) >= 0; + sp_521_cond_sub_9(&t1[9], &t1[9], d, (sp_digit)0 - r1); + sp_521_lshift_9(sd, d, 55); + sp_521_lshift_18(t1, t1, 55); + + for (i = 8; i >= 0; i--) { + sp_digit hi = t1[9 + i] - (t1[9 + i] == div); + r1 = div_521_word_9(hi, t1[9 + i - 1], div); + + sp_521_mul_d_9(t2, sd, r1); + t1[9 + i] += sp_521_sub_in_place_9(&t1[i], t2); + t1[9 + i] -= t2[9]; + sp_521_mask_9(t2, sd, t1[9 + i]); + t1[9 + i] += sp_521_add_9(&t1[i], &t1[i], t2); + sp_521_mask_9(t2, sd, t1[9 + i]); + t1[9 + i] += sp_521_add_9(&t1[i], &t1[i], t2); + } + + r1 = sp_521_cmp_9(t1, sd) >= 0; + sp_521_cond_sub_9(r, t1, sd, (sp_digit)0 - r1); + sp_521_rshift_9(r, r, 55); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_521_mod_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_521_div_9(a, m, NULL, r); +} + +/* Multiply a number by Montgomery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mod_mul_norm_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + sp_521_mul_9(r, a, p521_norm_mod); + return sp_521_mod_9(r, r, m); +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_521_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; + + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); + } +#elif DIGIT_BIT > 64 + unsigned int i; + int j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = (sp_digit)0; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + unsigned int i; + int j = 0; + int s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_521. + * + * p Point of type sp_point_521 (result). + * pm Point of type ecc_point. + */ +static void sp_521_point_from_ecc_point_9(sp_point_521* p, + const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_521_from_mp(p->x, 9, pm->x); + sp_521_from_mp(p->y, 9, pm->y); + sp_521_from_mp(p->z, 9, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_521_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (521 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 9); + r->used = 9; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 9; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 64 - s; + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 9; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_521 to type ecc_point. + * + * p Point of type sp_point_521. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_521_point_to_ecc_point_9(const sp_point_521* p, ecc_point* pm) +{ + int err; + + err = sp_521_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pm->z); + } + + return err; +} + +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +static void sp_521_cond_copy_9(sp_digit* r, const sp_digit* a, sp_digit m) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[r], 0]\n\t" + "ldp x5, x6, [%[r], 16]\n\t" + "ldp x7, x8, [%[r], 32]\n\t" + "ldp x9, x10, [%[r], 48]\n\t" + "ldr x11, [%[r], 64]\n\t" + "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "ldp x16, x17, [%[a], 32]\n\t" + "ldp x19, x20, [%[a], 48]\n\t" + "ldr x21, [%[a], 64]\n\t" + "eor x12, x12, x3\n\t" + "eor x13, x13, x4\n\t" + "eor x14, x14, x5\n\t" + "eor x15, x15, x6\n\t" + "eor x16, x16, x7\n\t" + "eor x17, x17, x8\n\t" + "eor x19, x19, x9\n\t" + "eor x20, x20, x10\n\t" + "eor x21, x21, x11\n\t" + "and x12, x12, %[m]\n\t" + "and x13, x13, %[m]\n\t" + "and x14, x14, %[m]\n\t" + "and x15, x15, %[m]\n\t" + "and x16, x16, %[m]\n\t" + "and x17, x17, %[m]\n\t" + "and x19, x19, %[m]\n\t" + "and x20, x20, %[m]\n\t" + "and x21, x21, %[m]\n\t" + "eor x3, x3, x12\n\t" + "eor x4, x4, x13\n\t" + "eor x5, x5, x14\n\t" + "eor x6, x6, x15\n\t" + "eor x7, x7, x16\n\t" + "eor x8, x8, x17\n\t" + "eor x9, x9, x19\n\t" + "eor x10, x10, x20\n\t" + "eor x11, x11, x21\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "stp x7, x8, [%[r], 32]\n\t" + "stp x9, x10, [%[r], 48]\n\t" + "str x11, [%[r], 64]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "cc" + ); +} + +/* Multiply two Montgomery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montgomery form. + * b Second number to multiply in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + (void)m; + (void)mp; + + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-0xa0]!\n\t" + "add x29, sp, #16\n\t" + "ldp x8, x9, [%[a], 0]\n\t" + "ldp x10, x11, [%[a], 16]\n\t" + "ldp x12, x13, [%[a], 32]\n\t" + "ldp x14, x15, [%[a], 48]\n\t" + "ldr x16, [%[a], 64]\n\t" + "ldp x17, x19, [%[b], 0]\n\t" + "ldp x20, x21, [%[b], 16]\n\t" + "ldp x22, x23, [%[b], 32]\n\t" + "ldp x24, x25, [%[b], 48]\n\t" + "ldr x26, [%[b], 64]\n\t" + "# A[0] * B[0]\n\t" + "mul x3, x8, x17\n\t" + "umulh x4, x8, x17\n\t" + "str x3, [x29]\n\t" + "# A[0] * B[1]\n\t" + "mul x6, x8, x19\n\t" + "umulh x7, x8, x19\n\t" + "adds x4, x4, x6\n\t" + "# A[1] * B[0]\n\t" + "mul x6, x9, x17\n\t" + "adc x5, xzr, x7\n\t" + "umulh x7, x9, x17\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [x29, 8]\n\t" + "adc x3, xzr, xzr\n\t" + "# A[0] * B[2]\n\t" + "mul x6, x8, x20\n\t" + "umulh x7, x8, x20\n\t" + "adds x5, x5, x6\n\t" + "# A[1] * B[1]\n\t" + "mul x6, x9, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x19\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[2] * B[0]\n\t" + "mul x6, x10, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [x29, 16]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[3]\n\t" + "mul x6, x8, x21\n\t" + "umulh x7, x8, x21\n\t" + "adds x3, x3, x6\n\t" + "# A[1] * B[2]\n\t" + "mul x6, x9, x20\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x9, x20\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[2] * B[1]\n\t" + "mul x6, x10, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[3] * B[0]\n\t" + "mul x6, x11, x17\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [x29, 24]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[4]\n\t" + "mul x6, x8, x22\n\t" + "umulh x7, x8, x22\n\t" + "adds x4, x4, x6\n\t" + "# A[1] * B[3]\n\t" + "mul x6, x9, x21\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x9, x21\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B[2]\n\t" + "mul x6, x10, x20\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x10, x20\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[3] * B[1]\n\t" + "mul x6, x11, x19\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x19\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[4] * B[0]\n\t" + "mul x6, x12, x17\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [x29, 32]\n\t" + "adc x3, x3, xzr\n\t" + "# A[0] * B[5]\n\t" + "mul x6, x8, x23\n\t" + "umulh x7, x8, x23\n\t" + "adds x5, x5, x6\n\t" + "# A[1] * B[4]\n\t" + "mul x6, x9, x22\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x22\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[2] * B[3]\n\t" + "mul x6, x10, x21\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B[2]\n\t" + "mul x6, x11, x20\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x11, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[4] * B[1]\n\t" + "mul x6, x12, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[5] * B[0]\n\t" + "mul x6, x13, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [x29, 40]\n\t" + "adc x4, x4, xzr\n\t" + "# A[0] * B[6]\n\t" + "mul x6, x8, x24\n\t" + "umulh x7, x8, x24\n\t" + "adds x3, x3, x6\n\t" + "# A[1] * B[5]\n\t" + "mul x6, x9, x23\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x9, x23\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[2] * B[4]\n\t" + "mul x6, x10, x22\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[3] * B[3]\n\t" + "mul x6, x11, x21\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x21\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B[2]\n\t" + "mul x6, x12, x20\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x12, x20\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[5] * B[1]\n\t" + "mul x6, x13, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[6] * B[0]\n\t" + "mul x6, x14, x17\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x14, x17\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [x29, 48]\n\t" + "adc x5, x5, xzr\n\t" + "# A[0] * B[7]\n\t" + "mul x6, x8, x25\n\t" + "umulh x7, x8, x25\n\t" + "adds x4, x4, x6\n\t" + "# A[1] * B[6]\n\t" + "mul x6, x9, x24\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x9, x24\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[2] * B[5]\n\t" + "mul x6, x10, x23\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x10, x23\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[3] * B[4]\n\t" + "mul x6, x11, x22\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x22\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[4] * B[3]\n\t" + "mul x6, x12, x21\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x21\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B[2]\n\t" + "mul x6, x13, x20\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x13, x20\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[6] * B[1]\n\t" + "mul x6, x14, x19\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x14, x19\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[7] * B[0]\n\t" + "mul x6, x15, x17\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x15, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [x29, 56]\n\t" + "adc x3, x3, xzr\n\t" + "# A[0] * B[8]\n\t" + "mul x6, x8, x26\n\t" + "umulh x7, x8, x26\n\t" + "adds x5, x5, x6\n\t" + "# A[1] * B[7]\n\t" + "mul x6, x9, x25\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x9, x25\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[2] * B[6]\n\t" + "mul x6, x10, x24\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x10, x24\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[3] * B[5]\n\t" + "mul x6, x11, x23\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x11, x23\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[4] * B[4]\n\t" + "mul x6, x12, x22\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[5] * B[3]\n\t" + "mul x6, x13, x21\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B[2]\n\t" + "mul x6, x14, x20\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x14, x20\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[7] * B[1]\n\t" + "mul x6, x15, x19\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x15, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[8] * B[0]\n\t" + "mul x6, x16, x17\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x16, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [x29, 64]\n\t" + "adc x4, x4, xzr\n\t" + "# A[1] * B[8]\n\t" + "mul x6, x9, x26\n\t" + "umulh x7, x9, x26\n\t" + "adds x3, x3, x6\n\t" + "# A[2] * B[7]\n\t" + "mul x6, x10, x25\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x10, x25\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[3] * B[6]\n\t" + "mul x6, x11, x24\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x11, x24\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[4] * B[5]\n\t" + "mul x6, x12, x23\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x12, x23\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[5] * B[4]\n\t" + "mul x6, x13, x22\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[6] * B[3]\n\t" + "mul x6, x14, x21\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x14, x21\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B[2]\n\t" + "mul x6, x15, x20\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x15, x20\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[8] * B[1]\n\t" + "mul x6, x16, x19\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x16, x19\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [x29, 72]\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[8]\n\t" + "mul x6, x10, x26\n\t" + "umulh x7, x10, x26\n\t" + "adds x4, x4, x6\n\t" + "# A[3] * B[7]\n\t" + "mul x6, x11, x25\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x11, x25\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[4] * B[6]\n\t" + "mul x6, x12, x24\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x12, x24\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[5] * B[5]\n\t" + "mul x6, x13, x23\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x13, x23\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[6] * B[4]\n\t" + "mul x6, x14, x22\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x14, x22\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[7] * B[3]\n\t" + "mul x6, x15, x21\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x15, x21\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B[2]\n\t" + "mul x6, x16, x20\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x16, x20\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [x29, 80]\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[8]\n\t" + "mul x6, x11, x26\n\t" + "umulh x7, x11, x26\n\t" + "adds x5, x5, x6\n\t" + "# A[4] * B[7]\n\t" + "mul x6, x12, x25\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x12, x25\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[5] * B[6]\n\t" + "mul x6, x13, x24\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x13, x24\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[6] * B[5]\n\t" + "mul x6, x14, x23\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x14, x23\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[7] * B[4]\n\t" + "mul x6, x15, x22\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x15, x22\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[8] * B[3]\n\t" + "mul x6, x16, x21\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x16, x21\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [x29, 88]\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[8]\n\t" + "mul x6, x12, x26\n\t" + "umulh x7, x12, x26\n\t" + "adds x3, x3, x6\n\t" + "# A[5] * B[7]\n\t" + "mul x6, x13, x25\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x13, x25\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[6] * B[6]\n\t" + "mul x6, x14, x24\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x14, x24\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[7] * B[5]\n\t" + "mul x6, x15, x23\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x15, x23\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "# A[8] * B[4]\n\t" + "mul x6, x16, x22\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x16, x22\n\t" + "adc x5, x5, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [x29, 96]\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[8]\n\t" + "mul x6, x13, x26\n\t" + "umulh x7, x13, x26\n\t" + "adds x4, x4, x6\n\t" + "# A[6] * B[7]\n\t" + "mul x6, x14, x25\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x14, x25\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[7] * B[6]\n\t" + "mul x6, x15, x24\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x15, x24\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "# A[8] * B[5]\n\t" + "mul x6, x16, x23\n\t" + "adcs x5, x5, x7\n\t" + "umulh x7, x16, x23\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "str x4, [x29, 104]\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[8]\n\t" + "mul x6, x14, x26\n\t" + "umulh x7, x14, x26\n\t" + "adds x5, x5, x6\n\t" + "# A[7] * B[7]\n\t" + "mul x6, x15, x25\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x15, x25\n\t" + "adc x4, xzr, xzr\n\t" + "adds x5, x5, x6\n\t" + "# A[8] * B[6]\n\t" + "mul x6, x16, x24\n\t" + "adcs x3, x3, x7\n\t" + "umulh x7, x16, x24\n\t" + "adc x4, x4, xzr\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "str x5, [x29, 112]\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[8]\n\t" + "mul x6, x15, x26\n\t" + "umulh x7, x15, x26\n\t" + "adds x3, x3, x6\n\t" + "# A[8] * B[7]\n\t" + "mul x6, x16, x25\n\t" + "adcs x4, x4, x7\n\t" + "umulh x7, x16, x25\n\t" + "adc x5, xzr, xzr\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "str x3, [x29, 120]\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[8]\n\t" + "mul x6, x16, x26\n\t" + "umulh x7, x16, x26\n\t" + "adds x4, x4, x6\n\t" + "adc x5, x5, x7\n\t" + "stp x4, x5, [x29, 128]\n\t" + "ldp x8, x9, [x29, 0]\n\t" + "ldp x10, x11, [x29, 16]\n\t" + "ldp x12, x13, [x29, 32]\n\t" + "ldp x14, x15, [x29, 48]\n\t" + "ldp x17, x19, [x29, 64]\n\t" + "ldp x20, x21, [x29, 80]\n\t" + "ldp x22, x23, [x29, 96]\n\t" + "ldp x24, x25, [x29, 112]\n\t" + "ldr x26, [x29, 128]\n\t" + "and x16, x17, 0x1ff\n\t" + "lsr x17, x17, 9\n\t" + "orr x17, x17, x19, lsl #55\n\t" + "lsr x19, x19, 9\n\t" + "orr x19, x19, x20, lsl #55\n\t" + "lsr x20, x20, 9\n\t" + "orr x20, x20, x21, lsl #55\n\t" + "lsr x21, x21, 9\n\t" + "orr x21, x21, x22, lsl #55\n\t" + "lsr x22, x22, 9\n\t" + "orr x22, x22, x23, lsl #55\n\t" + "lsr x23, x23, 9\n\t" + "orr x23, x23, x24, lsl #55\n\t" + "lsr x24, x24, 9\n\t" + "orr x24, x24, x25, lsl #55\n\t" + "lsr x25, x25, 9\n\t" + "orr x25, x25, x26, lsl #55\n\t" + "lsr x26, x26, 9\n\t" + "adds x8, x8, x17\n\t" + "adcs x9, x9, x19\n\t" + "adcs x10, x10, x20\n\t" + "adcs x11, x11, x21\n\t" + "adcs x12, x12, x22\n\t" + "adcs x13, x13, x23\n\t" + "adcs x14, x14, x24\n\t" + "adcs x15, x15, x25\n\t" + "adcs x16, x16, x26\n\t" + "lsr x17, x16, 9\n\t" + "and x16, x16, 0x1ff\n\t" + "adds x8, x8, x17\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adcs x11, x11, xzr\n\t" + "adcs x12, x12, xzr\n\t" + "adcs x13, x13, xzr\n\t" + "adcs x14, x14, xzr\n\t" + "adcs x15, x15, xzr\n\t" + "adcs x16, x16, xzr\n\t" + "stp x8, x9, [%[r], 0]\n\t" + "stp x10, x11, [%[r], 16]\n\t" + "stp x12, x13, [%[r], 32]\n\t" + "stp x14, x15, [%[r], 48]\n\t" + "str x16, [%[r], 64]\n\t" + "ldp x29, x30, [sp], #0xa0\n\t" + : [a] "+r" (a), [b] "+r" (b) + : [r] "r" (r) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "cc" + ); +} + +/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_9(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + (void)m; + (void)mp; + + __asm__ __volatile__ ( + "stp x29, x30, [sp, #-0xa0]!\n\t" + "add x29, sp, #16\n\t" + "ldp x10, x11, [%[a], 0]\n\t" + "ldp x12, x13, [%[a], 16]\n\t" + "ldp x14, x15, [%[a], 32]\n\t" + "ldp x16, x17, [%[a], 48]\n\t" + "ldr x19, [%[a], 64]\n\t" + "# A[0] * A[0]\n\t" + "mul x2, x10, x10\n\t" + "umulh x3, x10, x10\n\t" + "str x2, [x29]\n\t" + "mov x4, xzr\n\t" + "# A[0] * A[1]\n\t" + "mul x8, x10, x11\n\t" + "umulh x9, x10, x11\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [x29, 8]\n\t" + "# A[0] * A[2]\n\t" + "mul x8, x10, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x10, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[1] * A[1]\n\t" + "mul x8, x11, x11\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x11, x11\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [x29, 16]\n\t" + "# A[0] * A[3]\n\t" + "mul x8, x10, x13\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x10, x13\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[1] * A[2]\n\t" + "mul x8, x11, x12\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x11, x12\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [x29, 24]\n\t" + "# A[0] * A[4]\n\t" + "mul x8, x10, x14\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x10, x14\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[1] * A[3]\n\t" + "mul x8, x11, x13\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x11, x13\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[2] * A[2]\n\t" + "mul x8, x12, x12\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x12, x12\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [x29, 32]\n\t" + "# A[0] * A[5]\n\t" + "mul x5, x10, x15\n\t" + "adcs x4, x4, x9\n\t" + "umulh x6, x10, x15\n\t" + "adc x2, x2, xzr\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[4]\n\t" + "mul x8, x11, x14\n\t" + "umulh x9, x11, x14\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[3]\n\t" + "mul x8, x12, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [x29, 40]\n\t" + "# A[0] * A[6]\n\t" + "mul x5, x10, x16\n\t" + "umulh x6, x10, x16\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[5]\n\t" + "mul x8, x11, x15\n\t" + "umulh x9, x11, x15\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[4]\n\t" + "mul x8, x12, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[3]\n\t" + "mul x8, x13, x13\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x13\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [x29, 48]\n\t" + "# A[0] * A[7]\n\t" + "mul x5, x10, x17\n\t" + "umulh x6, x10, x17\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[6]\n\t" + "mul x8, x11, x16\n\t" + "umulh x9, x11, x16\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[5]\n\t" + "mul x8, x12, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[4]\n\t" + "mul x8, x13, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [x29, 56]\n\t" + "# A[0] * A[8]\n\t" + "mul x5, x10, x19\n\t" + "umulh x6, x10, x19\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[1] * A[7]\n\t" + "mul x8, x11, x17\n\t" + "umulh x9, x11, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[2] * A[6]\n\t" + "mul x8, x12, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x12, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[5]\n\t" + "mul x8, x13, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[4]\n\t" + "mul x8, x14, x14\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x14\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [x29, 64]\n\t" + "# A[1] * A[8]\n\t" + "mul x5, x11, x19\n\t" + "umulh x6, x11, x19\n\t" + "mov x4, xzr\n\t" + "mov x7, xzr\n\t" + "# A[2] * A[7]\n\t" + "mul x8, x12, x17\n\t" + "umulh x9, x12, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[3] * A[6]\n\t" + "mul x8, x13, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x13, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[5]\n\t" + "mul x8, x14, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [x29, 72]\n\t" + "# A[2] * A[8]\n\t" + "mul x5, x12, x19\n\t" + "umulh x6, x12, x19\n\t" + "mov x2, xzr\n\t" + "mov x7, xzr\n\t" + "# A[3] * A[7]\n\t" + "mul x8, x13, x17\n\t" + "umulh x9, x13, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[4] * A[6]\n\t" + "mul x8, x14, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x14, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[5]\n\t" + "mul x8, x15, x15\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x15\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [x29, 80]\n\t" + "# A[3] * A[8]\n\t" + "mul x5, x13, x19\n\t" + "umulh x6, x13, x19\n\t" + "mov x3, xzr\n\t" + "mov x7, xzr\n\t" + "# A[4] * A[7]\n\t" + "mul x8, x14, x17\n\t" + "umulh x9, x14, x17\n\t" + "adds x5, x5, x8\n\t" + "# A[5] * A[6]\n\t" + "mul x8, x15, x16\n\t" + "adcs x6, x6, x9\n\t" + "umulh x9, x15, x16\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [x29, 88]\n\t" + "# A[4] * A[8]\n\t" + "mul x8, x14, x19\n\t" + "umulh x9, x14, x19\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[5] * A[7]\n\t" + "mul x8, x15, x17\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x15, x17\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "# A[6] * A[6]\n\t" + "mul x8, x16, x16\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x16, x16\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [x29, 96]\n\t" + "# A[5] * A[8]\n\t" + "mul x8, x15, x19\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x15, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "# A[6] * A[7]\n\t" + "mul x8, x16, x17\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x16, x17\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "str x3, [x29, 104]\n\t" + "# A[6] * A[8]\n\t" + "mul x8, x16, x19\n\t" + "adcs x4, x4, x9\n\t" + "umulh x9, x16, x19\n\t" + "adc x2, x2, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "# A[7] * A[7]\n\t" + "mul x8, x17, x17\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x17, x17\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "str x4, [x29, 112]\n\t" + "# A[7] * A[8]\n\t" + "mul x8, x17, x19\n\t" + "adcs x2, x2, x9\n\t" + "umulh x9, x17, x19\n\t" + "adc x3, x3, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "str x2, [x29, 120]\n\t" + "# A[8] * A[8]\n\t" + "mul x8, x19, x19\n\t" + "adcs x3, x3, x9\n\t" + "umulh x9, x19, x19\n\t" + "adc x4, x4, xzr\n\t" + "adds x3, x3, x8\n\t" + "adc x4, x4, x9\n\t" + "stp x3, x4, [x29, 128]\n\t" + "ldp x2, x3, [x29, 0]\n\t" + "ldp x4, x5, [x29, 16]\n\t" + "ldp x6, x7, [x29, 32]\n\t" + "ldp x8, x9, [x29, 48]\n\t" + "ldp x11, x12, [x29, 64]\n\t" + "ldp x13, x14, [x29, 80]\n\t" + "ldp x15, x16, [x29, 96]\n\t" + "ldp x17, x19, [x29, 112]\n\t" + "ldr x20, [x29, 128]\n\t" + "and x10, x11, 0x1ff\n\t" + "lsr x11, x11, 9\n\t" + "orr x11, x11, x12, lsl #55\n\t" + "lsr x12, x12, 9\n\t" + "orr x12, x12, x13, lsl #55\n\t" + "lsr x13, x13, 9\n\t" + "orr x13, x13, x14, lsl #55\n\t" + "lsr x14, x14, 9\n\t" + "orr x14, x14, x15, lsl #55\n\t" + "lsr x15, x15, 9\n\t" + "orr x15, x15, x16, lsl #55\n\t" + "lsr x16, x16, 9\n\t" + "orr x16, x16, x17, lsl #55\n\t" + "lsr x17, x17, 9\n\t" + "orr x17, x17, x19, lsl #55\n\t" + "lsr x19, x19, 9\n\t" + "orr x19, x19, x20, lsl #55\n\t" + "lsr x20, x20, 9\n\t" + "adds x2, x2, x11\n\t" + "adcs x3, x3, x12\n\t" + "adcs x4, x4, x13\n\t" + "adcs x5, x5, x14\n\t" + "adcs x6, x6, x15\n\t" + "adcs x7, x7, x16\n\t" + "adcs x8, x8, x17\n\t" + "adcs x9, x9, x19\n\t" + "adcs x10, x10, x20\n\t" + "lsr x11, x10, 9\n\t" + "and x10, x10, 0x1ff\n\t" + "adds x2, x2, x11\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "stp x2, x3, [%[r], 0]\n\t" + "stp x4, x5, [%[r], 16]\n\t" + "stp x6, x7, [%[r], 32]\n\t" + "stp x8, x9, [%[r], 48]\n\t" + "str x10, [%[r], 64]\n\t" + "ldp x29, x30, [sp], #0xa0\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "cc" + ); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) +{ + sp_521_mont_sqr_9(r, a, m, mp); + for (; n > 1; n--) { + sp_521_mont_sqr_9(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P521 curve. */ +static const uint64_t p521_mod_minus_2[9] = { + 0xfffffffffffffffdU,0xffffffffffffffffU,0xffffffffffffffffU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU, + 0xffffffffffffffffU,0xffffffffffffffffU,0x00000000000001ffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P521 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_521_mont_inv_9(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 9); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_9(t, t, p521_mod, p521_mp_mod); + if (p521_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_521_mont_mul_9(t, t, a, p521_mod, p521_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 9); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 9; + sp_digit* t3 = td + 4 * 9; + + /* 0x2 */ + sp_521_mont_sqr_9(t1, a, p521_mod, p521_mp_mod); + /* 0x3 */ + sp_521_mont_mul_9(t2, t1, a, p521_mod, p521_mp_mod); + /* 0x6 */ + sp_521_mont_sqr_9(t1, t2, p521_mod, p521_mp_mod); + /* 0x7 */ + sp_521_mont_mul_9(t3, t1, a, p521_mod, p521_mp_mod); + /* 0xc */ + sp_521_mont_sqr_n_9(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0xf */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x78 */ + sp_521_mont_sqr_n_9(t1, t2, 3, p521_mod, p521_mp_mod); + /* 0x7f */ + sp_521_mont_mul_9(t3, t3, t1, p521_mod, p521_mp_mod); + /* 0xf0 */ + sp_521_mont_sqr_n_9(t1, t2, 4, p521_mod, p521_mp_mod); + /* 0xff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xff00 */ + sp_521_mont_sqr_n_9(t1, t2, 8, p521_mod, p521_mp_mod); + /* 0xffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffff0000 */ + sp_521_mont_sqr_n_9(t1, t2, 16, p521_mod, p521_mp_mod); + /* 0xffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffff00000000 */ + sp_521_mont_sqr_n_9(t1, t2, 32, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff0000000000000000 */ + sp_521_mont_sqr_n_9(t1, t2, 64, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff00000000000000000000000000000000 */ + sp_521_mont_sqr_n_9(t1, t2, 128, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000000000000000 */ + sp_521_mont_sqr_n_9(t1, t2, 256, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80 */ + sp_521_mont_sqr_n_9(t1, t2, 7, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t3, t1, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc */ + sp_521_mont_sqr_n_9(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ + sp_521_mont_mul_9(r, t1, a, p521_mod, p521_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 64. + * + * a Array of sp_digit to normalize. + */ +#define sp_521_norm_9(a) + +#define sp_521_mont_reduce_order_9 sp_521_mont_reduce_9 + +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_521_mont_reduce_9(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + __asm__ __volatile__ ( + "ldp x13, x14, [%[a], 0]\n\t" + "ldp x15, x16, [%[a], 16]\n\t" + "ldp x17, x19, [%[a], 32]\n\t" + "ldp x20, x21, [%[a], 48]\n\t" + "ldr x22, [%[a], 64]\n\t" + "mov x3, xzr\n\t" + "# i = 0..8\n\t" + "mov x4, 9\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x9, %[mp], x13\n\t" + "cmp x4, #1\n\t" + "bne L_521_mont_reduce_9_nomask\n\t" + "and x9, x9, #0x1ff\n\t" + "L_521_mont_reduce_9_nomask:\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldp x10, x11, [%[m], 0]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" + "adds x12, x13, x7\n\t" + "# a[i+1] += m[1] * mu\n\t" + "adc x6, x8, xzr\n\t" + "mul x7, x11, x9\n\t" + "umulh x8, x11, x9\n\t" + "adds x13, x14, x7\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldp x11, x10, [%[m], 16]\n\t" + "adc x5, x8, xzr\n\t" + "adds x13, x13, x6\n\t" + "mul x7, x11, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x14, x15, x7\n\t" + "# a[i+3] += m[3] * mu\n\t" + "adc x6, x8, xzr\n\t" + "adds x14, x14, x5\n\t" + "mul x7, x10, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x10, x9\n\t" + "adds x15, x16, x7\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldp x11, x10, [%[m], 32]\n\t" + "adc x5, x8, xzr\n\t" + "adds x15, x15, x6\n\t" + "mul x7, x11, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x16, x17, x7\n\t" + "# a[i+5] += m[5] * mu\n\t" + "adc x6, x8, xzr\n\t" + "adds x16, x16, x5\n\t" + "mul x7, x10, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x10, x9\n\t" + "adds x17, x19, x7\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldp x11, x10, [%[m], 48]\n\t" + "adc x5, x8, xzr\n\t" + "adds x17, x17, x6\n\t" + "mul x7, x11, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x19, x20, x7\n\t" + "# a[i+7] += m[7] * mu\n\t" + "adc x6, x8, xzr\n\t" + "adds x19, x19, x5\n\t" + "mul x7, x10, x9\n\t" + "adc x6, x6, xzr\n\t" + "umulh x8, x10, x9\n\t" + "adds x20, x21, x7\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x11, [%[m], 64]\n\t" + "adc x5, x8, xzr\n\t" + "adds x20, x20, x6\n\t" + "mul x7, x11, x9\n\t" + "adc x5, x5, xzr\n\t" + "umulh x8, x11, x9\n\t" + "adds x5, x5, x7\n\t" + "adcs x8, x8, x3\n\t" + "adc x3, xzr, xzr\n\t" + "adds x21, x22, x5\n\t" + "ldr x22, [%[a], 72]\n\t" + "adcs x22, x22, x8\n\t" + "adc x3, x3, xzr\n\t" + "subs x4, x4, 1\n\t" + "add %[a], %[a], 8\n\t" + "bne 1b\n\t" + "extr x12, x13, x12, 9\n\t" + "extr x13, x14, x13, 9\n\t" + "extr x14, x15, x14, 9\n\t" + "extr x15, x16, x15, 9\n\t" + "extr x16, x17, x16, 9\n\t" + "extr x17, x19, x17, 9\n\t" + "extr x19, x20, x19, 9\n\t" + "extr x20, x21, x20, 9\n\t" + "lsr x21, x21, 9\n\t" + "lsr x3, x21, 9\n\t" + "sub %[a], %[a], 72\n\t" + "neg x3, x3\n\t" + "# Subtract masked modulus\n\t" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x12, x12, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x13, x13, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x14, x14, x6\n\t" + "stp x12, x13, [%[a], 0]\n\t" + "sbcs x15, x15, x7\n\t" + "stp x14, x15, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x16, x16, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x17, x17, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x19, x19, x6\n\t" + "stp x16, x17, [%[a], 32]\n\t" + "sbcs x20, x20, x7\n\t" + "stp x19, x20, [%[a], 48]\n\t" + "ldr x4, [%[m], 64]\n\t" + "and x4, x4, x3\n\t" + "sbcs x21, x21, x4\n\t" + "str x21, [%[a], 64]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + ); + +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_int64 n; + + sp_521_mont_inv_9(t1, p->z, t + 2*9); + + sp_521_mont_sqr_9(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t2, t1, p521_mod, p521_mp_mod); + + /* x /= z^2 */ + sp_521_mont_mul_9(r->x, p->x, t2, p521_mod, p521_mp_mod); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); + sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); + /* Reduce x to less than modulus */ + n = sp_521_cmp_9(r->x, p521_mod); + sp_521_cond_sub_9(r->x, r->x, p521_mod, ~(n >> 63)); + sp_521_norm_9(r->x); + + /* y /= z^3 */ + sp_521_mont_mul_9(r->y, p->y, t1, p521_mod, p521_mp_mod); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); + sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); + /* Reduce y to less than modulus */ + n = sp_521_cmp_9(r->y, p521_mod); + sp_521_cond_sub_9(r->y, r->y, p521_mod, ~(n >> 63)); + sp_521_norm_9(r->y); + + XMEMSET(r->z, 0, sizeof(r->z) / 2); + r->z[0] = 1; +} + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montgomery form. + * b Second number to add in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "ldp x10, x11, [%[a], 48]\n\t" + "ldr x12, [%[a], 64]\n\t" + "ldp x13, x14, [%[b], 0]\n\t" + "ldp x15, x16, [%[b], 16]\n\t" + "ldp x17, x19, [%[b], 32]\n\t" + "ldp x20, x21, [%[b], 48]\n\t" + "ldr x22, [%[b], 64]\n\t" + "adds x4, x4, x13\n\t" + "adcs x5, x5, x14\n\t" + "adcs x6, x6, x15\n\t" + "adcs x7, x7, x16\n\t" + "adcs x8, x8, x17\n\t" + "adcs x9, x9, x19\n\t" + "adcs x10, x10, x20\n\t" + "adcs x11, x11, x21\n\t" + "adc x12, x12, x22\n\t" + "lsr x13, x12, 9\n\t" + "and x12, x12, 0x1ff\n\t" + "adds x4, x4, x13\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adcs x11, x11, xzr\n\t" + "adc x12, x12, xzr\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "stp x8, x9, [%[r], 32]\n\t" + "stp x10, x11, [%[r], 48]\n\t" + "str x12, [%[r], 64]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + ); + + (void)m; +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "ldp x10, x11, [%[a], 48]\n\t" + "ldr x12, [%[a], 64]\n\t" + "adds x4, x4, x4\n\t" + "adcs x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adcs x7, x7, x7\n\t" + "adcs x8, x8, x8\n\t" + "adcs x9, x9, x9\n\t" + "adcs x10, x10, x10\n\t" + "adcs x11, x11, x11\n\t" + "adc x12, x12, x12\n\t" + "lsr x13, x12, 9\n\t" + "and x12, x12, 0x1ff\n\t" + "adds x4, x4, x13\n\t" + "adcs x5, x5, xzr\n\t" + "adcs x6, x6, xzr\n\t" + "adcs x7, x7, xzr\n\t" + "adcs x8, x8, xzr\n\t" + "adcs x9, x9, xzr\n\t" + "adcs x10, x10, xzr\n\t" + "adcs x11, x11, xzr\n\t" + "adc x12, x12, xzr\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "stp x8, x9, [%[r], 32]\n\t" + "stp x10, x11, [%[r], 48]\n\t" + "str x12, [%[r], 64]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "cc" + ); + + (void)m; +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_521_mont_tpl_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "ldp x10, x11, [%[a], 48]\n\t" + "ldr x12, [%[a], 64]\n\t" + "adds x13, x4, x4\n\t" + "adcs x14, x5, x5\n\t" + "adcs x15, x6, x6\n\t" + "adcs x16, x7, x7\n\t" + "adcs x17, x8, x8\n\t" + "adcs x19, x9, x9\n\t" + "adcs x20, x10, x10\n\t" + "adcs x21, x11, x11\n\t" + "adc x22, x12, x12\n\t" + "adds x13, x13, x4\n\t" + "adcs x14, x14, x5\n\t" + "adcs x15, x15, x6\n\t" + "adcs x16, x16, x7\n\t" + "adcs x17, x17, x8\n\t" + "adcs x19, x19, x9\n\t" + "adcs x20, x20, x10\n\t" + "adcs x21, x21, x11\n\t" + "adc x22, x22, x12\n\t" + "lsr x4, x22, 9\n\t" + "and x22, x22, 0x1ff\n\t" + "adds x13, x13, x4\n\t" + "adcs x14, x14, xzr\n\t" + "adcs x15, x15, xzr\n\t" + "adcs x16, x16, xzr\n\t" + "adcs x17, x17, xzr\n\t" + "adcs x19, x19, xzr\n\t" + "adcs x20, x20, xzr\n\t" + "adcs x21, x21, xzr\n\t" + "adc x22, x22, xzr\n\t" + "stp x13, x14, [%[r], 0]\n\t" + "stp x15, x16, [%[r], 16]\n\t" + "stp x17, x19, [%[r], 32]\n\t" + "stp x20, x21, [%[r], 48]\n\t" + "str x22, [%[r], 64]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + ); + + (void)m; +} + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montgomery form. + * b Number to subtract with in Montgomery form. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_521_mont_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldp x4, x5, [%[a], 0]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[a], 32]\n\t" + "ldp x10, x11, [%[a], 48]\n\t" + "ldr x12, [%[a], 64]\n\t" + "ldp x13, x14, [%[b], 0]\n\t" + "ldp x15, x16, [%[b], 16]\n\t" + "ldp x17, x19, [%[b], 32]\n\t" + "ldp x20, x21, [%[b], 48]\n\t" + "ldr x22, [%[b], 64]\n\t" + "subs x4, x4, x13\n\t" + "sbcs x5, x5, x14\n\t" + "sbcs x6, x6, x15\n\t" + "sbcs x7, x7, x16\n\t" + "sbcs x8, x8, x17\n\t" + "sbcs x9, x9, x19\n\t" + "sbcs x10, x10, x20\n\t" + "sbcs x11, x11, x21\n\t" + "sbc x12, x12, x22\n\t" + "asr x13, x12, 9\n\t" + "and x12, x12, 0x1ff\n\t" + "neg x13, x13\n\t" + "subs x4, x4, x13\n\t" + "sbcs x5, x5, xzr\n\t" + "sbcs x6, x6, xzr\n\t" + "sbcs x7, x7, xzr\n\t" + "sbcs x8, x8, xzr\n\t" + "sbcs x9, x9, xzr\n\t" + "sbcs x10, x10, xzr\n\t" + "sbcs x11, x11, xzr\n\t" + "sbc x12, x12, xzr\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "stp x8, x9, [%[r], 32]\n\t" + "stp x10, x11, [%[r], 48]\n\t" + "str x12, [%[r], 64]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + ); + + (void)m; +} + +#ifdef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_521_cond_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 72\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x8", "x9", "x10", "x11", "x12", "cc" + ); + + return c; +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_521_cond_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + __asm__ __volatile__ ( + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "ldr x8, [%[b], 64]\n\t" + "ldr x4, [%[a], 64]\n\t" + "and x8, x8, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "str x4, [%[r], 64]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" + ); + + return (sp_digit)r; +} +#endif /* !WOLFSSL_SP_SMALL */ + +static void sp_521_rshift1_9(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x2, x3, [%[a]]\n\t" + "ldr x3, [%[a], 8]\n\t" + "extr x2, x3, x2, #1\n\t" + "ldr x4, [%[a], 16]\n\t" + "str x2, [%[r], 0]\n\t" + "extr x3, x4, x3, #1\n\t" + "ldr x2, [%[a], 24]\n\t" + "str x3, [%[r], 8]\n\t" + "extr x4, x2, x4, #1\n\t" + "ldr x3, [%[a], 32]\n\t" + "str x4, [%[r], 16]\n\t" + "extr x2, x3, x2, #1\n\t" + "ldr x4, [%[a], 40]\n\t" + "str x2, [%[r], 24]\n\t" + "extr x3, x4, x3, #1\n\t" + "ldr x2, [%[a], 48]\n\t" + "str x3, [%[r], 32]\n\t" + "extr x4, x2, x4, #1\n\t" + "ldr x3, [%[a], 56]\n\t" + "str x4, [%[r], 40]\n\t" + "extr x2, x3, x2, #1\n\t" + "ldr x4, [%[a], 64]\n\t" + "str x2, [%[r], 48]\n\t" + "extr x3, x4, x3, #1\n\t" + "lsr x4, x4, #1\n\t" + "stp x3, x4, [%[r], 56]\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "x2", "x3", "x4", "cc" + ); +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +SP_NOINLINE static void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + sp_digit o; + + o = sp_521_cond_add_9(r, a, m, 0 - (a[0] & 1)); + sp_521_rshift1_9(r, r); + r[8] |= o << 63; +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_9(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_9(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_9(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_9(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_9(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_mont_div2_9(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_9(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_9(y, y, t2, p521_mod); +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_dbl_9_ctx { + int state; + sp_digit* t1; + sp_digit* t2; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_dbl_9_ctx; + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_dbl_9_ctx* ctx = (sp_521_proj_point_dbl_9_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_521_proj_point_dbl_9_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + ctx->t1 = t; + ctx->t2 = t + 2*9; + ctx->x = r->x; + ctx->y = r->y; + ctx->z = r->z; + + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + ctx->state = 1; + break; + case 1: + /* T1 = Z * Z */ + sp_521_mont_sqr_9(ctx->t1, p->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + /* Z = Y * Z */ + sp_521_mont_mul_9(ctx->z, p->y, p->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + /* Z = 2Z */ + sp_521_mont_dbl_9(ctx->z, ctx->z, p521_mod); + ctx->state = 4; + break; + case 4: + /* T2 = X - T1 */ + sp_521_mont_sub_9(ctx->t2, p->x, ctx->t1, p521_mod); + ctx->state = 5; + break; + case 5: + /* T1 = X + T1 */ + sp_521_mont_add_9(ctx->t1, p->x, ctx->t1, p521_mod); + ctx->state = 6; + break; + case 6: + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(ctx->t2, ctx->t1, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* T1 = 3T2 */ + sp_521_mont_tpl_9(ctx->t1, ctx->t2, p521_mod); + ctx->state = 8; + break; + case 8: + /* Y = 2Y */ + sp_521_mont_dbl_9(ctx->y, p->y, p521_mod); + ctx->state = 9; + break; + case 9: + /* Y = Y * Y */ + sp_521_mont_sqr_9(ctx->y, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 10; + break; + case 10: + /* T2 = Y * Y */ + sp_521_mont_sqr_9(ctx->t2, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: + /* T2 = T2/2 */ + sp_521_mont_div2_9(ctx->t2, ctx->t2, p521_mod); + ctx->state = 12; + break; + case 12: + /* Y = Y * X */ + sp_521_mont_mul_9(ctx->y, ctx->y, p->x, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + /* X = T1 * T1 */ + sp_521_mont_sqr_9(ctx->x, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + /* X = X - Y */ + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 15; + break; + case 15: + /* X = X - Y */ + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 16; + break; + case 16: + /* Y = Y - X */ + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 17; + break; + case 17: + /* Y = Y * T1 */ + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + /* Y = Y - T2 */ + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t2, p521_mod); + ctx->state = 19; + /* fall-through */ + case 19: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 19) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, + sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*9; + sp_digit* b = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t2 = t + 8*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + volatile int n = i; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_521_mont_dbl_9(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_9(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t1, t1, w, p521_mod); + sp_521_mont_tpl_9(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_9(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t2, b, p521_mod); + sp_521_mont_sub_9(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_9(t2, b, x, p521_mod); + sp_521_mont_dbl_9(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_9(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_9(t1, t1, p521_mod, p521_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_521_mont_mul_9(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t1, p521_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_9(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t1, t1, w, p521_mod); + sp_521_mont_tpl_9(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_9(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t2, b, p521_mod); + sp_521_mont_sub_9(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_9(t2, b, x, p521_mod); + sp_521_mont_dbl_9(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_9(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_9(t1, t1, p521_mod, p521_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t1, p521_mod); +#endif /* WOLFSSL_SP_SMALL */ + /* Y = Y/2 */ + sp_521_mont_div2_9(y, y, p521_mod); +} + +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_521_cmp_equal_9(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | + (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) | + (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; +} + +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*9; + sp_digit* t2 = t + 4*9; + sp_digit* t3 = t + 6*9; + sp_digit* t4 = t + 8*9; + sp_digit* t5 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(t2, t1) & + sp_521_cmp_equal_9(t4, t3)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_add_9_ctx { + int state; + sp_521_proj_point_dbl_9_ctx dbl_ctx; + const sp_point_521* ap[2]; + sp_point_521* rp[2]; + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + sp_digit* t6; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_add_9_ctx; + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_add_9_ctx* ctx = (sp_521_proj_point_add_9_ctx*)sp_ctx->data; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_521* a = p; + p = q; + q = a; + } + + typedef char ctx_size_test[sizeof(sp_521_proj_point_add_9_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->t6 = t; + ctx->t1 = t + 2*9; + ctx->t2 = t + 4*9; + ctx->t3 = t + 6*9; + ctx->t4 = t + 8*9; + ctx->t5 = t + 10*9; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; + + ctx->state = 1; + break; + case 1: + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 5; + break; + case 5: + sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 6; + break; + case 6: + sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + ctx->state = 8; + break; + case 8: + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + ctx->state = 9; + break; + case 9: + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(ctx->t2, ctx->t1) & + sp_521_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } + break; + case 10: + /* H = U2 - U1 */ + sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); + ctx->state = 11; + break; + case 11: + /* R = S2 - S1 */ + sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); + ctx->state = 12; + break; + case 12: + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 15; + break; + case 15: + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 16; + break; + case 16: + sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + ctx->state = 17; + break; + case 17: + sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); + ctx->state = 19; + break; + case 19: + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + ctx->state = 20; + break; + case 20: + sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); + ctx->state = 21; + break; + case 21: + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 22; + break; + case 22: + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 23; + break; + case 23: + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); + ctx->state = 24; + break; + case 24: + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + ctx->state = 25; + break; + } + case 25: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 25) { + err = FP_WOULDBLOCK; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, + const sp_point_521* p, int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*9; + sp_digit* b = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t2 = t + 8*9; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<9; i++) { + y[i] = p->y[i]; + } + for (i=0; i<9; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_521_mont_dbl_9(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); + j = m; + for (i=1; i<=n; i++) { + j *= 2; + + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_9(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t1, t1, w, p521_mod); + sp_521_mont_tpl_9(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_9(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(b, t1, x, p521_mod, p521_mp_mod); + x = r[j].x; + /* X = A^2 - 2B */ + sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t2, b, p521_mod); + sp_521_mont_sub_9(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_9(t2, b, x, p521_mod); + sp_521_mont_dbl_9(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_9(r[j].z, z, y, p521_mod, p521_mp_mod); + z = r[j].z; + /* t1 = Y^4 */ + sp_521_mont_sqr_9(t1, t1, p521_mod, p521_mp_mod); + if (i != n) { + /* W = W*Y^4 */ + sp_521_mont_mul_9(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t1, p521_mod); + /* Y = Y/2 */ + sp_521_mont_div2_9(r[j].y, y, p521_mod); + r[j].infinity = 0; + } +} + +/* Add two Montgomery form projective points. + * + * ra Result of addition. + * rs Result of subtraction. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_sub_9(sp_point_521* ra, + sp_point_521* rs, const sp_point_521* p, const sp_point_521* q, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, xa, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, za, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, za, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, ya, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* RS = S2 + S1 */ + sp_521_mont_add_9(t6, t4, t3, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_521_mont_mul_9(za, za, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(za, za, t2, p521_mod, p521_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(xa, t4, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(xs, t6, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ya, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(xa, xa, t5, p521_mod); + sp_521_mont_sub_9(xs, xs, t5, p521_mod); + sp_521_mont_dbl_9(t1, ya, p521_mod); + sp_521_mont_sub_9(xa, xa, t1, p521_mod); + sp_521_mont_sub_9(xs, xs, t1, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_521_mont_sub_9(ys, ya, xs, p521_mod); + sp_521_mont_sub_9(ya, ya, xa, p521_mod); + sp_521_mont_mul_9(ya, ya, t4, p521_mod, p521_mp_mod); + sp_521_sub_9(t6, p521_mod, t6); + sp_521_mont_mul_9(ys, ys, t6, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(ya, ya, t5, p521_mod); + sp_521_mont_sub_9(ys, ys, t5, p521_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_521 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_521; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_9_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_9_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_521_ecc_recode_6_9(const sp_digit* k, ecc_recode_521* v) +{ + int i; + int j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<87; i++) { + y = (int8_t)n; + if (o + 6 < 64) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 64) { + n >>= 6; + if (++j < 9) + n = k[j]; + o = 0; + } + else if (++j < 9) { + n = k[j]; + y |= (uint8_t)((n << (64 - o)) & 0x3f); + o -= 58; + n >>= o; + } + + y += (uint8_t)carry; + v[i].i = recode_index_9_6[y]; + v[i].neg = recode_neg_9_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible point that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +SP_NOINLINE static void sp_521_get_point_33_9(sp_point_521* r, + const sp_point_521* table, int idx) +{ + int i; + sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit x6 = 0; + sp_digit x7 = 0; + sp_digit x8 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; + sp_digit y6 = 0; + sp_digit y7 = 0; + sp_digit y8 = 0; + sp_digit z0 = 0; + sp_digit z1 = 0; + sp_digit z2 = 0; + sp_digit z3 = 0; + sp_digit z4 = 0; + sp_digit z5 = 0; + sp_digit z6 = 0; + sp_digit z7 = 0; + sp_digit z8 = 0; + + for (i = 1; i < 33; i++) { + mask = 0 - (i == idx); + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + x6 |= mask & table[i].x[6]; + x7 |= mask & table[i].x[7]; + x8 |= mask & table[i].x[8]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; + y6 |= mask & table[i].y[6]; + y7 |= mask & table[i].y[7]; + y8 |= mask & table[i].y[8]; + z0 |= mask & table[i].z[0]; + z1 |= mask & table[i].z[1]; + z2 |= mask & table[i].z[2]; + z3 |= mask & table[i].z[3]; + z4 |= mask & table[i].z[4]; + z5 |= mask & table[i].z[5]; + z6 |= mask & table[i].z[6]; + z7 |= mask & table[i].z[7]; + z8 |= mask & table[i].z[8]; + } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->x[6] = x6; + r->x[7] = x7; + r->x[8] = x8; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; + r->y[6] = y6; + r->y[7] = y7; + r->y[8] = y8; + r->z[0] = z0; + r->z[1] = z1; + r->z[2] = z2; + r->z[3] = z3; + r->z[4] = z4; + r->z[5] = z5; + r->z[6] = z6; + r->z[7] = z7; + r->z[8] = z8; +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Window technique of 6 bits. (Add-Sub variation.) + * Calculate 0..32 times the point. Use function that adds and + * subtracts the same two points. + * Recode to add or subtract one of the computed points. + * Double to push up. + * NOT a sliding window. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_win_add_sub_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 t[33+2]; + sp_digit tmp[2 * 9 * 6]; +#endif + sp_point_521* rt = NULL; + sp_point_521* p = NULL; + sp_digit* negy; + int i; + ecc_recode_521 v[87]; + int err = MP_OKAY; + + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * + (33+2), heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, + heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + rt = t + 33; + p = t + 33+1; + + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_9(t[1].x, g->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t[1].y, g->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t[1].z, g->z, p521_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_521_proj_point_dbl_n_store_9(t, &t[ 1], 5, 1, tmp); + sp_521_proj_point_add_9(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[ 6], &t[ 3], tmp); + sp_521_proj_point_add_sub_9(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[10], &t[ 5], tmp); + sp_521_proj_point_add_sub_9(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[12], &t[ 6], tmp); + sp_521_proj_point_dbl_9(&t[14], &t[ 7], tmp); + sp_521_proj_point_add_sub_9(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[18], &t[ 9], tmp); + sp_521_proj_point_add_sub_9(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[20], &t[10], tmp); + sp_521_proj_point_dbl_9(&t[22], &t[11], tmp); + sp_521_proj_point_add_sub_9(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[24], &t[12], tmp); + sp_521_proj_point_dbl_9(&t[26], &t[13], tmp); + sp_521_proj_point_add_sub_9(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[28], &t[14], tmp); + sp_521_proj_point_dbl_9(&t[30], &t[15], tmp); + sp_521_proj_point_add_sub_9(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_521_ecc_recode_6_9(k, v); + + i = 86; + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_33_9(rt, t, v[i].i); + rt->infinity = !v[i].i; + } + else + #endif + { + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_521)); + } + for (--i; i>=0; i--) { + sp_521_proj_point_dbl_n_9(rt, 6, tmp); + + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_33_9(p, t, v[i].i); + p->infinity = !v[i].i; + } + else + #endif + { + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_521)); + } + sp_521_sub_9(negy, p521_mod, p->y); + sp_521_norm_9(negy); + sp_521_cond_copy_9(p->y, negy, (sp_digit)0 - v[i].neg); + sp_521_proj_point_add_9(rt, rt, p, tmp); + } + + if (map != 0) { + sp_521_map_9(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_521 { + sp_digit x[9]; + sp_digit y[9]; +} sp_table_entry_521; + +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_qz1_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t2 = t; + sp_digit* t3 = t + 2*9; + sp_digit* t6 = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t4 = t + 8*9; + sp_digit* t5 = t + 10*9; + + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(p->x, t2) & + sp_521_cmp_equal_9(p->y, t4)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; + + /* H = U2 - X1 */ + sp_521_mont_sub_9(t2, t2, p->x, p521_mod); + /* R = S2 - Y1 */ + sp_521_mont_sub_9(t4, t4, p->y, p521_mod); + /* Z3 = H*Z1 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_521_mont_sqr_9(t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, p->x, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(t2, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + sp_521_mont_dbl_9(t5, t3, p521_mod); + sp_521_mont_sub_9(x, t2, t5, p521_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_521_mont_sub_9(t3, t3, x, p521_mod); + sp_521_mont_mul_9(t3, t3, t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->y, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, t3, t1, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifndef WC_NO_CACHE_RESISTANT +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC | WOLFSSL_SP_SMALL */ +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_521_proj_to_affine_9(sp_point_521* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 9; + sp_digit* tmp = t + 4 * 9; + + sp_521_mont_inv_9(t1, a->z, tmp); + + sp_521_mont_sqr_9(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t2, t1, p521_mod, p521_mp_mod); + + sp_521_mont_mul_9(a->x, a->x, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(a->y, a->y, t1, p521_mod, p521_mp_mod); + XMEMCPY(a->z, p521_norm_mod, sizeof(p521_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * width = 6 + * 64 entries + * 86 bits between + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_521_gen_stripe_table_9(const sp_point_521* a, + sp_table_entry_521* table, sp_digit* tmp, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; +#else + sp_point_521 t[3]; +#endif + sp_point_521* s1 = NULL; + sp_point_521* s2 = NULL; + int i; + int j; + int err = MP_OKAY; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + s1 = t + 1; + s2 = t + 2; + + err = sp_521_mod_mul_norm_9(t->x, a->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t->y, a->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t->z, a->z, p521_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_521_proj_to_affine_9(t, tmp); + + XMEMCPY(s1->z, p521_norm_mod, sizeof(p521_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p521_norm_mod, sizeof(p521_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_521)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<6; i++) { + sp_521_proj_point_dbl_n_9(t, 87, tmp); + sp_521_proj_to_affine_9(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<6; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_521_proj_point_add_qz1_9(t, s1, s2, tmp); + sp_521_proj_to_affine_9(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC */ +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_64_9(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit x6 = 0; + sp_digit x7 = 0; + sp_digit x8 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; + sp_digit y6 = 0; + sp_digit y7 = 0; + sp_digit y8 = 0; + + for (i = 1; i < 64; i++) { + mask = 0 - (i == idx); + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + x6 |= mask & table[i].x[6]; + x7 |= mask & table[i].x[7]; + x8 |= mask & table[i].x[8]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; + y6 |= mask & table[i].y[6]; + y7 |= mask & table[i].y[7]; + y8 |= mask & table[i].y[8]; + } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->x[6] = x6; + r->x[7] = x7; + r->x[8] = x8; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; + r->y[6] = y6; + r->y[7] = y7; + r->y[8] = y8; +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^86, ... + * Pre-generated: products of all combinations of above. + * 6 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * table Pre-computed table. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_stripe_9(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* t = NULL; +#else + sp_point_521 rt[2]; + sp_digit t[2 * 9 * 6]; +#endif + sp_point_521* p = NULL; + int i; + int j; + int y; + int x; + int err = MP_OKAY; + + (void)g; + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = rt + 1; + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + y = 0; + x = 86; + for (j=0; j<6 && x<521; j++) { + y |= (int)(((k[x / 64] >> (x % 64)) & 1) << j); + x += 87; + } + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_64_9(rt, table, y); + } else + #endif + { + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + } + rt->infinity = !y; + for (i=85; i>=0; i--) { + y = 0; + x = i; + for (j=0; j<6 && x<521; j++) { + y |= (int)(((k[x / 64] >> (x % 64)) & 1) << j); + x += 87; + } + + sp_521_proj_point_dbl_9(rt, rt, t); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_64_9(p, table, y); + } + else + #endif + { + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + } + p->infinity = !y; + sp_521_proj_point_add_qz1_9(rt, rt, p, t); + } + + if (map != 0) { + sp_521_map_9(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC | WOLFSSL_SP_SMALL */ +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +/* Cache entry - holds precomputation tables for a point. */ +typedef struct sp_cache_521_t { + /* X ordinate of point that table was generated from. */ + sp_digit x[9]; + /* Y ordinate of point that table was generated from. */ + sp_digit y[9]; + /* Precomputation table for point. */ + sp_table_entry_521 table[64]; + /* Count of entries in table. */ + uint32_t cnt; + /* Point and table set in entry. */ + int set; +} sp_cache_521_t; + +/* Cache of tables. */ +static THREAD_LS_T sp_cache_521_t sp_cache_521[FP_ENTRIES]; +/* Index of last entry in cache. */ +static THREAD_LS_T int sp_cache_521_last = -1; +/* Cache has been initialized. */ +static THREAD_LS_T int sp_cache_521_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_521 = 0; + static wolfSSL_Mutex sp_cache_521_lock; +#endif + +/* Get the cache entry for the point. + * + * g [in] Point scalar multiplying. + * cache [out] Cache table to use. + */ +static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) +{ + int i; + int j; + uint32_t least; + + if (sp_cache_521_inited == 0) { + for (i=0; ix, sp_cache_521[i].x) & + sp_521_cmp_equal_9(g->y, sp_cache_521[i].y)) { + sp_cache_521[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_521_last + 1) % FP_ENTRIES; + for (; i != sp_cache_521_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_521[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_521_last) { + least = sp_cache_521[0].cnt; + for (j=1; jx, sizeof(sp_cache_521[i].x)); + XMEMCPY(sp_cache_521[i].y, g->y, sizeof(sp_cache_521[i].y)); + sp_cache_521[i].set = 1; + sp_cache_521[i].cnt = 1; + } + + *cache = &sp_cache_521[i]; + sp_cache_521_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifndef FP_ECC + return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 9 * 6]; +#endif + sp_cache_521_t* cache; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_521 == 0) { + wc_InitMutex(&sp_cache_521_lock); + initCacheMutex_521 = 1; + } + if (wc_LockMutex(&sp_cache_521_lock) != 0) { + err = BAD_MUTEX_E; + } + } +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_521(g, &cache); + if (cache->cnt == 2) + sp_521_gen_stripe_table_9(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_521_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); + } + else { + err = sp_521_ecc_mulmod_stripe_9(r, g, cache->table, k, + map, ct, heap); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + return err; +#endif +} + +#else +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#endif /* FP_ECC | WOLFSSL_SP_SMALL */ +#ifdef FP_ECC +/* Generate the pre-computed table of points for the base point. + * + * width = 8 + * 256 entries + * 65 bits between + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_521_gen_stripe_table_9(const sp_point_521* a, + sp_table_entry_521* table, sp_digit* tmp, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; +#else + sp_point_521 t[3]; +#endif + sp_point_521* s1 = NULL; + sp_point_521* s2 = NULL; + int i; + int j; + int err = MP_OKAY; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + s1 = t + 1; + s2 = t + 2; + + err = sp_521_mod_mul_norm_9(t->x, a->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t->y, a->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t->z, a->z, p521_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_521_proj_to_affine_9(t, tmp); + + XMEMCPY(s1->z, p521_norm_mod, sizeof(p521_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p521_norm_mod, sizeof(p521_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_521)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_521_proj_point_dbl_n_9(t, 66, tmp); + sp_521_proj_to_affine_9(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_521_proj_point_add_qz1_9(t, s1, s2, tmp); + sp_521_proj_to_affine_9(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC */ +#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL) +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_256_9(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit x6 = 0; + sp_digit x7 = 0; + sp_digit x8 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; + sp_digit y6 = 0; + sp_digit y7 = 0; + sp_digit y8 = 0; + + for (i = 1; i < 256; i++) { + mask = 0 - (i == idx); + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + x6 |= mask & table[i].x[6]; + x7 |= mask & table[i].x[7]; + x8 |= mask & table[i].x[8]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; + y6 |= mask & table[i].y[6]; + y7 |= mask & table[i].y[7]; + y8 |= mask & table[i].y[8]; + } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->x[6] = x6; + r->x[7] = x7; + r->x[8] = x8; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; + r->y[6] = y6; + r->y[7] = y7; + r->y[8] = y8; +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * table Pre-computed table. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_stripe_9(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* t = NULL; +#else + sp_point_521 rt[2]; + sp_digit t[2 * 9 * 6]; +#endif + sp_point_521* p = NULL; + int i; + int j; + int y; + int x; + int err = MP_OKAY; + + (void)g; + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = rt + 1; + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + y = 0; + x = 65; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 64] >> (x % 64)) & 1) << j); + x += 66; + } + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_9(rt, table, y); + } else + #endif + { + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + } + rt->infinity = !y; + for (i=64; i>=0; i--) { + y = 0; + x = i; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 64] >> (x % 64)) & 1) << j); + x += 66; + } + + sp_521_proj_point_dbl_9(rt, rt, t); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_9(p, table, y); + } + else + #endif + { + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + } + p->infinity = !y; + sp_521_proj_point_add_qz1_9(rt, rt, p, t); + } + + if (map != 0) { + sp_521_map_9(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC | WOLFSSL_SP_SMALL */ +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +/* Cache entry - holds precomputation tables for a point. */ +typedef struct sp_cache_521_t { + /* X ordinate of point that table was generated from. */ + sp_digit x[9]; + /* Y ordinate of point that table was generated from. */ + sp_digit y[9]; + /* Precomputation table for point. */ + sp_table_entry_521 table[256]; + /* Count of entries in table. */ + uint32_t cnt; + /* Point and table set in entry. */ + int set; +} sp_cache_521_t; + +/* Cache of tables. */ +static THREAD_LS_T sp_cache_521_t sp_cache_521[FP_ENTRIES]; +/* Index of last entry in cache. */ +static THREAD_LS_T int sp_cache_521_last = -1; +/* Cache has been initialized. */ +static THREAD_LS_T int sp_cache_521_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_521 = 0; + static wolfSSL_Mutex sp_cache_521_lock; +#endif + +/* Get the cache entry for the point. + * + * g [in] Point scalar multiplying. + * cache [out] Cache table to use. + */ +static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) +{ + int i; + int j; + uint32_t least; + + if (sp_cache_521_inited == 0) { + for (i=0; ix, sp_cache_521[i].x) & + sp_521_cmp_equal_9(g->y, sp_cache_521[i].y)) { + sp_cache_521[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_521_last + 1) % FP_ENTRIES; + for (; i != sp_cache_521_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_521[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_521_last) { + least = sp_cache_521[0].cnt; + for (j=1; jx, sizeof(sp_cache_521[i].x)); + XMEMCPY(sp_cache_521[i].y, g->y, sizeof(sp_cache_521[i].y)); + sp_cache_521[i].set = 1; + sp_cache_521[i].cnt = 1; + } + + *cache = &sp_cache_521[i]; + sp_cache_521_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifndef FP_ECC + return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 9 * 6]; +#endif + sp_cache_521_t* cache; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_521 == 0) { + wc_InitMutex(&sp_cache_521_lock); + initCacheMutex_521 = 1; + } + if (wc_LockMutex(&sp_cache_521_lock) != 0) { + err = BAD_MUTEX_E; + } + } +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_521(g, &cache); + if (cache->cnt == 2) + sp_521_gen_stripe_table_9(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_521_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); + } + else { + err = sp_521_ecc_mulmod_stripe_9(r, g, cache->table, k, + map, ct, heap); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + return err; +#endif +} + +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_521(const mp_int* km, const ecc_point* gm, ecc_point* r, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[9]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 9, km); + sp_521_point_from_ecc_point_9(point, gm); + + err = sp_521_ecc_mulmod_9(point, point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the point by the scalar, add point a and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, + const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[9 + 9 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (9 + 9 * 2 * 6), heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 9; + + sp_521_from_mp(k, 9, km); + sp_521_point_from_ecc_point_9(point, gm); + sp_521_point_from_ecc_point_9(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_9(point, point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_9(point, point, addP, tmp); + + if (map) { + sp_521_map_9(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +#ifndef WC_NO_CACHE_RESISTANT +/* Striping precomputation table. + * 6 points combined into a table of 64 points. + * Distance of 87 between points. + */ +static const sp_table_entry_521 p521_table[64] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0xf97e7e31c2e5bd66L,0x3348b3c1856a429bL,0xfe1dc127a2ffa8deL, + 0xa14b5e77efe75928L,0xf828af606b4d3dbaL,0x9c648139053fb521L, + 0x9e3ecb662395b442L,0x858e06b70404e9cdL,0x00000000000000c6L }, + { 0x88be94769fd16650L,0x353c7086a272c240L,0xc550b9013fad0761L, + 0x97ee72995ef42640L,0x17afbd17273e662cL,0x98f54449579b4468L, + 0x5c8a5fb42c7d1bd9L,0x39296a789a3bc004L,0x0000000000000118L } }, + /* 2 */ + { { 0xb4c52f11f3d7286aL,0xec47cb5cbbf67a41L,0x89d3382eaafb10a4L, + 0x4afd876806222b90L,0xfa16b295feb5416dL,0x0946f9d45d095b7cL, + 0x18526b28ff975e3dL,0x259cec2fb2f1c609L,0x0000000000000104L }, + { 0x545642a062e36127L,0xc3e4c6f71c38e6f5L,0x1d0e02c41bf9d6c4L, + 0x8a7a6fb6483ba43dL,0x7104aee9cf390b9cL,0x4acdccdf2578e2fbL, + 0xc17af6df73b3feb4L,0x2fb789d77977fa95L,0x00000000000001d6L } }, + /* 3 */ + { { 0xdae82f27eb2db12fL,0x675f5cdb25444b98L,0x3234752e78288513L, + 0x53e37a78730f5769L,0xabad04dadcecd88dL,0x29d87958f3098a34L, + 0x00567f1a50cb0363L,0x400b67853878a6f6L,0x0000000000000183L }, + { 0xba3b00c422d2d58aL,0x06c351857b0e93d5L,0x78810249545fa63dL, + 0x1d72e7923a6be922L,0xe38827919e285f6fL,0x3212ca84b33128efL, + 0x41640a1022acf93dL,0xbf1819691f33c6e9L,0x00000000000001beL } }, + /* 4 */ + { { 0x0cfa67a640b0a066L,0xe97c179e8057a710L,0xb69d285227cce600L, + 0xf7dde8c5419687d1L,0xdbcb15185d5b039eL,0x3e47772fd99abbe0L, + 0x71c9c23ab2a9c6a0L,0x5af10b9b94496951L,0x0000000000000152L }, + { 0x0f96137f2c91f83aL,0x1f743fa449f15b33L,0xd43b3ec0ccb4c1b1L, + 0x895a3be3cfeea9b0L,0x27c3b4959eefee4dL,0xf1b4212712e344eeL, + 0x023afa76ff929c1eL,0x3078097686d07325L,0x0000000000000166L } }, + /* 5 */ + { { 0x36005a37c7b848a0L,0xbac203eee032cc07L,0x8d20246ee4c229c4L, + 0x13b8b0f97b7f6b06L,0x0223a119e13c7b1dL,0xa91307c053c2e32aL, + 0xce595e9f3536fe07L,0xc2a2efbc8489551dL,0x0000000000000019L }, + { 0xed22e7b467081afdL,0xda1c5f3c81df2676L,0x48f406480ad04de0L, + 0x8d7720d1f7391873L,0x111178cb13448f78L,0x39087943ea49e233L, + 0x692f712c73d8d101L,0xca20ab46f4e7e521L,0x000000000000005aL } }, + /* 6 */ + { { 0xfd0c67177af706c0L,0xc500416552ebdf4aL,0x332675708ff0bf03L, + 0x3f282dddd2cdcf58L,0x93b60398bf766c2fL,0x124f1fd970748a8cL, + 0x05422ecb20ef5badL,0x9fef00a63eafb2dcL,0x00000000000001d8L }, + { 0xcd2d1f04e4d1ae24L,0x099afd68e44bae9aL,0x669b9ba3a4880b2aL, + 0x5608536cab513fcbL,0xfd7de6a330d8ace2L,0xebc44b8deb95e59eL, + 0x2302405e64969883L,0xbea96ff09b276f35L,0x00000000000001b6L } }, + /* 7 */ + { { 0xa7b7271a71b90b8cL,0xbdc1bcdad12ca22aL,0x52a5f2901f10a310L, + 0xf5c070c0f420fb28L,0xdd09d3a5f099a78fL,0xaf60cbbe3914e826L, + 0xeac320c9a914d69fL,0x78bc962dfd5ba85dL,0x00000000000001c0L }, + { 0xf69575cebe45049dL,0x418d09354958daccL,0x142da6f236c0f076L, + 0x51d23cfba83dfce7L,0x2df0421809714d01L,0x1260496a9ecec18dL, + 0xb4317aa1f700b1adL,0x86174a20cd86c341L,0x000000000000016cL } }, + /* 8 */ + { { 0xc2453d609102b570L,0x70840e5c142c3e23L,0x7294b1f1c30037d9L, + 0xa2500f121dbf5ab6L,0x03eceb67c9c8ee6eL,0xa19c319330820abbL, + 0x7aad59e87da6b820L,0xe97823f8c38d842fL,0x000000000000006dL }, + { 0xce5bb75ea7b6bb9eL,0x9090358d569c9eddL,0xa8b5ec369cd0c065L, + 0xb2b5ac1cf81c82b8L,0x8feb364c3ffceb86L,0x355ed9ecdf4f9f7eL, + 0x6f2efe425ce12ff4L,0xb23168780155070dL,0x0000000000000035L } }, + /* 9 */ + { { 0x5ba6aa8d478bab98L,0x849ddc2dbcf097c4L,0x67f6f4b274b08601L, + 0xb15c438fe3acbdacL,0x0bcd49256ee2c337L,0xd15facb9b3085e64L, + 0xc8e919dcf4d1b708L,0x6eed3e36e1fae449L,0x000000000000000aL }, + { 0x5103486e84926700L,0x1c6b82b28fef959eL,0xb309fe2355b9fa8dL, + 0xdb35d40e4b8579efL,0xad5288e08145279aL,0x92b15dbc4ba26317L, + 0xda42d33ed9838c28L,0xd13ae9ba313a0399L,0x0000000000000047L } }, + /* 10 */ + { { 0xe2fa0b39d0851f6aL,0x2f7f00e5263539c6L,0x46a066b81f107130L, + 0xe7cc14f87dbe9579L,0x61f1e5c2d0607b55L,0x539a9c4d07a4c0b8L, + 0x9b6893829fdc8d6fL,0x684725a9847245fcL,0x0000000000000020L }, + { 0x98dd1588d9de1e07L,0xbe9ed1154f6dc491L,0xd4605724f2b85d2bL, + 0xf94da3ba93aa2d3fL,0x81229b618d489608L,0xb7e6a31ed9eaf52cL, + 0x787d526dab03e098L,0xc16ee426e5b06c1bL,0x0000000000000171L } }, + /* 11 */ + { { 0xa732fe002dcd1035L,0x034d6ff8da1f6ee4L,0x2103d3e5097d7909L, + 0xcc496507dfbec358L,0x27044faeef335708L,0xf685e6bffe9be99aL, + 0x57b8c6c406aababdL,0xb8e751f6a901830cL,0x000000000000011bL }, + { 0x87101fab8e15a6b9L,0xf5ad21a11a270874L,0x5d9a90506e900902L, + 0x15a2f5a1d7fe9a86L,0x7ba4fb21dce37de8L,0x7751df45ff07bfb9L, + 0x12089b6202345c0bL,0x46f675ec6a9ace86L,0x000000000000006dL } }, + /* 12 */ + { { 0x689a234ba08e3299L,0x941f6f05c4cf87aaL,0x3a98121b9ffdcb65L, + 0x3252b8481623b72cL,0x65f531b5c6bae311L,0x4c5005a1c383df2cL, + 0x13c011795826c457L,0x90ea82a2ee3b03deL,0x00000000000001d3L }, + { 0x582b79258c0a1261L,0x859c8a11788ba98eL,0x693c1e149a3ad4f7L, + 0xd3f4b8032d62063cL,0x6affccfa0ddc5607L,0x63a3cd821eb6c797L, + 0xa94ce4ae6fb0c290L,0xce9479c1612cf5d1L,0x0000000000000190L } }, + /* 13 */ + { { 0x4ab1603c78f459e6L,0x7d9af2dc6a84373eL,0x2d2be036159a9a40L, + 0x82371e3d509c38ecL,0x82188bf292f079b7L,0xd74b82801e82e017L, + 0x4041778a6b59bbfaL,0x40cf824ab8d4f9a7L,0x000000000000005dL }, + { 0xf9f79e729bc88c1eL,0x8459190a77e9ceb8L,0xdcb44b726a41a19dL, + 0xe30dbe8ce243350bL,0xa168a6a9f3adebc1L,0x421fdb0c48e22f81L, + 0x3cc1d22a9b601ed1L,0x226ef6f6d2b564c0L,0x0000000000000170L } }, + /* 14 */ + { { 0x0eedf7150a80f0b9L,0x7f78ee5beb322905L,0x15d5375783498544L, + 0xff081204b66d249aL,0xb95434862bd0d1fcL,0x6404f1321899b952L, + 0x1de3aa0ccdd8789cL,0x8b6f01381a7153a4L,0x00000000000001beL }, + { 0x12d645e1fe536313L,0x14f4969e373af74bL,0xa56f032a30d498ccL, + 0xf30e4fe50a666b8cL,0xfb42284e3fb31a5eL,0x065215a7efba6b77L, + 0x217ca76c6fa31b46L,0x13a6f6f91fc38bfdL,0x0000000000000007L } }, + /* 15 */ + { { 0x5141181078303dc6L,0x93d1836de01c1d96L,0x98046c2ff50b037bL, + 0xeb3a5b31fcd53e6cL,0x3976f8fefb2f8250L,0xd193f5369cda2685L, + 0xc609817dd26ee74cL,0xeea2244858f903d3L,0x00000000000000e9L }, + { 0xc4f3956b8e0c791bL,0x68cf3903875c10f4L,0x3ff4a407c1f7f0adL, + 0x3672c4658902e832L,0xdf5d0b6796ea88b4L,0x68df74bda86075a8L, + 0xe6d2d0675071e56eL,0x8c7fb077c7e5fcd5L,0x00000000000001f1L } }, + /* 16 */ + { { 0x1e2df23635210df1L,0x387b5c569aace566L,0xdd8152c9ee401323L, + 0x56fe9ec99cac0076L,0x88ed7fc1e1a9f782L,0x6551487d681f0428L, + 0xeaacc10197689006L,0x626bcad13b7e7fe6L,0x000000000000015eL }, + { 0xe1b3574a7acb23caL,0xadda7c9e3636a1c2L,0xfc998cf89f3d947cL, + 0x38ee4df41ba0511bL,0x1f40cc1403e4694bL,0xccecf4e0800fb6d9L, + 0x021f708ae1665d06L,0x2bcd7975c492d329L,0x0000000000000063L } }, + /* 17 */ + { { 0x66bd9a0dcd698697L,0x0e7d8559bbbc1758L,0xe072a980ac76a88dL, + 0x3cf9af4de2c17874L,0x85b24bc4748e97adL,0xe5522faff3f6d8eaL, + 0x60ed43eecef45795L,0x1859fd20de38467dL,0x00000000000001d7L }, + { 0xe62c76b958e8181cL,0x3731bcc7fee454edL,0x2cb0b570ee89f8f5L, + 0x0a38b9539a262283L,0x46887e748c19a774L,0x1dbea5dae80adab4L, + 0x09fb720cb1f75c90L,0x91e0b746e723e278L,0x00000000000001c5L } }, + /* 18 */ + { { 0x6fa722771bac59f3L,0x72358865520791adL,0x8da1c8c4e32463d8L, + 0xe0e75863d41001a5L,0xcea164f748e602dbL,0x93cb5f21cc294cfeL, + 0x28a3168a941cbefaL,0x018fff3bab807177L,0x00000000000001e0L }, + { 0x3da709346a145377L,0x1fb17f7fd16e2b7cL,0xef6432fcc0469077L, + 0x62a59a97d6660f5eL,0x8ddc75505e2a0275L,0x2e65b1f75b88dce7L, + 0x0f4283da066f2c44L,0x561b9e15ce4b84a2L,0x00000000000000ceL } }, + /* 19 */ + { { 0x728e1148bdbc363aL,0xdfbf9e0bf7a1bbabL,0xd6fcfc8fa2e12868L, + 0x3969e7e50a19fe81L,0xf104d3bef512cd5eL,0x9879c459810da8f5L, + 0xad31c365924bf3a3L,0xb2e2d4914d4fb575L,0x0000000000000151L }, + { 0x330d4d1d79410972L,0x7e6ef2a0ba1831bbL,0xa18cc9af4f4d5b93L, + 0x29d8de68069c9948L,0xb8bc009db5e104a0L,0x4237d3728d929561L, + 0xa126a4e8a2274babL,0x85ba9fcba99c454fL,0x00000000000000cbL } }, + /* 20 */ + { { 0x954376cee18bd1d3L,0x350613fc9c8e4d95L,0xa60d595673323968L, + 0x4f6ac836a044cdbcL,0xf38d747b881b8e40L,0x79f7faf00426afe2L, + 0xf6e7403d2ae12787L,0x721445a6726ae667L,0x0000000000000141L }, + { 0x9e0d2f66306787cbL,0xe2e3c834fce146f5L,0x4b62143f2fdbe2b4L, + 0x19dab7745acb5238L,0xee46bfcd7e138359L,0x29dbfa6675966b0fL, + 0xcde19c032d601936L,0xf7daf7b4f2aa4f6fL,0x000000000000019cL } }, + /* 21 */ + { { 0x55a5e8f4641b7e08L,0xe1e589f3caa920f7L,0x07b90ad9abfced83L, + 0x05ff78cd7c466f08L,0x2020ab945a71ab69L,0xd3ad1f5787ee2ec2L, + 0x078ba17efb3d4c91L,0xcf3753ba8607cf69L,0x0000000000000138L }, + { 0xb5e391e2b9b22600L,0xd20b3d088454c979L,0x4c9945642f77a85dL, + 0x972896db1f169687L,0xd3554fa2ff8d2b83L,0x57123d99bd48e671L, + 0x1266b6b9da59843fL,0x633ff4df9241786dL,0x000000000000011fL } }, + /* 22 */ + { { 0x071492bba1e06f89L,0x6f3f72de734cc62aL,0xb50a11309d477637L, + 0x5f6b0546a5ce54f0L,0xc1cde769698626edL,0xe43af79564ade3d0L, + 0x3067161797a20d2bL,0xdf5821678c48495bL,0x0000000000000129L }, + { 0x544a65284790d91fL,0xf71c90983f2924bdL,0xd5adb1d63e5c39e1L, + 0xeba20528d8cacd81L,0xac72518ac728e735L,0x789a0bdf565cc9a0L, + 0x4b320c80c922bce2L,0x99b26c84e458f8adL,0x0000000000000138L } }, + /* 23 */ + { { 0x2fdf67bf4f6f867eL,0x56f6ea005f1e2bf2L,0xa7d7c0d8896ab990L, + 0x589102a023329acbL,0xbb8b6a64156a7f42L,0xfbb556a95882578aL, + 0xa82481cc609250ddL,0x573e51d9daad3d36L,0x000000000000005dL }, + { 0x7e2ede0bb08ffeecL,0xa37176acefbed279L,0x5acafeac0b72e855L, + 0x4c7c3ce3fcc75badL,0x2fc0939bb9a704a1L,0x6bf0c5328e4b4841L, + 0x5b72f1f2c10742b0L,0xaa62d2889c54371eL,0x00000000000001c1L } }, + /* 24 */ + { { 0xd3a8c547c7d40b83L,0xd9fd110285002a83L,0x4bd9516c97f61840L, + 0x8c628537448c4851L,0x5cae5aa1c90925adL,0x356195c572319740L, + 0x8cb6e19ba2b5be38L,0x68626a4b6d9c8c33L,0x00000000000001c5L }, + { 0xa0c846a9ed58b48cL,0xebe6013ae2590d05L,0x60c5d677da2fc481L, + 0xb6a0897b9252e895L,0xb84595be218bf72dL,0xf1c527960dd1b9b1L, + 0x3f1583a86d38442aL,0xcda593dec4de5fdfL,0x000000000000010aL } }, + /* 25 */ + { { 0xdb0fa258898a19c2L,0x26346630974ccc01L,0x135ec9db1f1e6a83L, + 0x9bac9c9bff494d1bL,0x78d21ad4f24be4d7L,0x3249c89f61e8dacdL, + 0x528df6852df2cb51L,0x4f0ec6792ccdc5b7L,0x00000000000001dbL }, + { 0x789f22f80f551d73L,0x42ec3257e7ba5676L,0x3f52c4094cc69a65L, + 0xafdd277df46f1eb6L,0xaead8379ec161194L,0xdc657d191f924e46L, + 0xe43263451578ff09L,0x4750da5878d091bfL,0x0000000000000173L } }, + /* 26 */ + { { 0xdf886f2283fe19b9L,0x740586885dba4f8eL,0x3c04af37a13f8dc7L, + 0x21771e2b3b99dcbcL,0x1e77950d55c5b6a7L,0x20a6975b1b1338e1L, + 0x4956e06bd276d114L,0x56686808b0d3b14cL,0x000000000000016cL }, + { 0x08f3f2a25182063fL,0xdea422b775992149L,0x845260b0d5cd158cL, + 0x6e0768e07c353c5aL,0x5718a1c23afa81caL,0x522f094d3ca2a17cL, + 0x96a361f77d93ebfdL,0x75c32db595c76942L,0x0000000000000095L } }, + /* 27 */ + { { 0x953bb441f640de23L,0x5d35f0a9e1e08dc6L,0x323a1bcc7bbad4e6L, + 0x4dc31447742e5ad0L,0xca0e8e15068bf07dL,0xfc317d5097ebd2b5L, + 0xf5447997db4c9342L,0xf98a3ac80ba0a29cL,0x0000000000000061L }, + { 0x4ccb1f249e925079L,0x97af5a3dc5c916d3L,0x7ef650a8e497e318L, + 0x0d49927842975745L,0xcf328ffb2462eba5L,0x8ee8e88f6e360cf3L, + 0xe15c3f8dc77cd3c2L,0x5ab87910736a646aL,0x00000000000001cbL } }, + /* 28 */ + { { 0xa2d5fdb981e3d610L,0x0127b05c456ddf1cL,0x29e247d9bd5bd8a4L, + 0x5758d126f5959d68L,0x9b0fb7fb2b28eb19L,0x1d8aa23a1d312140L, + 0xf8b9e467281b4d2cL,0xea57464cda959b8fL,0x0000000000000028L }, + { 0x8e118540a87d4633L,0x3af88a0815200b8cL,0xca1806b488f790e1L, + 0x605032a919cd9554L,0xf671627867d0cefdL,0xb58e8671d1676085L, + 0x51098dc308decf4cL,0x5a6cb932a50e312dL,0x00000000000001fbL } }, + /* 29 */ + { { 0xa15c4a20a5eff951L,0x1baa735b49f4f32fL,0xab53ad000feaf2c6L, + 0x354c2bc4ed66b803L,0x587fd2379ac12f3aL,0x3837c9643881a729L, + 0x1d8bd95b9f6c3bbeL,0x237ed2d808d914b6L,0x000000000000001cL }, + { 0xcde7b0d13de67115L,0xe645422dec23815bL,0x286703896b445b51L, + 0x11be335a80ae2784L,0x9185d3df1a64b6e3L,0xa0ccbdd0ba228c67L, + 0x6bc79dff3a013cf7L,0x229f4e59b464c70cL,0x0000000000000185L } }, + /* 30 */ + { { 0x3f621de51cf1a11fL,0x18b004fd0658152dL,0x1bc8a9455df479b5L, + 0x901719473da586acL,0x06fe325baa2f052fL,0x4a2fc982b6323a69L, + 0xa975e8bcd4f0b34cL,0xfe3f271914ed1d56L,0x000000000000001fL }, + { 0xeb6045d192f826d9L,0xbd7c962e5facf02fL,0x87a5cfbd35fa1278L, + 0xfd1ff1cbdeda7159L,0x1b0f6638a1287202L,0x861e7cbae1fcd5deL, + 0x33709b6bb9822a98L,0x171b15ba8b2ae55dL,0x0000000000000130L } }, + /* 31 */ + { { 0xb7f2a894410a34a1L,0xca6d4597f0e4d4f6L,0x84c4a0fe6d4d1f8fL, + 0x42d465bb405c281dL,0x44893a203587a898L,0xc0d0d12dab4ffdd6L, + 0xd5f85269fc00e886L,0xd93f9cdce1999ae1L,0x0000000000000199L }, + { 0xa9d573cad9980812L,0x87d7913ea7e02cb9L,0xb772f40954b00468L, + 0x207850e8cc28eec8L,0x772468db7603910dL,0xb5d3125db256e07aL, + 0xbeb567df8b59420dL,0xb3467a5331e5ccd1L,0x00000000000001daL } }, + /* 32 */ + { { 0x3a44dde085f45eddL,0xc543bb6498309156L,0x82d1ce827597d0d0L, + 0xb2dc0f228106ca3eL,0x66f394472c3a313aL,0xd8f6fcdaa6cf2e03L, + 0xed6e0e279e6bf40bL,0x7dcd3a305b9a41afL,0x0000000000000095L }, + { 0x0a870cec12be5b78L,0x912c21c51406e140L,0x977417fdd5e13ca1L, + 0x9edbae8ab9e21e28L,0x9fce45ea13b2f069L,0x25bd760b84f3cbf2L, + 0x29e0c3ce2489fd01L,0xe16d31c1ec62a766L,0x0000000000000061L } }, + /* 33 */ + { { 0xaff40f1bb5598dccL,0xef04d8e02cd12fd9L,0xc97ea3b0bad7f4acL, + 0x336776a6565108cdL,0x71d9d056fe63178cL,0x31a7c54714c3f6aaL, + 0x5da757a2f6804c72L,0x7b3ee12ff87bb311L,0x00000000000001faL }, + { 0x30df907b6bf9f426L,0xca534ca162ef821fL,0x540f8dba017699e6L, + 0x7cddfe567358f4afL,0x5bc334a1e8f9cef6L,0x427fd83d6adc5f09L, + 0x2fd7c1b8b851f4c2L,0x234d1bd4d1587daeL,0x00000000000000d3L } }, + /* 34 */ + { { 0xffef7d4f0399f8ccL,0x1e2708d99da4c3faL,0x5792ebd0a9474303L, + 0x21aa73f56812a436L,0xf61f60fbc052973dL,0x7061be89fc874833L, + 0xe65c7ebe2638329dL,0xe728d78b032852d9L,0x00000000000001e5L }, + { 0xb633df4e27c9f6a6L,0xdea08843cc406457L,0x75f688183cb011bcL, + 0x234d1b4ad5603044L,0x2a156ae5b6a4d089L,0x9021af3b95e9f0f1L, + 0x90e987df968be7d3L,0x0821a4d3781ef8abL,0x0000000000000027L } }, + /* 35 */ + { { 0x94de21d800df95b9L,0x5d5301b2c33c47f7L,0xf271cabd2343171eL, + 0xbe5441dc4f0fba70L,0x11183253702639d6L,0x01e0573a97074632L, + 0x268025dfce4f92d6L,0x47d14b05039a6f5bL,0x000000000000015dL }, + { 0x2051179030720125L,0xee2db45c6541fb74L,0x07bbcc41aacdcd66L, + 0xffa2e9630a08ced4L,0x819b5b7a7f21a83dL,0x5f857f0a44896b10L, + 0x6126655c197cde7dL,0x81d0f14a2962acd7L,0x0000000000000138L } }, + /* 36 */ + { { 0x0b7a0b9ebb82fc5dL,0x7f2e7af14ff667a2L,0x5cc3df3907a17aecL, + 0x91465af2398fb92aL,0x785b749098cf002eL,0xfe22218f7380cfb0L, + 0x32bd17997f4f03a8L,0x1c78bd791209f39aL,0x00000000000000e4L }, + { 0x90389734e8b69225L,0x26dc02b5fc69c2f9L,0x37eeea93ff349339L, + 0x642556bfc05ec9e4L,0xaa5744b64bd93ef1L,0xc29f3c91bfe9d602L, + 0x399cbf01aea4f871L,0xc02b2b1cf485b047L,0x00000000000000beL } }, + /* 37 */ + { { 0x5658a9eb87400032L,0xb3513556ff0b51afL,0x99ecda719408ac7bL, + 0x5e8d8b317bc54d5fL,0x8f178f9e3ef10515L,0xfc49ad94624b9cf0L, + 0x344433fd8fc6c90fL,0x0df7be427f17965fL,0x00000000000000d4L }, + { 0x47918d08133929fbL,0xd6cf0aa92ff979f6L,0xad55ab19a37a87abL, + 0x9e9d1956c5010be8L,0x8c0bd3ea3b82de19L,0x6c87820ff3c76b65L, + 0x70499d39f2232ed8L,0x70dab0ee7aafde31L,0x0000000000000163L } }, + /* 38 */ + { { 0x5ba4b10081d6751dL,0xeb0319dcb96a5495L,0x56407c25654729d3L, + 0x3de520199330d3fcL,0xb3f8a00e28998bc4L,0xeeea220977ddd518L, + 0xc1429a49cb758f5eL,0xa86982a271834361L,0x0000000000000040L }, + { 0x7472fd7ac36a0150L,0xfb61ab940138d406L,0x0cef5dd5b30829f5L, + 0xa8ea3257aef599f4L,0x593011df7fa063bcL,0xea4c19ced774cb2cL, + 0xad0eeee6b85e8883L,0xb5ef126b2aba018bL,0x000000000000014dL } }, + /* 39 */ + { { 0xebc08efd502a4e76L,0x4589d863767c0f52L,0xcf85eef44eb92decL, + 0x0b59aa5d965c95c1L,0x60945a8e68d2dbcaL,0xc94cb99f77bf02d9L, + 0x6cc41e81cfc0dc2dL,0x7264e2dc8637cc2bL,0x00000000000000d5L }, + { 0x28989d2059b1302cL,0xba90dc20f89d0c77L,0xa29e9c3ee7f4a0b1L, + 0x332ecd54b0a225b5L,0x38a3b080b042d9eaL,0x6d27ff408d9dfe9fL, + 0x146f8b3a88a69d3aL,0x634fb13f1fee39b6L,0x0000000000000031L } }, + /* 40 */ + { { 0x2cd49af1f76d4173L,0x269b00cea89acef1L,0xb60e2977a015990bL, + 0xbe86499685f72cefL,0x4e23b9db4d5b0156L,0xfa92c71c6bb926baL, + 0xf4a8ff2fe2f815b2L,0x543eee4aa5cd2e72L,0x000000000000003aL }, + { 0x9cf75fb5ecb67b4bL,0x0132019091a51dbcL,0x0557fcd377225c2bL, + 0xe2e69807eec94860L,0x58ccae9fe6c35624L,0xeca667a6e8c2ff7cL, + 0xf6939f23a37b64a3L,0x2eaabbb70d225b40L,0x00000000000001e7L } }, + /* 41 */ + { { 0x036735d2ff9ed76dL,0x8653e8e35d154fcaL,0x6811cba29824f792L, + 0x96a3c594e1252e06L,0xb02543e7ba6e82e0L,0x5435187349b46518L, + 0xb7c14c8662bc5b59L,0x6535a2bba295f29bL,0x00000000000000bfL }, + { 0xd8b41e4bb0625972L,0x1063ba69b0ce550dL,0x64687d34db86696eL, + 0xba2b364455ca06daL,0xa4699ecd3dc4d46cL,0x9f40e631bfe4443fL, + 0x0e97fbe375fa4f7eL,0xd026b0481cbdc008L,0x00000000000001d2L } }, + /* 42 */ + { { 0x042191fad75cf083L,0xaffc08409bd72c49L,0x8f5b9e3f67faf055L, + 0xa99fa2dfc9794f77L,0x1ccbe885a9ef6764L,0x5fd1e3d66880eb3cL, + 0xbef622be06fd7bfaL,0x80d418fe9b10c7f8L,0x00000000000001a9L }, + { 0x6637732018148fceL,0x9655f8b345c2a46fL,0xdc2952408caf763dL, + 0x4313d4b72816a721L,0x6ff4aa0a50da95edL,0x66cbcad2d4f9e551L, + 0x0ff11d35e6993438L,0x93dab445b9a4f5f1L,0x00000000000000b4L } }, + /* 43 */ + { { 0x8e846df2fa327b1cL,0x6755220d1d0321a0L,0xbacbc29c0342f918L, + 0xbe6f119dd996a1f8L,0x11c02d40eb5740ffL,0x28a8232f4fbb962fL, + 0x066250a52185293aL,0xf9220c87cf927dc9L,0x00000000000001c9L }, + { 0xd59cdf5b32a50e5aL,0xa13035882c379627L,0x0894c6ebff69b440L, + 0x881458aaad6055c9L,0x27d9a7d5b2f2ff3cL,0x15f33160d254ae2dL, + 0x6e8342f1e38392bbL,0x7d7f4576b7c8d249L,0x0000000000000104L } }, + /* 44 */ + { { 0xd63c86d1774512bfL,0x1524c2d8624719e4L,0x2f7568d486d00e48L, + 0x02140d74cb9cf033L,0x3fff92dd0cae0cceL,0x7ec6068613941889L, + 0x791633ac5b8bf2b0L,0x43c98854ca54ea3dL,0x0000000000000027L }, + { 0x15a79b14e40eb5e3L,0xccbbb5c75eeb2285L,0xc67c8dff5686232dL, + 0x8b45a2c1a83196efL,0xa274bf939b2acfb3L,0x35c2b5486e56b741L, + 0xc93963b24c1f5672L,0xbe9d6f8edf0e741bL,0x00000000000001caL } }, + /* 45 */ + { { 0xc8bc017afba4114aL,0x8e2791028d7327cdL,0xb95bfa6bddf662a4L, + 0xd828f02c44a2caefL,0xc6504c42a27af13cL,0x67f79cab503e832dL, + 0x42da6976cb78ed1dL,0xda6e4727431e01dbL,0x0000000000000094L }, + { 0x44d5bd3eb9544fd3L,0x329d747f694035c9L,0x104d2b68676f065aL, + 0xc8e81e7cfc4165c6L,0x4cdad2fdaf3779f2L,0xa3e4a4625669a833L, + 0xa281178a21737142L,0x86f7afb87b8f5514L,0x00000000000001aaL } }, + /* 46 */ + { { 0x407a70134deacc7cL,0xa2540e3c94faf272L,0x4246beb934b43fa8L, + 0x157b03f43ba9f184L,0xf7898d60b939ba99L,0x0f7e245e3a5f62b4L, + 0x2db5a341eaf7a575L,0xd3adea091278c0f7L,0x00000000000000d2L }, + { 0x2cba440ada687094L,0x3b478947767e75d2L,0x07c7de48078f23a0L, + 0xca6d86d8d7252108L,0x5fd954fc6a7fbe1aL,0x4d552bbec9a2a901L, + 0x71ab270159e75804L,0x294298fa5413091cL,0x0000000000000149L } }, + /* 47 */ + { { 0x6e60bc8cbebdd14bL,0xfe195cae8c57a662L,0x41e77bd8d9e4437aL, + 0x20fc2e7c85e4b9edL,0x97be566d2a012e02L,0x433786ae9b250c6aL, + 0x9eb76699cbb3224aL,0xf727091bf090f756L,0x00000000000000c8L }, + { 0x31496439ef08161cL,0x265407a997e3a008L,0x2088210700e2ffcdL, + 0x27426c055359fd16L,0x03e836825c39f78fL,0x380743a8e2a8832eL, + 0x8739d8ed792bafe6L,0x6cef07c47a912015L,0x0000000000000128L } }, + /* 48 */ + { { 0xf29780090fd83accL,0x247593df630645dbL,0x171e6b9f9e2db252L, + 0x3a6342a3c30aec58L,0x23524040fdd7151cL,0x2589b6988b15fee7L, + 0xca588f0a17dca917L,0xb2cf2fe7677cad49L,0x00000000000001dcL }, + { 0x14332d38c8573626L,0xd4fe66fd4adedd4dL,0xc5d8f397bb83b706L, + 0x00aacee43aa8a4c9L,0xe90383ff107c81a9L,0x6cf1fb2f18abdf47L, + 0x9bf5444df17c55baL,0x8251ec253659322aL,0x00000000000001deL } }, + /* 49 */ + { { 0x5a0e940caa2f106aL,0x7c55eb23418781b7L,0xcc6cebb679cc53b8L, + 0x7f6f64fc9d7377f4L,0x1571161c6de3403cL,0xd7a52f1cff1bf45aL, + 0xaf938df4110b0e06L,0x9bf219246f64c558L,0x000000000000001cL }, + { 0xe15e4f0ea5afb352L,0x9ff6c56df2ca770fL,0xe0e86c68338e1890L, + 0x96907f1990808bfcL,0xd0d47de2686b805bL,0x2fbfcb72b367c12aL, + 0x800a58141dd3d54fL,0x2f4b07b398657a79L,0x00000000000001f3L } }, + /* 50 */ + { { 0x3340fb54f31cb1daL,0xc1e88317e1752360L,0x183ae7f8121667b1L, + 0x1f271fa2bfd7bae9L,0xb31175b92b745d3dL,0x22250cc540b13e3eL, + 0x63aabbb70d01026fL,0xab63c0f1b428cd91L,0x0000000000000018L }, + { 0x7435eaf9988d425aL,0xf9f323c7fcc441f4L,0x8de16b8c3e4de08eL, + 0x2e603853e495b0f0L,0x204602204b3f0024L,0x8aff3f40b43cdb09L, + 0x409df7af4d00e185L,0xc681d091f1637f16L,0x000000000000019fL } }, + /* 51 */ + { { 0x0801a2cf5f0f872aL,0x67a587356ec8c90cL,0xf21e24aba0913e94L, + 0x985fc1703502bcb9L,0x8552800450a05926L,0x65918c8f426e56f8L, + 0x382414dea5cba2acL,0xc3e7a7e62874cb00L,0x000000000000005fL }, + { 0x96630e9e8fa87335L,0xdcf938c1c7771bc8L,0xc4f3d77aa8cf3cd8L, + 0x931ae9adc99a5fddL,0xa89581d55e2bcffcL,0xd0c7c71f29758819L, + 0x4ef995634d5aa9d8L,0xc6de91cd8f04cdebL,0x0000000000000171L } }, + /* 52 */ + { { 0x24b42a1c35c6448eL,0x129a536e1f4e7b86L,0x7ca12db87e48d8f6L, + 0xa827acf3587d6577L,0xfc62bf4bb1cce77eL,0x702ee5800f45a823L, + 0xb4989843cc7a73a3L,0x3d49a2326c0afb5bL,0x00000000000000a6L }, + { 0x7055c61255086dc5L,0x0622af5a3cf49868L,0x113dc4cbe695b064L, + 0x65b33365479ff727L,0x156ab542336bc622L,0xf5a2ef703014770dL, + 0x486f74556ded88eeL,0xbb4619488c8a7452L,0x000000000000005aL } }, + /* 53 */ + { { 0xea6e991125fdd488L,0x4ea556fb3d0183b0L,0xe8cfd7c274dc3ac2L, + 0xe20307a3ddf1bd8fL,0xfc6684582ee9be7bL,0x64a60b7937782071L, + 0x612fc9c865acf2e3L,0xcbb8c60c0de59326L,0x000000000000009cL }, + { 0x250e6112195cad69L,0x81cd7e491f0b4f39L,0x9b6808db61302e46L, + 0xe569c108b3af7841L,0x34d86f57c15d5fc4L,0xb3e586fdf76ad338L, + 0x6de8477b70bd63a8L,0xa52b119d7ec86218L,0x00000000000000b6L } }, + /* 54 */ + { { 0xe51f8aca27806255L,0xcb4da51a101b2dbcL,0x27b8a0dfe08b3553L, + 0x6e2a0d066ad56ac8L,0xa751cbedec44486dL,0xf574538ec5b2ae67L, + 0x64e1e3c3827536e2L,0x652fa060707a8b39L,0x0000000000000170L }, + { 0xe17a85d7684c6e53L,0xc957477272d0d747L,0xf892866558d12edbL, + 0x7fb212c8c815db67L,0x44b676eff3e66186L,0x15b57bb3998ef4d3L, + 0xb71f3b8e815c5036L,0x3fe9796573bcb190L,0x0000000000000162L } }, + /* 55 */ + { { 0x5c7f569ad5fd84e2L,0xf84d6e126fa4bb7cL,0x7b74629c82a1081dL, + 0x7935151d0522589eL,0x94fb78055ccf889aL,0xec677309792f6bacL, + 0x42fbece8d880a0d8L,0xd97b4881b70be7f5L,0x00000000000000e3L }, + { 0x85d79261eda953e8L,0x35894e9b5dd94926L,0x95c1cbc99aeb12b8L, + 0x017029fe1695dc35L,0x36e2b978d6b13364L,0xb86b7afdc388ce34L, + 0x2d25b1af1e501e40L,0xf9127abdd5d7e7c1L,0x0000000000000031L } }, + /* 56 */ + { { 0xa18a458f1bb9a390L,0x916fc4b62062e5d6L,0x7040176be16db823L, + 0x25de966301177efeL,0x2b679ddeb778335cL,0x6ecf1689862622ebL, + 0xa191ffb5cb8a74ebL,0x4cc26680019ff743L,0x0000000000000047L }, + { 0xfa5b847e047d9854L,0x6151153b6527d4d7L,0xf72721185bdda4c9L, + 0xdde383c5f17c7e63L,0xc44040392a82edb4L,0xd1355d9781853b97L, + 0xcb503386b211c3b0L,0x0f9dbd602c17c154L,0x0000000000000107L } }, + /* 57 */ + { { 0xc8cd8f5d80a2aa22L,0xb73c67a164793948L,0x55abac7fff49c73dL, + 0xb5bc3df0833e5481L,0xf6ab8a42384a9287L,0xbea5929eee4f0be7L, + 0x2e6bf0f29619ddbaL,0x224ce58ab2ddfcbeL,0x00000000000001d4L }, + { 0x12f87e99ac4906b7L,0xe752f6ea5101c876L,0xa32a9b1f2b668943L, + 0x9e6e6c2b07950fdfL,0x72a31f91bbb4c324L,0x2e799c7b28546e5fL, + 0x7183a2daa6ff4503L,0xe2cb8237178fdeebL,0x0000000000000030L } }, + /* 58 */ + { { 0x140767d0bcd89552L,0x31548634f10fdf7fL,0x2c4cdccc210355fcL, + 0x4929888955704a76L,0x7102e8820f1d3e5eL,0x4afc588292691f55L, + 0x4cff09ede9e34260L,0x49d3a32932cd76ccL,0x0000000000000063L }, + { 0x6adbd32efbdc0c1eL,0xe3fe998b0ebefe86L,0x78099d79bc2a7085L, + 0x333c30095adeab44L,0x45e4d96f07ae4b5fL,0x4bd1f0d29de88636L, + 0x8c7e69d7bf2020a9L,0x126a5886c3c5a340L,0x0000000000000000L } }, + /* 59 */ + { { 0xaed91dc9263f94d8L,0x40f3c1cd20289709L,0x74dc1e9b60b5c995L, + 0xf08cb2a07d5bb95bL,0xbb6d3339e642d133L,0x3c3e5d14d2fcc05dL, + 0xcf159316403e4a78L,0xd84be52727c124faL,0x00000000000000d4L }, + { 0xbb2246c1bcd8501dL,0x86d450b3a1600eadL,0x6f5a476db626b180L, + 0x01f4b93e3511ac45L,0xd813396bbbfa9758L,0xc69b8e7512347b15L, + 0x108defb629cd9e2cL,0x4778bb2725e9ac47L,0x000000000000012fL } }, + /* 60 */ + { { 0x140ce283ea9f1213L,0x2326ce38af8d7bc8L,0x90d21012914ac8b5L, + 0x0b07cea5e255c0e1L,0x93cc5e972dcccb43L,0xb89b14f2ee9885efL, + 0xa35afc40dc56967cL,0x8125d749ee2e3671L,0x0000000000000101L }, + { 0x1855e97a55902052L,0x8a572e69e6f0db6cL,0xb1bb6014fcfa3f3cL, + 0xe77eb435f5b2440bL,0x9045dd302df3f74aL,0x7853068b590ee6e0L, + 0x5714e1f03b9ddec5L,0x9810d77586086f6bL,0x0000000000000008L } }, + /* 61 */ + { { 0x66e566e3d3a87a30L,0x325081762cc44c2fL,0xdcaf7c52b80871eeL, + 0xdad02ffbdd245945L,0xb9e691ace956cdb7L,0xdfefe49fc62064e5L, + 0xc322f6978c8f2c6dL,0x68a5d7a8aa88f4acL,0x00000000000000deL }, + { 0xee5555b183ae6c21L,0x895c837c478467b0L,0xe6b06030dd4d105bL, + 0x9d9f32247636a759L,0x68132ea4fb8142ddL,0x6109a099ff919487L, + 0x775c37af76a1835dL,0x4279a9c48604a81eL,0x0000000000000093L } }, + /* 62 */ + { { 0xb72859988df31c84L,0x0489bf6fcf37c1c3L,0x4ccd18d9e4c102d7L, + 0x8b1d6c45ac3e23c9L,0x7b7d259a5744d781L,0xa0033649b4778129L, + 0x5c7414d99fbf3e2cL,0xfa78a7fe20774c25L,0x0000000000000043L }, + { 0x9b46685f8e69fd50L,0x2632cf3c3e47d4deL,0x44fd8f9f0238fed0L, + 0x2fbc3f42e03f169dL,0x0c9df28eb7dcd132L,0x5deae7cc3d18592aL, + 0x3034fb93406adf0bL,0xbf5a3a75ae059280L,0x000000000000010aL } }, + /* 63 */ + { { 0x272883fb34c28cb6L,0x22b00e9e5e7d03a4L,0x23bbacb97f4602a2L, + 0x27564d96c248ed06L,0x9b7e1ce6d1c5b544L,0xb3d77b2d71182e92L, + 0xb18e78aade9d46abL,0xde48d9e12e69d74dL,0x00000000000001fbL }, + { 0x8de62222099effafL,0x2212621b1328146cL,0x05f3c0b003677fccL, + 0xf43e4825fb0fc3c0L,0x94d3b33698536e0bL,0x22c1cca4225481ebL, + 0x2b8668dfa9fcbaf5L,0x51e858f2c30e9271L,0x00000000000001e9L } }, +}; + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^86, ... + * Pre-generated: products of all combinations of above. + * 6 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_9(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_stripe_9(r, &p521_base, p521_table, + k, map, ct, heap); +} + +#else +/* Striping precomputation table. + * 8 points combined into a table of 256 points. + * Distance of 66 between points. + */ +static const sp_table_entry_521 p521_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0xf97e7e31c2e5bd66L,0x3348b3c1856a429bL,0xfe1dc127a2ffa8deL, + 0xa14b5e77efe75928L,0xf828af606b4d3dbaL,0x9c648139053fb521L, + 0x9e3ecb662395b442L,0x858e06b70404e9cdL,0x00000000000000c6L }, + { 0x88be94769fd16650L,0x353c7086a272c240L,0xc550b9013fad0761L, + 0x97ee72995ef42640L,0x17afbd17273e662cL,0x98f54449579b4468L, + 0x5c8a5fb42c7d1bd9L,0x39296a789a3bc004L,0x0000000000000118L } }, + /* 2 */ + { { 0x803986670f0ccb51L,0xb87e1d013654974aL,0x7f58cf21b2b29ed9L, + 0x06c0e9aaa3add337L,0xf13b35d0e9d08ffbL,0xdd8bf44c96761627L, + 0xa4a18c14758a3ef4L,0x96a576dda0043adbL,0x000000000000013eL }, + { 0x2bde24f8632d95a3L,0x79f15ef14c524829L,0xaadd863e9bdaba19L, + 0xdde053f4a962b707L,0xc598a2de14258d98L,0x9fa5a19d061c235cL, + 0x0ed46510e8ffd32cL,0x2aea9dd1ef78ceacL,0x0000000000000185L } }, + /* 3 */ + { { 0xd0a91dd8eaaf1fe3L,0x0db386624400b52bL,0xff6a06a921abf0d2L, + 0x9412879aa768c940L,0xf3791abc9a1eec37L,0xc913fbe62738343cL, + 0x728b42abe222abc1L,0x874c0a862b9ef313L,0x0000000000000157L }, + { 0x0ac8f184e6f03d49L,0xa9c357e41e48be03L,0x02ce5ef3815cbdefL, + 0x7a41c7ab5fd8dc3cL,0x4bef67c9faeb109dL,0x2f98cca1a84f4d38L, + 0x7e03d47d672f0aaeL,0x24b1ab581d58968bL,0x0000000000000007L } }, + /* 4 */ + { { 0x904f2d4bdf9314e0L,0xdaae850de7a00aacL,0x79231083582efb03L, + 0x80f1c283ec7fe6d2L,0x2d5b3996199d74a8L,0x5f120b9b395007e7L, + 0x30d237734773f03eL,0xf4c192733b78b686L,0x0000000000000121L }, + { 0xf103ff6dfa8b51f0L,0xae7afb5140e2bdf0L,0x1130380e83254171L, + 0xe83501b8cda10d95L,0x1057771e4f3a8c01L,0x8f52196aac807069L, + 0x3609b0aaa5623821L,0x8c25790694a0a7f1L,0x00000000000001dbL } }, + /* 5 */ + { { 0x300370ccb2c0958dL,0x89aef16669a7b387L,0x2792f3cf480c9b38L, + 0x0b2984f2fab3e149L,0x9751e43650748967L,0x9cab99d5ad33db2aL, + 0x4d945d32b44a4daaL,0xa26cca5216c77325L,0x000000000000000aL }, + { 0xcdbe1d41f9e66d18L,0x80aeef96aa117e7aL,0x053214a2ddb0d24bL, + 0x6dcfb2275c98b7bfL,0x613e7436dfd3c848L,0x6e703fa13ca4d52cL, + 0x0c8e297718551e64L,0xf5e90eacbfa8527dL,0x00000000000001c6L } }, + /* 6 */ + { { 0xa2c2f1e74ab2d58fL,0xc1bbf82c2a097802L,0x6583eb24770bb76aL, + 0x8e4ed9ed5667f7bdL,0xd8c01d86fd96897eL,0x66395a133fbe0f15L, + 0x51e4f39dd99cdcb1L,0xde08424a720deb25L,0x0000000000000082L }, + { 0x97aa53b260ea91afL,0xa4384af77a31dfddL,0xcd82f2395cd09bbeL, + 0x997c19daf30058e1L,0x443b60c6e5c78e97L,0xfaae9b5f575b1845L, + 0x5ce86f3308c2ce16L,0x983ce58f4f63fa86L,0x0000000000000073L } }, + /* 7 */ + { { 0xaee931318217609dL,0x7f8a9dd42412fc00L,0xe117e64c286c6329L, + 0xcc3782d67bf1c65eL,0xe8c144db8d03eee5L,0x01acacb29ab93799L, + 0x215eb1b5b07784c7L,0x2c409fa81affcd87L,0x00000000000000f8L }, + { 0x007d3766378139a4L,0xc6d969ebb55bea93L,0xc7c60d6f68c8bc9dL, + 0x844e84615f93f242L,0x8461ca2a741717d9L,0x8e930e79f0bf120eL, + 0xe1554a026b5699d7L,0xe69c77026a4fb6deL,0x000000000000007dL } }, + /* 8 */ + { { 0x61b51bb04bee80d7L,0x0e1f6a1f7692de69L,0x8379e46ca0ebc3bdL, + 0x1c0bffa7930644f0L,0x97c67b87390db077L,0x095c33e1fada1ce9L, + 0x3c500addac54b512L,0xc231d360d3118656L,0x00000000000000b0L }, + { 0x0628929839bcab2fL,0xc0c0678064dd220aL,0x062f6084763dc2a0L, + 0x88e9da731938c3e3L,0x69be8f2d52e46eb9L,0xe55c8d2d6a5de0fdL, + 0xf3a3fd63db2c0e26L,0x899c6d9f1e4bff57L,0x000000000000014aL } }, + /* 9 */ + { { 0x9ff6e3a1ec05ce88L,0xf8fc2496b6afd202L,0x0b9d20776fbeb007L, + 0xb50ec0bdeebded40L,0xaef97742693700f7L,0x806e37a13f7b030eL, + 0x5cf17d171b901f77L,0x9036e5dfca95ae0fL,0x0000000000000159L }, + { 0x00af64b5000e8e0cL,0xd3f2ae0406fb4df9L,0x5f61da67449f23baL, + 0x0ca91842255b25a9L,0xfa6af3e68e33c650L,0x14373c00c2c027c1L, + 0x99f3cda1972840a5L,0x98c62b79d0e84240L,0x00000000000000e7L } }, + /* 10 */ + { { 0xe8c7c4a8ae4d0f28L,0x3a8a55ef566d006eL,0x37985f65066e4023L, + 0x8deccab55d321b76L,0x38b966d6b8351b07L,0x2e889e5357d548abL, + 0x7a9e8e2fe631ab0bL,0x45c60f95e75c537bL,0x0000000000000059L }, + { 0xbca27d347867d79cL,0x7f460b1581c81980L,0x7ec2d9ab976b8c51L, + 0xfcd0448661b91ed9L,0x730a7a25d9c1d15fL,0x8a2cf259f94c9db9L, + 0x8e784b875dec5a3bL,0x062526073e5131eeL,0x0000000000000004L } }, + /* 11 */ + { { 0xdee04e5cf1631bbaL,0x40e6c1df156f4524L,0x06603f30e4c30990L, + 0xdb649a436b6abec7L,0x354f509cf6b94f6eL,0x7fecf46936b7e0b5L, + 0xa7a7107eba1e6dd2L,0x889edac5689450caL,0x0000000000000022L }, + { 0x9012916ed05596f2L,0xe3901dacb023cb8bL,0x2501d3ece7d4abe1L, + 0xb2815040a9c90313L,0x9dbcd3f1c6d146d0L,0x6fa1d5b174ee1896L, + 0x49aea161a91226fbL,0x754ceedfb8a80984L,0x0000000000000154L } }, + /* 12 */ + { { 0xb64e27b04270b2f0L,0x84b34e48bf4d74d7L,0xb186be8b0c2722baL, + 0xf54a589d9ff9b71cL,0x9887e4df34fd6bc4L,0xb7c669fd7412f49dL, + 0x4008d9bb77f89d16L,0xafb9426bc902e074L,0x00000000000001cfL }, + { 0xcca4f2d1662935caL,0x2847c703997dcc46L,0xc089e9e5353c79f8L, + 0x9ed8d9895215f0f4L,0x59cf08bc80911b9dL,0x4b03540e6de27aa3L, + 0x52f4d63ef69e320dL,0xa0217fd694ef193bL,0x00000000000000e6L } }, + /* 13 */ + { { 0xb77de62774214780L,0xca066817207459eaL,0xf78579b7e9c7fb01L, + 0xe55548c1d6d4b7c7L,0x45756190a66caa39L,0xf8141b0398505a4fL, + 0xa5ca0d7c4c8864ebL,0xbf8af9509e129d3fL,0x0000000000000053L }, + { 0xbc9b29d885285092L,0x82f31daa8eed5e5fL,0x9c33690ef618aab9L, + 0x0eee14f4d2626ed1L,0x4229570b07ed8e09L,0x1977920e8736d040L, + 0x47ee25ffede7d01dL,0x3c921c3abc7ab73bL,0x00000000000001b9L } }, + /* 14 */ + { { 0x0b6a07cca08b2b14L,0xaa978debbf174c7fL,0x291cb828c40cb2a4L, + 0x95c7827290adc838L,0x08da8b2a8c1edde6L,0x741ceb2f90fbd220L, + 0x5f89c9e5322db94eL,0x18266085b73c548eL,0x000000000000007dL }, + { 0x69ebf82a2defd012L,0x01ecb0945a1537efL,0x3c5575353ef0811dL, + 0x59c882a7b2bd4deaL,0x00a1f9727bf969c8L,0x063adf5e0b25ad1bL, + 0x4c1ff306f2536005L,0x8e515bec4112fe18L,0x0000000000000117L } }, + /* 15 */ + { { 0x9314787fefe3d3d5L,0x29e76f659d897227L,0x15c77ed1e0b6acf5L, + 0x9c2b7b201c5e8dd9L,0x788038f15f5667afL,0xf38c766ff3576ef4L, + 0x9f0623c80040154aL,0x47d3c44bde883b53L,0x0000000000000096L }, + { 0x32075638de1b21a4L,0xbb6399c1571081c1L,0x322e606775c03599L, + 0x5c7fde7fade60cf5L,0x1b195440efc19059L,0x7e70ac8cdd7b3960L, + 0x4aa5a83d6a6fa73eL,0x34f8cfac63080764L,0x0000000000000042L } }, + /* 16 */ + { { 0xee31e71a286492adL,0x08f3de4465f86ac4L,0xe89700d4da713cb4L, + 0x7ad0f5e9a86b7104L,0xd9a62e4f2572c161L,0x77d223ef25cc1c99L, + 0xedff69613b962e0cL,0x818d28f381d8b205L,0x000000000000008eL }, + { 0x721231cf8cdf1f60L,0x8b640f2b6717760fL,0xbe726f8ce045a403L, + 0x422285dc0370689fL,0x7196bf8f72ea0dcbL,0xa16f7855c8086623L, + 0xd4e19fc7c326fe48L,0xfdbc856e8f68bf44L,0x000000000000013eL } }, + /* 17 */ + { { 0xde34d04fe6a3ace5L,0x0dbb603e896191c1L,0xb4dc0007f75ed0f4L, + 0x15e0e6bc95b259b5L,0xdfbcba662615f020L,0xb2ec5433d31ea3f8L, + 0x42b0b0e4103ff824L,0x19315060c480332eL,0x0000000000000111L }, + { 0x9997ea28045452f1L,0x80b678cf71f3f73bL,0x4a52bddc41e9328eL, + 0xb7f2656ee6af1c23L,0xc43805b9b44215e7L,0x3aa734f2f0a4028bL, + 0xe3c72479422476e2L,0x6dc2e8b068c60cf7L,0x00000000000001f1L } }, + /* 18 */ + { { 0xbcdfae6ffffc0de5L,0xa801814fab4a5f24L,0x19013658ea2aa8ddL, + 0xf3b1caf5da4f0441L,0xf24b9cdb34100611L,0x48c324ed96e0cf88L, + 0x4b7ea33423055c82L,0x6e835b6489092e29L,0x00000000000001d3L }, + { 0x7eb77ae707372f27L,0x4779b4fa83bae19aL,0xa175dae165429ebbL, + 0x942ec266fc03ef3fL,0x0e5fc6a96991c7c4L,0xa0f61e4f56253d3cL, + 0x7a11ff58de74e738L,0x60524cd4624de919L,0x0000000000000002L } }, + /* 19 */ + { { 0x45b5d0ca01342e08L,0x509ed4f0b749f0afL,0xeb5502d96529d804L, + 0x5eb087db6d80359cL,0xeaa66a874c384800L,0xe972c7a0c75a8784L, + 0x8c169e216874317eL,0x81c556e0e5c9fbf4L,0x000000000000014fL }, + { 0x26b0b12be120674dL,0xc6bf09b9219f00acL,0x1e1e732dd658caa6L, + 0xc771c5af8292d99eL,0x5d81352925fdbf80L,0xe61bd7983666c37dL, + 0x8dac946a1d0df680L,0x58dcf684c39f0983L,0x000000000000009fL } }, + /* 20 */ + { { 0x141691027b7dc837L,0x2d719754b50eb1c4L,0x04f4092ad7e6741bL, + 0x1d0a7f1dbc824a38L,0x570b2056c8e20bcfL,0x6732e3b9da181db0L, + 0x7880636e0a7b508aL,0x11af502cc9f70492L,0x0000000000000045L }, + { 0x0b820d94c56f4ffaL,0x1c6205a2c4f0c0faL,0x99f33d4ea1a0606aL, + 0x1bab646679b316fbL,0x05aa0852e4f240fcL,0x22539b7892d7dc43L, + 0x03657f1206e3c073L,0x28405280cedb6633L,0x0000000000000059L } }, + /* 21 */ + { { 0x90d087114397760cL,0xb9020b761c9fcd06L,0xc7fec7fa987e24f7L, + 0x0e33b8a0522335a0L,0x73dbeafdae21ca10L,0x458c060a3b032220L, + 0x9b9c73b8ee145da6L,0x31c661e527ff62efL,0x00000000000000aaL }, + { 0xaf518eb081430b5eL,0xb32f9cea50ee0d69L,0x0ecdb0b5aa6ebe8bL, + 0x1f15f7f29fe1d689L,0xce5d68f31a59cc9aL,0xf4d6799408ab2a63L, + 0xe85b1cef4347ce54L,0x8ff423c0286d0776L,0x0000000000000176L } }, + /* 22 */ + { { 0x8564104c33dcec23L,0xbaf0d61bcdd07519L,0x486daf514c4f309aL, + 0xf01bc8f5de488715L,0xddd6baf1d3539ba3L,0xbb7e665d3a3be8ecL, + 0xf919dac3cb5d865fL,0xfe203da3f12149a0L,0x0000000000000173L }, + { 0x043ae9a178d4a3d1L,0xa4d5cf58865316d8L,0xeaf026c041176463L, + 0x316c638ff84afa44L,0x512f2397ffea422dL,0x691eaa046622b613L, + 0x48856ea397e7068dL,0x42d1b2e3f4a1b33cL,0x00000000000001b5L } }, + /* 23 */ + { { 0xf51b2d5e1f487402L,0xe36016e67aaf1dd5L,0x1eb3f1f56da9c20aL, + 0x25b7d361ece45bfdL,0x42db0633027a9e18L,0xbf228777e8411649L, + 0xf5fce0c4458773d0L,0xb2b3151d2dd7a5f0L,0x000000000000001fL }, + { 0x102773e8fbaa096aL,0x152726ebe093a878L,0x5c53cd072c7f1781L, + 0x38d3dfd0ab5dca76L,0xbb4a7d8587ef2d4aL,0x5c9c2013b7eb11c2L, + 0x5e353c340b6da22fL,0x846d50a5a325ecadL,0x0000000000000039L } }, + /* 24 */ + { { 0x76da77361677df58L,0x364bd5671cb50d6cL,0x0443c7d70a080ff2L, + 0xa0a8542986532430L,0x82002dd2c35101e7L,0xbebc614348c5cd76L, + 0xff1591aeca6cf13fL,0x91c7c2e698bf8dc0L,0x00000000000000fbL }, + { 0x6a7c5cad12de14d5L,0xbc448c5f6561c822L,0x9f8de4307cdbb3daL, + 0x9c58f011c76811d7L,0x1e89806e75462049L,0xe52ad0a2c9a74e49L, + 0x2034685cb2be37c3L,0x7a8632450a0bc72dL,0x00000000000000ecL } }, + /* 25 */ + { { 0x33818c218a86786eL,0xed537f742137e2c8L,0x5d9690d1a7e6eb20L, + 0x9790ec705cdc4803L,0x469162c824f7bd75L,0x09e7ef9d4e1f0f14L, + 0xd30c128bce9915caL,0x810145f66c71226fL,0x000000000000002dL }, + { 0x312749f5b71d87e5L,0x25f3b1417b02cedaL,0x02456d2ee0baff16L, + 0x97f7b3a9fcae6627L,0x0d6ebf8f37bd985fL,0x20aa81b97fa6d0c1L, + 0xb29f1a0121f2f137L,0xe326a2f85cc0ddb1L,0x000000000000003dL } }, + /* 26 */ + { { 0x26f3398b38c2ee78L,0x40c3d101a75a0beeL,0x35a31706565a7f8eL, + 0xd12985e304019e5dL,0x21e2a642b8174b6eL,0x25a15ee8af80a52aL, + 0x5d1e0fe68518d80eL,0x8cbbc13804f6ea9aL,0x0000000000000084L }, + { 0x76828690dfd45169L,0x38d7e09859d3e8d0L,0x23758811cdb8bfc2L, + 0x8499547a162cf648L,0x494bab3bb4d15b8cL,0x822cbc57c60499a6L, + 0xac43224ea8a1cfedL,0x4356346957c6598bL,0x00000000000000d9L } }, + /* 27 */ + { { 0x2b06925368271323L,0x24d9e0a849cd04d7L,0xaae35fbf2b31cc7dL, + 0x44f64b4f57a3e361L,0x149046860294e856L,0xddc82ee743ced4aeL, + 0xcb92a6a57e2cda47L,0x989c42efbfc1f968L,0x000000000000013fL }, + { 0xbed98bdfb8651600L,0x8c3634347a3cfaeeL,0x93a1254335b1a226L, + 0x558da7ddd5825507L,0xa5173b23852eb1e9L,0xdf5ae5852295f545L, + 0xe546e2ef6646d101L,0xf7e16a2c5d89f862L,0x00000000000001faL } }, + /* 28 */ + { { 0x0d746c8ec7ec136dL,0xf8e1d827cd11351bL,0x764a3ad3f187a116L, + 0x2f1b968f136e8465L,0xd41aa294850983c2L,0x2123ecc4be717259L, + 0xdcdcab52763c149cL,0xa7f50b181022b82dL,0x000000000000016dL }, + { 0xf99e532d0ca5e258L,0xa148ad1797b62a7bL,0x8d0a242ec77fddefL, + 0x58518bcd74f9b6c4L,0xc53b30b87fd122d4L,0xbb8cd193fb50b2d7L, + 0x1a169aeebc01aae9L,0x7e49b10a1de26e09L,0x00000000000001c5L } }, + /* 29 */ + { { 0x2cabe67521210716L,0x81a296a307e02400L,0x94afc11d8c83795bL, + 0x68f20334dd9efa6aL,0x5be2f9eb677d686fL,0x6a13f277bf5ce275L, + 0xf7d92241b9757c5cL,0x70c3d2f4c74f4b8cL,0x0000000000000132L }, + { 0xf9c8609c8d209aa4L,0x46f413a2db2b5436L,0x96b72d1a2992345dL, + 0x186f2aeb9487c34fL,0x4fa72176b440a375L,0x3a4209367da5358eL, + 0xf11eade3ff25b310L,0x9a570153505d60b8L,0x00000000000001a9L } }, + /* 30 */ + { { 0xae1513936e7495bbL,0xebd2fd28490879d1L,0x9c232b0b29fd76fcL, + 0xa1a0d49bc60e721cL,0x9f582b83517a09e2L,0xac37809e9d8badf8L, + 0x4aa4de9e0ad48bb4L,0xfd041312cb6cc487L,0x0000000000000027L }, + { 0xc05502eeead4fb6dL,0x760c25ed0a602cbeL,0x58ba6841bd7f4a07L, + 0xc28b603254edce14L,0xb9d41e390397614cL,0x4221b71d181eed93L, + 0xd010e3c2332d4b0bL,0xdfe58a27dab0e419L,0x0000000000000096L } }, + /* 31 */ + { { 0x4cd6fcd67debd24eL,0xbe3fca609ae2b075L,0xa7d8c22ef217c26cL, + 0xd42d03e0b9620e3fL,0x634bf216c7f9f87dL,0x22b1ec538972ffeeL, + 0x83a957c1d60d3e77L,0xedfe5f860f6a537eL,0x0000000000000162L }, + { 0x40a05400f0ea20b8L,0x2872ac7e1d796900L,0x7765a5c90edb0cacL, + 0x9df5b930b62939a7L,0xf78a676eaf2cb708L,0x030732bf52febc12L, + 0x3a6640deba190ad3L,0x36eae15f93e7e341L,0x00000000000000d5L } }, + /* 32 */ + { { 0x6c6119f6a1c88f3cL,0x924e5fec2ec6944aL,0x4c8aac605742ff2aL, + 0x60adde1eddb22c7cL,0x9728938cfa5d25bbL,0xfa5ac4f7ec117de0L, + 0x41f35ab7482929c1L,0xd1c4e8f90afd95f5L,0x0000000000000180L }, + { 0x2fc4e73da7cd8358L,0x39361a57f2a1c920L,0xf6f2f130ad94d288L, + 0xe37e24662b6a78e2L,0x0babff8b79c262cdL,0x6cae01ef61b597b9L, + 0x9c1e33f0a60d4e64L,0x52a42280dd01f845L,0x000000000000000eL } }, + /* 33 */ + { { 0x72d640a40f013755L,0x0b6dce77fb8380e9L,0x2789ce797eb64b31L, + 0x8e704b0b93ca5a36L,0x18c360ff58bdffc9L,0x53b1f323b230c372L, + 0xd6b390885a7385d1L,0x071130f556b93bf7L,0x000000000000004aL }, + { 0x29a2096bfeef3f88L,0x22eba869b82b3945L,0x7fe2184a872664a7L, + 0xa0dc0ba1858ff942L,0x33799eb57490c9daL,0x1d356f6281588ce8L, + 0x7dd9bc7fa7b2cee2L,0x1e61a4e8a3cfaee9L,0x00000000000000d2L } }, + /* 34 */ + { { 0xec5db629e9068656L,0x623bd70c9fede4dfL,0xc78ad5bdfcd45546L, + 0xf7981dd26291a741L,0x3ac53d92761e688eL,0x6a96892a55b9272fL, + 0x4217e7b806546fecL,0x793c03cbab9e2f56L,0x000000000000015eL }, + { 0x08fd95436eff39beL,0x5a1af07edbff4f68L,0x83d47abdb0241616L, + 0x37c5d2fdd4798029L,0x9d86d97860b2e6fbL,0xe3e3284ece8db998L, + 0x9f049eb5d868b9bbL,0x3b3e8a789dad18b3L,0x000000000000018eL } }, + /* 35 */ + { { 0x57026c56e51e61f0L,0xdddbcaa3307f2757L,0x92a026ebb1aeaf41L, + 0xa33e937ce2d7f5baL,0x1f7cc01ebc5ead91L,0x90ab665d2e46807dL, + 0xc2a44f5553419519L,0x099c1ca679664049L,0x00000000000000aaL }, + { 0xb561a9098f97e387L,0xf605177845e1dd69L,0x1ffa512b7ff1d6abL, + 0x42da55a4d09a9c89L,0x5e5a7c71d2282e2bL,0xdfa5a203e74185adL, + 0x19b1369dea0baeffL,0xa5eef9141ecc0a16L,0x00000000000001a3L } }, + /* 36 */ + { { 0x2af20d0a7a573b81L,0x7eac1ca866194cefL,0xef0d2d8d0b711c34L, + 0x6aea016cba099d42L,0xa6609d285067a8caL,0x6a52c6007a1351efL, + 0xdab85818b11c2634L,0xf17fa45dbb1c033cL,0x0000000000000121L }, + { 0x9fb8b87afc3279d6L,0xe30e76abc201f1e1L,0x02af6a83806c21ddL, + 0xeafd7e2bc63f824fL,0x7b074e2646bd1a53L,0xcd6f4931a2139164L, + 0xab2cfd39c172d9bfL,0x62f3eb4b4db59cf1L,0x000000000000010aL } }, + /* 37 */ + { { 0xe402de36e0689a1bL,0x9dccc9fd7dcafe72L,0xe4dead7e255d0bfbL, + 0xd7ee87ee4ada04d9L,0x5a85039ebfd2e774L,0x282c6657770b2b9bL, + 0xa7aca826ba103bbaL,0xac7028bac7cd5071L,0x000000000000011aL }, + { 0x2e61d39c680c8f04L,0x2f09c4ccb48b3b5eL,0x131609bd95744f3cL, + 0x6d72e4b4aaccb593L,0xdb7060ca5adfb209L,0xc67d9e431fd3eccfL, + 0x1487a26fe1752a73L,0x3d95366364d0857cL,0x00000000000001e3L } }, + /* 38 */ + { { 0xe664506b4cec9e7fL,0xa44564b430aab98fL,0x5e1b501f173fa284L, + 0xe7b7bd7e15c97472L,0xd6cc67a882dec033L,0x1fe2e9340a63b762L, + 0x3a084e1b3f8e2fcdL,0xccce4da89ae6e752L,0x00000000000000fdL }, + { 0x0797f8eec12fd820L,0x325f892a96da4733L,0x597d241d55997bf4L, + 0x3aef35ac02b753cfL,0x8a73f95df677cebaL,0x5b2892b7d1bbac6cL, + 0x90751583cc5278b0L,0x2f5ed53fa47f45f6L,0x000000000000001cL } }, + /* 39 */ + { { 0x3914165eab40b79cL,0xbfb6eed825b489a8L,0xda136b7d8a6c107fL, + 0xd431db8b8e01f28bL,0x84e5d0dda4d79907L,0x69a91472a471e685L, + 0x58d0696998376ff8L,0xce369b74c46311fdL,0x0000000000000006L }, + { 0x6c0773d11add1452L,0x2e4e9c95ed8e9a2aL,0xe8ff8e32ca15a40cL, + 0x3fcb7d36af62f18fL,0x2ca336eeeec9484bL,0xa4d6e7a93b20405bL, + 0x6d90d031956d8352L,0xdd375603d9ca03e7L,0x00000000000000e5L } }, + /* 40 */ + { { 0xcc5f297d8b481bf7L,0x06a2a3e42a13383cL,0x9e14528cdc40b96cL, + 0x9a2bf35f1189da3cL,0xb8adb9896cd57fa7L,0xc1a4935c9357d32bL, + 0x51fb2580c2d76fadL,0x98721eb424f23de1L,0x00000000000001baL }, + { 0x8c02daaf52a4b397L,0xc3c5f4cc0d0b4e54L,0x29be4db37b7e79cdL, + 0xf34336ecb33970b6L,0xed3dcb7c92808c7fL,0xec290eff02288db1L, + 0x2a479d51e96ed59aL,0x9d7ed87076d8fa5fL,0x0000000000000092L } }, + /* 41 */ + { { 0xd8edaf0be660043cL,0x84aa2ccb016e074dL,0x9d2368e7e2cc3b3dL, + 0x47b501305c269fc4L,0xd0194ee13de33e36L,0xdb3361b9789ca504L, + 0x8cd51833984db11dL,0xd5b801ecc8ec92f0L,0x00000000000000c6L }, + { 0x33f91c1547ab9887L,0x2f285e2a6b5ab011L,0x9b734e5a133fc818L, + 0x5c435a7438d8692cL,0x3c92b47c43282e81L,0x191231f59c7bcdaaL, + 0x3ae425c34d158c86L,0x7f568febc5a23ccaL,0x0000000000000011L } }, + /* 42 */ + { { 0x8ccbd9d5bf5caa87L,0x17bfc60f68dd8c9dL,0x63eb4dbbc7d4dedeL, + 0xbf6e59458270b5bfL,0x887137a5cc098fe7L,0xca5eb68705d7b8f5L, + 0x4b7deeee4b25a533L,0x8e045c324a700a6cL,0x00000000000000efL }, + { 0x160c1c9270cf52bcL,0x4bf3f63a90cc6298L,0x5fff421cbf3028fbL, + 0x0a8102d7523beff1L,0xff3309a38b9ce105L,0x8e9da4d006621b1eL, + 0x9775f89fcc0a7807L,0x5904486500178612L,0x00000000000000ebL } }, + /* 43 */ + { { 0x8a6664fdebbd33ecL,0x0cf9a660ce5ad579L,0xecd06c0550fb56edL, + 0xb4ca5fad1d5aaa6eL,0x36daee5b948a7f07L,0xd2e37887efe1c11aL, + 0x41f61ac491d2544bL,0x49df70712bffd8eaL,0x00000000000000beL }, + { 0x60e2f1f565acdb56L,0xf2f13c845e5e5bdeL,0xb97fd354e17a0412L, + 0x8a2867cfd9c93befL,0x9ca9d16b25a957e4L,0x1f55c19b4a18635fL, + 0x9b3868f58d26ae71L,0xac4480414c94541dL,0x0000000000000000L } }, + /* 44 */ + { { 0x6c1bcf89d4ad38dbL,0x1180f3813d714511L,0x5b4c2759cb70243aL, + 0x5dd64d63163a716cL,0xbbd2efea13648bdbL,0xa47187f9e4de9969L, + 0x65de6912e2de8c45L,0xe075f29c4bdad0a7L,0x0000000000000048L }, + { 0x003354745e4dd88dL,0x1828363880577afcL,0xe4b35c01227288f7L, + 0xd008fd91e68989deL,0x42142315cd3f71baL,0x5cb023ff3e4da1e2L, + 0x7e6b9c35b5662bb1L,0x143f41657fb04fe5L,0x0000000000000072L } }, + /* 45 */ + { { 0xb06b046c26f40f2cL,0xbd5d246c6cd7c31dL,0xaaa562701953a9b7L, + 0x5ac929b88f00436fL,0x1937392c21d0660dL,0xd279ed159bd6dbe6L, + 0x377c4d5ad17c43f9L,0x800eda50b8fcd025L,0x0000000000000179L }, + { 0xb88ddc0b36132f31L,0x6f8f4f012ade73a3L,0x38859ec3203de2b9L, + 0xedb03814231b6533L,0xad08cd20a14093caL,0xb9f86d445c2be2f9L, + 0xfd3d9532f6ebc09fL,0x757b58991aef478dL,0x000000000000013dL } }, + /* 46 */ + { { 0x7d9ad100580f894bL,0xb612488ad925e46fL,0x45497e142e5a6865L, + 0xc86e105317f9a813L,0xd8aa820af8a33541L,0xa67906607a66d578L, + 0x47df60ae5f758e23L,0xcadd4c90a7f8ab5cL,0x0000000000000107L }, + { 0x356b044f6764ad0eL,0xf69fe0e1250189b3L,0x2deaca625f14db6aL, + 0xe9f2779f1bd77d54L,0x979911f25cfa895cL,0xd4e94cedb6f19ac3L, + 0xc353341701af44b1L,0xcac43fff50c727f5L,0x000000000000003bL } }, + /* 47 */ + { { 0x1742951c83c1d4cfL,0xe03791d0b245c34fL,0xea8f8ef69c2dcc71L, + 0x2ea57a292a310767L,0x255b46bbb12948bdL,0x2adc1e090feaeb83L, + 0xa0d2d18c449abf59L,0x9e8c9ff5c4a8a689L,0x0000000000000019L }, + { 0xc9f7b9cdeb28171aL,0xefd78403d576987bL,0x58b4f3bf22ff824cL, + 0xee09b393bf333cc5L,0xebff83a2b01ceb72L,0x5bb34c45220299cdL, + 0xa3c3e8a066ebf751L,0x5dee07bb49d05cf3L,0x00000000000001a6L } }, + /* 48 */ + { { 0x09a958d6b114257bL,0x729afd41d4975e30L,0x072879b53aae7b11L, + 0x0791b093edd1ac83L,0xcfefc7d11eb67975L,0x0e54bd37e2675b4aL, + 0x89a62d7e8d69517fL,0x96f805d8202109a3L,0x000000000000006bL }, + { 0x4815d51757b5f9f4L,0xe5c9e436405b44d1L,0x3442dde0e4870160L, + 0x953fef951ef6b3f8L,0x919e4cf5f7497fafL,0x24e3cc4d016ef0b7L, + 0xfc5caa872512eeedL,0xf1ba4029a3bd1703L,0x00000000000001b6L } }, + /* 49 */ + { { 0x2a668435529252acL,0x3da626c074e7b0d8L,0x55080cc1e0be86abL, + 0x534a53f74ed5dc53L,0xa9eff1400cd41fd0L,0x0e7c945c5674891cL, + 0xdea4b895ec53b5adL,0xefc67bef15150988L,0x00000000000001ffL }, + { 0x988dc109306033fdL,0x1b287979f36875d9L,0x4d39af26e3c335c5L, + 0xa47259fd124e29d6L,0x5d60c570c41dbdfcL,0x06224b610cc0d895L, + 0xa041d4e5eea8ff86L,0x2920e15cae4d8707L,0x00000000000001fdL } }, + /* 50 */ + { { 0x66d15f0ccd67da85L,0xae98b6f45ac54a15L,0x2f05e021f1ac71c3L, + 0x1feb222647559224L,0x2a2f156166e856dcL,0x65eb14566fb4ba47L, + 0x34688bd2a29d920bL,0x943ce86ef9d4cb9bL,0x0000000000000061L }, + { 0xb4696218aac91174L,0x85b519ec41dd9234L,0xb7efadf29f0763a4L, + 0x98517f27712c8b33L,0xa02e7ec3b0538630L,0x46bc45bb1ff3e3e4L, + 0x46ae896f29496486L,0x2aeb1649ebd2b93fL,0x0000000000000146L } }, + /* 51 */ + { { 0x1f34f41fe8e4d3c3L,0xc80d87ff5bb7e9dbL,0xf0216c0ad910b579L, + 0x2a24b761b87349aeL,0x054bc5282b0a6cc0L,0x3b4c7029af2d1957L, + 0x0e4b90e2adbe6cddL,0x8e774f8126060a34L,0x00000000000000cfL }, + { 0x3c7f9dbc2e229950L,0xab11f846d9f82b70L,0x2b7ad9a3f10c05f3L, + 0x203ead4f0f1820caL,0x51dbcbc8ccbfb332L,0x3bd9caf0066706f1L, + 0x5a39be2506059d5eL,0x984387c8dcafe64eL,0x000000000000014cL } }, + /* 52 */ + { { 0x708a757f8e011531L,0x7f45b172c3dcd57cL,0xa8eac9fdc2d99e29L, + 0x9d4ee81fb93b6415L,0xa5833b54a5488e86L,0xddd561c30bb7ab70L, + 0xb5bda384b3bdf3a9L,0xf909f8e01ddf332bL,0x0000000000000124L }, + { 0xc5b8aa84ab41e782L,0x1de20126851ddb87L,0xf49baa7d99482bd2L, + 0x05963debf4b6413bL,0xed369fbb7cd1e224L,0xdcf495dd1bad60eeL, + 0xeb475693892e30edL,0xaaf11bd8af0a212dL,0x000000000000010bL } }, + /* 53 */ + { { 0x7146017416ec64e2L,0xbfd14acf7d7c6ebeL,0x1e3504a3668b7176L, + 0x72e3f3f3741b041cL,0x651fa54a2d3b67b0L,0x623edca3e57d928dL, + 0x29b74e8b72c8f419L,0x3d99cb47327abaefL,0x0000000000000038L }, + { 0x808dd0b3da342a3fL,0x12002462def4a954L,0x1b1c642eeab5a860L, + 0x5e1e2a0506e54b6dL,0x9ba1710f10c6cf1aL,0x334fc3660f903cd0L, + 0x969e0001134166f5L,0xfaa26074155c4353L,0x00000000000000faL } }, + /* 54 */ + { { 0xc85cd0e6712de285L,0xcd2ff8b0869f5dc5L,0x372a2b92df4ed389L, + 0x63524d3055b99c84L,0x46fef5a2e07a0033L,0x0a2c82dad6e09493L, + 0xb362662172a8952bL,0x9afcb188af217eb6L,0x000000000000002cL }, + { 0xd3b9d4769a64c5b5L,0xa0d8d5de44c4cfe1L,0x560858ef11c6dbffL, + 0xce1d978f41c14aedL,0x251f9e7235efe854L,0xf9d0c14c0474575dL, + 0x0d2c838ebda89c03L,0xa25f040b36cc9dc0L,0x000000000000016fL } }, + /* 55 */ + { { 0xb23d9dea9cad682dL,0x87acb1b346369391L,0x9f5c19885c0f24d7L, + 0xdff62fc7d41883ceL,0xd1ab29df53555e46L,0x569b1cb2891cda05L, + 0xdb14dbc452c633edL,0x1acbb86c2a345428L,0x0000000000000194L }, + { 0xd86a70c824db8127L,0x84a6563f41b7cf5bL,0x8d84dabeb908d9b4L, + 0xaaeaae63899c260aL,0x13ed6b2b44436957L,0x3bc94f99d0a92c8dL, + 0x978f2e2bd04bcb97L,0x56a388ef716a565fL,0x0000000000000074L } }, + /* 56 */ + { { 0x6082dfe496fc1f77L,0xb04c435f1347ad6aL,0xf42694dc25ebe457L, + 0x64a17069b6f764aaL,0xe03873d504d83da1L,0xb0b9db52e0c82330L, + 0x9886b34ed4239b3eL,0x76587f2a598814daL,0x000000000000016aL }, + { 0x6918f8e8ebc71a5dL,0x49141a4285405233L,0xd63f09ccc182cbccL, + 0x4afe59d3e09057a7L,0xe633db0de239d8ebL,0xbac8582dfd9494b2L, + 0x8b915a414704fd61L,0xe0866a9dfceaefd9L,0x000000000000010eL } }, + /* 57 */ + { { 0x2b50c47052e07a4dL,0x7f6d38b8e5d745d0L,0xb414c47ce1af1226L, + 0x03e4b44b39c505f7L,0x59f3d79586f739beL,0xca19bca7e7c2f1bcL, + 0x1c51c01ec063fad4L,0xda3937a57f428afbL,0x0000000000000080L }, + { 0xe9d8ca9d102369faL,0xe009bffb706c0e35L,0x2e0a19a796b55d80L, + 0xda0e42deac0d094cL,0x6c1be2c5787c187aL,0x6d4ae2cc9cfa04b6L, + 0x5b0cea6076577340L,0x2d525245c7c96285L,0x00000000000000d8L } }, + /* 58 */ + { { 0x6dcb238cae93de69L,0x4963c8333bfdae9bL,0x33c81f4de8b79836L, + 0xe13a2244ae8bf8aeL,0x0bc6e7864c3ebaccL,0xa837a53c555a5ad6L, + 0x875d8d35bc7e9459L,0xb3705534f9f46fcdL,0x000000000000001fL }, + { 0x78e9270c7fb974a1L,0x23448fa0e9ed2481L,0x14166c3d64bffbd4L, + 0xa05aa443d79f4b3cL,0xd855a4f13b9f32a0L,0x4bebcf8dac90235eL, + 0x658499878db52b48L,0xaa4d59f1e48d09d1L,0x0000000000000183L } }, + /* 59 */ + { { 0xee585d75dbffad9fL,0x64df6174f419d8fcL,0x6f73bf59e6c69345L, + 0xb80793d183d59b0cL,0x6baf4fc3929c8950L,0xbd445a9529962babL, + 0x52b61945eaa91273L,0x4fccdfff3d1c785bL,0x00000000000001beL }, + { 0x05c384d97cb2857fL,0x4cf8305806b7abf4L,0xf528dd1743ace6b2L, + 0x2c7b8fa2bc43d6b6L,0x8f0e28bf14e564b9L,0x1b69bc73d2b9f01aL, + 0xab8beb403dd383e6L,0xaccea0c59791946bL,0x00000000000000aeL } }, + /* 60 */ + { { 0x9a68baee0163c2deL,0xc42d0b2beb2768a4L,0x5686f124ffdae767L, + 0x926da5d50aaca943L,0x699c34cee01091cfL,0x3d2545405324becdL, + 0x1b6b58f14193a0a9L,0xf144925ed611cc9dL,0x000000000000014fL }, + { 0x7f61a60cc1ed9259L,0x1be37aa32f1d5a7fL,0x0384713d07aef431L, + 0x99f33d494e6fa7baL,0x43928c168bd3730cL,0x73cf8ccf5b9557dcL, + 0x0bc6d460d1a2bee5L,0x27cd194383b15610L,0x0000000000000145L } }, + /* 61 */ + { { 0x4be651353427af4eL,0x2e6c0bb1310d937dL,0xbd8ea76acaa671c3L, + 0x9d7b3fd4d3a9c376L,0x124ce863471709aaL,0x225ce41d018051c0L, + 0x5489284ff9e8ee1cL,0x22d829c9535c4ec8L,0x000000000000013dL }, + { 0x6b01ed9da1b15e02L,0x1d092bac301e5868L,0xbfa7a1835764135bL, + 0xc0ee59b76f7159a4L,0x9171a05118090d0dL,0x5c1531bbb8052196L, + 0x740930fc20927904L,0x963b48cc76337685L,0x0000000000000008L } }, + /* 62 */ + { { 0x0fe8b620f4aaaed5L,0x1068de7dfe871ee8L,0x2b22030ffebfcb4bL, + 0xd4dfbee7c3a2155bL,0xa7a26a8c2769b805L,0x377de7706d39eaf0L, + 0xf1a92447f615f032L,0xa1b81a8442d9b731L,0x000000000000012aL }, + { 0x299e67d0b1152e8fL,0x2e773d9792b5e14cL,0xe0d81073f1cb57a2L, + 0x03af0a9cbf1da4a2L,0x169b160ec22b449aL,0xb82c1ac8dd2d7d1dL, + 0x7508aca6bfc98ee4L,0x54992440e3cbea15L,0x0000000000000150L } }, + /* 63 */ + { { 0x70004a0aa13a4602L,0x505c71a3d0d2c60eL,0xa4fe2463a6d79bc5L, + 0xe878eb3ad54d9df4L,0x7ecca90773d3c7b8L,0x5b3bb278244ecfa5L, + 0x8a30f61fb124d179L,0x5b7e50014f632af0L,0x0000000000000115L }, + { 0x62c42ecc9ef0021aL,0x58017fd7f856c9d4L,0x10e243b82e6478bcL, + 0xaf0746691505a4dbL,0xd9bb0a1c4cd7eea5L,0xe8ba39a2d52aed0aL, + 0x0747449ab549f09dL,0xd5c8f7bd9e57fa64L,0x000000000000013fL } }, + /* 64 */ + { { 0x1bd8ce7b5a53c22bL,0x78733fcd7cab446aL,0xc44ca4e248acb394L, + 0xa9888b1ea38c790fL,0x36afb6eb15c34237L,0xb913b8a8fb702063L, + 0x34b77cc5917508faL,0xa931d7a7f9e4732bL,0x0000000000000050L }, + { 0xa90a429056d21d18L,0x8266630755b410a1L,0xb4684a8b894a6b05L, + 0x8a1ade63828cf75cL,0x4fb2f85a127702a3L,0x83ff7d05adf7b709L, + 0x1d3f5a92a68d1db6L,0x243ce1dbc093cd5cL,0x00000000000000f5L } }, + /* 65 */ + { { 0x8fc183c3d37d7891L,0x17b50149fd865ecaL,0x0f6e43d68f218441L, + 0xaf51ec255a07f658L,0x8fe5a6cbad303202L,0x95de68f310676ef5L, + 0x7508e31fca4e000cL,0x783e5a9577735254L,0x0000000000000159L }, + { 0xbc1db5712e537ad9L,0x5e87112d35be9cf7L,0xbb522b48d57f9bcbL, + 0x1eff7890a8b3cbc7L,0x4f306e11e5ecdb5cL,0x30da83923387e7edL, + 0x4d91fcf472321e3dL,0x8487bb62e412a67cL,0x000000000000009fL } }, + /* 66 */ + { { 0x86f5f80f8cb8e08eL,0x7cfd2c412496fed6L,0x0061b74360b7dcdfL, + 0x4dbaffdf57f4d05fL,0xb1993c2a458061f2L,0x6c6ca8d09de994c4L, + 0xef70d24d2747e062L,0xd4e5d4e3b9995cbcL,0x00000000000000ffL }, + { 0x3171e245c6f40077L,0x1592e0450723e506L,0x35c86f7e6a6bfd88L, + 0xba0959d16d9d9ce0L,0x2e7f8fe83eb5770cL,0x58eb0881c40d63ddL, + 0x56333bdaeb9e4419L,0xfb0397df3afd1f4dL,0x0000000000000034L } }, + /* 67 */ + { { 0x7b84e05eb358815cL,0x3abcb2d4e41087d9L,0x87a7588907f05d7aL, + 0x350778d57a9d481cL,0x9d34cff842d64cbdL,0x0859cd5accf289feL, + 0x8372d591dd2b2c6eL,0xc06d482e18b40b62L,0x000000000000006bL }, + { 0xd10695a0da4ed375L,0x51baf588298daaeaL,0xb028a1b4f4b7092cL, + 0x8ab87dae7a335b35L,0xa73593620567efd8L,0x7a49fc103320c374L, + 0x737acac4a3558b30L,0xd30696a34c0fce9bL,0x000000000000001eL } }, + /* 68 */ + { { 0xd9550ab0bd3902feL,0x9bba4b4b86a9d3b3L,0x3a59e0a9975cac37L, + 0x045e8731333605dcL,0xf2c598c21afc2c58L,0x81ff8d6feef9cbf1L, + 0x82bed5d09bf83c42L,0x9d1d9d5b528131d5L,0x0000000000000157L }, + { 0x687da3055519258eL,0x73f539f9027de2a8L,0x69fa9747d6a230d6L, + 0xab1aeb235f5d1684L,0x5bbfe9475f7e41f5L,0xbd546abb16a7feb3L, + 0x2afbd4e8e16d5187L,0x7437be13bcc953ddL,0x0000000000000160L } }, + /* 69 */ + { { 0x55f165a9ee9755a3L,0x0c8d5a1ab82c9ab1L,0x65a1e45aab6b97e6L, + 0x3004cdb0ab05e271L,0x9e0c3b526db0830fL,0xaae1ec1a75acbdebL, + 0x413d4484761e8498L,0x589e09bbb1b9c62eL,0x00000000000001e9L }, + { 0x675120819c72258dL,0x61dcd7345c1593d4L,0x6c627a7b91c11fdbL, + 0xd1d3e9bf8857908eL,0x9aac06fe530bc68eL,0x125c16bb6b5b44ffL, + 0x38860bb6db90edd5L,0x96fe8b08fbbedb5cL,0x00000000000001aaL } }, + /* 70 */ + { { 0x323a5dd8f257c0f8L,0x4884dc92dd3a10d9L,0x03f379cebbb8ce03L, + 0x6217ad53a47262a9L,0xa1df201752e06c6dL,0xf5b723e0c32428cdL, + 0x1e5d38892c30c62cL,0xd9a90f1f477f82ccL,0x00000000000001fdL }, + { 0x830d27ba1763ab59L,0xcf27d93e723783e9L,0x81558264945968aaL, + 0x63251a321700d5d5L,0xcf6bbe7303146d9fL,0x6cdcf455e65bf0f2L, + 0x80aa00ce632323fbL,0x6e49e62cd96a4744L,0x0000000000000149L } }, + /* 71 */ + { { 0xbeff0b7e40574c09L,0xb76f26433fe80e96L,0x0b3bd352eb237d91L, + 0x3c0c62b77edc3102L,0xf989394b424a36dcL,0xe9ea64c27c6c435eL, + 0x2dfc21c4e388d076L,0xcc3852f6a4e69e4bL,0x0000000000000139L }, + { 0x5238a3ffbb096b91L,0xee72c9e573d8d43eL,0xc116db118c577558L, + 0x54ec89d2dc47d4b4L,0x2006dd3542e1955fL,0x004aed6a7437475cL, + 0xc1ddc32a2bee9041L,0x597417a2ed9332c9L,0x00000000000001fbL } }, + /* 72 */ + { { 0x3c0f1981859bae66L,0xab48e9b1845d7c1bL,0xc6ce9c03452a3c1eL, + 0x2384a00cff810339L,0xcd7ede115f98d6feL,0xf7a00e3d38a0dd5bL, + 0x56dd948a3c7e1c06L,0x9d21a7d18e53a61fL,0x00000000000000d0L }, + { 0xf9cfdbaf880eb3fbL,0x64cfd2975e83f7c9L,0x61ba7d6fa28a74b4L, + 0xb8200d5fdfb13e03L,0x03bc8f4b232a6128L,0xd1fb92c281a8d86eL, + 0x68675fae706d6ea7L,0x9b08608aefab18c2L,0x000000000000011dL } }, + /* 73 */ + { { 0x17cf6146bbd2f539L,0x96052fc076e26ba2L,0x36821d18d4be4a67L, + 0x8f8234229f3f39a8L,0x68b846b9433f873aL,0x7a1d3f36716f4568L, + 0xdf603e282fd47750L,0x77cb02c56975e226L,0x0000000000000003L }, + { 0xf275add38c01dd59L,0x9c213a9eb9c1a37aL,0x690ad1044dfc5403L, + 0x202ee20607ee0d86L,0x896ede95661fc40eL,0x6b4d7398d0b02f56L, + 0xccb96991e5af1a24L,0xd5c281afc13f7125L,0x000000000000009fL } }, + /* 74 */ + { { 0xc858c54bd7073a5aL,0x87c81a5c861eac7dL,0x51f84a39e720201aL, + 0x952a9f8e40e003ceL,0x76bdc4ab58f199deL,0x1cf12322d56cc02bL, + 0xb6634e6383f162f3L,0x84c017ee8f969e11L,0x0000000000000169L }, + { 0xf1f433625c89f1faL,0x4a02a630b697b078L,0x33311e5c4b05b7f4L, + 0xa7ccae514fede4ccL,0x0d26e8744b025aa4L,0x7d5b77bbf84db7adL, + 0x39ef1aa8f571c1feL,0x65eba928418ccd20L,0x000000000000018dL } }, + /* 75 */ + { { 0xa37866ab8abb2537L,0x14ac4cbb65b3096fL,0x827fa7ed2a428ad3L, + 0x95d19f6210e9e196L,0x31eb97a089801b4eL,0xaae77a62aae8b823L, + 0x9693d62a5f5c9642L,0xff5bfe973e368b84L,0x00000000000000adL }, + { 0xa3efae21492b0deeL,0x2143e9ee9602c2ceL,0x21367c996f3b99e5L, + 0xdd78b2b0e93b8f59L,0x8d541c381064c13eL,0xe6b970daf5738e7aL, + 0xaf6ecc168373b1a4L,0xdbfa3f4f74ae208fL,0x0000000000000180L } }, + /* 76 */ + { { 0xb024621a907a6aa0L,0xef56cb68407879f6L,0x44c38b688168a934L, + 0x70d638d39b9a9048L,0x6968caa082541f20L,0x0c5970531fc88b50L, + 0x5564ded5af635784L,0xe7e898c7c4d494cfL,0x0000000000000097L }, + { 0xe1dc98d96b6ebb2fL,0x292a17fc7aa9e126L,0xb60f0fdbfa2a2c68L, + 0x9c63270cb2e1851bL,0x898db26581ca4cfeL,0x94082638b11959d5L, + 0xe44f308ea54b8d19L,0x96399eb844e63094L,0x00000000000000d6L } }, + /* 77 */ + { { 0xfa00f362b83769eeL,0x72d040ac3efc4cb3L,0xc393388957abd687L, + 0x62264425940a7128L,0x909c4c8fec242a31L,0xd1e48f1e65a1a551L, + 0x68bd70f1049c2172L,0xc8692d2b709b7fd4L,0x0000000000000041L }, + { 0x4e388aa1df816784L,0x4a58c8a501be75ceL,0x9b49dffb02a67812L, + 0xa73299e0eda721e0L,0x8a0bd1f5e67a65ecL,0xd81e91e8856c71b6L, + 0x37aee2f4c005aa30L,0xd94007500595bbf2L,0x0000000000000073L } }, + /* 78 */ + { { 0xa912ac4a010c0ef3L,0x0e654bd84e81b1a0L,0x8f0563dc4f353509L, + 0x10dc41f3b47d189aL,0x122edd06f238c09cL,0x224c16afc41acf67L, + 0x1ccb933483758520L,0x1a4b5f292275ae6fL,0x0000000000000127L }, + { 0x792fd4733ce688b5L,0x14566d37dca9c68bL,0xfce9326e541711d0L, + 0xe3ba14ee3cc341a8L,0x6b8ab4cc2122c11fL,0xc0fa763bf5d379b5L, + 0x95e2d2aef1522f91L,0xd4e21b3d31cf95a5L,0x00000000000000acL } }, + /* 79 */ + { { 0x4013a7791d8e061aL,0x62707e70acc84a30L,0x6ac08266eb2f636aL, + 0xe917ea2177b25c9dL,0xddb78bbd70ff35cfL,0x5008db2b041898beL, + 0x0f58a4fcce0ae445L,0xed0923972257d0e7L,0x0000000000000043L }, + { 0x2cad77b3e2e129e6L,0xfb8c4a870f1be4d7L,0xaee50dff20056333L, + 0xbc2658c12a691543L,0x95dc0ccab8fe2640L,0x694eb5841965a0afL, + 0x7d3baa53edd1d99eL,0x2df13b208a1edc87L,0x0000000000000083L } }, + /* 80 */ + { { 0xfead2247d181c3f2L,0x915d35bef337b23fL,0xdb4cfcba74890672L, + 0xe4f70d8ffda7a3a1L,0x226b641979275686L,0xe80408636ff1f79eL, + 0x98e84b39cf5fa4e8L,0x57aa0be9d8a09f60L,0x00000000000000daL }, + { 0xd40cecf54efcea66L,0x98df2aecafc76faeL,0x63f19a48c91585a8L, + 0xb111bda713f00aa5L,0x6687afab44b5cb9fL,0xc6d5fb12652620d1L, + 0xaf953f1bbacb35abL,0x99709370ff94c4d2L,0x00000000000000edL } }, + /* 81 */ + { { 0xac9f56e068b54c89L,0x08ecc17dce737c22L,0x208ee83fab089b53L, + 0xb0f3a129543fbd1bL,0x1b204cf8844dd706L,0x80975c89dec2e40dL, + 0x08b011ae9399914aL,0x6b4ba17074674df7L,0x0000000000000017L }, + { 0x71216ea98fdfc175L,0x77b7fc637e0f5b0cL,0x88d0285fceb33a34L, + 0xb679814f0223eab7L,0x9078720b51c6d922L,0x5859d5a49c13f51dL, + 0xe69f850bfaed60b5L,0x2499a8446d0ccab2L,0x000000000000005cL } }, + /* 82 */ + { { 0x41d581fb73e7bcf1L,0x16dde61cdd3c17beL,0xc62997ecfa199fd9L, + 0x1a758873c159db97L,0x4ed7789664132830L,0x9672ce892942a918L, + 0xf3ee4587816ba4bbL,0x4fb7a148ce54dd7fL,0x0000000000000123L }, + { 0xf05d80aff009be8cL,0x62e938d778df1ba1L,0xa7e22e84312de620L, + 0x48d29e7f6070c4b9L,0x5cd9c3eba1b5da37L,0x1e51bd2fa4717453L, + 0x94098ab056ab9e67L,0xbb584abc49f7c6a1L,0x0000000000000049L } }, + /* 83 */ + { { 0xa9f255301ea470f7L,0xa01bf808e9254e30L,0x098569ea71a0038dL, + 0x0d2b2ee15913ca87L,0xae17004bb8281fdbL,0xdb5c6eb0118e5c2aL, + 0xa56ac64c1fa943abL,0x1aaf64771a92d501L,0x0000000000000053L }, + { 0x9679ef4906345730L,0x946aaa4e846f37c2L,0xf81726b01a7c3aabL, + 0xcb808da28166df4eL,0xe9fb3fc24e04dc3eL,0x9e0b61db76ec19b4L, + 0x6e7f665eeed6d13eL,0x70ed8c0786a75384L,0x00000000000000e5L } }, + /* 84 */ + { { 0x66456e58108ce13fL,0xb5bfc58d0e397813L,0x04b6a84bea3949e9L, + 0xea9b66bc75af667dL,0x7cb4d6dca891566bL,0x1b3cecf0bf61595aL, + 0x4312c73d002e2520L,0x81d768986135a5faL,0x000000000000014bL }, + { 0x4047bc25841078ecL,0x75aa9c96179c454dL,0x6a1606094851f8fcL, + 0x998d4e3ece34091fL,0x9a9f670488e54102L,0xbf280f885da8ac5eL, + 0xc64caca08fec230cL,0x0ac864b05094b775L,0x000000000000002bL } }, + /* 85 */ + { { 0x6b606e398f5daf7fL,0x4838548910927506L,0xa2255c5c08c58a72L, + 0x2f362fd0c90f3ee3L,0xc9633af408795f02L,0x71710bd10425f5aaL, + 0xc2017e05ec06dbfbL,0xd9c7dc82c1b8bbcdL,0x00000000000001c8L }, + { 0x7db41fdf18b8bed9L,0xe9483308e3a23125L,0xbcf91de77291c4bbL, + 0x9b0b972b41448aafL,0x95dfc633c44da462L,0x90b9c46301bf50a2L, + 0x18b66f77869e3131L,0xa8a4e2fa121baad9L,0x00000000000000f5L } }, + /* 86 */ + { { 0x8ca55109ca0251eaL,0xf2aeed8b27a6c9b0L,0x901a8beb5620f528L, + 0x9a8421e8ae13fc56L,0x1349f1c485993c07L,0x29e083590d1ab0d7L, + 0x96e2929baeb5d909L,0x96c2f1f8f599a66fL,0x00000000000000ceL }, + { 0xe4bc4b5112be8bd7L,0xf4846a0f3c67e99bL,0xd89cc7d34d3a3864L, + 0x1f64711273f43981L,0xc32bc32426dce567L,0xf7134ebff02b096bL, + 0x5604f00b0d0682b7L,0xfd23d7eae3ce8b59L,0x000000000000011cL } }, + /* 87 */ + { { 0xf89646cca27689a6L,0xd6a7dc435564172bL,0x30bda48eb57cbfccL, + 0x9b11fffb5b1adfe5L,0x9f2d80db711d8bf4L,0xe879fdf0b70e5a5bL, + 0x975341836bd18a1dL,0xc8c526bd8cbfd504L,0x0000000000000114L }, + { 0xd5fe725bef7388bdL,0xf1c3dbdfe7ffaea7L,0x78395b897e6de2acL, + 0x81a72c9a9ebf1bfbL,0x6526570769785146L,0x3925ecd9f52670afL, + 0x437bcdd283d57d48L,0xb5d732a7c80ecb02L,0x00000000000001ceL } }, + /* 88 */ + { { 0xa7f9fccecfd376d7L,0x6b4eab3ea66b084dL,0x6ac90d08d5b91bd8L, + 0xaa3d5b7e8aa304d8L,0x27f3d42b7f866a4fL,0x95d19fa8bb813ae1L, + 0xd38798d7e34a9206L,0xdf7c0a69a32c1cddL,0x0000000000000073L }, + { 0xbe2c01bb38315b16L,0x1daa7c899e18c8f9L,0xa3d43fb408b6b853L, + 0xb159e48c68092a81L,0x77e93d9e836faad4L,0xd4ed6361a4699730L, + 0x569cb3f66297e476L,0xb69d8183e7811fa6L,0x0000000000000185L } }, + /* 89 */ + { { 0x18f27eb3ab9cb764L,0xbbbefc218ebc1d6dL,0x47760ddb0479aa79L, + 0xb4d16d2409e542f5L,0xe35c38d1bc699b96L,0x13b2ae258c8d8c8aL, + 0x8579c15267a3a45dL,0x773b73576c554c04L,0x00000000000000d9L }, + { 0x9620a4730218c299L,0x69be29b399f78a33L,0x4684a009484f414fL, + 0xb2c749379a2ca4d4L,0x09c0773e68db7ab3L,0x6181f059935c357fL, + 0x0931303d8b7de3f2L,0xf3effcd0e0fb6e08L,0x0000000000000060L } }, + /* 90 */ + { { 0x723c14beb25d6530L,0x5e015b399a97d40fL,0x209c3c4bfbf7f622L, + 0x83d8c59c14b4f0f1L,0xcf002fde3f7e8ecfL,0x35d353c91eb1ef0fL, + 0x394c42a5201f0c60L,0x787128ab7be8ee34L,0x00000000000001b5L }, + { 0xa0937d3ab70110cdL,0xe0fa4efc477911b5L,0xc6acaf5bc53a4c19L, + 0xbd3010f338d509f2L,0x3ee2a82be54ac1c6L,0x31ea67c3e4f2a3bfL, + 0x7a4ca66ef089c7b9L,0x5bda2c4f34a2362fL,0x00000000000000b0L } }, + /* 91 */ + { { 0xb424a071d1f575cdL,0x15693b01a5237182L,0x141336029a2c9d40L, + 0x50c4348b9c914a60L,0x9024573d095b31c1L,0x6f975fd222fd4962L, + 0xa1704886e210b277L,0xac29b8136dba937bL,0x00000000000001f6L }, + { 0x09edef55775da491L,0x25953f9e2b6aad82L,0x6696a1061bb40d5bL, + 0xcfc453114d5127d8L,0x2f21dca981ead062L,0x3f3e4f07af3b7123L, + 0x12cd06b89646f20dL,0x241363696910f5bbL,0x000000000000015eL } }, + /* 92 */ + { { 0x0c844fd03ecfc44eL,0x4095f2c85043b3d5L,0x9a5fe7dbc9bd059aL, + 0x239328faf65becdfL,0xe3102471a67961cdL,0xea9e39bfbbb5dfddL, + 0x8022b6d0133dc5baL,0xbed7aa9b5f12c379L,0x0000000000000141L }, + { 0x096f0059fd94d941L,0xfc6e9f007d4ff018L,0xe63af598779f05e3L, + 0x4c40f0b300483c99L,0x04d2feef72a19870L,0xdb773b5b464a4a71L, + 0x00b6770f49367f1eL,0x4f7e03012a9fbd2aL,0x0000000000000169L } }, + /* 93 */ + { { 0x0df5dd738a9095fdL,0xc4b7a021d3ce857aL,0x90aa796be5edc767L, + 0x56497eff180a0808L,0xb9856e1f66f10aabL,0x3129882439879766L, + 0x61748cf73ba80601L,0x07d9076c555da929L,0x0000000000000012L }, + { 0x0b049a011c44394dL,0xf5f25ef70ce49e45L,0x1e3a09f0b1694265L, + 0x2c5bd9fe109b33f8L,0x07f2a43fa30932e4L,0x736abfcac6cf8af2L, + 0xadf7fa04f3366722L,0x2f1e92fbfa9d26b0L,0x00000000000000e0L } }, + /* 94 */ + { { 0x9524e4a663be4d4aL,0x1fa57bed66f3cc91L,0xdd7c93fa7e7a7ccdL, + 0x70e8cf6a88c5d1d3L,0xb257997a3f251f1eL,0x0a5ec58ee3554cf5L, + 0x68d268d7065a7109L,0x7c23d4d2085089eaL,0x000000000000004cL }, + { 0x63ae575bbd52d132L,0x0fb8daa738c81cc5L,0x096a6e51e4e63b99L, + 0x51d6b366b239d387L,0xed5f8874a5d49fedL,0x025091d943a8c07aL, + 0x100f845ae4686ae2L,0x1af59d747eb4ef5aL,0x00000000000001c2L } }, + /* 95 */ + { { 0x5f7bc01edd441308L,0x0dc3494486308890L,0x2af38a74759611cdL, + 0x11a712614c23ce66L,0x37f317b5f8bafed2L,0x4efbb9ff4c93e079L, + 0x880f0edd8ecc52cfL,0x480cdd2cddc9d82aL,0x0000000000000028L }, + { 0xe8f1ca0dc3f807acL,0x6a3e4fc2bd070549L,0xad3d0a1491f8bb6cL, + 0xe3ee1cfd3d6dfacdL,0xee46b1b95fb46ffbL,0x5207b3ac7dd5cfbcL, + 0xd580c0d9b1b8e8b7L,0x52c669f4c7bdd11aL,0x0000000000000084L } }, + /* 96 */ + { { 0xa42b4747c0ace6d5L,0xd5acb64bbe7287adL,0xf330489989bc2614L, + 0x817fe836ff05c71eL,0x772eb246d35ac450L,0x7f5fc216375a9c3cL, + 0xfb6f9e1acbc0d6fdL,0x7643c315720e9733L,0x000000000000009aL }, + { 0x4b2216b4f3845ccfL,0x9c174e8090bc05bdL,0x7a550c74d6049037L, + 0xbd7220a16358c806L,0x838f9c41aa677b6dL,0x37332c1966e2e08eL, + 0xb032875e496f6da5L,0x52b274cf9c30630dL,0x000000000000000cL } }, + /* 97 */ + { { 0x6ec2e7828ea58bebL,0x2b404c1d3665fa48L,0x546d5fad20b40ff0L, + 0xfb5df7b629d3e6a5L,0xf186846d66c81991L,0xbe690bde6e2cfe3eL, + 0x97aeb9a01410d16bL,0x59d81548bacc8e92L,0x00000000000000cbL }, + { 0xd905d3adbaf66a23L,0xc333738740dfb081L,0x6d5535de4b00f432L, + 0xe17fe8e807d3a03eL,0x29544ff7066bca80L,0x60c2b96cbadffa55L, + 0x9f018d9445a26ea4L,0xd543816724a34ffcL,0x000000000000011eL } }, + /* 98 */ + { { 0x62a873fbbd7f8a61L,0x5e18cd71bbe580bbL,0xfd5c9eb3667f6980L, + 0xab8d4f61571d3dc0L,0xe2e45215783f9bc8L,0x36c3774b24398b14L, + 0x2db4a36374d811b5L,0x9f7f12972debe3c3L,0x0000000000000138L }, + { 0xbb97f21c798fefb2L,0x9c76fcb5107baa72L,0x12fbf760fadbb568L, + 0x1a648be7d33ea6c5L,0x412a2993236134a5L,0x4a3d81698985893bL, + 0x6144958f3e66ada4L,0xb4dfc79b7687b457L,0x0000000000000140L } }, + /* 99 */ + { { 0x83b145707abe5bb9L,0xae0cbfd8e51d81beL,0x20dadf49c9827affL, + 0xc3a72548a687b554L,0x080263fbeeb41733L,0x7014fdc3d3827c63L, + 0x7d018f84b5e3b70eL,0x1d483e00fbcf7168L,0x0000000000000015L }, + { 0x154e3c7c6b578aa3L,0x511ce9b5d3043daeL,0x55f89e9bb6008101L, + 0x4ec31112f405ac6fL,0x7e66a4d82008ac7bL,0x73c00d3925c52fa6L, + 0xee1b99988acac2ebL,0xdfa31d9560b57453L,0x000000000000008fL } }, + /* 100 */ + { { 0xcc74a0e0251cf8d8L,0xd4d8949d041f2bd2L,0x0b734a4933ebce52L, + 0xe1ac5f515c5bcdaeL,0xd3ecdfcc16200b93L,0x2506a266a793736eL, + 0x585a1c8bea6e6940L,0x081cdd539190f935L,0x000000000000000eL }, + { 0x055f995653e28412L,0x0d1526f2db27164bL,0xcd5625eb1df3adc7L, + 0xd2c453cadd35deddL,0xed442849a838ffe2L,0xad20c1375c0ce589L, + 0x2d5fba81bd99b609L,0x5be41dcc622efb07L,0x00000000000001adL } }, + /* 101 */ + { { 0x563af6678f850756L,0x86d37aae52f3b597L,0x10d38a53796842f5L, + 0xcdaaf99ff743f997L,0x2fa755e593f1a8baL,0x1af04e15409f7cd9L, + 0x63bf9a0ad6d0650bL,0x67b1cead55abfd9aL,0x000000000000000eL }, + { 0x3660a8e0b5f43178L,0x56bd412d9cc35b33L,0x3d7bfa63880f6808L, + 0x7f372d662e622c71L,0xad7b7be76ff82445L,0x0f2bde808db04e51L, + 0xe1e781fe4bd15c8dL,0x1f475bfbb8e502f2L,0x0000000000000194L } }, + /* 102 */ + { { 0x79482bf9d63543ecL,0x985cb67ca117ef3eL,0x8ac50638160ccc63L, + 0x556cbed5729bdc1eL,0xd62ed97da22686dfL,0xb124cb5fc81eb77cL, + 0x4d7b4f6672fa2ed9L,0x60b29aa778335b96L,0x0000000000000172L }, + { 0x21bfc7b6a43df7c6L,0x85acac23bc20706cL,0xeb6f37bc345d9580L, + 0x9d8f20d2a32a08bcL,0xf08924f6d1953c5eL,0x7d25d7c6c4f680d0L, + 0x64e6a2372de9912cL,0xda1c06c452ce644cL,0x00000000000000ebL } }, + /* 103 */ + { { 0x26677c5c411dd110L,0x0d6787aa2c991c4aL,0x53be6a41a45666d6L, + 0x73e716aac15f9f15L,0xa93b863f0e0cc7b2L,0xa40571172a624ab0L, + 0xe5e7656e1a39c260L,0xaf8d78b52ef6f130L,0x0000000000000046L }, + { 0x796214b170f38dffL,0x3e35d828123a1105L,0x046a44d4957ed812L, + 0x618fa9ba0da60161L,0xe7cdd2a554f84413L,0xf1c2563e19ea95abL, + 0xc4459e14cb2a30b4L,0xc748add661ff9aa9L,0x0000000000000183L } }, + /* 104 */ + { { 0x32981f399de58cafL,0x05bb80fd8753ea64L,0xc83f9f242d119486L, + 0xf490cf0603eeb00aL,0x4037f2517c73d79cL,0x844209fd724d461bL, + 0x6b03f6d2272420cfL,0x6f4bd29eb3438fa2L,0x0000000000000152L }, + { 0x964d034ac389e51cL,0xacda55e96db7d98eL,0xb2ae97dee913c583L, + 0x0793077bfeb03440L,0xaa16e3789d461e29L,0xb0a67533043bf8beL, + 0x9d749a42ba7d8c3fL,0x7c41e6d66bb925dcL,0x00000000000000ecL } }, + /* 105 */ + { { 0x2e9b345dc5da8398L,0xbc66841fbb38c430L,0xce3ac5627c3bb47aL, + 0x8fbeb12b738d2cddL,0xd4bc2ad768731185L,0x9521db1cbbd4f4f4L, + 0x2a690caefe4e1b0eL,0x375215eb7bfebe3eL,0x0000000000000194L }, + { 0x4cb234f12edfd661L,0x0149984eed52c1f4L,0x32d27260d8f8f98cL, + 0xfe76e4e47be38590L,0x5435873d95e8b672L,0x916c397ff2b00e82L, + 0x3b9bf705bad61eb8L,0x7ee90182ae131bbeL,0x0000000000000000L } }, + /* 106 */ + { { 0xd36fea9e93fbcb5cL,0x382be5839fa8529bL,0x0b243125fd611ba0L, + 0xa59ae37fcd8a2637L,0xab78c60e3d8d4704L,0x1bac243d44c41b79L, + 0xc4001feaeda49cc5L,0x988ea44a83dc7e9fL,0x00000000000000f6L }, + { 0x4d90caa4f077f79eL,0xf4d17601d9e2590dL,0x11debbb3d21b4b77L, + 0x031b3f609037e1b6L,0xf113ed82135becf0L,0xf6c01379f2903ddaL, + 0x36bde7caa6f19296L,0x57d3b6849dbbad85L,0x000000000000006cL } }, + /* 107 */ + { { 0x963fee389abfccb0L,0x6c6e2a24b9676e63L,0xf8768f0284ba6d27L, + 0xc38ba3ba465853d1L,0x6e3ab36d1b8ab9b6L,0x01fc974247a07331L, + 0xfdd4171825233f32L,0x4dacfa81ac61de7aL,0x0000000000000021L }, + { 0x365a9f37eaa3198cL,0xcbe8a345fc8b99d5L,0xa427f12ad4f5ecbcL, + 0xe841ff600c237514L,0x5d9e8c5a28a27b05L,0x2d37744462859ff3L, + 0x1c0460ffea8bde37L,0x0a0e49a129cf5bf8L,0x0000000000000181L } }, + /* 108 */ + { { 0x688203af45843c3eL,0x4601e303aabebae7L,0x397b08f3624df62bL, + 0x5687348ad21e5aa8L,0x2cf12c739a242b0eL,0xc848ed0132a76c6dL, + 0xb72aa1c2f52751a2L,0xb63296c392c02d05L,0x00000000000000f3L }, + { 0xce4b42adc6f3d1f0L,0x2f0dcc532f532b94L,0x5781333583443d9cL, + 0xb50118eedc8dd9cbL,0x3039e1a5ee87192fL,0x9977267d557419c2L, + 0x462efa4c30f96b0cL,0x454fb7963cd3c35aL,0x00000000000001f7L } }, + /* 109 */ + { { 0x10f281949d153926L,0x42e28c9182b57548L,0x4b423b30509e94c9L, + 0xc5acc52ade9d6b57L,0xaa746c398b3ca314L,0x0f4ea307c63d5bc5L, + 0x425553a2e1ccc989L,0x271198bff76d9194L,0x000000000000008eL }, + { 0xc7900e463c8e672bL,0x703675cd3f2dfc27L,0x704951f7af2163c9L, + 0x74d699087aceaab0L,0x482f21a97e8d2369L,0xdcfbc1dc813dc115L, + 0x0ce2bc8004f6cd13L,0x2a54662c82bfaff2L,0x000000000000003fL } }, + /* 110 */ + { { 0x0dcf41e61588a8bcL,0x6f48cd0e210c52cbL,0x338562bd758e7a45L, + 0x1600d54b48b9b957L,0x461df80ba6b89b9eL,0xf7fd4f17098cc82fL, + 0x167f01cd14977147L,0xb13385116116c5f9L,0x0000000000000048L }, + { 0xdeb763335d2617f0L,0x3f9a57726ecb8606L,0xa93c032d1b91fce9L, + 0xf7a4388b6c84b997L,0xbfe80225823ca5beL,0x6f19c02835a32f6bL, + 0xf26cd5ade3cb5c58L,0x7f5ddc776d0c1dd9L,0x00000000000001e7L } }, + /* 111 */ + { { 0x3c9feec86ee764c9L,0xd1bec836b07c82ccL,0x6bf1b2e6a005b142L, + 0x70ef51a329e8a5eaL,0x517d298e3ffe241cL,0xbb389e2872966c28L, + 0x3a2da8a92c7acc76L,0x902c9126732a21b5L,0x000000000000004aL }, + { 0x96c51b9c8f7ce110L,0xdcc33a87aeb036f1L,0x826950980a6a59e2L, + 0xceaf26a7e78db500L,0x82f3c384c95bb030L,0x6dd6e9f724c42f42L, + 0x768dde2970ac4a0aL,0x4aedce4b03d22efcL,0x000000000000016fL } }, + /* 112 */ + { { 0x077f032aeded03c0L,0x2684a052588ddd4dL,0x6d09bc4f9a85be0fL, + 0xbdda0c7fe0b9b6bbL,0x19689c7ef2fb5887L,0xf8a96960ec3cce7eL, + 0xb043d9d5768d2ae5L,0x29c8081bdb21219aL,0x0000000000000068L }, + { 0x6bf872fade59f006L,0xc2b9ffc6cb97ef5aL,0x371915db58ae7ef8L, + 0xc2e23ca1f4ccaa1fL,0x1af8c60e89c27cc4L,0xeee5d7e7c86bdcc6L, + 0x9225b47f9bd8de43L,0x53e7f4634b24f08bL,0x00000000000000b4L } }, + /* 113 */ + { { 0x54c496d0e3048bdaL,0xe2b6749943c3de4eL,0xac2049f74c2d509eL, + 0xb01f691e543c5089L,0xcd9960a3105a365bL,0x34d93ffe78b17049L, + 0x029f99b3f82c9467L,0x785c5ea20161a755L,0x0000000000000091L }, + { 0xb455f978953dbdb6L,0xea9e84d997eca19fL,0x473bd02936d4d75aL, + 0xa9c17ca8c15276faL,0x9cf6613347c76356L,0x4a68360b039738d2L, + 0xd3e430a869733609L,0x0ae532dee2b27f21L,0x00000000000001b4L } }, + /* 114 */ + { { 0x68110e825164cb8bL,0x6979af4f2552a67dL,0xe10d6d0e8d185527L, + 0xcf6c5787fb64eac4L,0x8408163bac424592L,0x5d8fff37fce0d810L, + 0x8b284e49da84c15cL,0xed80556732663ec9L,0x0000000000000010L }, + { 0x106f403051f3ee9eL,0x2e8e3ee9b38adf1eL,0xd3c87a6ea13d6449L, + 0x27b49f4580e1abb1L,0xc283d1790bfd7298L,0x8fe50fa5afc7a35fL, + 0x773da545ade3ad4fL,0x78bfaae4d9a21df2L,0x00000000000001f8L } }, + /* 115 */ + { { 0xae60d8e8abad5678L,0x0afa72cee600c25bL,0xb9d4e0b44c288e21L, + 0x64447f76d254cf9fL,0x1fb36bc4959e2ba5L,0x393c44d72961132cL, + 0xd7a8881ffc140f19L,0x27a861288d096648L,0x0000000000000091L }, + { 0xb536c0218a9e690cL,0x85dcc521eab4fa15L,0x09af4423b00ee54cL, + 0xb3793525af3a8e48L,0xe1f36308b7731d85L,0xb5361d78141cfb55L, + 0xea41f29eeffc4529L,0xcf5755b19f7d2634L,0x00000000000000e8L } }, + /* 116 */ + { { 0x01edb80dd212b398L,0xd0396181d53dd373L,0x0e0860478a52fa95L, + 0xad1e6432a7825e6dL,0xe0185bc5330ece4fL,0x508f7313b078936fL, + 0x1dc982fd9e7f6ea3L,0xdbf3a602d5556b60L,0x00000000000000e8L }, + { 0xc3763234279e05bcL,0x7f5f40ecf44453d3L,0x310c5f4d7fa30793L, + 0x5cffad36108d7e22L,0xf2f01ef3c2a98bbcL,0x30ab1719d7d47f80L, + 0x7bc9f918a9b22e1cL,0xf53dc52ae834df94L,0x00000000000001f9L } }, + /* 117 */ + { { 0xf266b49ec183f89bL,0xd3fb5f025f5806d4L,0xd30a42b594ec3080L, + 0x4b6b1940371cd917L,0xf7541aabb7f7e26dL,0xe55269eb2d5b7b64L, + 0x0e1a85c17f8036c5L,0xa0ff0f22da5f2675L,0x00000000000001ceL }, + { 0x602bd56a3a8e11f8L,0x29864021f5f9ab54L,0xc6742c5a0ccc92d7L, + 0xd64569e6523f650bL,0xc8e4681bf7fabfb4L,0xb4275947c3c9e6cbL, + 0x2b3952d538f5ff20L,0x818f8e381f04aea2L,0x00000000000001b0L } }, + /* 118 */ + { { 0x3be5bffae50d90f0L,0x4cb3b11bf5011cdcL,0xe10ca711a691dfacL, + 0x62ec211d4ea1a773L,0x5a979ebbe586eeb6L,0x4df16ab1a0c2f1fdL, + 0xfe9e3f7ec57bbfeaL,0x1b05960e5ae526f6L,0x000000000000015eL }, + { 0x1c8e04a58630e62eL,0x3d00310e6447e1b7L,0xcf1e6b6143b4447aL, + 0x92abb8517462e7a3L,0x8309ea080002724dL,0x1d805d70e45296dfL, + 0x0f3849b33d4ed812L,0x2d6bffbc6834d44eL,0x0000000000000096L } }, + /* 119 */ + { { 0xd13fe58d48e07711L,0x70f83648d270a3b2L,0x1517892d8cdff04cL, + 0x15bb657851411f14L,0x6c31cd903e4f8a55L,0x73f871520413362fL, + 0x2fe025eeeca06d4dL,0x32a6e417954e317fL,0x00000000000000adL }, + { 0x7e38c63f69d147dfL,0xb69bb06e710bf37bL,0xb94debef28d514deL, + 0x4b2307fb8d11c3d9L,0x3b369df90385c604L,0x68ea2f49e7800e83L, + 0xf028b2587d501c1cL,0x970782215cef7818L,0x0000000000000055L } }, + /* 120 */ + { { 0x10c351db54c1d751L,0x81445301ba0f9512L,0xa77eb34fbfdc8bedL, + 0x498d8138cf23680aL,0x928c14a4e04f2860L,0x96192dba16a5b6daL, + 0x49dea95b5f9a9103L,0x80dd457801724102L,0x0000000000000085L }, + { 0xe90725000e09221cL,0x62e05b21f21de056L,0x448cafa1e0e60950L, + 0x657fb97b6f775129L,0x5d2991bdf1f34acaL,0xa66cd5ac49ff15d6L, + 0xdc1d6897d049ec79L,0x388fca84e72baea8L,0x0000000000000067L } }, + /* 121 */ + { { 0x6520b49da6ef1dd3L,0x391a045e3ba6cd76L,0x9c84980af33d5f48L, + 0xe53cf5b2ef07474aL,0xa35b2e9a78bfb1eaL,0xeca97fd6eda906faL, + 0xf1a937891b9f2cf4L,0x667533693ab28589L,0x000000000000010dL }, + { 0x5b51049673691fafL,0xdc73d3a9d57ec618L,0x7e2921bb930a8525L, + 0x094f571e40b05b69L,0x5e96a017413bedcaL,0x9e7d4f728d1a6b98L, + 0x55143fda3eade8b7L,0x859b8444d16e454dL,0x00000000000000fbL } }, + /* 122 */ + { { 0x7c22083e7c667aafL,0x33545cb94a91ccbaL,0xca1e99318ca0e94aL, + 0xc3afff23e4eaa0c7L,0xa21ac43642f56844L,0xfcc68a8b60d52d0bL, + 0x401a585b6a9301d4L,0x547f762c907abce1L,0x00000000000000a3L }, + { 0x63dd3ed3fbe260ceL,0x2717752d80dc01faL,0xd5fab75d6f1da3e4L, + 0x5f16864a5261f10eL,0xbe7b1f63d20cd6bbL,0x9d638c10221ac656L, + 0x3137b8f6673b918eL,0x23eb44384ada2fb8L,0x0000000000000174L } }, + /* 123 */ + { { 0x194e27c42a1fbcf4L,0x4c0d285b5facd5eeL,0x75c2ebdd915e6607L, + 0x1e696510ef0a6a9aL,0x13c5afa1067cf458L,0x2be013c17bee1fbaL, + 0x85a406d6dad279e7L,0x0042951d5142cf59L,0x0000000000000031L }, + { 0x6a735ec1a22bbc45L,0x4ee5391a7f56f4d8L,0x305af9d0236001deL, + 0xa8b21851aa2f8d25L,0x0e2c36d8187db78aL,0xcfcc083fa1a888c3L, + 0xb91dab7fbd3e7d5bL,0x62d85460f4fdd023L,0x00000000000000f4L } }, + /* 124 */ + { { 0xf568ba024972d703L,0xfc44ca1d39098a03L,0xe9b8e542ae28c855L, + 0x4fd4f3605b1b4536L,0x2e08b07b4c7f7e48L,0x042f3b982230823dL, + 0xc9ffd3131889fd13L,0x56af0652c6c68359L,0x00000000000001bbL }, + { 0xedbf05e206e0f16aL,0xfc1ac2fad74644a5L,0xe59a0a980f92c71aL, + 0x13ae37d736c800a1L,0x5f20efc6236178dcL,0x443a58b82b46ef10L, + 0xc9517dcf442509e4L,0x7d0bb415640ed9b0L,0x0000000000000166L } }, + /* 125 */ + { { 0x3aa30a613d22842dL,0x8c6e00f5b3c4ece0L,0x8764cf876df82b79L, + 0xda92d86d78d208c5L,0x0a52d391e788854aL,0x499b26fba59b0994L, + 0x5dc133ad04c5fc9aL,0xa5c0926934e3f134L,0x00000000000001ddL }, + { 0x6f0dcac2fad6d673L,0x6d8fdf0500f3b3feL,0xece71941631756e9L, + 0x3990f4930a4d80e3L,0xf2aca93631d13001L,0xee91966c75581638L, + 0x6df0f574e6dd5679L,0xbe124868ccd71cdaL,0x0000000000000111L } }, + /* 126 */ + { { 0xf644c726475cc1b4L,0x915fc2f92b73978cL,0x65a7e6d10e3d7eb7L, + 0xbb44e21af40c38e0L,0x988662b9e1ad24fcL,0x270ba4ddc35606e5L, + 0xc3834a2c1a4f93f7L,0x93d0c9a23362a4d7L,0x0000000000000021L }, + { 0xe2cb7b8cf769fd7fL,0x1815da9789a213b9L,0x7b4f8c566b910fefL, + 0x2088b30926931438L,0x477b71bd925b37c0L,0xa049a92126a640e5L, + 0xd3ddf1bdfd21c6efL,0x9b5f9d7d232a56b2L,0x0000000000000064L } }, + /* 127 */ + { { 0xd640adf8679a9c35L,0xcdad98e3cb74d796L,0x464b8ebb5f8e9dafL, + 0x4738614ead4a073cL,0xbd86c0ee2edde557L,0x77331738576ce0b9L, + 0x9b5d33274095fb96L,0x72f0aeb3ee09aeadL,0x0000000000000136L }, + { 0xa388c76d64e54ba5L,0x63fe7af1dc474d21L,0x7fa3e9d1b2a77081L, + 0x0447b49ede1240adL,0xd9f64b66c720303aL,0xb1c78029e6bd0213L, + 0x1caf1c700aa03ea5L,0x179180eb3bb85d2bL,0x0000000000000103L } }, + /* 128 */ + { { 0xadbf4f9faf2ed12fL,0xce1d19e4f380fd8aL,0x0957bdb5a39e81aeL, + 0xf9833321626ef6bcL,0x110ae5ea0cf5b28dL,0xab15945020392cd4L, + 0x67c498876bc67855L,0xce7e5938a3fd61c6L,0x000000000000004aL }, + { 0x59c5b9ef28c7dea9L,0xd02f95ba0a6a7184L,0x034dc2578202769cL, + 0x213b0b0894dd6896L,0x03730b7fb5dea95aL,0xfe243ed0617ca889L, + 0x16cf4d17fb1ba052L,0xd8691d6b226f96daL,0x00000000000001c0L } }, + /* 129 */ + { { 0xaa2edf3fbf8015c2L,0xe7f8236dc49502d8L,0xe890f6e0a6a43157L, + 0x318ef325a2d04b0cL,0x9cc0668da809dbabL,0xdd26937ada67ca21L, + 0x8f27c12c83febc49L,0x87b3db2f3c9b9844L,0x0000000000000029L }, + { 0x37e7aed0fd2e3dc7L,0x498e8bdb7415fd55L,0xfc0d6c9a58a45f25L, + 0x83d5baba209c85d0L,0x31ec8dc6d579e1eeL,0x1f4cad0ba502bfedL, + 0xc432e6ce1f41bef1L,0x3b10afaabbffca65L,0x0000000000000191L } }, + /* 130 */ + { { 0xbd9f7df053053af7L,0x60304765b28a1cf4L,0x441778fc7ce90438L, + 0x8fbed36eac8c5dddL,0x27b1313bfb59ec61L,0x9d2656ffa1b1becfL, + 0x334e1345945973a9L,0x3261888cc362b595L,0x000000000000018cL }, + { 0xf413a414aa7f6ff8L,0x092aeb883fab7c7aL,0xfa1d886b7cc307baL, + 0xdc81c1252346100eL,0x93d4d27302140c93L,0xa1ed7e3ce6104835L, + 0xe2b91ecfdf1795f3L,0x160dc11a369ed416L,0x0000000000000191L } }, + /* 131 */ + { { 0x9a72f46e8b57d7ccL,0x3140b0e54bf02386L,0x886c396e05b3a91dL, + 0x1b9ab3a9a4ec26e0L,0x742feaebc50f58e9L,0x1592c60855e26af0L, + 0x943cd476bb1cd9f7L,0x3ed97fd4c7f02c89L,0x000000000000017cL }, + { 0x53b02503e6d54964L,0xd9bd1162c6a318c0L,0x18ff6cf49cc28c22L, + 0xa45c784003534640L,0x8ea3335eb4cc0668L,0x7ad727f8f42dbe03L, + 0xb157e911fdf6c3cdL,0xa7f894c9ec992d76L,0x00000000000001b3L } }, + /* 132 */ + { { 0x91e6e397af09ea77L,0x26a760b975dc25c5L,0x8c040c08b94a197bL, + 0x041baca8b68ce619L,0xa19a0d155bd23564L,0x86ca5b94d977b33fL, + 0xf31f87f8e5fbd029L,0xf76c55a6b1901f99L,0x00000000000000b8L }, + { 0x175bf8c33846ec9fL,0xf462205c9deaca46L,0x92cb5ec0a3108df0L, + 0x879db283cfaed928L,0x477dc00465049fb2L,0x48d24bac96ee5031L, + 0xa7db6b1656adce45L,0x0110cdabab1c684fL,0x00000000000000fcL } }, + /* 133 */ + { { 0x151b66d84d308bf2L,0x99013c9fd6638004L,0x6892df92fd383bf9L, + 0xa10efd843ffc8efcL,0x527e316c313ea287L,0x8ef6e3cd3a0df740L, + 0xcb96e430f6ebd2a1L,0xc1ebecf2a70ee4ceL,0x000000000000018cL }, + { 0x80d14ad71a70404cL,0x6ad21dd0f9ce2a30L,0xb94cbcde3aa3e072L, + 0x0ab596116363a690L,0xe70bff45c6b1e2b4L,0x1296dd0b66ceec5bL, + 0xd4cb2a74747757c0L,0x08988ca63d7d91e8L,0x00000000000000aaL } }, + /* 134 */ + { { 0xaa2dcfcaf8db0396L,0xe8ae8f37b422da76L,0x652f834996485724L, + 0xf647c3c47bf1493fL,0x8b600b46b0247a4eL,0xabf3e4397aebda8eL, + 0x2e1d231fa7958df0L,0x38e692b1f881bab2L,0x00000000000000efL }, + { 0x1f3c168926cf3047L,0xdad14f9459539858L,0xfde85d1c293f20b6L, + 0x2ea5436ef57abb17L,0x0d1a8ffc1794de38L,0x9ba508e22bfecd2fL, + 0x110f0a7fdb786042L,0x2ade6f647cde31f8L,0x0000000000000196L } }, + /* 135 */ + { { 0xc996a537fec78898L,0x0b39de72de0fa77fL,0xf6d076acd34cb08fL, + 0xacd8bb82da78d353L,0x5fe804d3a0392cc1L,0xab7adedee581549dL, + 0x883901a0c067c6d9L,0x5855ffa24ed93f37L,0x0000000000000191L }, + { 0x29570e36bf9ebef3L,0xe21046a5df4b3177L,0xf9b89a95a6816b5cL, + 0xadf39281288d0e11L,0xd6baabe53979159aL,0x411afee05c8fabb2L, + 0xf192c3afe5c7af10L,0xaa72e81cd7dce37bL,0x00000000000000f7L } }, + /* 136 */ + { { 0x20fa3c0f16c386eeL,0xb33b0469d4c09839L,0x79e0d722876a3136L, + 0x343c0a923c406c06L,0xef220e3e4debe27dL,0x09d7b1e1196f00eaL, + 0x4a0f5dd824a9dcffL,0x53582ec599c1d085L,0x00000000000001e2L }, + { 0xcc8ef2625138c7edL,0xdec431946547f88dL,0x2b6e53addd0a9488L, + 0xeb9f1efa8257ebdcL,0xc583c6eb1f08c989L,0xf173691140163768L, + 0x6282ff8bdbc20e3dL,0x26b810059cbd514eL,0x00000000000000d5L } }, + /* 137 */ + { { 0x2449522fa0025949L,0xb26d888f0bbd8945L,0x33442f5fe637216fL, + 0xd8ec3b64472827f6L,0x91d8a1a399fc2681L,0x6d232ead68c7710dL, + 0x8e5bfe2fe51b2762L,0x0f9f4fedfd109fa7L,0x0000000000000004L }, + { 0x1952ea516b4a05e0L,0xcb0d48eef21c78ebL,0x64d366191997dfdbL, + 0x0d11b2048b4c21fdL,0xa6f569b6be92303aL,0x2b8f609678c5e809L, + 0x7226b5ab36805d8eL,0xd6cff180db349ca2L,0x00000000000001bdL } }, + /* 138 */ + { { 0xa49f8576943cc612L,0xc914319e832b31c7L,0x9225e297cccadebdL, + 0x4918fb42b0619821L,0xaccb308425b1cc7cL,0x751d3347a646e5f0L, + 0xeafb4aae590e3e22L,0x821460382c4a0008L,0x0000000000000151L }, + { 0x3c2481dbbf96a461L,0x51c122e9b52a3ba4L,0x21c2858e464db08bL, + 0xb1014b786d6a081dL,0x167d3ed4f533cef7L,0x6cfb329481545f7cL, + 0xea46d31c449b7b9fL,0xcfad76139621c299L,0x0000000000000081L } }, + /* 139 */ + { { 0xef796327478a7f0eL,0x914183e2de17705dL,0xd24a26df572117e8L, + 0x3cdb1b09b7cd52cfL,0x9e42b9fbad83c160L,0x6971d2ea709ef8c9L, + 0x1894fc5b8ee54ccdL,0xf757b4e534a520fcL,0x00000000000000fcL }, + { 0x5a5518cc86b62347L,0xec51c9d27bc2a928L,0x2eea2b052966727fL, + 0xbc8a8e3a0ae43e6fL,0x80535b5e05ca066bL,0x91ffcdb18833986dL, + 0x2f4a5bba32374cddL,0x08763a490d202243L,0x0000000000000124L } }, + /* 140 */ + { { 0xe498b9724efac14dL,0xb6f4bf8da79a9d3cL,0x0f1e8dbdd6e07c29L, + 0xfac30cfd71771538L,0x4c91ed2271b03263L,0xbf93833519b455f5L, + 0x76a5e789127092bfL,0xa97674e1b4813bd9L,0x0000000000000128L }, + { 0x29b63c41583e5924L,0x61f9aff18f171d06L,0x2b45b3cdab227a28L, + 0x939d5dda8a11ab70L,0x2bfb47b0e8db6971L,0x562379df0ec10805L, + 0xaf5a648124ce1801L,0x8d98c43434f94abaL,0x0000000000000150L } }, + /* 141 */ + { { 0xdea9fe73cfffc80fL,0xe23e2e9bd43473f6L,0x27fb3ed3c9d37ba7L, + 0x733766d27a3fc357L,0xd0db4cf38e04a03dL,0x8ce017522bbe0f43L, + 0xd87eb719da986f4fL,0x6d1b50ae2fe6b037L,0x0000000000000153L }, + { 0x371f5defda40bab1L,0x07d6a8af9b2bda63L,0x5e8a5c890d4aca87L, + 0x4d72f0ff643ff8abL,0x9c4c10d94bf8ec2fL,0x36b0eaba0eb93e22L, + 0xbc4b0e8f1d2dfd01L,0x9f252e5a9d34a082L,0x0000000000000142L } }, + /* 142 */ + { { 0x4affd4c17d0e7020L,0x9b169aaab5482168L,0xdbe01708588f348fL, + 0xdaebf6ff885986bbL,0xb33987f515f9c381L,0x7e455f2c04a94a7bL, + 0x39a41442a0ed6849L,0x1c1ad4a61ef7798cL,0x0000000000000154L }, + { 0x7647b628072709c4L,0xb330d68b8810e5feL,0xd1bd8874e92e0f63L, + 0x144e4fb9f8bea9baL,0xc15afc188318981aL,0xe19c5c82b68c6a07L, + 0x858c57a236e00b66L,0x9b25511007cb7aecL,0x0000000000000011L } }, + /* 143 */ + { { 0x121ced27c887027dL,0x6050f3352bfab286L,0x6e373c1c19d511e2L, + 0x02d4c3a97f4c69f5L,0xe6f356af25226bb4L,0x3b9011c383e7ac30L, + 0x43b0c23d33d8fdfbL,0xa8c390f7af2ea363L,0x000000000000000bL }, + { 0xc430c3d67e851bacL,0x8991c389a5f544fcL,0x006bbc6467fba061L, + 0xd49d024e97cbdbf4L,0x4539b7dd7734adadL,0x90ba8f9f28cb6d2aL, + 0x7a9218304de4b3adL,0xb28732efa7b96928L,0x000000000000006aL } }, + /* 144 */ + { { 0x71dab52d22ed5986L,0xdeee627a58533e06L,0xe8fee37acf155fe3L, + 0xcd61490d7ae8b132L,0x2706e18534a08b94L,0xa85ffd52f9c15c30L, + 0xd5a224f351a5ad46L,0x44d1b6d554d700bbL,0x00000000000001e6L }, + { 0x96830686862e4e9cL,0xfe5cd76c48763fe4L,0x60309679c0839caaL, + 0xc0e4cbeb8d83d62dL,0x911e254e11bc4ae2L,0x96a0d7c864fca062L, + 0xf5785dd5e9a27045L,0x2f4677d0f3e0412cL,0x00000000000001beL } }, + /* 145 */ + { { 0x4c0012ddab01a6dcL,0x391bd6c1ae1adb69L,0x3ae7daecb9b05079L, + 0xc2714f9e62a1061fL,0x71978ee7a96536b7L,0xeec11bd05e17654bL, + 0xc71166e0efab3dd4L,0x0f7aa57287edbf61L,0x00000000000001d7L }, + { 0x26ea6f7d51eb5932L,0x354ea0aa5f882ca4L,0x175b60977739f7dcL, + 0xd335192a9be57934L,0x9801f42378545eccL,0x32b8e2567b643c9dL, + 0xb9411dd723e3abecL,0x656dea68cf1c6509L,0x00000000000000eeL } }, + /* 146 */ + { { 0x4d38e140a0890debL,0xbf7bd87dbceb84bdL,0x51f0ff72ba041decL, + 0xafeec70aa6820be9L,0x755190a38c486298L,0xecdba558e7010ec4L, + 0xced91db88c7879b1L,0x08de3e4cef5e215cL,0x000000000000014cL }, + { 0x9c1534ed16266da2L,0x9ce322eb7b4c9009L,0x37decaef69927688L, + 0x6525097f05c2844dL,0xd23b7e131ac519abL,0x682ebb7265a3cc86L, + 0x0c531db9628c4575L,0x2e00e8b873805373L,0x00000000000000beL } }, + /* 147 */ + { { 0x3807c80057ed32e9L,0x427e40cf7c024997L,0x58506abbabb54830L, + 0x5649776fce820bf4L,0xb5353293b2c43e81L,0x671e8353cfef6648L, + 0x27217d3f903bdca5L,0x40a9c109a813fd79L,0x00000000000001dcL }, + { 0x6beaa6c33db21a38L,0xcae222e1d73ef7e4L,0x1ff684e7bd1d507fL, + 0xf5bac664587a77abL,0x58c74f620c64a4d6L,0x4ca837d96a7c378aL, + 0xf43df5313e42e409L,0x8a9a4347fb49e14fL,0x000000000000013fL } }, + /* 148 */ + { { 0x85ab4edf992f8923L,0xe24aa5e06fd209f3L,0x27be9b871b1340eeL, + 0x2957d11f91e0bb40L,0x425afad2f3d4c62cL,0x2d231286c7ff7aafL, + 0x96412b2b0114cbe9L,0x6706a231c3e23529L,0x000000000000019fL }, + { 0x06b3bbd2225c02afL,0x53ebc1663fa3e98dL,0xa6df2b75b84f482eL, + 0x912b45212bfc55dfL,0x30bdbd40512a73daL,0xac0f43d93d53eaa4L, + 0xfc358fe40c27fd53L,0x2cb183be919424b4L,0x00000000000000a3L } }, + /* 149 */ + { { 0xe39b0c2d3fa6a746L,0xe84a79221d5a24a8L,0x70a5891478cdf2b5L, + 0x8a88067d30666cb3L,0xb09a709ef6d71d06L,0x50007a3e0065d184L, + 0x7046af4bb8dc9448L,0x2b6a3129c65493acL,0x00000000000001fdL }, + { 0xd3d5d5bde45f2771L,0x8542b08af432ed95L,0x2ecd40fbf232a6bbL, + 0x0fcb6143e8beccb2L,0xcecc513abf8e247fL,0x955d56f78da3039bL, + 0x9157c61956c2a0dfL,0xa6d35cbf3031fe2aL,0x000000000000018cL } }, + /* 150 */ + { { 0xdd800b1bbe0c4923L,0x046ae7406902907bL,0x2398b37f957bd0c7L, + 0xaa8e1a9d9655f8b8L,0xcd2927fa500f4150L,0x826a9c6d202e7aeeL, + 0xb4cf58b39f29692eL,0x3093868cbf41577cL,0x000000000000011fL }, + { 0xadcb5e7a333ed442L,0xae5c8e2f906fef7bL,0x2d9b01233d98f228L, + 0x4632f2da7ffe125cL,0x59487731ba231835L,0xa0caae5b12d2c512L, + 0xbf00e6589857d9c4L,0xc5d1008654f200f6L,0x0000000000000172L } }, + /* 151 */ + { { 0x589540462fc283e0L,0xf76339847ee0880eL,0xfaf1b40eb7fd1622L, + 0xecf5151ef598c5edL,0x6b4d92f77e00d9bbL,0x7543e3b3a8c43fd4L, + 0x3994e12c6511d1d2L,0xdd841a1daf05b6d3L,0x00000000000000c6L }, + { 0x23da17e023b991adL,0xaab2b21371fba514L,0xb417ec5a0ddc1879L, + 0x173bc8ad5f63acdcL,0x2fcf52101e2a7d50L,0x63373fd06106d008L, + 0x1e8211de7db012cfL,0xa07766d9576545efL,0x000000000000018cL } }, + /* 152 */ + { { 0x8e4347b9af80dfafL,0xa80b631f9c4667f3L,0x6ff1db266ddbc238L, + 0x6161e365aa8718a0L,0xe7f7ac90af31c35fL,0xc03831d1fc6846e8L, + 0x1e669d10684175b4L,0x6da9d620934b731aL,0x00000000000000c7L }, + { 0x981f597ba3e4e78bL,0x2c14dedc55099f9aL,0xbf37399593088c61L, + 0x7c5683079b207458L,0xc4440c47a2276900L,0xb6df23c8f7e6daf3L, + 0x4f662c2542929103L,0xf4ea6db18b3b7963L,0x00000000000000f9L } }, + /* 153 */ + { { 0xc669eb88ced36049L,0x87a4ffe1f41b99f8L,0x690b75636a72e108L, + 0x67dd6a8c65a0bb8aL,0x42cf8c5896e42955L,0x5286b5f31aabffadL, + 0x1f7dfaf28f6f26a4L,0xc5d9e0ac0e1ae503L,0x0000000000000120L }, + { 0xafbee3ffacc10da7L,0x67e2d5f9944946e5L,0x8ec17e863c4220ffL, + 0xfe6f7414bd6f632eL,0x4a9e3c0fc3fc9ef4L,0x25ff3cba03bfb870L, + 0x18fd3600bb03342dL,0x1e63e7530050cd2eL,0x00000000000001acL } }, + /* 154 */ + { { 0xdd83d07c8f3d6a02L,0x71fc143c7ef4d0d1L,0xca994bf0d4c7af61L, + 0xc8a93e98827c5cf0L,0x4a102c7b2b697882L,0x633c87d58a55e8baL, + 0x1ae8822fcc2d64f0L,0x2ce9b53f986d01fcL,0x00000000000001c1L }, + { 0x859639fd95dc1b79L,0x2728f7543f4e616aL,0x6e703c4cede2fb9fL, + 0x042f7680d50fae9eL,0x0546bc3bc2d530edL,0x00a4006bcdd598acL, + 0x3f3286c9e1294910L,0x77782255b6bf9629L,0x0000000000000146L } }, + /* 155 */ + { { 0xaf81421ee30c98feL,0xdeb0feb0fc2cd705L,0x9b2c4ca614df6ad2L, + 0xd38134de9ba314e8L,0xa443deb84f04b16dL,0xfc556ee0f07f8ca8L, + 0x3c1c83bb3a4f3917L,0x8397dd24b1adcd41L,0x0000000000000199L }, + { 0xca01e17edf4781e6L,0x32d7c31946f1f901L,0xa227a613b53090daL, + 0x2495b1dca7c8c607L,0x1cf2fbeeddc69709L,0x1d3d82bb45608098L, + 0xcfcddda3085134d7L,0x3dd171b596798c41L,0x00000000000000d2L } }, + /* 156 */ + { { 0x97a40f84d4dd7e96L,0x7114c8ea8409fc0cL,0xc56f29e6a9d11393L, + 0x3b6066218fd8c6d6L,0xad3baa8600269e7cL,0x1413c6b005929d5fL, + 0xc1ad7e40222e365bL,0x6a82621a4798aaecL,0x00000000000001d3L }, + { 0xaeac45c4c1003c81L,0x9ef9ef5af43d8602L,0x36a65f5e60f77469L, + 0xf312e7abbf5d2858L,0x2f53ec81c84acef1L,0x63e32ca29d248b52L, + 0xfe9aa7c581e65c60L,0xe3686c9a52841973L,0x0000000000000017L } }, + /* 157 */ + { { 0x0b2efe659e90de99L,0xbe4485bcad05ab63L,0xc48a6a52e14e4892L, + 0x2ad8543022628687L,0x261f0e955eb3db54L,0x48e81863d45e5841L, + 0xcfe1ce0f8ed75739L,0xbd6f1ff57d84ade4L,0x000000000000003fL }, + { 0xd43711ddd1bf968cL,0xd558d7cd48dfa472L,0x49f09223e425a566L, + 0x0cf833385c26d041L,0xbe7b81f17c2c1743L,0xe3bdc33e5143d9d9L, + 0xf385ac3594fd3faeL,0x7551cf429fd1811aL,0x0000000000000113L } }, + /* 158 */ + { { 0x4928f55b20193bb2L,0x96e579d07310b872L,0x5ee06309d345d276L, + 0x9a43e432a871868aL,0x28c113e111038683L,0x8286ecf3a332f108L, + 0x3348aa370385cbb4L,0x698ffcaaef158dafL,0x00000000000000c6L }, + { 0xa044c54af6908745L,0xa6b336e46a3353fbL,0x694c2852d561e821L, + 0x1b2979703634917fL,0x6e1023b981f61315L,0x6817dc2bef46a5efL, + 0x93dea0af8e114f7fL,0xc3cf3cd5ed72c5bfL,0x0000000000000136L } }, + /* 159 */ + { { 0xbb8799ab7b080de4L,0x3b8f781dd69d8396L,0x76b42aaa986f8f63L, + 0x5d74c038a54bc5caL,0x76fcb605a9c2fbb9L,0x8451b44080178930L, + 0x40f00c389d286f0dL,0x3038e9520c543263L,0x000000000000014cL }, + { 0xc94bc3816977aad9L,0xadbfd082d7087be3L,0x06d0820c875fed08L, + 0xe1ce84d4345656fcL,0x71c4d8e00fd6dd4eL,0x23338b226a5fab40L, + 0xd477eac10baeeb6fL,0xe4db08bb5f80c26cL,0x0000000000000078L } }, + /* 160 */ + { { 0x0111d12a1078342aL,0x0534725e559a1064L,0xea459d590fd3ffddL, + 0xcf694a9f06f0ac1fL,0xf6d24adb3e19bc69L,0x3ce38f5eb9ddcd00L, + 0x38400f66b632dd4eL,0xcab8fdfbe15e1c55L,0x0000000000000085L }, + { 0x0a943f6b8d09422fL,0x17d297560f988c3bL,0x55a441fa2ef2e4d9L, + 0x6743523b35f7c13fL,0x274d3407edaad3ffL,0x594114359347242dL, + 0x1cb273013bb8615dL,0xa0437004bd7794cdL,0x000000000000007dL } }, + /* 161 */ + { { 0x824b99a62d712c44L,0x148368f8a6962577L,0x8ed68432d65e2287L, + 0x140283066f5bc5f8L,0xe6cf31214ec3479dL,0x96db6f449326db70L, + 0xca5ac098ca32936bL,0x69e248c72fea21afL,0x000000000000004dL }, + { 0x0aa89092a71269fbL,0x2f6bdba818650b60L,0x1d9cc2a39fb55db2L, + 0x0fceb0df6311e9d0L,0x6faeb79c90ac2c1dL,0x2393b222cb1f372aL, + 0x62a6f3dfbc8c4193L,0x9dea30b22fe8e674L,0x0000000000000001L } }, + /* 162 */ + { { 0x7df689ac12b3118bL,0xd06ee39d6cb6ea56L,0xcfcc22c2187cd978L, + 0xb985b6818d537d87L,0x75845152e9f56db2L,0x0f8398715e098c15L, + 0xbe96a5c83b212cd2L,0x3dda0338d9ac1c47L,0x00000000000001fbL }, + { 0xf06b7fe0cfa0a9b8L,0x9478bac7e22dcf75L,0xf3815e04136887c8L, + 0xed811dde914c54bcL,0xc8c241600f51ea64L,0x63914d834c870577L, + 0xed24e552a8abbcb4L,0x9e5eb9e82644f52eL,0x0000000000000001L } }, + /* 163 */ + { { 0x1f65a04e66d52313L,0xfd6945454d3f72bdL,0x2bc0ddafa6b7ae11L, + 0x921f79d8571ab247L,0xd4c5f966ae5a8d68L,0xfde17716aec5ce13L, + 0x70e6eda4b764bd39L,0xffe94085990d6783L,0x00000000000001efL }, + { 0xf3fa0e27d88f92e8L,0xa21ef0fd9c77123cL,0x6259974c89274dbaL, + 0xd4cfa4a5b9ba2762L,0x10c909d246ebcaf6L,0x0317a10d8f8e2870L, + 0xb0771de1453aeea2L,0xdf0c479168c6b0a3L,0x00000000000000eaL } }, + /* 164 */ + { { 0x11bc1e484c854477L,0x2bec25b48638e47cL,0x43d4e02b869c54d9L, + 0xe318de32be1e7ed2L,0xf5471eb06b460c4aL,0x38ae7bf3aa426afeL, + 0x23ae26ddd8452dc1L,0x9d3fc1d55782de9dL,0x0000000000000164L }, + { 0xd87cae310ade1979L,0xa847041d3b4bc728L,0x38923c4056c3c9beL, + 0x36fe182ad74ae467L,0x92bff6f4ecbe49aeL,0x6680db80dc41f9f5L, + 0x35bac06fe4630715L,0x6d68b4c7d6d07307L,0x00000000000000c0L } }, + /* 165 */ + { { 0xdbe22be7854dfcf2L,0xee21a7dfa6ae3bd0L,0xf4633ad1a521ec46L, + 0xee94527a41a9484cL,0x1145eb9b2aa123f3L,0x5634a82acae3ca92L, + 0xe176aca0fc85d925L,0x504cf7fc19082d8cL,0x0000000000000078L }, + { 0xd74ce7c43799793cL,0x74ddd618b5519fb5L,0x2cf6df9395ff9808L, + 0x00ea45d1b8bf61e6L,0x26863613dcfcf54fL,0x67423b76030035b0L, + 0x9fbc75344028a9cbL,0x7b52ce37051a077eL,0x00000000000000f4L } }, + /* 166 */ + { { 0xebf7d8ad96bec962L,0xd1cc81f617e0107aL,0x64c44509214e1058L, + 0x6c298c4342394c9fL,0xd910052d1a660513L,0xc364375490df8243L, + 0x2313be1efe5cdea4L,0x249a60f7d27fb7b1L,0x0000000000000076L }, + { 0x749758381cf593a0L,0x0c9ceefb8364c59eL,0x2f5a1333e05c9991L, + 0x30ea5e1f421808e3L,0x56fb3a4f4f5e8f4fL,0x2cae6e2eb6c0cb47L, + 0x60b307fd08bdcc6aL,0xee17901c0ff8c117L,0x000000000000001aL } }, + /* 167 */ + { { 0xc048336b89aa9e14L,0x66634271f676700fL,0x4daa0433906b6980L, + 0x30247ee1ebb7ab23L,0x969b4aa7eb59a053L,0xd78ef8258000f4d5L, + 0xe5db38eb46026b5bL,0x06a43e5d7d6856c4L,0x000000000000003bL }, + { 0xaa0ae838ed2a0ee7L,0x04bbe528f16e8813L,0x8ab6df5c4ea64137L, + 0x5be80cb606e29867L,0xf19b1b72f459ed2bL,0x7a9cce4d1761521aL, + 0x39aff994aa516f3bL,0x97d92e86b3416925L,0x0000000000000007L } }, + /* 168 */ + { { 0x25aeede15af3a8caL,0x33924782a5c351ecL,0x41e7a3fbf93ec080L, + 0xb04f93c4e6f425b4L,0xe4ec12ec81e76009L,0x797366d45180ffc6L, + 0xd293cbb50e0aef3aL,0xa149694468d71d91L,0x0000000000000061L }, + { 0xf52c541c675a67a1L,0x67d38d308f5fe906L,0x2a70bcccf6be988eL, + 0xae03ecbe18589886L,0xecd026167067045bL,0x1facdd9910ca8d96L, + 0x7aa10a8230c0735dL,0x2a27e5543328f21cL,0x0000000000000015L } }, + /* 169 */ + { { 0x3dd609e0e6057e27L,0x87e8b6a7c7a454daL,0xff5991451f32dd5bL, + 0xea397a88d0ef51e2L,0xc49866a125567546L,0xea45c8b13228b480L, + 0x3dbe0e77dd01997aL,0x0e2ea28fc51867d2L,0x00000000000001f8L }, + { 0x6295412d69d0820bL,0x031731271ea65a18L,0xc27c8221eb06380dL, + 0x7ffd4efc75fe9706L,0x7b396a575a71d250L,0x61c80051c7cb7543L, + 0xe07db4d7ad4dbee3L,0x1c7481f49b192d45L,0x0000000000000143L } }, + /* 170 */ + { { 0x5eab2d0408e1cc4dL,0xe93758d3ad2dc1eeL,0x0ceb7dfe5c9c7393L, + 0x530d86a9d3379683L,0xef5283cae24f86d7L,0xab5d1a64f0b1bb0bL, + 0x96aabc1f54db4e3cL,0x3e3d87cc3bc00c59L,0x0000000000000144L }, + { 0xe50a82131d60e7b0L,0xfc9b629b5d33d018L,0xc54aee42fd05338dL, + 0x0678f2c0e821c6eaL,0xe5c9d75f06ac09cbL,0x8335751353018df6L, + 0x81ca6fac0bf8c667L,0x7fc8020e9d0ae2ddL,0x00000000000000e1L } }, + /* 171 */ + { { 0x8add47411baaa5ebL,0x02cbb75979bd8036L,0xd8680c40cdffed22L, + 0x1c23a8f04e091141L,0x65d141ed20748b87L,0x586a1575659e9289L, + 0x7c68d7cd5006dbfeL,0xda0ad0df22569a74L,0x0000000000000148L }, + { 0xc8fcc5db7f9069d7L,0x2487d2455c0531a4L,0xc5ab4899e9a2db3aL, + 0x52bfd538b4fe9720L,0x73a04ca4d27f35e4L,0x7cbbc549ee2dac93L, + 0x0287229dff3ee7e2L,0x3179878d28da9360L,0x00000000000000d0L } }, + /* 172 */ + { { 0x89b7e9bb3b66c047L,0x22e65869602a3e1dL,0x44f82297c8db9c00L, + 0x0e76aca3d08a74a3L,0xfbf1a71dfcd398deL,0x2fbb6eaa8320e66aL, + 0xa82d0ebc179c9fc5L,0x4e00cf6f4e7ab2b4L,0x000000000000000fL }, + { 0x424c0e9a4890c439L,0x37564a2bbc35a6b2L,0x95a4479dd9b7497dL, + 0xa1ff3f0d612de942L,0x358627fce60d0033L,0x815da8c0522417daL, + 0x506104d4ef6b8385L,0x800728d2f16e96aaL,0x0000000000000120L } }, + /* 173 */ + { { 0x976f2372ab039042L,0x10e6978c9fa084edL,0xd03fdd2f58bec143L, + 0x3200c101fe2045c3L,0xe6868f7ab0a5a928L,0x26c95d1de61faff8L, + 0xa1e20127b7b12265L,0x8e63dd78c2a5ed17L,0x0000000000000089L }, + { 0xbb6533da22bba4eeL,0x3eff6397f496a574L,0x409329f714f2a6b9L, + 0xa08248bd1dfdd73fL,0x62f33f2e69bca1b1L,0x9a177e64ba2e0327L, + 0xbc50e99375ddf741L,0xb87a979f4a56bd1cL,0x0000000000000095L } }, + /* 174 */ + { { 0xe83736a967c1f177L,0x1b6d3508600133c9L,0x9424bb926eac9a5bL, + 0x7a9c01a6c27ef31cL,0xad93bba5122b4870L,0x9eb94e2a9d1ac985L, + 0x511c0206d53f175bL,0xd13eb2525102d914L,0x00000000000000b1L }, + { 0xcfe7dbeb675a1171L,0xb228295c16c0d2b1L,0x8db25b5a057c88caL, + 0x73ea9e96d300e9cfL,0xb0e0037f269552ebL,0xea9d035c9e0f98dfL, + 0x860e49b8d290480fL,0xa35e9512c036b319L,0x0000000000000037L } }, + /* 175 */ + { { 0xc56729ee8f00df48L,0xb89ca7b611ac8304L,0x497a57f98b3a8123L, + 0xe0431b19c21ca3eaL,0x45a73debe2bb3ce7L,0x2f86cc2badc77819L, + 0x5ff005e4e5eb3df1L,0xf955dd7add27dcf0L,0x000000000000005eL }, + { 0xe0c22ffa00ee402fL,0x5b335e2a3b30bb4cL,0x542551d0643cb101L, + 0xc6183f453cd19688L,0xc6664f22f0be54b4L,0xa5f4cfee4c20cde4L, + 0xdcaa972f80a4c475L,0xde4af20059111ed9L,0x000000000000019cL } }, + /* 176 */ + { { 0x9e9d0bc8d771f428L,0x3ac1ecd9e43ca382L,0x8d5ee480eb93acf0L, + 0x16232f81065a2a3fL,0x1fc04faa2f0b8a73L,0x4a8df7e7025474a2L, + 0x51ac4ff23bb15f6fL,0x66e21b73e0950e52L,0x000000000000006bL }, + { 0x59c9848067a41deeL,0x2cfa95ae7b3e2b3fL,0x54d98386891454e1L, + 0xf0dddbdfeefca6a4L,0x5f691b2411e9cb75L,0xa9b9e766fef208c3L, + 0xe8df100018b33cf6L,0xb8a55ac9d1c174a9L,0x00000000000001c4L } }, + /* 177 */ + { { 0xa99f58625c4cccb8L,0x70bf52092ef4d3efL,0x28f4e57689efc878L, + 0xa2366f96da14206eL,0x90331a007c52107dL,0x478d4cead4a0f0f0L, + 0xb2899ee2472a47b0L,0xae96534e64207549L,0x0000000000000110L }, + { 0x2cc1d655cced05b0L,0xabac3f0901759543L,0xbaeb70a48e577cd7L, + 0x84b0089340e98d6dL,0x26983653603d24f1L,0x6e1458832572173dL, + 0x1d348b26611141deL,0xe52257dcefa27f34L,0x000000000000006bL } }, + /* 178 */ + { { 0x92678f33c947e655L,0xff0fb76a08923795L,0xb2dfe745790239d1L, + 0xea0874923cdbb7ceL,0x21326db905f6d41cL,0x5b1ae9ae79dc5588L, + 0xe145340ce9c31702L,0x07502c29a2c38a9cL,0x00000000000000c3L }, + { 0x0c124f11c156ace2L,0x2c170fe779ff2529L,0x60df9a816e1171b2L, + 0xa19bca8355de2797L,0x1ad927ea7c6cc79dL,0x285901121d61f770L, + 0xfe80c826261c06bbL,0x4050d338aa2642bbL,0x000000000000015eL } }, + /* 179 */ + { { 0xc9397829eaad87bcL,0xe0ac936781e84cbdL,0xb579c24d6ade4fdeL, + 0x50b9aba5690d7f56L,0xf09b29d3d14fb0b9L,0xd0684f2325a0e7b6L, + 0x0514e9d3606f4ff3L,0xe63bdd26e8ad733bL,0x0000000000000077L }, + { 0x0afd06ece0d25c6dL,0xdd90021a00ba2dcfL,0x1b0257708c5bb398L, + 0x077f06d8198ff8fcL,0x87d50ff1b7e2cd68L,0xef75e057263a3572L, + 0xbf257892fa925a9aL,0x847d3df0739d0e95L,0x0000000000000111L } }, + /* 180 */ + { { 0x52ab9cc7fec82924L,0x1c76dd69a7220d69L,0xa63527dea06ef0e2L, + 0xab3e51c227183904L,0xf4db35ea716807c8L,0x8f3ede0a748f1246L, + 0xf149364441156095L,0x5f6583d1874b38deL,0x00000000000000f7L }, + { 0xa39189e10b927eb7L,0xa87c6359c2e2f127L,0x0b72c2337fe966f4L, + 0x102b8382105e5585L,0x63fee006e58c39f9L,0x3f052ee3991b5329L, + 0x7f5b854ccbaff97bL,0x935e5f6c5f805060L,0x000000000000016aL } }, + /* 181 */ + { { 0xf19a0355dfd88d38L,0x555cd8e3c549df40L,0x322729e304d006e1L, + 0xf16b706cfd0b0ce6L,0xf156dc0935f2ad31L,0xb30c5213f7a3df9fL, + 0x9f29cc92a55e5fb5L,0xa0ecfdd42b858da2L,0x0000000000000144L }, + { 0xb5c115df52658a92L,0xbce3ed17c4281616L,0xa5595f707fd92a91L, + 0x663c8bfd9cd5d896L,0x0776343f5a9472b1L,0x14e44ca8b033e1bdL, + 0x27a1c9861e5c02fbL,0xece0f2c4cc4ffb32L,0x00000000000001b5L } }, + /* 182 */ + { { 0x17127bab31211943L,0x44a8cac65684325cL,0xd2fe0b88d855fc3eL, + 0x47abab0cce91eea5L,0x5d23ddc478ec7d12L,0xa3986de70cd9fefaL, + 0x32c7b86782655766L,0x3e54018beeaec7faL,0x0000000000000087L }, + { 0xc96e86f2b38d17c1L,0x9cbfbd0c71fa040dL,0xe111ab79f88499cbL, + 0x1d47c5cef71ec80bL,0xacaa3bc146c89692L,0x5f921c0e3d316331L, + 0x31fa081ee768765bL,0xd5dafd5f41eff270L,0x00000000000000feL } }, + /* 183 */ + { { 0x8af10b9d4cda1348L,0xb0769fd225c3013aL,0x450aa5b18957c22bL, + 0x5cafd6c7f5acf1c4L,0xcf71a1409fef8029L,0xe12029f5ee089f5dL, + 0x9752a8fb0fbd2ba8L,0x61e2275f6f70cb58L,0x0000000000000090L }, + { 0xb70a4ac51fbda16aL,0x79910e79f1dfa2a2L,0xba2ce132d9945f6fL, + 0x450d59aeeb4ba4efL,0x6a8e09b34bf2d53dL,0x76010204e620c7a8L, + 0x63f8943d0a53c6f4L,0x14c91d1987eaf56aL,0x0000000000000132L } }, + /* 184 */ + { { 0xe54fb120490d66c3L,0xeaed7328a0dc8204L,0xba014c3804b4294dL, + 0x3f2fa2ab31ddc467L,0x70ff55ea8342ed11L,0xb18da72f23034e0eL, + 0xadc30dbebd8ae3c1L,0x179bdf6f3e945a02L,0x000000000000009cL }, + { 0x46c928ef7484c26fL,0x206b7db1ef2adbb1L,0x0887f5483f58dda7L, + 0xfde4e20c4bc7edb6L,0x484d121d975cafdcL,0xc5b5967086beec20L, + 0xb579aa88a6d6db67L,0x22c6d87e41187488L,0x0000000000000015L } }, + /* 185 */ + { { 0x0a890757c471d4aeL,0xfef4b1a543a1da76L,0xb892b1826aa701a1L, + 0xbf4d4e5259c65f93L,0x923af929d789df35L,0x3ccb46c60b79c3f2L, + 0x95582ce7cf4cf130L,0x7da081b4257f0ec4L,0x000000000000011cL }, + { 0xf92c6ae59aeef274L,0xe6c5bf4f1437c083L,0xaa74b023e13c86afL, + 0xd21dace62a225360L,0xb3d572b822589fa5L,0x3d4a3916dfa74b0fL, + 0xe76cd8dcb12891a9L,0xa0391a3f59f4cfbdL,0x000000000000019aL } }, + /* 186 */ + { { 0x054ba69e203fc3f1L,0x09168ccb62106a29L,0xb0818540aad5fa9fL, + 0xecb8f20ebff7ed6fL,0x2c80a618bef94afdL,0xe25d8ca0b0abd1dbL, + 0x75e67a41028e0a7cL,0xdd7662ddd6e95b9aL,0x00000000000001b2L }, + { 0x87dff279f289d7eeL,0x4d755d59eea2205cL,0xaeb0fd54c18adac6L, + 0x3a8c46cf7ec01019L,0x6fc90e7eb48d70a4L,0x965c53c110b39ef8L, + 0x455777cc38545a20L,0xa33430f757dd023eL,0x000000000000016eL } }, + /* 187 */ + { { 0xfa9f39490ff53d2cL,0x8dc91596b00349b9L,0xf10a5014d5997967L, + 0x4dd72daba8a6b78aL,0xef5de5408b517b10L,0x142b90bca6d39be0L, + 0xcaeaa3e9eda17f70L,0xa01689d606b31118L,0x000000000000016dL }, + { 0xea6ca563f46afff7L,0x3945c7ba34a5e5f3L,0xc1ffe4c8aa998fd8L, + 0x42a60146b63f535eL,0x50816888d1f509e5L,0xd1918daa9f8cd0dbL, + 0x6505e6bb78a36772L,0x4ab03a819cc6dc66L,0x00000000000001efL } }, + /* 188 */ + { { 0x06089d14d376d986L,0xd0f4e077a2dc35b0L,0x1c11709a53ff2c86L, + 0xfef4ba45123c3fc8L,0x852cd5a71b656fc2L,0xb57c74891fefa8bbL, + 0x8f05383e48110b77L,0x4b55d3ad52c5a129L,0x000000000000004cL }, + { 0x5110cff3f3827633L,0x086784d5e00afe96L,0xcb3878823ead32faL, + 0x3dcf4d162b91cd86L,0x078b6a58e6f3638aL,0x33792112e8b7fd42L, + 0x6964044dee5683e7L,0x3b84210f28e28433L,0x0000000000000122L } }, + /* 189 */ + { { 0x6c28a9a9c3ebeb27L,0xd7bcdcb53ef590f8L,0xe88a2e114dae7f37L, + 0x033522e4726ea7c9L,0x99d503868c141388L,0x59b1aeca61621575L, + 0x719fcfebfcc564d8L,0x3a577af11aeb8e36L,0x0000000000000043L }, + { 0xc3f26ce06feba922L,0x5f6c83ee475a5693L,0x7f79674028bf378eL, + 0xd2a5e368bdc3f6f1L,0x3d034a0aa6ed90aeL,0x3b1c3a4c4a47cbd5L, + 0xa4f0aa6e4dce2bc8L,0x97c7af4374ca00ebL,0x00000000000001c0L } }, + /* 190 */ + { { 0x0037717879c28de7L,0x617aa2aaab9c330cL,0x4308182666bc61ebL, + 0xe0b5b5cf4d78b504L,0xd76a752d9870fc72L,0x3b4689f5d40b7bc5L, + 0xa97fd86787f2d03aL,0x6ab7b5eefd6060a9L,0x00000000000001c0L }, + { 0xe99eadb1ffb71704L,0x436e58bb390fe3b1L,0xeecab82cab4f19aaL, + 0xda492dfae0f3d9dcL,0x2a0f54bd6e20ad12L,0xaf89fa0f7dbbd262L, + 0xdcc50a1ae8d2eb54L,0x9799f816ef7d0758L,0x00000000000000b7L } }, + /* 191 */ + { { 0x9ec46462104f98ccL,0x4511592272aedeaeL,0x7ae93dd07e62186fL, + 0xd17ce0268d6d69b6L,0xb5347608fd43a8f3L,0xe87f1c137c0ab797L, + 0x3bf597a8139f991dL,0xe293a85be547e0d6L,0x000000000000008dL }, + { 0x0982add38ef668b1L,0xc54e6b2d611c9764L,0x3ce76b121c1d4263L, + 0x3134b28eeff64e73L,0xaf71a9ac2871612aL,0x31c88af2ba093594L, + 0x0b649112ba9108e8L,0x8febc5c55cf437daL,0x0000000000000113L } }, + /* 192 */ + { { 0x7e9ca589c4a2daa2L,0x18ea703c400f608cL,0x6f8cd058d5175103L, + 0x264934724abb6f29L,0x0be553e194296ab4L,0x9af9398fac51657dL, + 0xe232deec4f880ea8L,0x2f81761e67b1e1b1L,0x0000000000000137L }, + { 0x51014bc73a20f662L,0x1fb7e77c49ed9502L,0x89f5096fb62b9652L, + 0x3a659c67a2e8d37eL,0x0f2b2a265804170eL,0x1674fce69ed50a34L, + 0xaaa4537efdc3c00fL,0xf3c3bfda4ce99d93L,0x0000000000000198L } }, + /* 193 */ + { { 0xbab1f5cd81614189L,0xc7d56c4524b259f7L,0xc7baa4b245fb415eL, + 0x302bc8dc7af6bef9L,0x91b770e074b48e82L,0x4a1336e09b6d1b1fL, + 0x285c1357e6680c97L,0x59bcb813c7ccb625L,0x000000000000012dL }, + { 0xddad83b47c019927L,0xe10f2667630dfd5bL,0x15dbec5a31e05d23L, + 0x2aa6e5fa456ac460L,0x46956529243cac82L,0xc69c9c7f4dc8c9e9L, + 0xadb27e09e24a4065L,0xdfa7a34eae41301bL,0x00000000000001ccL } }, + /* 194 */ + { { 0x176a864d59cb1a7dL,0x4d864ca36aefb8eeL,0x0ee83acb1c22b0d8L, + 0x7e80a6ebd980df1dL,0xf582acc47f94ced9L,0xa29cd1233a72c115L, + 0xce12a2a8c7107bb7L,0x0229ca564ed80a30L,0x0000000000000150L }, + { 0x9774bad52f1c180bL,0xd08be998d749aa10L,0x978c48ab56dbd1baL, + 0x6ed3e3e40afbea9aL,0x8a8be97b153dc5fcL,0xadc7f0959be93ed0L, + 0x8d2429082cee23bdL,0x417523c6dc2729deL,0x0000000000000016L } }, + /* 195 */ + { { 0x74eeccf16c14a31eL,0x488e2534b2de3c2dL,0xf9bb35997cec43c3L, + 0x4210459d916ac936L,0x71d15c029f7e4400L,0x8c9c7c1244553583L, + 0xcc97548dec94a467L,0x4ca678183167bad9L,0x000000000000014eL }, + { 0x033af0558d0312bfL,0xbd1bf4f554161e66L,0x259945a7fa41781dL, + 0x33494da800eef1d5L,0x6c505ec079c3b8d0L,0x70ae1ade1c9f6e69L, + 0x0288f0c176830aaaL,0x7f4cfe3ba62a060cL,0x000000000000000cL } }, + /* 196 */ + { { 0x0d8b447d057d6006L,0x38b976e6fd71c8b0L,0x5e77e029abcf40f5L, + 0x13bee386f103a783L,0x20a6ac205e472c4bL,0x43b045f631fcb194L, + 0xe5dc1d9fc00abf49L,0x28c0bc70a5556b79L,0x00000000000001b5L }, + { 0xba9d07ee8a8640b8L,0x25611023d0e34012L,0xc7ce655bbe24ae89L, + 0xe358e524fa579dcdL,0x57ce2715377bbfe5L,0x64651c6c3c0947e4L, + 0x5fbd8d50f4a97826L,0x6fcdd28fe2e1c15aL,0x000000000000008dL } }, + /* 197 */ + { { 0xb564a2f65c7202c8L,0x7d6340525a54b0d8L,0x8414d6721434fbf5L, + 0x8114215e1d9830a3L,0xc7a758d55ef0fbe1L,0x5705dcf8e6f57f9fL, + 0x5dd49a56d92269d3L,0x8f015d7abdb49f97L,0x00000000000000f1L }, + { 0x07131110b4799ce6L,0x35bbfb992cbcb7dbL,0xc1f00c9ff7ba21e2L, + 0x009d6913b18f49feL,0x8da61951abcf959bL,0x0e6872130d42146eL, + 0x55832817ae5f23f1L,0xc9b5bb689ae7386bL,0x0000000000000143L } }, + /* 198 */ + { { 0x423328db48c74424L,0x32616e11d19cb2ebL,0xe534192a40d6e217L, + 0xdd83a94c0cbdc752L,0x5c623050d733bb01L,0xcd0d631a5b7a4520L, + 0xccdc0a259a4011c8L,0x22f112cc646e7cd5L,0x00000000000001e6L }, + { 0x47d6e29a3e1e4c4bL,0xd5f825389fb1548aL,0x7e3705b54fd3e319L, + 0x8c4ce59a0a08b966L,0xbca749e7d8cbe8dbL,0xcc4496eaaeec3d75L, + 0x17dc723a8a1a313dL,0x250ff77a8ceb9360L,0x00000000000001a5L } }, + /* 199 */ + { { 0xa55a0726fe29bd79L,0x6574a8104f990b34L,0x6906946daad56983L, + 0x0e580ab950d41fefL,0xbc75b5146e6f7f45L,0x508cc97bf0f3718aL, + 0x51ba2ca4a5634087L,0x75c39077e64d8910L,0x0000000000000172L }, + { 0xf37cccaff77ca6bdL,0xe0a0df41bdb18df5L,0x9f46cff8019e01f7L, + 0xbe4f3d44aa65d72bL,0x7822d8ac6e3663e9L,0x5f37f9223ef9db6dL, + 0x7f0ad39dabe4a9aaL,0xa0a57c70f69cc8baL,0x0000000000000098L } }, + /* 200 */ + { { 0xd9c50cf400fd5286L,0x1ea5b9d572a4b03cL,0xf5e60f9e051ae73eL, + 0xfe9b5142951b3824L,0xb034b2d09fb4d667L,0x4b537a80edc50856L, + 0x69ee10128cb0022eL,0x7c8b9e5c6a548aeeL,0x00000000000000edL }, + { 0x746007bcd933619bL,0x0ce7668e2b9dfe19L,0xa9eed5d3cc6e2a2eL, + 0x35a14f5f7eebf32fL,0x75cb898d67cc4f64L,0xcb2185fc7850c16cL, + 0x09874a7645f79c96L,0x7468f8ae27db4744L,0x0000000000000139L } }, + /* 201 */ + { { 0xc5de68adc88684f6L,0x7c1edaab619a7dbfL,0x258d1735b27a18f5L, + 0xb27e7b658ecd89ebL,0x3d8889c6d879f7eaL,0xa8fdc96d67d5befbL, + 0xc84d86ae37bad73cL,0xc7e91976ce8e56d7L,0x00000000000001a4L }, + { 0x5001a5406319ffa6L,0x0cae64ec134ec04cL,0x1f69a96cd541242cL, + 0x9da259eebf2caeeeL,0x88e7978c28bee805L,0xe9484bebb8e890e4L, + 0x0e5246d0fb227fd9L,0x8be2a54a625d6318L,0x00000000000001b7L } }, + /* 202 */ + { { 0xa223554af472f13aL,0xfac993b75733e91cL,0x26afe9f096c168a2L, + 0x7cfe761d4b127535L,0x84301873e77070caL,0x66b6aaadc7e7cdf6L, + 0xda2dd5eaa1562ed4L,0xa81a2e0039faf8d8L,0x0000000000000016L }, + { 0xa880759c4e3de3bfL,0x0c1e2e1152f3088aL,0xcb2ded9aaa7eba5bL, + 0x4c65d5539f9c11caL,0x0ab9bd87b0dc5c19L,0xd32f8c96ca3f4b61L, + 0x49842fcc28cb5f9fL,0x31ae27ccb90e21dfL,0x00000000000000f4L } }, + /* 203 */ + { { 0x3b2a0a0d6a0ccd0aL,0xa3eeec825993b555L,0xb13486fd9de672a6L, + 0x8d9c51480da05dcfL,0xc4aa444d6739874dL,0xd9cf35b2e29a35c9L, + 0xd6bd9b5f89177eadL,0x9af0f59d2a0470a1L,0x00000000000001d6L }, + { 0xb2f844c7ba7535fdL,0x45bd4c3da842ff39L,0x5fe149ede951974bL, + 0x6982e997fd4453ecL,0xa63f705de6c37c0eL,0x09b0f6a9d2c3ef6aL, + 0xbedd35861776a8d6L,0x4048a46aede11b78L,0x0000000000000176L } }, + /* 204 */ + { { 0xa47c6ee551a251d1L,0xbef4bf120d279dfdL,0x4c2d538cec518a28L, + 0x1b2b78873880be6eL,0xc69ccf8e1be9b20bL,0xe41dfeae3796a19eL, + 0x25676fc9fb50bdeaL,0x8b815a0503e180c0L,0x000000000000016aL }, + { 0x2ca085f653f5ef65L,0x61dfbbf977b25105L,0x88ea87e3a3346fe6L, + 0x25ddfdee1b95f7efL,0x22074e695b65eaecL,0x11869a154c2e023bL, + 0x8601b57742e83bb5L,0x1464652cfa877e7dL,0x0000000000000015L } }, + /* 205 */ + { { 0x250853c857fa58f1L,0xb58a4e684ca4c670L,0x07b96d0a1b81f40dL, + 0xa4651e10558e8cbdL,0x1a64046e42e388cfL,0x51b0d53944436088L, + 0xc2bf35b3e26b8fd0L,0x4ae787095702cfceL,0x00000000000000fdL }, + { 0x3c79bc29df53d498L,0x4cf31c4e1137f624L,0x93b6856c17a3cedfL, + 0x2461131c6cd9115dL,0xab30a4539228cddbL,0xe97757b68d202bf1L, + 0xa666de7ce6108612L,0xc200fe654f6026b4L,0x0000000000000051L } }, + /* 206 */ + { { 0xea96103cb1a2b4b5L,0x98dccbfe843c0968L,0x6a37072d986ffb5bL, + 0x2fa07af2169d3ac2L,0x8bb85b9a771371f1L,0xeae10d34e7c299efL, + 0x3d4bdc69e2372efcL,0x378df75d8dd856f1L,0x0000000000000039L }, + { 0x31e902ffde7ff5d9L,0x0e9a85d2325a09caL,0xd71b93a6f4192fcdL, + 0xf52a573715b076b3L,0xd726aa866e711d1cL,0x0b61b1df2c292819L, + 0x224e575cc8015de6L,0x68e893e118b79e47L,0x00000000000001dcL } }, + /* 207 */ + { { 0xb7924ff96ffeda73L,0xe709f406a0da2018L,0x368e20eaf89584dfL, + 0x0095112e8355a040L,0x259d4528fd777d7cL,0xb0c495652bf8f2c8L, + 0x7f63192844c5311bL,0x698d0e4f8466d9d5L,0x00000000000000d5L }, + { 0xe10d64fa015d204dL,0x7b626bfa6dd10c53L,0x087f8e63a7698c94L, + 0x525a654705337a56L,0x558e2244df5c782fL,0x48aa1e41855fbaffL, + 0x48f2218e47ee3830L,0xf2523959138463d3L,0x000000000000004dL } }, + /* 208 */ + { { 0x76f4fd69d8695310L,0xe28eb09f7e8768eaL,0x039c1812e0d532a8L, + 0xdda67744c572ac79L,0x1f9800e0785d6293L,0x2bfe2a5a3da76bb2L, + 0x6ed15b90a2bc7217L,0xd80e61bfd1788a8eL,0x000000000000004cL }, + { 0xb9f4037016730056L,0x46f45fefdced3d43L,0x0afd763c1aa50742L, + 0x21e5c652ff92ae73L,0x6ef0830d1bb2063fL,0x18306ecc12d22540L, + 0x4edd9b3a1f15001cL,0xe4eb25b8c0cc5424L,0x00000000000001f4L } }, + /* 209 */ + { { 0xed61a714a1db5c18L,0x9454e61e7677074cL,0xe970fbe57bf685deL, + 0x221b0c53d2145be5L,0xb931881bee49a5f2L,0x00b91afa14b11d03L, + 0xc6aefe493ec22137L,0x50554e94526200afL,0x000000000000013cL }, + { 0xd42c45e77364c92eL,0xe0500265735218e8L,0xd281da0284d3f3c5L, + 0x312f8424dbf7646bL,0xe1a88f2a485f304fL,0x583f56311127a513L, + 0xed7950c71a60e0bcL,0x92855e104b7b70a4L,0x00000000000000c6L } }, + /* 210 */ + { { 0x8d06185c644614e7L,0x2e906cae4749a424L,0x585412ea2587e528L, + 0x3763990ad12857cdL,0x770c7f70ba5593b5L,0xc2cf6dc4dd5d2a46L, + 0x564da4563b69a1baL,0x639f7e14187895daL,0x00000000000001c8L }, + { 0x05c96b02f8589620L,0x2fe468a341e44054L,0xbf22da11096ad09cL, + 0xbc73c2989c652aeeL,0xcdef9f8b547e1b8fL,0x7073785a977dbf73L, + 0x0a92a1aa7e13552dL,0x227611403a393d3fL,0x000000000000015bL } }, + /* 211 */ + { { 0x89a5a7b01fbfaf32L,0x5c5a62d0be661d21L,0x47970f5ef5e3b44dL, + 0x3ea001edf43bbf62L,0xa8e74285260ae5a0L,0xeb899ebd2697c62cL, + 0x36a003e6751a7643L,0xef178c51ba0725a6L,0x00000000000000eaL }, + { 0xaacf8e9f9bd51f28L,0x39febbdba8712044L,0x8780ad3a5bfc8365L, + 0x408a34cd10e6f08fL,0x8104ca108241ab0eL,0x843e71ce98a662a1L, + 0x9dce8514232048d6L,0x5cba23be1cf3d187L,0x00000000000001faL } }, + /* 212 */ + { { 0x2fe8c9d22973a15cL,0x66fec8ddd42979f3L,0x39af4a390b6afb3eL, + 0x0bb1e436ab65ef22L,0x8f26201e66c5fcdbL,0x3cffe8a35af4870bL, + 0x65ae286f2bb44e24L,0xda2e283a51dd1722L,0x0000000000000114L }, + { 0x4a9c9a56c1e3d708L,0x4fe62d3f1cb0efa6L,0xf070298497e87540L, + 0x138b7d6b3cea46faL,0x0780634e83886263L,0x27e8428071c30909L, + 0xf0af79d7e5838647L,0xc1b86582b236a267L,0x0000000000000104L } }, + /* 213 */ + { { 0x32ff09eda526c894L,0x95abf12014ac7d23L,0xb6f94dcd3cd92934L, + 0xffaaeb1292e6b556L,0x193796ea1036c31bL,0xa9d237e7707ff32eL, + 0xd65a5b0d829d67b8L,0x48edb556db29248bL,0x00000000000001b3L }, + { 0x6ee9f9b2ded46575L,0x496ca08affa69acfL,0xd5aeb3a1f16d37d1L, + 0x4a507db1789e5d01L,0x05e2ce29c827cc45L,0x29b6e4a52964e677L, + 0x0563b0ba4c0e46f2L,0xe75c24484bc46485L,0x00000000000000a3L } }, + /* 214 */ + { { 0x0fcb476fd2f6615dL,0x4b7f9b78d98da9a9L,0xe2fddf1cd2bdf107L, + 0x2bda30869b956f31L,0xf3cca2f7b596eadfL,0x91c09f8b355b2538L, + 0x46f3f6f3c6c846dbL,0x9bb9398e2a14642eL,0x00000000000001ffL }, + { 0x5118d4f5a17bd645L,0x57033eabdbd6d552L,0x007e86fc734d0957L, + 0x98ca065f5f53c435L,0x9949d9bffd27dd19L,0xddc4e3046952d1caL, + 0x84cab4fb81ac101cL,0x46d079f94a56b007L,0x0000000000000003L } }, + /* 215 */ + { { 0x95eb8e4fa6bfdeddL,0x993a285e7a74c6f9L,0x8bd5d4d13d09a252L, + 0xeaa10be619a5f767L,0xd3db083e0cebb340L,0xc633a78b1dbf7a83L, + 0x2664bc3ec30f23e1L,0x6630f8f107a08379L,0x00000000000001c9L }, + { 0xbbf4cb4bdef86a80L,0x1fa4ec783f8259abL,0xa4bf7604609532c8L, + 0x71bb7acc8b909e92L,0xca1d731717884160L,0x7f7f14beca1ab928L, + 0xbfea016e5f8455a5L,0x7b8c76b9bf21e899L,0x000000000000002dL } }, + /* 216 */ + { { 0x468605634b9f8e7dL,0x201176b763fc58a8L,0xe7a5da7e2feed68aL, + 0xcc67763e65183190L,0x7d7d0102e9377ad6L,0xccfc472077032321L, + 0x573ee031534bb505L,0x1bf1ef8c0f1a2769L,0x00000000000000f3L }, + { 0x635f5c4b0c935667L,0x74152c39060d2b8bL,0xeffaac2e37c3a574L, + 0xfd5fcc4c0b72e0cdL,0xb743f9b9f4f60247L,0x05c2e35479e16f33L, + 0xa2234c473074ef9cL,0x4092f279495aace3L,0x0000000000000124L } }, + /* 217 */ + { { 0x5bfd7851b30f9170L,0x715aa1e937fce5b1L,0xcffd55e0928437b9L, + 0x88acd259c32f1273L,0x5a145cf248be1e34L,0x3a3408607a5bc62bL, + 0x6296eb1518156f46L,0x397fad192774e1c3L,0x00000000000001e7L }, + { 0x362f99f49c8225b5L,0x33efce4946b77c4dL,0x451df5308541e91bL, + 0x0bd2d93438f3d693L,0x0b5de2d6e727b54eL,0x42d929c27622d940L, + 0x36ace72356f6a94bL,0x64a18cd5fccaf205L,0x0000000000000044L } }, + /* 218 */ + { { 0x8dbe0aababa95d63L,0x92780c617b4b346dL,0x6430f8630e0d8142L, + 0x875be02ab56ef04cL,0xc28feb95785e3633L,0xd5401795c12c93e4L, + 0x89ff51c1e36f82a3L,0x3c48c89510eeafd6L,0x000000000000016bL }, + { 0x79287ebad4f064beL,0x1a77d55554ebda99L,0x46745ef2623727eaL, + 0xa911f59189f366c6L,0x7e5435cdc59d6ebdL,0x3a84daea7524d213L, + 0xc7b1dd1c4395b38dL,0xca13e7041a823c49L,0x000000000000001cL } }, + /* 219 */ + { { 0x6399860c874d64b0L,0x3375b0921653ce0cL,0x16700000eaa11986L, + 0x62c67909621cd15dL,0xbe1d7dd677d70dcdL,0xeff0f270305bd4cdL, + 0x076ec621362f8f30L,0x812048167e445b78L,0x00000000000001d8L }, + { 0x81749a0e161f9758L,0xe60915fea3c4fce2L,0xf537ce41911dd8afL, + 0xfe36a8ac79a51a09L,0x67fb54b42ca5cf8eL,0x1bdcae07e49057f5L, + 0xb71ff0c5a4244b64L,0x4815a5364b606583L,0x0000000000000106L } }, + /* 220 */ + { { 0x78c69c3eef39cc39L,0x98304564fa6356d1L,0xbd3c3542412fb990L, + 0xa1d531d379dbb2a5L,0x4865f188e7e75e3dL,0x2dac4e220b0147b1L, + 0xf59e51ca33d29ab0L,0xc964f7fe37b074efL,0x00000000000000f1L }, + { 0x7080c0a60e301262L,0x9a4580605390a22dL,0xda677f9acc8a9029L, + 0xdfae905714c0f1c2L,0x3665ff166e66d9f7L,0xc866dd8c47846924L, + 0xc5afe98fc4cc307cL,0x60e3ba63e0bf50e4L,0x0000000000000039L } }, + /* 221 */ + { { 0x1a785136959ecdb3L,0x289af617f9e959beL,0x5145b2b8cde0dc88L, + 0xfe9070b07c079e15L,0xf77f04d350e22415L,0xb3ab7372358d6d42L, + 0x14fd41b9ba7b629aL,0x7b32d80e7400fd25L,0x0000000000000193L }, + { 0xe5d80d4d7147886fL,0xe08ced61576c81caL,0xe14e8692642717bbL, + 0x9dcdf198abb4bd21L,0x658be6466530308bL,0xfbf192dad99d19c7L, + 0x55a3d1b3304ab126L,0x943f4be5fa24de31L,0x000000000000000eL } }, + /* 222 */ + { { 0xc54240587fe9ea48L,0xaf24f82561b57486L,0x9d2c413c78719740L, + 0x27a9be7970eb874dL,0x43fef8e0b62ba3aaL,0x0a23f2862c1bf0acL, + 0x51c276f34af130e1L,0xf6cd1e9aae55cebfL,0x0000000000000185L }, + { 0x24defa7f40369093L,0x11f1d9d658581e0aL,0x9900bf33e512ed9eL, + 0xbf8a8459ed120896L,0x8324555e8b73c399L,0x54a305698f6f54feL, + 0x2a9d6da53c252355L,0xe6a6f9042a093b31L,0x000000000000016aL } }, + /* 223 */ + { { 0xb2e123c9152cdd35L,0xae6e43a886402ef1L,0x892bf0dfb9ce5bd5L, + 0xb4acb84a75804914L,0x8c7f55fff502eec2L,0x9c8a7b93aa33ef4eL, + 0x06b10357fd9d2001L,0x3e319ff00ba3bcebL,0x0000000000000027L }, + { 0x182c2f77abe360a3L,0x57ef5c84adfefca6L,0x9a4f0ca6650b6fccL, + 0x3f4f8e56aaf0b202L,0x5c8508a0a24ef156L,0xd8f62fd91ea45f13L, + 0xf2c923a028036dbeL,0x4a9ca4c01a4d103bL,0x000000000000018aL } }, + /* 224 */ + { { 0x2a3fb7985448e339L,0xde8770cf18a39976L,0x1160574d7a69170cL, + 0x4bb05c592b6067acL,0xde0d2db0848138abL,0x149dab924909e794L, + 0x83a336b6790315f7L,0xcd9074d9a335a258L,0x000000000000013cL }, + { 0xe839c5e0ac1b784dL,0xab65c8c6ee527ae1L,0xd3c86146a1c88ec0L, + 0x2201f79046c1bf58L,0x71cec6273fda502aL,0xff3f88eb225b9065L, + 0x6c1f0c98c556dfcdL,0xaa3222aa484fa5ccL,0x00000000000000acL } }, + /* 225 */ + { { 0x17e74bc3c9b4dfd6L,0x25ba8053f8e76293L,0x0307dc059d8c3520L, + 0x1c9036ccb85a20b4L,0xf2c63f0a23871359L,0x1a99d9d8ca95fb4eL, + 0x3d7c4f399850c6c6L,0x162969c968299668L,0x0000000000000169L }, + { 0x7d13c267cb63ee53L,0x67b12e6175eac353L,0xb3369a11191abfcaL, + 0x5ad0649dee1af69fL,0x4d7a6f0011dc11e7L,0x80f030b8db9f9765L, + 0xa20001a3f0ab1332L,0xe17c98d239d8cc62L,0x0000000000000194L } }, + /* 226 */ + { { 0x720d80b41d8fe898L,0x8d7a28b732184534L,0xf1f3c38504f21740L, + 0x5d381cd5166aa6afL,0x9cde6084cc560e35L,0xcb041f0a5e61e2cdL, + 0x621116f5d9b4951aL,0x509e16d37ee2ac2cL,0x00000000000000c4L }, + { 0xb82a20c42c6fd79eL,0x95b7ee4e3af78b0eL,0x3d9b63c1bad819caL, + 0x10d674de98552569L,0x17de64b2f9c19d0fL,0xa03fabaf47c5e6a9L, + 0x858bc4ad2ce2db6fL,0x76c2380a1fc9d18eL,0x00000000000000c9L } }, + /* 227 */ + { { 0x91171ef8b064f114L,0x83cb15654f2f0f4cL,0x3052585457b262b7L, + 0x468c67010f34936cL,0xef26d2fe99a41fedL,0xf6da2267a7f7f6a9L, + 0x2563b8dba01bfc1bL,0x14b36c85c340ed40L,0x000000000000000eL }, + { 0x5e57e26425db67e6L,0x85df4e897f2e905fL,0x7832e514026c4268L, + 0x312be2623e875093L,0x856b5bd83c538691L,0x5b1cae5595734f9dL, + 0x5a07bfe2d5aa4861L,0x7a4c96f0ce8abb58L,0x00000000000001d0L } }, + /* 228 */ + { { 0x7bf54d05523aa2e9L,0xc8841e0ced3d0860L,0x5683f6e27f9bfb69L, + 0xdcb07f44162bdf85L,0x62d1783907b0dcc9L,0xa2cbb8ab657a536eL, + 0x98b9a0d27cf47d3cL,0xff154d685eea6370L,0x00000000000001f2L }, + { 0x568b768a56b232acL,0x4e8d6e363f2a52abL,0xbae87a168837fc60L, + 0xebc58a83d10a7691L,0xad5e4af0f9455fbeL,0x1a20d6c37d654e2eL, + 0x8c40fcb9da7c8255L,0x6d7b3cd760d9b931L,0x00000000000000b2L } }, + /* 229 */ + { { 0x7b090c3ebb2eaf45L,0xed24d91c62ffb92fL,0xbf2a3ea4a736f23dL, + 0xb5b99ebd6ff0fde3L,0xbca2b55dca1102f5L,0xf6203cd807e032a8L, + 0x5410b448a8bf17a8L,0xb86660a7e1dc55b1L,0x0000000000000109L }, + { 0xb148b1da02a2fbd8L,0xfed85e8b3b22e8a5L,0x1378a0e48712b509L, + 0x68560148c6a3e516L,0x7100921c1633b503L,0x9392514325512711L, + 0x7b4931d207d31047L,0x623e722b8542e0bbL,0x00000000000000eaL } }, + /* 230 */ + { { 0x084823d324972688L,0x58b83c12003f5762L,0x194d66906d0d4528L, + 0x842195842c6f747eL,0xc8f8a2e90146d89aL,0x29ec1de77451bbc2L, + 0xf622b6b8f7f284faL,0x83f1dbe97b71e44fL,0x0000000000000060L }, + { 0x99649333999dd56bL,0x2cfac0ba97a47de9L,0x6660d8aebbe8fb20L, + 0x47c29dd8f61d7bcaL,0x6f5fb51d85adc14dL,0xe65ac7884f9fd41cL, + 0x1ce69dd4ff513e6cL,0x1ace591effe59d3eL,0x0000000000000023L } }, + /* 231 */ + { { 0x2e67a438a9fda771L,0x626f652c8663100eL,0xdfb19e48e133f23bL, + 0x599f88f2035d2d1fL,0x1723a1128d13e878L,0x890aa292fb51ce07L, + 0xe5f3a70ebbd9ba82L,0xdde82673374514b4L,0x0000000000000155L }, + { 0x08b2b77ed6f59a95L,0x93f853e302020420L,0x52252ac1ebac7797L, + 0x6ecdcb99b56b6676L,0x4abdb9f99722a500L,0x26210f3f04e2bad0L, + 0x0ca5a0ff3034dd4dL,0x333d8080dac0b80dL,0x0000000000000041L } }, + /* 232 */ + { { 0xe851070935a85a06L,0x4e166e7642ef1b44L,0x84a90b71a07b3a6dL, + 0xd6dd6c0030329e6aL,0x20c4ba653d555259L,0xee3b26af6f8ad05eL, + 0x20e3d5412ab4cccdL,0x79798934a9406424L,0x00000000000001bdL }, + { 0xf2a1d1848e0c7ff0L,0xbae85efc9543b340L,0xe96431aef51d318bL, + 0xe5d3ed4e75878fa6L,0x4d2a29dbc2895f52L,0x3af278771f11067cL, + 0x6ccde9649e7f4ee5L,0x35188da1a56d74daL,0x0000000000000192L } }, + /* 233 */ + { { 0xb083212003d310edL,0xd20ee8cc987b0311L,0x9e549d2684c558a8L, + 0x5e25f3ceb7167ec8L,0xacf114f44bf55bb5L,0x819edc77061c9017L, + 0x759a44e6deb343c0L,0x58df9f7e04c9b5edL,0x0000000000000078L }, + { 0x4fa47ebb3bf13222L,0x1e451dcdea07da11L,0x1be9fac3c0d8242fL, + 0x93257d4d36eb871eL,0xf49e775abea3190dL,0x406d191f4ebe2b33L, + 0x67aac53c0c110096L,0x5215cf8bd381ac78L,0x00000000000001f4L } }, + /* 234 */ + { { 0x387e8a8efa493b79L,0xb20e270b4eb1c2acL,0x9f393fa09ff22320L, + 0x5ee1baaea91c393dL,0xdeda961a138a8d96L,0x69ab238c97bd50e4L, + 0xff68d48a2363c8e0L,0xaf8e00e5ce4c4c16L,0x0000000000000158L }, + { 0x6ccdcf06cfc509a1L,0x60f411efc26cc075L,0x6d0cdfd64d9c57f0L, + 0xa951485332e99cacL,0x58f9ab3d8b8e9510L,0xa7e98709b10dc3fdL, + 0x8390843d75ef3509L,0x28ccc9d05a9312c7L,0x00000000000001b6L } }, + /* 235 */ + { { 0x1d934f00e341463fL,0x14c8a6ce150da7a0L,0xdb4860fc4109553fL, + 0xc23bde5aa93f4a91L,0x9f47c7872cd58067L,0x1d3300548433dc80L, + 0x0c0be7f975a32a7dL,0x08b777d588c75da9L,0x000000000000012eL }, + { 0xdfc1281761a10d37L,0xed7b61815c50f5a5L,0x28af95db79477c60L, + 0xa0aa2b7733c5310bL,0x905faab853118267L,0xf40e98166b41959fL, + 0x9ccb425216b37784L,0x6835d77c69866accL,0x00000000000000c5L } }, + /* 236 */ + { { 0xe9d714cb2b450a66L,0x1318885c7dbfdc14L,0x655a8d85b466a0c0L, + 0x02a21e995bdfc1a6L,0x7a0d7c98e67792d1L,0x2a01bb57b550a797L, + 0x42c462335d74d337L,0x7be4e1c088dad495L,0x000000000000008bL }, + { 0x1873b03f95812273L,0x2e26ed32ee3f757fL,0x2c710eae6da6217aL, + 0x9b50b574261d9f4fL,0x43971fa9b7c1da2dL,0x22c4fb87c4a85de7L, + 0xf72c3451ec22137bL,0x1345668c77ba1926L,0x0000000000000173L } }, + /* 237 */ + { { 0x3e3e8c7a8a3ba183L,0x4e8cebbbfe389fa7L,0x8ea446870f9ba60fL, + 0x55176e35cb601a83L,0xf90bdc2612e52db4L,0x95f9e4598f712bf1L, + 0x9bd3200fbea054cdL,0x2cf19bf6dd5fd40bL,0x000000000000017aL }, + { 0x71cf6ca266736febL,0xbde86f49de7cfe2fL,0xc60abce8fc290563L, + 0xaae8a3ce726b6e4fL,0xd23824453f29235bL,0xa4b557f5650ffa5eL, + 0xa1453e54113ef744L,0x7c676a533e426dd2L,0x00000000000001b0L } }, + /* 238 */ + { { 0xf5e603f235d96872L,0xab1a23cc3fa5b8caL,0x5459871be988dc5fL, + 0xe32e8489d430c0bdL,0x7ec269e0764d9cc3L,0xf7238212f2c0c40dL, + 0x2d946183887b83b4L,0x281fa6712f18a411L,0x0000000000000010L }, + { 0x8028048f64858b37L,0xe0e149af357de5d9L,0xb2218791619ebb18L, + 0x210200b39f2b0ba0L,0x5a87eae61039cbaeL,0x4efdcddb39579d1dL, + 0x1b388eaa2788515eL,0x1a552c3cc81878aaL,0x000000000000002cL } }, + /* 239 */ + { { 0x7ac7f5000ea723dcL,0x0a5f04f442b15231L,0x63d49445be885c86L, + 0x61f9993fff119702L,0xc3fba45cc4c58ceaL,0xe6d151e6b9cd6036L, + 0x75a3ab1557b923bbL,0x4ec07c52ceb2fd46L,0x0000000000000147L }, + { 0xc46a3d32ed88239dL,0x0d1b8ae6835ae694L,0xf4fde3259feeb2e7L, + 0x223bf71c43bc0bb5L,0x3cd220b78f62a705L,0x2224860e9fe799a5L, + 0xd855870324ab7f93L,0x8e0f7330b594958bL,0x000000000000010fL } }, + /* 240 */ + { { 0xaf35c7bb3c67d520L,0xd8f4958b23fca9ecL,0x0778f1948bbaa808L, + 0x418c30ce2135e8aeL,0xcdd8d9a9c888eff7L,0x72075df0f73144abL, + 0xb549c8954506a534L,0x4ef389795fbb7fc5L,0x000000000000011cL }, + { 0x3fe2c9ac43f5e698L,0xce77fcbce38a5e3aL,0x6d05c90e3089c2e1L, + 0x5a74f3ffac1d5801L,0xaeeda220381b9d2aL,0xd958b143f5f3960bL, + 0x65ffd0510db7abbeL,0x8e97e6807a05b718L,0x00000000000000ceL } }, + /* 241 */ + { { 0x2251e61b8ce86a83L,0x8604159fbf7e7160L,0xfc127dd748f03377L, + 0x87cb2c3745052242L,0xbd4950f4934ea09bL,0x5146c403c4679441L, + 0xe8ad471023ba416aL,0x89b81a60af638eb1L,0x00000000000001b3L }, + { 0xe699934ee8150c69L,0x74f75908e27c14bcL,0x5dc0a8916a0194ffL, + 0x38f49d321bd51b76L,0x6bc3305e18779630L,0xfe2f3fbffd3b4a68L, + 0x1409b377d7caf189L,0x029ea13b9b8f109bL,0x00000000000001b3L } }, + /* 242 */ + { { 0xef7938d225a2fd88L,0x890f2f7cceba0603L,0x4c3e1c80d7a6dff4L, + 0x00c78f362883f986L,0xed92b592998e5305L,0x018a8f1b325ddc73L, + 0x6dffd987d5d3708aL,0xdcd3554f0d1f28bbL,0x0000000000000059L }, + { 0x17c6e41d23a74e7dL,0x94b61ebe5db32df6L,0x3c2fffa79e7ffa0bL, + 0x473662b72ebb7a0dL,0xa86415ee01adf9c3L,0x1502c32654679264L, + 0x169113492fa09c57L,0x897f34aa24749086L,0x0000000000000195L } }, + /* 243 */ + { { 0x4845d359abadc253L,0xe054b92cc797c95eL,0x22a9b5bd9a218212L, + 0x9bb80a5ea52b8827L,0xea38e78e2e61c676L,0xfb274b1a08b0f8b3L, + 0xb6aa42e3db9d854cL,0x8ba2252356012d73L,0x0000000000000163L }, + { 0x7cec0e6f75c8c576L,0xabb20e7ce4bc7dd2L,0x0958a0c869d80726L, + 0xa908c66a8a023eb7L,0xca9f50ea76110b15L,0x668c9994186f61a6L, + 0x9ddf22ed2a0a69d8L,0xbfee1897bbf8a10fL,0x00000000000001e0L } }, + /* 244 */ + { { 0x26d8681848319e4fL,0x6be6f6b55a586fa0L,0xbef5d88626713265L, + 0xac252ac598529cfaL,0xe7cc45f162b29cfbL,0xee050609a2a6358dL, + 0xf7cb9ca42940ac70L,0xfb44aaeca885b1f0L,0x00000000000001adL }, + { 0x66b7a936e798678eL,0xca01e10399540438L,0xf2491e37816860b7L, + 0xeeffd483b745d857L,0x5dbb3628a4705ed6L,0x57d68d49b2a5d0f7L, + 0xd1a8529a2389fee3L,0xdbbc25491a7fd686L,0x00000000000001adL } }, + /* 245 */ + { { 0xe10cba20969686a3L,0x308b1c55e3c053f5L,0x1712b13426f47102L, + 0x1f9165b149033038L,0x45b720172d01527bL,0x6fcf6647aa9a34e2L, + 0x51f54b94b0be35c8L,0xfccb22a55a15e382L,0x00000000000000e3L }, + { 0xaa71e4ec5b4dc0beL,0xbb136248db1cd5c4L,0xf36bff43046e1007L, + 0x5a6806d7da9c99a3L,0x9cbfc6ee8349bc50L,0x26871e73e13e0850L, + 0x5e6aa22767f448c1L,0xba77787c2da7baf9L,0x00000000000001b9L } }, + /* 246 */ + { { 0x1abe58eec5a73375L,0x175df69d7a8ac438L,0x2cf3150aceca835aL, + 0xb87b0609f507d30fL,0x9ae53a2bc60b0424L,0x4931e182410f90ecL, + 0x452c7d0fadd689bbL,0xab45349147631a8eL,0x0000000000000013L }, + { 0xaf2dd8568c84f3afL,0x829dc0921baae33eL,0x46542a858b96b070L, + 0x42260d40e8a82516L,0xb9e5edac5c35322bL,0xbca7956039eda0d2L, + 0x86bd07c6b962b90aL,0x2e22dac7b1ec5302L,0x000000000000010aL } }, + /* 247 */ + { { 0x665fc09d239d8f0aL,0x92b2e03cab8a1021L,0xe43697680173477bL, + 0xab38ed9f8e361604L,0x79b0091d9eb061beL,0xcd4226543e845670L, + 0xa0f77ec72fe1a2e0L,0x1d242162760a030fL,0x0000000000000093L }, + { 0xfa9f834cf8646bc3L,0x7df94a5240ae96f9L,0x901c3890379177d1L, + 0x9dfd0644ffeb66cdL,0x81aec2ec77b92465L,0x2df3b7f2cd981d4fL, + 0xc9bc3f69f377b093L,0xdaef34f3dd859d8bL,0x0000000000000125L } }, + /* 248 */ + { { 0xac08451ba2c123bcL,0xd1e83a680818fa54L,0x56dd570298957b8aL, + 0xcc7f2e34f0f12f16L,0x1f6a9c330a9fa14dL,0xefc9a2bbb2fe782cL, + 0xd319c697709f54ddL,0x0b8238cbd6460a53L,0x00000000000001dcL }, + { 0xf649290144dfb6f6L,0x270d7cb46e401d26L,0x1a70a40e48537ad8L, + 0x84d661b570d8dbd9L,0xca27223af170d58bL,0xeeb4cf146344e1d2L, + 0x2255fc95ab9de1faL,0xcd6e110adbdc5ea7L,0x00000000000001f8L } }, + /* 249 */ + { { 0x2a57c6b978b8a0a7L,0x24b4aeb6e833edeaL,0x9e4617c14bd13fe7L, + 0xc4186888fc2e8ee4L,0x8d398a49fb147eefL,0xe9f191f12e662cfbL, + 0x61872289958ba2ecL,0xbd6d0f1b00b8d50dL,0x000000000000002dL }, + { 0x895cfdfe24c93cc9L,0x29ed7780b9e718e7L,0x01c8ba5838baf7ebL, + 0x0225387e4ddcbf69L,0x64b250bfa180d6bbL,0x6d68e548c947c7c2L, + 0x82a7b6329923f3cdL,0xb8f036132d103cd2L,0x000000000000000fL } }, + /* 250 */ + { { 0x8198b3f08cd9d494L,0x9b2065b994f4f9f3L,0x7664a2203c738fa9L, + 0x199f4c14d8d229cbL,0xddad75c4c51c54b3L,0x9a32ce0bd213a332L, + 0xf3a21085888c7b2fL,0x6defa3625b1ff20aL,0x00000000000000d1L }, + { 0x44e0054819a296ebL,0x1d94ff15d1a91313L,0xd7dead2bfeaa454fL, + 0xae65a8034d40bd7fL,0x604f147e1801a4afL,0x983048f9a5e0de77L, + 0xa3b19ca5ff572ca0L,0xa237dba71821d117L,0x00000000000000e0L } }, + /* 251 */ + { { 0xedbabf8491630ee8L,0xde6589c205eb5301L,0x9f7d2b2da051f47bL, + 0xaeaa9f96212bbe81L,0xdced3d5e94292124L,0x691f5b89f4435e5bL, + 0x19604c339411f66fL,0xb7fc09ca4356f0daL,0x00000000000000e6L }, + { 0x1294e413f74f811cL,0x1a42d831df8d8ddbL,0x27f57217963418c9L, + 0x5fde521888ebcdecL,0xfdd5e06eea305bc9L,0xed1e6088ac668b61L, + 0x333af016eb811861L,0x5ecb192d15ddcebcL,0x00000000000001c9L } }, + /* 252 */ + { { 0x927b37a3e0bde442L,0xe0543fe866f7a73eL,0xd30d9d208ed10c2eL, + 0xaf79c341a6617a32L,0xe7367870d1d5cf8bL,0x02d0dce9e3abcf8bL, + 0xfe23d2dd772b5e7bL,0x29fceea01ffc70c5L,0x000000000000010bL }, + { 0x31bcae4d62d803ffL,0x93ee913fdbc306a9L,0xaf1de7abd8c10662L, + 0xd485782ae7a6d658L,0x9126592e102f4e06L,0x91a3127f136fafe6L, + 0x46b9344088371213L,0x53bb4380a31e1634L,0x00000000000000baL } }, + /* 253 */ + { { 0x62e517fcca5636b0L,0x4296e0216aba15c7L,0x5aa8fd7c212e7b2dL, + 0x9517ce6d5717ad84L,0xe762b85b98b2f357L,0x42f996b5df59b07cL, + 0xf3732abbf37ef6f0L,0xa5d145ea4542b489L,0x000000000000015dL }, + { 0x1e77c55eaa7f6e3fL,0x3f4d99a7aa4a05bcL,0xa56d7d7745828227L, + 0xdb0895fb77b748fbL,0x1c484cce0629f5d1L,0xf5b1c90a359803fbL, + 0x43ac4f291720b8d0L,0x8c10bfe872ac13f2L,0x00000000000000e9L } }, + /* 254 */ + { { 0x9d1c4785c06c4fd6L,0xbf4b9025d25c2b9dL,0x04135eb1d4982f24L, + 0x3ab3edc2ba4fef2bL,0x55a5239f98de07abL,0xd5fc49ab096f4b7dL, + 0xc50a29603844c815L,0xdb1148d015676b2bL,0x0000000000000047L }, + { 0xc49f9cc510f3bad9L,0x490888fc022901d4L,0x917a55ebc47b44dfL, + 0x20b2ebc6f39f2b68L,0x0c58e3af04e9962aL,0x52ab7c1b573dd5b7L, + 0x2b54add6a329f76cL,0x59dad1eb82f4ca3bL,0x0000000000000108L } }, + /* 255 */ + { { 0x662c4128a182d1adL,0x7751796e20916c45L,0xa7704272ba681647L, + 0xfac8b0fab92c85c1L,0x207ab2dfaefb2e07L,0xc73530a07861b32dL, + 0x63dbed6588aed145L,0x547bcdca0a53a49dL,0x00000000000000bdL }, + { 0xa7c1382f87056b51L,0xc3d91edb130f9912L,0xf7c7de46d3805b42L, + 0x456101ebfd31a995L,0x1efd22b4cd3fb8aaL,0xfe391df79eb17bceL, + 0xb4d4c0c6616c0c32L,0x19f023be711beef4L,0x0000000000000112L } }, +}; + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_9(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_stripe_9(r, &p521_base, p521_table, + k, map, ct, heap); +} + +#endif /* WC_NO_CACHE_RESISTANT */ +#else +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_9_7[130] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, + 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_9_7[130] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_521_ecc_recode_7_9(const sp_digit* k, ecc_recode_521* v) +{ + int i; + int j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<75; i++) { + y = (int8_t)n; + if (o + 7 < 64) { + y &= 0x7f; + n >>= 7; + o += 7; + } + else if (o + 7 == 64) { + n >>= 7; + if (++j < 9) + n = k[j]; + o = 0; + } + else if (++j < 9) { + n = k[j]; + y |= (uint8_t)((n << (64 - o)) & 0x7f); + o -= 57; + n >>= o; + } + + y += (uint8_t)carry; + v[i].i = recode_index_9_7[y]; + v[i].neg = recode_neg_9_7[y]; + carry = (y >> 7) + v[i].neg; + } +} + +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_65_9(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + sp_digit x0 = 0; + sp_digit x1 = 0; + sp_digit x2 = 0; + sp_digit x3 = 0; + sp_digit x4 = 0; + sp_digit x5 = 0; + sp_digit x6 = 0; + sp_digit x7 = 0; + sp_digit x8 = 0; + sp_digit y0 = 0; + sp_digit y1 = 0; + sp_digit y2 = 0; + sp_digit y3 = 0; + sp_digit y4 = 0; + sp_digit y5 = 0; + sp_digit y6 = 0; + sp_digit y7 = 0; + sp_digit y8 = 0; + + for (i = 1; i < 65; i++) { + mask = 0 - (i == idx); + x0 |= mask & table[i].x[0]; + x1 |= mask & table[i].x[1]; + x2 |= mask & table[i].x[2]; + x3 |= mask & table[i].x[3]; + x4 |= mask & table[i].x[4]; + x5 |= mask & table[i].x[5]; + x6 |= mask & table[i].x[6]; + x7 |= mask & table[i].x[7]; + x8 |= mask & table[i].x[8]; + y0 |= mask & table[i].y[0]; + y1 |= mask & table[i].y[1]; + y2 |= mask & table[i].y[2]; + y3 |= mask & table[i].y[3]; + y4 |= mask & table[i].y[4]; + y5 |= mask & table[i].y[5]; + y6 |= mask & table[i].y[6]; + y7 |= mask & table[i].y[7]; + y8 |= mask & table[i].y[8]; + } + + r->x[0] = x0; + r->x[1] = x1; + r->x[2] = x2; + r->x[3] = x3; + r->x[4] = x4; + r->x[5] = x5; + r->x[6] = x6; + r->x[7] = x7; + r->x[8] = x8; + r->y[0] = y0; + r->y[1] = y1; + r->y[2] = y2; + r->y[3] = y3; + r->y[4] = y4; + r->y[5] = y5; + r->y[6] = y6; + r->y[7] = y7; + r->y[8] = y8; +} +#endif /* !WC_NO_CACHE_RESISTANT */ +static const sp_table_entry_521 p521_table[4875] = { + /* 0 << 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 0 */ + { { 0xf97e7e31c2e5bd66L,0x3348b3c1856a429bL,0xfe1dc127a2ffa8deL, + 0xa14b5e77efe75928L,0xf828af606b4d3dbaL,0x9c648139053fb521L, + 0x9e3ecb662395b442L,0x858e06b70404e9cdL,0x00000000000000c6L }, + { 0x88be94769fd16650L,0x353c7086a272c240L,0xc550b9013fad0761L, + 0x97ee72995ef42640L,0x17afbd17273e662cL,0x98f54449579b4468L, + 0x5c8a5fb42c7d1bd9L,0x39296a789a3bc004L,0x0000000000000118L } }, + /* 2 << 0 */ + { { 0xf43e3933ba6d783dL,0xcf2fa364d60fd967L,0xaa104a3a35c5af41L, + 0xb3b204da6ef55507L,0x2c6e5505d769be97L,0x7403279b1ccc0635L, + 0x2fcb288148c28274L,0x3c219024277e7e68L,0x0000000000000043L }, + { 0x1be356d661f41b02L,0xeafcbe95edc0f4f7L,0x93937fa99a3248f4L, + 0xb3e377de9f251f6bL,0xab21a29906c42dbbL,0xc6b5107c4da97740L, + 0xa7f3eceeeed3f0b5L,0xbb8cc7f86db26700L,0x00000000000000f4L } }, + /* 3 << 0 */ + { { 0xa5919d2ede37ad7dL,0xaeb490862c32ea05L,0x1da6bd16b59fe21bL, + 0xad3f164a3a483205L,0xe5ad7a112d7a8dd1L,0xb52a6e5b123d9ab9L, + 0xd91d6a64b5959479L,0x3d352443de29195dL,0x00000000000001a7L }, + { 0x5f588ca1ee86c0e5L,0xf105c9bc93a59042L,0x2d5aced1dec3c70cL, + 0x2e2dd4cf8dc575b0L,0xd2f8ab1fa355ceecL,0xf1557fa82a9d0317L, + 0x979f86c6cab814f2L,0x9b03b97dfa62ddd9L,0x000000000000013eL } }, + /* 4 << 0 */ + { { 0xfbc87412871902f3L,0xa1d5025b08e5a5e2L,0xe8b88e9f078af066L, + 0x8659e24afe3d0750L,0x06c5d55541d3ceacL,0xc61c891c5ff39afcL, + 0x54b483487c9070cdL,0xb5df64ae2ac204c3L,0x0000000000000035L }, + { 0xe21f47fc346e4d0dL,0xbb7faef04699d1d9L,0x5224f750a95b85eeL, + 0x79f283e54ba38540L,0x5ae63fe2f19907f2L,0x5521aef6e6e32e1bL, + 0x73e0178eb0b4abb6L,0x096f84261279d2b6L,0x0000000000000082L } }, + /* 5 << 0 */ + { { 0xd5ab5096ec8f3078L,0x29d7e1e6d8931738L,0x7112feaf137e79a3L, + 0x383c0c6d5e301423L,0xcf03dab8f177ace4L,0x7a596efdb53f0d24L, + 0x3dbc3391c04eb0bfL,0x2bf3c52927a432c7L,0x0000000000000065L }, + { 0x173cc3e8deb090cbL,0xd1f007257354f7f8L,0x311540211cf5ff79L, + 0xbb6897c9072cf374L,0xedd817c9a0347087L,0x1cd8fe8e872e0051L, + 0x8a2b73114a811291L,0xe6ef1bdd6601d6ecL,0x000000000000015bL } }, + /* 6 << 0 */ + { { 0x23731bedf79206b9L,0x2f66e95657f380aeL,0xe0727a239531be8cL, + 0x5fbcca16153f7394L,0x981506ade4ab0152L,0x623d30977fd71cf3L, + 0x2eff34f94480d195L,0x4569d6cdb5921953L,0x00000000000001eeL }, + { 0x1eaccd7858d44f17L,0x3dc7b8b55ca0dadeL,0xf96c984de274f220L, + 0xcab72d0e56648c9dL,0x7240a926201a8a96L,0x2aabbb73da5a808eL, + 0xe2dd270546e3b111L,0x0255ad0cc64f586aL,0x00000000000001deL } }, + /* 7 << 0 */ + { { 0x01cead882816ecd4L,0x6f953f50fdc2619aL,0xc9a6df30dce3bbc4L, + 0x8c308d0abfc698d8L,0xf018d2c2f7114c5dL,0x5f22e0e8f5483228L, + 0xeeb65fda0b073a0cL,0xd5d1d99d5b7f6346L,0x0000000000000056L }, + { 0x5c6b8bc90525251bL,0x9e76712a5ddefc7bL,0x9523a34591ce1a5fL, + 0x6bd0f293cdec9e2bL,0x71dbd98a26cbde55L,0xb5c582d02824f0ddL, + 0xd1d8317a39d68478L,0x2d1b7d9baaa2a110L,0x000000000000003dL } }, + /* 8 << 0 */ + { { 0x86f9ea54aa78ce68L,0xb56289b5a6f40405L,0x8b598c1bc8d79e1aL, + 0x5bfea5b8579f49f0L,0x8b8a3b05f826298fL,0xd4e29d8a9b003e0aL, + 0xa8348396b010e25bL,0x22c40fb6301f7262L,0x0000000000000008L }, + { 0x8ad642f11f17801cL,0x9f3ba94009471353L,0xf0ba0df065c57869L, + 0x89e9c0aa5911b4bfL,0x5083de610677a8f1L,0x44f8ede9e2c0715bL, + 0x48fdab6e78853b9aL,0x31911d5542fc4820L,0x0000000000000163L } }, + /* 9 << 0 */ + { { 0x1f45627967cbe207L,0x4f50babd85cd2866L,0xf3c556df725a318fL, + 0x7429e1396134da35L,0x2c4ab145b8c6b665L,0xed34541b98874699L, + 0xa2f5bf157156d488L,0x5389e359e1e21826L,0x0000000000000158L }, + { 0x3aa0ea86b9ad2a4eL,0x736c2ae928880f34L,0x0ff56ecf4abfd87dL, + 0x0d69e5756057ac84L,0xc825ba263ddb446eL,0x3088a654ee1cebb6L, + 0x0b55557a27ae938eL,0x2e618c9a8aedf39fL,0x000000000000002aL } }, + /* 10 << 0 */ + { { 0x87ff09a04f2f3320L,0x7c2e411f1a8e819aL,0x9daa4da9842093f3L, + 0xa2c7c178fcc26329L,0x4a9246b11ada8910L,0x901d879ac09ac7c3L, + 0xfcfe7bb6721ec4cdL,0xeb8f22bda61f281dL,0x0000000000000190L }, + { 0x2954bc98135ec759L,0xf3689639739faa17L,0x536f6163dc57ebefL, + 0xbf5349d44d9864bbL,0xa97fd78a62ef62d2L,0xc2eeb2144251b20bL, + 0xbaeab3b0ca2ba760L,0x5d96b8491614ba9dL,0x00000000000001ebL } }, + /* 11 << 0 */ + { { 0xecc0e02dda0cdb9aL,0x015c024fa4c9a902L,0xd19b1aebe3191085L, + 0xf3dbc5332663da1bL,0x43ef2c54f2991652L,0xed5dc7ed7c178495L, + 0x6f1a39573b4315cfL,0x75841259fdedff54L,0x000000000000008aL }, + { 0x58874f92ce48c808L,0xdcac80e3f4819b5dL,0x3892331914a95336L, + 0x1bc8a90e8b42a4abL,0xed2e95d4e0b9b82bL,0x3add566210bd0493L, + 0x9d0ca877054fb229L,0xfb303fcbba212984L,0x0000000000000096L } }, + /* 12 << 0 */ + { { 0x7be69571bf842d8cL,0x3774c75c530928b1L,0x477fee9a60e93801L, + 0x44e90b7c3fb81b31L,0x107cf7a5967713a6L,0x81874157958457b6L, + 0xe4fae9749c7fde1eL,0xd9dcec93f8221c5dL,0x00000000000001c0L }, + { 0x79e7b1a3281b17f0L,0x884ba72224f5ae6cL,0xcc10a6f951b9b630L, + 0xd6d18843d86fcdb6L,0x5e404abf6a17c097L,0x63fe65ab71494da4L, + 0x3ce1d103a682ca47L,0x48b5946a4927c0feL,0x0000000000000140L } }, + /* 13 << 0 */ + { { 0x1887848d32fbcda7L,0x4bec3b00ab38eff8L,0x3550a5e79ab88ee9L, + 0x32c45908e03c996aL,0x4eedd2beaf5b8661L,0x93f736cde1b4c238L, + 0xd7865d2b4924861aL,0x3e98f984c396ad9cL,0x000000000000007eL }, + { 0x291a01fb022a71c9L,0x6199eaaf9117e9f7L,0x26dfdd351cbfbbc3L, + 0xc1bd5d5838bc763fL,0x9c7a67ae5c1e212aL,0xced50a386d5421c6L, + 0x1a1926daa3ed5a08L,0xee58eb6d781feda9L,0x0000000000000108L } }, + /* 14 << 0 */ + { { 0x2c9e682dd3432d74L,0x6767f6b812efbf5dL,0x79df3e4b7bc744aaL, + 0x74fc06c8b897222dL,0xd4fb0babe0b31999L,0x958b401494116a2fL, + 0xe1b8ccfaaf84ded1L,0x5bc7dc551b1b65a9L,0x0000000000000187L }, + { 0x41669f852700d54aL,0x5b690f53a87c84beL,0x11e89bf1d133dc0dL, + 0xd07781b1b4f3584cL,0x0847ce9b86d7ed62L,0x8470122b8e51826aL, + 0xd66290bbabb4bdfbL,0xa4923575dacb5bd2L,0x000000000000005cL } }, + /* 15 << 0 */ + { { 0xe9afe337bcb8db55L,0x9b8d96981e3f92bdL,0x7875bd1c8fc0331dL, + 0xb91cce27dbd00ffeL,0xd697b532df128e11L,0xb8fbcc30b40a0852L, + 0x41558fc546d4300fL,0x6ad89abcb92465f0L,0x000000000000006bL }, + { 0x56343480a1475465L,0x46fd90cc446abdd9L,0x2148e2232c96c992L, + 0x7e9062c899470a80L,0x4b62106997485ed5L,0xdf0496a9bad20cbaL, + 0x7ce64d2333edbf63L,0x68da271571391d6aL,0x00000000000001b4L } }, + /* 16 << 0 */ + { { 0x3a6c508f81a8402fL,0x0dbf5c8007dcdc53L,0xe5965da7985630dfL, + 0x943ff18861ca00a0L,0xd632f8d20b188a52L,0x2a87f66192fd3f3dL, + 0xda97da26ac743b0bL,0x7d10d8a89c8ad05dL,0x00000000000001d1L }, + { 0x52e272f89bc73abeL,0x9e3004d28d186333L,0x0b3075ac9d998852L, + 0xc182e1398db547aaL,0xc7ca30c94b6b82c1L,0x1335e4c3b800d50eL, + 0xc714b18d3d3e6972L,0x37343c582d77001fL,0x000000000000007aL } }, + /* 17 << 0 */ + { { 0x76f817a853110ae0L,0xf8c3042af0d1a410L,0xdf4e799b5681380fL, + 0x760a69e674fe0287L,0xd66524f269250858L,0x99ee9e269fa2b3b4L, + 0xa0b874645923906aL,0x0ddb707f130eda13L,0x00000000000001b0L }, + { 0x35b9cb7c70e64647L,0xe6905594c2b755f5L,0xd2f6757f16adf420L, + 0xf9da564ef6dd0bf0L,0x8d68ac2b22a1323dL,0xb799534cf69910a9L, + 0xc111d4e4aeddd106L,0x683f1d7db16576dbL,0x0000000000000085L } }, + /* 18 << 0 */ + { { 0xfdf44d01dd99c61dL,0x84ce572f5a15c423L,0x2f0702c3096dec08L, + 0x6edbb8525bffe522L,0xbd0cfe367de4666dL,0xf7e86dbc7bf99a7aL, + 0xcb2edcc5b63d1281L,0x33425e72a12779eaL,0x00000000000001bcL }, + { 0x35d841e2d8f9a86aL,0xa79fe15b19f84fd1L,0x915d1b4b45209b9dL, + 0x234aeb201f7a9133L,0x2c8e4a00ac84c044L,0x4fb07ce0d196f255L, + 0xe3e74d33d9e63d02L,0x06e999885b63535dL,0x000000000000010dL } }, + /* 19 << 0 */ + { { 0x78ff0b2418d6a19bL,0xfecf431e725bbde4L,0x9232557d7a45970dL, + 0xfa3b309636266967L,0xfff0acdb3790e7f1L,0x45b77e0755df547eL, + 0xc0f948c2d5a1a072L,0x8dcce486419c3487L,0x0000000000000099L }, + { 0xa9091a695bfd0575L,0xf5a4d89ea9fbfe44L,0xb0ec39991631c377L, + 0x73ad963ff2eb8cf9L,0xcc50eee365457727L,0x67d28aee2b7bcf4aL, + 0xc3942497535b245dL,0xd5da0626a021ed5cL,0x0000000000000137L } }, + /* 20 << 0 */ + { { 0xe2603bfb11b0344fL,0xad792e43b74e16d8L,0x11dd48b9bdb3cd5cL, + 0x09506339cb1049cbL,0x2e968fbed342b517L,0xbd767c2ab0d93fb1L, + 0x3deeae39cc6f8cc2L,0xdd7f1b889598a465L,0x000000000000018bL }, + { 0xdd3287fed0990f94L,0x37ccb22eee9cfc94L,0xedfaec1030da8ef8L, + 0x4da3bdddc6d4eae7L,0x546c550952d31120L,0x9ee7f52b410b9444L, + 0x6908296959bf0af8L,0xaadbe63f68ca5b6bL,0x00000000000000c5L } }, + /* 21 << 0 */ + { { 0x42ef399693c8c9edL,0x37ac920393a46d2dL,0xd9497eaed827d75bL, + 0x46257eae4d62a309L,0x19523e759c467fa9L,0x268bb98c2ed15e98L, + 0x3cc8550859ed3b10L,0xcbb2c11a742bd2fbL,0x00000000000001a1L }, + { 0xe60bc43c9cba4df5L,0x7c9b0f17649ccb61L,0xbeb43a372c63eec5L, + 0xdf741a53da483295L,0x180a296f6bafa7f7L,0xe83c0059c5193e6cL, + 0x2c12da7c5e40ce62L,0x209d7d4f8eeb3d48L,0x000000000000011aL } }, + /* 22 << 0 */ + { { 0x4e38650bb6f5a561L,0x904b3a12cae0af93L,0xadfffcca8b28ca1fL, + 0x74cbfe1018ffcd40L,0xbde3122096c4089fL,0xa7facca319d4b7aaL, + 0xf1f6e66dc1eae621L,0x00ce6190c6d36ca5L,0x0000000000000097L }, + { 0x64319ad5a535acadL,0x5c43427efea6210dL,0x2f22ec132ed14e5cL, + 0xf40ac9cdadfe028aL,0x95e0fd06c5b46824L,0xbe4a8d24e415f9c1L, + 0x61c78fad62100104L,0x78827e13569356b0L,0x00000000000001fbL } }, + /* 23 << 0 */ + { { 0xa1c6a5ece2af535cL,0x07e1f1ac26ae5806L,0xe9e3f8e9a5ec53e2L, + 0x1f6f83fc9af5b0afL,0x490d234d0cdd4510L,0xee7a39ba785fb282L, + 0xdb1cacec5f547b8eL,0x0b275d7290159376L,0x00000000000001aeL }, + { 0xd815c3536fa0d000L,0x213b4450a8d23856L,0x3c27b27bb07dd0c2L, + 0x10843361ee97fcf4L,0xb431647844c2dc0eL,0x7d759ff890d05832L, + 0x68a2858fc068471cL,0xc97a825e53853806L,0x00000000000000f2L } }, + /* 24 << 0 */ + { { 0x44a18c28b69bcef2L,0xc710838fa0a4198fL,0x6fdefaa4a86f6559L, + 0x93ad7a9b2d0e4f66L,0x91391a8862c2cd7fL,0x0e49a0e6ec407366L, + 0xb447ce5d4930becfL,0x8e98d1008e9d525dL,0x0000000000000096L }, + { 0x8a0b537a0b446729L,0x0575ce65dd7dd9fdL,0x8385e09fa134f793L, + 0xa48f492cf3d07536L,0xb4cd2b81880aa962L,0xa555cd2f64a2b032L, + 0xe44476658a352aa7L,0xa7e96f293f75117cL,0x00000000000001e1L } }, + /* 25 << 0 */ + { { 0x3f8c2460bf70ace0L,0xe41e0eb1c25d3fb1L,0x56e87e2aa648ff27L, + 0xa91856917c36ee4dL,0x1c772c8c5499994aL,0x0073102651b107b1L, + 0xa35874a6f5dff9d2L,0xe84c6d5c5a9a1834L,0x0000000000000154L }, + { 0x4325bce404c78230L,0xeede2a54672e6b6dL,0xd1e2370a6a5972f5L, + 0xdee3543572fbc1a0L,0xf66c2888151666a6L,0x15a923eb0022a0c7L, + 0xe22a28f80bb60d3fL,0x0fdce9171910473aL,0x00000000000000cdL } }, + /* 26 << 0 */ + { { 0x60d9e9a7e8c685c6L,0x68dd29c10c73e410L,0x007102d5dd038ed7L, + 0xf686bfc27593b717L,0x09bc2cbefa11d6aeL,0x31a02407e9a85766L, + 0xd9857bb0c04dcd13L,0x61bd55cc8b533222L,0x000000000000017bL }, + { 0xe74aacc5b9b4844bL,0x5b8bff6c058ff572L,0xd74b5b2ac347d133L, + 0xb4ddb8a4191a4648L,0xcbdc724709e1f426L,0xc8b4ac79f5a29ba2L, + 0xa0196c0682be4633L,0x4da05ea4b9de3bbeL,0x00000000000001c3L } }, + /* 27 << 0 */ + { { 0x357643017002d68bL,0xb215604492ec4568L,0x51353aeda0d3163cL, + 0x80b2123da719d483L,0x33f35187e135854dL,0x8739535d0e4f862cL, + 0x62a4d4eb889e646fL,0x373edf8218f9b6a7L,0x0000000000000160L }, + { 0xf3e6aeca5d90b740L,0x463ffe709d45acb3L,0x13b874f4a8bb572eL, + 0x1efa491ed92ebc54L,0x4a56f78e1a1b2201L,0x9fd193c5cf52c3bbL, + 0xe5828401ac06a3faL,0x597050014dcfe1c5L,0x00000000000000f1L } }, + /* 28 << 0 */ + { { 0xe160709ed9985b0cL,0x291adc92433ee2deL,0xb93220fe2ad07f0bL, + 0x949da6c395738534L,0x116d7f87032133e2L,0x988bb828d895ed5dL, + 0xec5c29642dd68d1aL,0x95646effed8e5f6fL,0x0000000000000191L }, + { 0xc31c832ba74eed38L,0x886f4ded1741425aL,0x2e495cf2adcdd6b5L, + 0xb345b828c8b09a2bL,0x5443c5d9216ae388L,0x606d3f34656a52efL, + 0xce28b4724f834f49L,0xacd53b00c9f29f3eL,0x0000000000000177L } }, + /* 29 << 0 */ + { { 0x2c3927618eda25dcL,0xf33d8595d51f6d96L,0x4003ab8e847ffb9eL, + 0x5ca9bc06876d7291L,0x28bef38f7664a130L,0xf9f45131e86265ecL, + 0xb3c1fbfcb65a085fL,0xc644d6c94b68287dL,0x0000000000000174L }, + { 0x187bbbc4821a0c30L,0x0679927c26ebbfbdL,0x50c2732d706d303fL, + 0xbe0e21952ce0d90bL,0xb5cf5eb795ad34b7L,0x0233ef8fcb6441fcL, + 0x05acc95b41b7b782L,0xf3a7c2f87f419e68L,0x000000000000011aL } }, + /* 30 << 0 */ + { { 0x6117fab72ec4b081L,0xf7d9b10f1b0af595L,0x352756cc169eac6bL, + 0x35573f35d14754c6L,0xea52dbcbae2a3e44L,0x3cf015e1f913a610L, + 0x0b6913261570a6b3L,0x87d70d11074b36bbL,0x00000000000000d0L }, + { 0xb64d6af6579dbed3L,0x069edd46ff01cfc7L,0xf279efee5d860be9L, + 0x54d5b99d1d7d4adaL,0xe47015660d29f153L,0xbcde1cd94afbdbd3L, + 0x811d7b95a6ac2dceL,0xd07f8c3cafdc1636L,0x00000000000000c4L } }, + /* 31 << 0 */ + { { 0x2257d0e0c16a8803L,0x0dcfb5e488e24812L,0xfc14c1ac09cd6b22L, + 0xd65543a904c9d429L,0x7260a83ca5e7726cL,0xb574589657c2a661L, + 0x006a58cdb7307b7dL,0xe9920cf30f0c6615L,0x00000000000000d8L }, + { 0xca4677c739792d19L,0xaa1bd97c7b54318aL,0x139a868cae4cc263L, + 0xf76b8c3244d14790L,0x0aefb72cbed1aa30L,0x8b5406328f10c806L, + 0xdf09c13a214a30ecL,0xb023b5454a663987L,0x0000000000000127L } }, + /* 32 << 0 */ + { { 0x47915ab876e56a9eL,0x9cf7475cd6c5c782L,0x67d69bd6ba5d1f94L, + 0xea04a35dc73596ecL,0xdba7ecefb9e5ebd6L,0x3e3e16654e068038L, + 0x43707af94ba15b34L,0xba3596d20a3a4694L,0x0000000000000166L }, + { 0x95ca9a9914a156aaL,0xcd98c965ab961636L,0xb74de1c7ea0f36dfL, + 0xe204cdb818cdd6b9L,0xc1df01c65c2dd77dL,0xff7bd8bf5ff01967L, + 0x1635e75ff3e08268L,0x7b0ff4d1940ab811L,0x0000000000000047L } }, + /* 33 << 0 */ + { { 0x5ea2e1fc649f308dL,0xa5ec59186b2ed12dL,0xe9a519a57aa53ac2L, + 0xabdbea7e2b77ef1aL,0xf381421a74236df0L,0x52086d482be92613L, + 0x8c76eb4e3c76f58eL,0x4195f0978fb969e6L,0x0000000000000028L }, + { 0x6be95a3dd3e11c4dL,0x88effd5c228b58f3L,0x00bd7216c16deb3aL, + 0xe7656ecbf3d138bfL,0x9e016769614ac5f2L,0x24d513abe063c663L, + 0x7b7a3bc869056d3aL,0x43eb08c656dc636bL,0x000000000000007eL } }, + /* 34 << 0 */ + { { 0xdfeab253d54e4d87L,0x55ac0435b8317a8dL,0xc1f1f8993db2173bL, + 0xa866a98b697d432cL,0x0e7d84e588581cdeL,0xeb8db26b14dbf8bfL, + 0x48c58962f5b7c130L,0xf66451d0843fb594L,0x0000000000000122L }, + { 0xda9dba82fd9a06dcL,0x6baa5223db7571bcL,0x13184d9be8ea9140L, + 0x5097293e4f87a6a4L,0xd357d7c04965b051L,0xa373f5aee0a89a48L, + 0xc1f12141aee1538aL,0xd137b942249932f3L,0x00000000000001beL } }, + /* 35 << 0 */ + { { 0xdc1039c9ccd7d718L,0xa92ff614b5dcc8d8L,0xee4a618608f6b2ceL, + 0xb4806c84fda74023L,0xc4a780b56f97392bL,0xa267a642b593e0bfL, + 0x398b62069e0ba392L,0xc300757549630a78L,0x00000000000000ddL }, + { 0x3eb1d3ef241e07f4L,0xdeba4db422640a4dL,0x5c212522ee69e797L, + 0x9ab1178bd2c70142L,0x462796591a31db50L,0xa510936cb5d85bcfL, + 0x1b83431f0c30dbbfL,0xbd2d07f6ac7fc5ccL,0x00000000000000c1L } }, + /* 36 << 0 */ + { { 0xae56cbf798bd6ef0L,0x5323bdc821e585edL,0xe724672821b585e0L, + 0xf385da9cc679bb96L,0xe81a91dac8198aa9L,0x2a25afe471eb8ad9L, + 0x514d5e71894c417bL,0x81c98659bd21e082L,0x00000000000001adL }, + { 0xac7712f8455c99efL,0xd2c4ba1759a98b43L,0x75db963d8698121dL, + 0x1af50b0bb140013aL,0x61285bc6dc8f01afL,0xca7f475441227b97L, + 0x61aeda5705b3c0f8L,0x7026e6cf8e6fc4e7L,0x0000000000000121L } }, + /* 37 << 0 */ + { { 0xf1ac4d59b557a36fL,0x8c64e76ae4780273L,0x0e58e26ee0980df2L, + 0xbd445763a2885604L,0x29ed0ae6af9d5749L,0xc35c5d56c1d59e42L, + 0x39e798352ded5867L,0x4f64a6c21832b671L,0x0000000000000094L }, + { 0x312bf98394fb2a03L,0x31a4be4056988296L,0xb85c564995a057c3L, + 0xdcbdbc2471c83f81L,0x26317da6b7991305L,0xfe4e6ff21865f859L, + 0x0c4b9624f26cb192L,0xb7ee1b02028ab741L,0x0000000000000150L } }, + /* 38 << 0 */ + { { 0x853028d72a5f4a8bL,0x46f120cb19ae965dL,0x12ac2ecc9d4e6883L, + 0x18413823ed8d4ba1L,0x690815bcef8f3127L,0x9078207b610c2c77L, + 0x922563d71ef6e744L,0xaeb1530c7d932a18L,0x0000000000000195L }, + { 0xc87e1acd797bd130L,0xa29e51d193b701e0L,0xe3e3c9a002b7985bL, + 0x51f9ba596291ef1eL,0x06c5bdf7d0e15448L,0x8611d8bd49b6090aL, + 0xc0ec0975884bb030L,0xc42d3cc093242000L,0x00000000000000e0L } }, + /* 39 << 0 */ + { { 0x0b704119ee33b77cL,0x5b4fa1d48083af67L,0xac0bf434e5fa3179L, + 0x256b0cc58626a1e2L,0x38d9fd62359c6ea7L,0x9e9661a49b5b9072L, + 0x5264126356a49902L,0xa0b8f411fbad6075L,0x0000000000000124L }, + { 0xd8dcdc61228b61a6L,0xf212e74b698e40abL,0xa3caf2415944e762L, + 0x18dc59feb96825aaL,0xdc0b1240c690db48L,0x68937baa8796154cL, + 0x602a9a406bbd399cL,0x29616edc7335dce3L,0x0000000000000010L } }, + /* 40 << 0 */ + { { 0xaf456a57824c9d93L,0x067fffb5ac53dbcfL,0xb513eb4d0d01e18fL, + 0xf85d832dc9767b2bL,0xc4ad6090a169b596L,0x9f67883679409022L, + 0x3a0c67f41886ad96L,0xfdef97e46cd54455L,0x000000000000002eL }, + { 0xb591f36738ae8e5cL,0x2c101fec75d81e09L,0x4f4ef79e44aa97b2L, + 0x46a5357b21dc2240L,0x5bb3141d0df516feL,0x23ffc6106dff96f7L, + 0xc4c2a4687e65a6a8L,0x8e2e3fdd283592d6L,0x00000000000000d8L } }, + /* 41 << 0 */ + { { 0x8e00ce952624381eL,0xddfda1a522cc2af6L,0x4c08c3ddc8297bfaL, + 0x16b931eaf495ccceL,0x85b85f23864a60c8L,0x52a523e4b28998a6L, + 0x63ebfd9d28830825L,0xa140ed79e85e24a7L,0x0000000000000175L }, + { 0xebeb760ec1028ecfL,0xf75dd758e7f3a3ecL,0x052a6e551fa28ebdL, + 0xb39e0e11ecf327daL,0x23de821b22c82111L,0xab59e580e9ee5632L, + 0x36f21343ca399be7L,0x9696d71855e2d4edL,0x00000000000001b0L } }, + /* 42 << 0 */ + { { 0xb7902612a9491855L,0x420bebfa3767dcd4L,0xea20d4e83c72305eL, + 0x2868fa8595d78243L,0xf5633364979e0bdaL,0x874f472a2a5de6f2L, + 0x7a0a1c0aa146f918L,0x960dcd7bd7263ed3L,0x0000000000000161L }, + { 0x38a50e352ad521efL,0x414fdd6ad7d07223L,0x324cba6a2c4cce08L, + 0xf728d67c4e39de86L,0x2da602f01cc43b7eL,0xa00e6a6b58d1eec4L, + 0x5c6ae0d1031bc121L,0xc1cfdd433815668aL,0x000000000000016aL } }, + /* 43 << 0 */ + { { 0x713e3083224f497eL,0x51f0b62fdee07d20L,0xa3a74e7a9b3d85caL, + 0xfc66ad7aed7d37ddL,0x8ef0f94438fea396L,0x70678aa2ec1419afL, + 0xd55022d90544d8a0L,0x0148a165ec58e4feL,0x000000000000018cL }, + { 0x895829067683adbfL,0x94edb92f76b688d2L,0x932d602b547ce17aL, + 0x67fd6098879b1cf1L,0x7a3037819f1a0becL,0x2677e91db1d144cdL, + 0xebf7a83c7aa3bf24L,0xbbaa1f099e78869aL,0x000000000000010aL } }, + /* 44 << 0 */ + { { 0x78f253cdcde738d2L,0xb25aa02cb1d33a53L,0xe77fffe912d2b367L, + 0x761e3e73e6509ec7L,0x7c3cfcbef88c88cfL,0xe1cecbe9fd57f650L, + 0x5f0c48a04cca16cfL,0xdb51df11f0b29b48L,0x000000000000013fL }, + { 0x42343b5cbafa661aL,0xa123424f99a20b52L,0x57733ea23d7284a8L, + 0xa4460186aa2271d3L,0x9ebedc0452b7c2f2L,0x7aac908b39acf39cL, + 0x4e4f65b3d0338f66L,0x4c8c184a7228a703L,0x00000000000001d1L } }, + /* 45 << 0 */ + { { 0xa32dec600fc95c1aL,0xc2954607b66b70c4L,0xdc8ea1ace5703f0fL, + 0x79189b7df8a1f2d9L,0x7b2df5e3f1972867L,0x7a58f7f28c147cc0L, + 0x79596b8fff2bc020L,0x69d186aca09e53b8L,0x0000000000000049L }, + { 0x9498a7cd000903a9L,0x3463f5d90b37564fL,0xe6ed9e879be328e1L, + 0xb8d5b2802eadae85L,0x35584a1bbcdb40a9L,0x15862a4232708841L, + 0x4d6ac21f3149fc7bL,0x1ec9b50cd8d3847dL,0x00000000000001e8L } }, + /* 46 << 0 */ + { { 0x64f2ae8093e738a5L,0xf672b6d2bd57d22fL,0x07c09231d2ec1549L, + 0xa6ddb5626540f81dL,0x09f753aaf1c4d248L,0x1a9cfb9a69a8fc57L, + 0x1998e2d14f33a8c5L,0xdb162d8284a91002L,0x00000000000000bbL }, + { 0x42c142895961259aL,0xb7ec80b09fa59f0eL,0x7d3af42dc8cd7e17L, + 0x2a1831112f54132dL,0x2ece424b786f6c37L,0xb81c9fbd9674149dL, + 0x1fe9c7cabc10b00aL,0x86f0d5921216efe0L,0x000000000000013bL } }, + /* 47 << 0 */ + { { 0x2fda63c9abd59d11L,0x0a56a130d1ecbb67L,0x9cee75a3a9c4dd66L, + 0x381864a0b9e922acL,0x97233106c71dd0b2L,0xe4e4fe2714033ff8L, + 0x9cf5083ecc35882cL,0xe31f8907048afd5fL,0x00000000000000afL }, + { 0xd3065d2f1d90e1d6L,0x11ca41599f10673bL,0xebb760f478d825e7L, + 0x9276080b904982a6L,0xecea7f3276f06497L,0x605f079349209f88L, + 0xb4290cbbb54ff69aL,0xf95dc8657275fe9dL,0x0000000000000012L } }, + /* 48 << 0 */ + { { 0x87027d612ad879adL,0x641752bd214aea2cL,0xdedc21b10ff08ee6L, + 0x2da0980f3f6fbfacL,0x4b3cc8a5803e72f4L,0xae7d6d6672102c33L, + 0x8158e5caec62c8ccL,0x35de3b21557462acL,0x00000000000001fbL }, + { 0xb1958193d256d46bL,0x596f24804841cae7L,0xb7c142e0fb0e3c28L, + 0xe6214d64a1d97e37L,0x871294f66cc2e1baL,0x76f27fc94d09e2ccL, + 0xa855b9ec4bb3f5c1L,0x34a5ac1587992dedL,0x000000000000018dL } }, + /* 49 << 0 */ + { { 0x0c8cb45049efc0adL,0x7258dab1ac4c04caL,0x91d8c84e2e345fa8L, + 0x6bc2a2df52f62842L,0xf581b8b111dea9ebL,0x9d45c347ae499839L, + 0xcba40a63bbaba0d3L,0xcb98fa3c0b8c1d57L,0x00000000000000a5L }, + { 0x0b9bf46a2c8884b4L,0x4b963fbfb0b88b1dL,0x0c10f2ceebb72bb4L, + 0x5dd9775543d575c2L,0xb072c39d9da8bf83L,0x749ee467877e5b7eL, + 0x1409b01bf72e151bL,0xdc95654090d77b97L,0x0000000000000015L } }, + /* 50 << 0 */ + { { 0x5386773de84d4766L,0x56b6ddab593aebf3L,0x250a859beaced3f3L, + 0x84804ab26d84fe02L,0xd8384cf1be365bc6L,0x5312b292e34a0cfbL, + 0x0baf0dbb1aa3f9caL,0x76204957627b7a12L,0x0000000000000171L }, + { 0x3f16ac6addc84f8fL,0x36298e21939f704fL,0xad93d2e5b9d0e753L, + 0xaf8c1321bb6c8d5fL,0x5def1db4859b504cL,0xd3032d3a9450b50cL, + 0x83e07ea5f688b03dL,0x713d5c2ffb0e1119L,0x00000000000001acL } }, + /* 51 << 0 */ + { { 0xa8d916fffbcc9504L,0xfba689ef1d7be2ddL,0x1321ae1b1054cbc0L, + 0xc2edfe6af07390dfL,0x3ba316cbf9beea26L,0xfdf9f5bde34fe9baL, + 0x025d93b68145f3ccL,0x395ba51e278415a2L,0x0000000000000168L }, + { 0x8eb9c45edd5c087bL,0x0600aabed3a89e18L,0x849c36096e3aeba5L, + 0x97bc2b68badb600cL,0x24e5b174adeb9b3cL,0x52dd878f21c480ceL, + 0x39d9531942d3f579L,0xcc10f3ca041a2456L,0x0000000000000071L } }, + /* 52 << 0 */ + { { 0x86177e368b29f6f3L,0x955114f2afcab7a1L,0xf00784311b6e5993L, + 0xb9259354dcee33a4L,0x58aef6be769f0693L,0xa4dae03e0c1b4b4aL, + 0xc39a55b518749158L,0x32e875ce65d1e6d4L,0x000000000000001fL }, + { 0x9301992d481b07f8L,0xbd6b45f989f292daL,0x112577769cd77ef2L, + 0x1e73a061b8098a91L,0xa0c126d318c39791L,0x2cbf93bbae0f4ad4L, + 0x895ea8b22ee1a6fcL,0x3c8e76fb7849180bL,0x000000000000005aL } }, + /* 53 << 0 */ + { { 0x6285684cccb69906L,0x08153da9c9880816L,0xd3af581fba4fe12bL, + 0x4e210e637209a78eL,0x27e82f6f948100c4L,0xf8688be916895fb7L, + 0x7b3d0ffd310306dfL,0xf6e249195ee693f7L,0x0000000000000143L }, + { 0xfd6ddaddd40c7861L,0x040a3dfbc4abee6aL,0x0f6a7a9de3b4cf8cL, + 0x4fdf64f503cf3bb3L,0x35437e8053d10cb1L,0x7dc73fdde42c2169L, + 0xc5611a0257510987L,0x3e8fcc9618eb2a74L,0x0000000000000105L } }, + /* 54 << 0 */ + { { 0xf5ae2048a0ce100cL,0xf707cab63228fdfcL,0xdac4214e4dbf052bL, + 0x393cf55982451c97L,0xc6d866fdc6c98505L,0xcb09728861ea138fL, + 0x414997e998e0e3e1L,0x8e65f2a40e6899c6L,0x0000000000000193L }, + { 0xfa47b98bb8d1b411L,0x0f607d677492659bL,0xc972b99ee0d7fcbcL, + 0xc6069fa8017db8beL,0x222e0ed99fea4bd0L,0x943485186e2074e8L, + 0x9c2868a692e89dbfL,0xd556dd0941ce794eL,0x0000000000000163L } }, + /* 55 << 0 */ + { { 0x84ad7ccebd470f5eL,0x7fe9ab4bda96ac4eL,0xbe17486b1e0b549aL, + 0x8cca93917cad27e6L,0x6472d4028e8da1a1L,0x7cb03e9fd9d0a79dL, + 0xb551e4155c6daca9L,0x52fc4b6d310ce7c4L,0x000000000000004bL }, + { 0x7a196cd230a36ef2L,0xfa03a23006a096eaL,0xd69609e345b53586L, + 0x10aa85895c5a084cL,0x00fb114a7dbae155L,0x619f44311a16a0b6L, + 0x385ea7907a1a7b2eL,0x85e54fe81461ae21L,0x0000000000000035L } }, + /* 56 << 0 */ + { { 0xb3f739d5ae2df998L,0x1df04ea2e96c1840L,0x5730e56267acca24L, + 0x01378e253fef51d2L,0x9cc29b3435be97f6L,0x638c12398e4b8b01L, + 0x7e9056e95b0cbef7L,0xd775038527f32bafL,0x000000000000003cL }, + { 0xc10d9706f4083cbaL,0xeba60fb5d8b4723dL,0xc431863c1a8cbf15L, + 0xb52a737cbaa0ffa2L,0x7c1e5b54987f4a28L,0x2b0430e114f67a39L, + 0x70e1a3561fc3e123L,0x9bf7cf42095c5b17L,0x0000000000000078L } }, + /* 57 << 0 */ + { { 0x3475330a4e9a13e2L,0x8372c8fff95c8450L,0xf66fd5bc64b8a520L, + 0x4af300bbde9118b2L,0xef3c6d77a521b9f8L,0x5a8defe72e6dbf85L, + 0x4463da75efb25ffaL,0x53c3e7fcf3c902e0L,0x00000000000000dcL }, + { 0x33ad7f7fd9c4248fL,0xa9493190c62a5532L,0x4a3f82056a929f73L, + 0x3482530d0d3bd86aL,0xb62e7eb390dff3eaL,0xb13d0dd2caf9d989L, + 0x68073ee6c4d4f8f7L,0xe88fccd4fdbd992bL,0x0000000000000074L } }, + /* 58 << 0 */ + { { 0x0f583210d81ab70dL,0x5479d3ccb04647adL,0x342a03e174a3106aL, + 0xb96a728ad56c650dL,0x9871b54c6764d3fdL,0xe61385c801f5277cL, + 0xd53b6ad821b5026bL,0xc46df1c09851ab92L,0x000000000000008bL }, + { 0xfa08b29292c1205fL,0xcdec69190bf94d71L,0x3b8ac3a45ca1b766L, + 0xee784bec8fca81a7L,0x52b53e4fe686134fL,0x765073f1f9a731aaL, + 0x7a4c58d40bfb0e0bL,0x48ff92784e9946a2L,0x00000000000001d9L } }, + /* 59 << 0 */ + { { 0x63be8a26eb16686bL,0xcba8524ec3472088L,0x1a90342d64373a8aL, + 0x055693c9b1344c69L,0xaae5c2934c222920L,0xc7223c5d98901999L, + 0x063c60342df29eadL,0xaeb454ad31876773L,0x0000000000000035L }, + { 0xcf4aa03c5381fa2eL,0xfd82c38cfaffe51aL,0xc9fd5fddd64ffec3L, + 0xb8cf8c44bc83d0b8L,0xe22f2ef3cb6efd45L,0x15a48db3660903f4L, + 0xdb0b0ca0aff1ba7fL,0x7ecbec147e7e43b4L,0x0000000000000018L } }, + /* 60 << 0 */ + { { 0x9fd17ffbfca45bdcL,0x051c3f6061c25274L,0x7fb6bf92d04b42fcL, + 0x92681661adc786c5L,0xf79836e37bbe1183L,0x36f33caef52de9fdL, + 0xccb82b562ed32ff2L,0x8a508e954435eff6L,0x0000000000000003L }, + { 0xf8464ad3910e9522L,0xd748f73799f3e5d4L,0x4b1ccdfa683285b4L, + 0x5edb1e7a605abfd0L,0x997df1a529560a5aL,0x9b0ecf8e37baa0e9L, + 0xdb7a7399b931e716L,0xbd87d8b1e7bd4d2aL,0x000000000000008dL } }, + /* 61 << 0 */ + { { 0xcb53d8cfcfc376a1L,0x952bad1671543c41L,0xa5e7fce59a7b32d7L, + 0x7193c11a77dc0b39L,0x4cd724b1be8b1717L,0x8b6cd17d5b660d1aL, + 0x92ca7c732a1c7d8fL,0x544c4a011407425cL,0x0000000000000115L }, + { 0xb8eff8cd4a17604bL,0x56f22ee9c3fe5e24L,0xa834ff603afa032bL, + 0x03f78d54b7f553a8L,0xec097a3aed58c6a0L,0x79af485fef422cbfL, + 0x07996d2a399c872cL,0x3df9c6c0ac6485b3L,0x0000000000000115L } }, + /* 62 << 0 */ + { { 0x9627fe31114dc85eL,0xf0a8c971ef2a1696L,0x213f3fe32bfd4e19L, + 0xa38bf246fec516fdL,0x943ae59581dd5885L,0x378b0b7bc177aeafL, + 0x20f6ed49f27734e2L,0x05dada534c5c8ae0L,0x00000000000001f6L }, + { 0x19f1002224aa57b5L,0xdda2d7e072a578a3L,0xcb156b6e7671eb7aL, + 0x6284a831f6b296eaL,0x50e685ccd164a220L,0xf2b0afd6935f4aaaL, + 0xa2252c7a4be38a37L,0xf96d82698b78d305L,0x000000000000014fL } }, + /* 63 << 0 */ + { { 0x0f5beb6fce8888e5L,0x75af7d025770ac8dL,0xc63996847586265eL, + 0x3ae99661308e125eL,0xe7f54fe4d51b0845L,0x1ca21b5affa0ddc1L, + 0xe82f799acb0a6e8eL,0x32753b64640c457fL,0x00000000000001c1L }, + { 0xe9d15ad2a03dba15L,0xaf657b5bf664a2acL,0x13f78f82f0071283L, + 0x05d3dbdcec1896bcL,0x595e8c353aa22380L,0x1a9e093286414006L, + 0x6d81ac89b205d796L,0x47e184197a053817L,0x0000000000000153L } }, + /* 64 << 0 */ + { { 0xb53dc38ce320f742L,0xf5aab8029a04fd41L,0xa7c02c1603e7001dL, + 0x1bebd3c5efda3773L,0xb614b9ccf8872d27L,0x7ebdbba81860bbacL, + 0x4f3d7c0392fec2a6L,0x9aa17dce1112ecd1L,0x00000000000001abL }, + { 0x5af17058a5f8df5bL,0xc1c1a5dba0653dd4L,0xf4a7b9e3af958043L, + 0xac165ba1508d4b10L,0x0c63ba369e87ab8bL,0x9bb1cc8b2fb1dc1dL, + 0xaa10217c57b9586dL,0x8d70eb848e920573L,0x0000000000000074L } }, + /* 0 << 7 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 7 */ + { { 0x76b026e5736fa9e0L,0x2e27e720e23413b5L,0x51df70816a5f814fL, + 0x4e49496876271d7dL,0xd3124af75ce07d97L,0x3c9698bbc2d6dd2cL, + 0x631c070153023fa8L,0xd41259f27e381f83L,0x000000000000017cL }, + { 0xbc309412c9dfb131L,0x0e3434093472a8e4L,0x4e69c32464ace613L, + 0xa49ff4b0dc99ac8cL,0x1b4fabc3b1af9ca9L,0xdce01ed3e7df7966L, + 0x5330ffdefbc7a682L,0x6e5df506a8283dceL,0x000000000000006fL } }, + /* 2 << 7 */ + { { 0xb38e8b9069be00acL,0xe46b33cb95e50b81L,0x9c9152c41f5d4723L, + 0x7b8f4f6e8c2a465aL,0xb85fa9e8f269cb33L,0x03c7389ecb5b47a9L, + 0x458afc780f312e99L,0xf40a0f02b5ac20c9L,0x00000000000001d5L }, + { 0x8fac1245486bde7fL,0x1430cd504d4a7784L,0x58d2b99dbd29a67bL, + 0x965cf65738e3bcebL,0x062dc3aa57354f17L,0x40ed2a81e5b45635L, + 0x981bba93103b69c5L,0x3503aa73413c42fcL,0x000000000000008eL } }, + /* 3 << 7 */ + { { 0x56870723706a1717L,0xdee44d64cbecf7ecL,0x2120fc43ae06d588L, + 0x78086900695451dfL,0x596711a00b6e13b5L,0xb8e2b20d76c11762L, + 0xc1f30fd6f71431ccL,0xc8cfecf557133623L,0x00000000000001fdL }, + { 0xb73cd41565e5811aL,0x688b3301550441c7L,0x9cb5a657d50fd76fL, + 0x724f5ed8b588f34aL,0x48eeb907f67f5927L,0xf71b353897aa02c5L, + 0x0239e51026db84caL,0x61ab0a5710fce663L,0x000000000000014dL } }, + /* 4 << 7 */ + { { 0xa48e3c3a9bd47f93L,0x4e1485e3636c0850L,0xd8264f7ff0ea4f3cL, + 0x0b9047fc044463c8L,0xb03311c98360fdf8L,0x007c068dd0db7f48L, + 0x5e82c7e7d429af87L,0x5a81286a6d64d318L,0x000000000000000eL }, + { 0xe1d75c7eef97408eL,0x7c54b93f58a5e905L,0x0d8eee9a08c607d5L, + 0x2aeac3b0e2dec0b5L,0x96db5ced9fc99e2cL,0x27764de20a51c0acL, + 0x7999170f1b15ad22L,0xdcfc7192ced85f26L,0x0000000000000137L } }, + /* 5 << 7 */ + { { 0x136100c21f0ab49bL,0xf2763c0d8fd640c8L,0x32576a60a10ab0fbL, + 0x45efed229b4b2fc8L,0xd50aafff5ed021c9L,0xb8246dec184ac0c1L, + 0xe2519fc446002b5cL,0x440b77c67d492f06L,0x00000000000000a7L }, + { 0x24fdbf51777310deL,0x1b57c3c267a603d1L,0x4daef7f2d4f9e51fL, + 0x2326062ffc847f9dL,0xbf6644f62f1175f9L,0x170083e32d9bbb9dL, + 0x7f64b63896089981L,0xb949d76a9accba57L,0x0000000000000165L } }, + /* 6 << 7 */ + { { 0x49eb72c3f62a4087L,0xec8734325b00b2bfL,0xbb040159643c12a4L, + 0x2ef972f4eac95679L,0xc7e358bd2bbea697L,0x924a6105db23f1b4L, + 0xd587c6bebdd61fe2L,0xf348d2f7aafc22ceL,0x0000000000000058L }, + { 0x7bf1deec930f49aaL,0xfd3224c43cec939aL,0xac7eced36d94e0bfL, + 0x5ef5a81cb2c7b10fL,0xa6e9c773f85c84d4L,0xe139b7ab2f547b59L, + 0x436bffd61fd522dbL,0x32ee281e0b189b69L,0x0000000000000073L } }, + /* 7 << 7 */ + { { 0xb97c4ac3ef64ffeeL,0xfafc66c9e0513ff3L,0xcb15b02551d57f8bL, + 0x5a94fca3a0f73040L,0x1df583781ecdef64L,0xdc88683a2e876043L, + 0x897308dfa547bc00L,0xf9d9cd0346cddd21L,0x00000000000001c8L }, + { 0xa05d56b56854aee5L,0x948513d41337a153L,0x96806e7b1461e392L, + 0x74d411bcd4f89728L,0xe39ed8aa54173ab8L,0x82acdb9ae17fabd4L, + 0xb0b61eeaa5fd0c85L,0x4ab388ee0a933577L,0x00000000000000fcL } }, + /* 8 << 7 */ + { { 0x8a4d6b672986daf8L,0xff67c35d3f0247b1L,0x3e45f2e50f48c2d6L, + 0xba941fa43663525bL,0xfe5a47af97fbdb12L,0x617a16b820ae9103L, + 0xfd6eee9fc57a09d0L,0x4bfffdbead5fad3bL,0x000000000000018cL }, + { 0x7c2cd6fce8839b07L,0xee10cab923601344L,0xceed18245f0b8f0bL, + 0x6ea30adb68064245L,0x66306dadbf29705fL,0x4f9d0131be7db901L, + 0x1d972c0a531d35ddL,0x0ff3da3c08edcc09L,0x00000000000000c9L } }, + /* 9 << 7 */ + { { 0x69344fe39d6186adL,0xe85c33e690fc2732L,0xde9bf41a7d60ee51L, + 0x9d90a9105a70cda6L,0xff3b1a64879cdbf2L,0xcd89aba7dd686017L, + 0x9a891f87db1f6b9cL,0x6fa1abfc9b556eb2L,0x00000000000001f4L }, + { 0x9aeb96367a4d8d49L,0x7f968cba1b2933faL,0x0e04c95323abd39dL, + 0x4a4b0b0549509543L,0x95fb7e645e7d71ecL,0x9568b11718f7086eL, + 0xeb8a07ef0390b0c3L,0x7b8624d13cccb970L,0x000000000000014cL } }, + /* 10 << 7 */ + { { 0xb491062b1c6cac89L,0xf20c58359adecc2eL,0x488d7b916f1d7401L, + 0xff98b90e94761e60L,0xe63993ca857557a9L,0xc4ff9951acdeadfcL, + 0xed5a53c7fd8dccadL,0x42e02c14f101c24aL,0x000000000000012bL }, + { 0xbb9fe6f6bd3fe0efL,0x72f26ffcc1393d0bL,0x2e831b196c25ed19L, + 0xdb50a0d7ebcc6b99L,0x202ac3c8245c479cL,0x97b284d7436117a0L, + 0x13bd5e82fd694e11L,0x452b5a94022193a6L,0x00000000000001c8L } }, + /* 11 << 7 */ + { { 0x3d8c19764ac0927eL,0xcbc9ba6613389954L,0x727a3bb8defdd149L, + 0x4b2d64d24a3e6bb4L,0x4f5d185658ac36e0L,0x6d5e79b9f5acaac1L, + 0x426e868b4b602032L,0x1efd37220c41942eL,0x000000000000004dL }, + { 0xdb8ef6c4ebb168f0L,0x0b4d7fd2de0e0478L,0x9c77f534097fdb12L, + 0xadaaa54bde3ea9bdL,0x3bdbe93f45f9f191L,0x45cf748118bd8d2eL, + 0x422ed3d85e63012bL,0xd8c1b94ed20c2076L,0x00000000000000cbL } }, + /* 12 << 7 */ + { { 0x65c7757b84367fecL,0x7ff69a4b6aa22fb7L,0x197f4c3659b0a103L, + 0x897241b712350397L,0xefb60dd033bf2644L,0x3ed30a65944af798L, + 0xd67a5cbd5bd2d770L,0xe9d97ea6f3caa508L,0x000000000000006eL }, + { 0x94633f8662fc83a9L,0x6d68a34729a5137eL,0x88f2b4777bf9924dL, + 0x7bd70fcc00b533fbL,0x4dd3b695b7b8167fL,0x85dfca1cd0162981L, + 0x2f96a81f43e5022cL,0x2596b2ef14872ddfL,0x0000000000000131L } }, + /* 13 << 7 */ + { { 0xf21f278ab1cca421L,0x69452c4fc9c944e1L,0xab6f0267b5e1995bL, + 0x6871f6bf8e371695L,0xa175e5747c561f3eL,0xa0a9a596fc97048eL, + 0xdeb9a72472d14a89L,0x6068a848beb4e020L,0x00000000000001acL }, + { 0xd187185e41589675L,0x9ab0c85fe7f8b9ceL,0xd866c2c25a6ef98bL, + 0x4fe3a42c88dd5569L,0x28e16344591c5ce6L,0x2ab62c9dac764ab7L, + 0x924cfb8417e00b2aL,0xb039e64f1804d328L,0x0000000000000039L } }, + /* 14 << 7 */ + { { 0xd59595d1d131f694L,0xce8a83752a7fdc21L,0x9e62e8b175869627L, + 0x336d78456e01b014L,0xb53acb366ea5a8edL,0x4451f2e57dd8ac89L, + 0x4c8cf19c2bc29533L,0xfbc29ef47c10e63fL,0x000000000000017cL }, + { 0x9bc2ec347a9357faL,0xde489a133b8a04afL,0xb3521cda230520b7L, + 0xd7699690af6deb0dL,0xace1af3a14655a0fL,0xf628697fcd89b94bL, + 0x40a71ea33c079ab6L,0xe7b89297fd251809L,0x00000000000001dbL } }, + /* 15 << 7 */ + { { 0x49041d4784ed9ff2L,0x199606dfb57c5ed5L,0x3b8805182e409600L, + 0x854b5c40f0548f8aL,0xc64b1a67810d2870L,0x7232957f7095700eL, + 0x84f1a370b4758cf0L,0x5c267b9e70f58198L,0x0000000000000132L }, + { 0x0339062438472718L,0x07734d06b8b84729L,0x1b78f06c26126b2bL, + 0xd92e040a77c87891L,0x2c4ba3849588cf7cL,0x09b19b0e230e1a23L, + 0x634174526980a2b2L,0xd34ae179bbe3f6e1L,0x0000000000000023L } }, + /* 16 << 7 */ + { { 0xd00804524043c91dL,0xe3362d2a01d9a152L,0x3d143282404673daL, + 0x1d1c9b70a881d770L,0x4254627ed4a785a8L,0xf801104f2d8b8d09L, + 0x306f6f22c6ceca92L,0xf0529d16130418eaL,0x000000000000011bL }, + { 0xfa1e73fd6d8f9fa8L,0xb402cacceaa0860bL,0xcb08d922ae11ae49L, + 0x79908f8f76387f9bL,0x126b0ebab8b14819L,0x156a5ea63bb333a0L, + 0x0ba63111fe7f302fL,0x225ee26a4cd4f889L,0x00000000000001d0L } }, + /* 17 << 7 */ + { { 0xfcc2afcec54b8cc3L,0x2c4032b2bc4ec6f5L,0x9e6c26d66c62aaf9L, + 0x7b93cc5ae1eb8fc8L,0xce80f55d20a95dbdL,0x841e250cb822f54bL, + 0x9b84e5a91965018dL,0x36e53589b969312cL,0x0000000000000002L }, + { 0x3c511a989eaeb028L,0x2efdcdb20cd5d5c6L,0x3226c4a858b01f2eL, + 0xdc5dfb8949ca64c9L,0x9afbb61ab0678018L,0x4114e7856d147d7eL, + 0xef683c889ff02c8dL,0xe143f8689b0655ecL,0x00000000000001aeL } }, + /* 18 << 7 */ + { { 0x4300169a60b57216L,0x0143c544441bc961L,0xa682548d7fef586cL, + 0xf064f99ee993fe8dL,0x169a43dbdeba875bL,0x935ce8bf6d3cd064L, + 0xe986a7cb7c9700ddL,0x1447fbea51c0f10fL,0x0000000000000117L }, + { 0xf10cf577e3adf2d8L,0xfd750660bf433d46L,0x4944b8842395ce48L, + 0x63c24a4dc1725875L,0xb12376d0f3392f50L,0x9fb12a1a97588187L, + 0x3c03cc124bb92450L,0x26f27fe0b4e9c733L,0x00000000000000c8L } }, + /* 19 << 7 */ + { { 0x67ab4438cc4b3b6cL,0x9d47969be199d9c5L,0x95211e215e288cf6L, + 0xec223f59eb99a911L,0xd4408dad7fbc455bL,0xcf5037c6e8bd254dL, + 0x8d0f9f2ca920b371L,0xafbc9c94f0d5952dL,0x0000000000000100L }, + { 0xdd090be1bbfd4541L,0x91fb0b2960a60ef2L,0xa631624d7477e013L, + 0x121ab825fa1b4a90L,0x13446ddf0a2bfab6L,0xdf92e361a05dfd80L, + 0x5a41609e15079b20L,0x815020c4d656089fL,0x000000000000016aL } }, + /* 20 << 7 */ + { { 0xdcf42b383b9e35b6L,0x5b92cc18d2a99f54L,0x55d1c68c30e7c057L, + 0x3a66cba43a7bd12bL,0xbd21276482b879e6L,0xd0154d7d0bca108eL, + 0x1e0c926fb273e3a1L,0xf063b1fd5e005f6eL,0x0000000000000169L }, + { 0x7eea86aa702dd8e1L,0x4ed21f67fb054211L,0xfc634d80c3b592f6L, + 0x36066f725afadb07L,0x11244608875383f9L,0x57a100127c01dd73L, + 0xfa53012f4eea7d92L,0x32fc27c706e46a11L,0x0000000000000151L } }, + /* 21 << 7 */ + { { 0x0560b7a81cdbe62bL,0x48216ec02b69b169L,0x8473c01ca53cedc8L, + 0x4b28a72f421b41f1L,0xca176bcdb2a9e3a2L,0xa53467c885e7ef54L, + 0x5acde94e6e7b42b1L,0xca852c3fe58d357dL,0x0000000000000184L }, + { 0x361ac3bf65696a12L,0xa17302498773f839L,0xb67cda7b8af8aa33L, + 0x495fa40ff68d9e49L,0x1db30912ed1f64d0L,0xd69756d5040cd7f7L, + 0xf00a572051714973L,0xb1431d1bbd1c1d1fL,0x00000000000000e3L } }, + /* 22 << 7 */ + { { 0xfe87dde0d9e02e98L,0x416547e5c932a7d3L,0xbac5a7827d904335L, + 0x7077ab520fa3993dL,0xbe4eacb39ea4d3e5L,0xcddb4006f56df114L, + 0x0c4fcca5a34c7b12L,0xa1450a3a05dc8144L,0x0000000000000178L }, + { 0xb5d6b79945594505L,0xc1e8f726dd0ac080L,0x6269b2a1e2d97d82L, + 0xe9c9e6e320004746L,0xab497cfaa7f067f5L,0x55fdf3a16dfbd572L, + 0xfb825d8470231374L,0xbe464555e606857aL,0x00000000000000daL } }, + /* 23 << 7 */ + { { 0x4aa75dbf3fe45c64L,0x984319bfac44599fL,0x6a9f5fe68d365824L, + 0xb188e2357af31facL,0x238f73c3d9a23fafL,0x55ae76a5b2011c25L, + 0x3fc45a1279e98b35L,0x4cebe6d6b9178aacL,0x0000000000000002L }, + { 0x2ad7f331d91ba2ffL,0x337d19a14528123fL,0x6966284d9172998eL, + 0x3dbe46a5a78104faL,0xd64c5cc3126b5bc5L,0x7c3b65aa76f1dc3bL, + 0x51807e032b762128L,0x5a96328ec6a60ccbL,0x00000000000001e0L } }, + /* 24 << 7 */ + { { 0xa0242d2359b58cfeL,0xdd86e8cadf33a5caL,0xf5108bc2b08cbf3aL, + 0x4eb5d82bda1733ecL,0x01eb740244dfac1dL,0x66f1ca152a30cd80L, + 0xde973c30bfd63e5dL,0xeda12faf640918e8L,0x0000000000000148L }, + { 0x250a8d93bd70515dL,0xb2d0389b15f9705bL,0x4845788f4ee84ceeL, + 0x88926b3283f6fe3eL,0xd487de769e834d93L,0x3e4a88e11b7dfaa1L, + 0x10b6759de684beb9L,0xefc2282ffc887b1aL,0x0000000000000021L } }, + /* 25 << 7 */ + { { 0x5abbd8f370fe7fa0L,0x3f9bb48bf231ffd3L,0x935b99d8645a2ba8L, + 0x6516eb26a79dc498L,0xb3f04c2b9205d3a1L,0x947f338edbe99243L, + 0x7fb24af62be752afL,0xfe1b2b011053a4b9L,0x00000000000000d6L }, + { 0x3bdc2af92a4ff7ccL,0x17cf61867902709aL,0x37394caeb4a54187L, + 0xdd2fa8aaa97f6c1aL,0x61478eb937775709L,0x9191485999524713L, + 0xa52dace45c79ed9fL,0xcf33ce2c602de95aL,0x00000000000001a4L } }, + /* 26 << 7 */ + { { 0x34cdf57179fba304L,0x2da4c0b855cba8eaL,0xfa08edb0e3ab7e7cL, + 0x89bb570a1176d554L,0x13f14aecf620989dL,0x070496cf73ab04d1L, + 0x0fd6e1e4e776b1a0L,0x6be78274e938cbf6L,0x00000000000000d9L }, + { 0x8c93bc922cea82b4L,0x841a245a8f61cc83L,0xf6d7371707b43460L, + 0x17789f54496e9107L,0xd79a87dabf60ab90L,0xccb42770f04f3232L, + 0xda9dc1deaaeb6908L,0x0389467e83b7bd43L,0x0000000000000081L } }, + /* 27 << 7 */ + { { 0x8a119d3b89293e7fL,0x8829872e7fd1cb46L,0x511969b796e8a867L, + 0x2ae47fde2fdcd328L,0x657ba478462c5116L,0xed1e2c23686b8ab1L, + 0x9e111f8d653b32c7L,0x507b05a0cf55d5efL,0x000000000000005eL }, + { 0xe452817b4beeff01L,0x20683c1527b777f6L,0xe075294f9877e28dL, + 0x56526da46e018831L,0xc0f16bef1997f554L,0x167a23dd2b350ba2L, + 0xb656c0d37a19f7c2L,0x0f9c5b5b5adc0909L,0x0000000000000110L } }, + /* 28 << 7 */ + { { 0x536183888cc1b1d8L,0xc4caa4f436009a81L,0xbe295ba7db665aa6L, + 0x4ca4e46fbc0a3df2L,0xfa5d29b60beea0f1L,0xeb13931e86c30cbbL, + 0xc3fbe7b253ec9ee4L,0x95a931572b98e62dL,0x0000000000000003L }, + { 0xf8ef16326477ddd7L,0x87ad8c3b79fe61f1L,0x5d00661fb635ececL, + 0x181527820f321c67L,0x4819373f635bba1eL,0xde36d5089a09eb68L, + 0x48d4a31bb4c9aa92L,0x28c3f527bb549db3L,0x0000000000000182L } }, + /* 29 << 7 */ + { { 0x38c9f940c68fad90L,0x77d0f2254b76e5d4L,0x7df41a86096626aaL, + 0x377aea6d8c22d84eL,0x8228cfbb882544a5L,0x89f9f9b3b1669379L, + 0x7a0ef6c3e8ac7a4eL,0xccef3bafc96ea0a2L,0x0000000000000172L }, + { 0x70f4de9feb10583fL,0x76f7b047c27ee4adL,0x544bdfb63fcb3ebcL, + 0x632d453921a2c4e7L,0x4f70b8c6ab207ae0L,0x2daa1737ce209d65L, + 0x9f094efb4921286aL,0x725653ef5465a990L,0x00000000000000a3L } }, + /* 30 << 7 */ + { { 0x16a72eb79e6dbb64L,0x0e4a2f79ddd1a126L,0xa231a534482b6a66L, + 0xd1506e255f4424b5L,0x72869485b848cbb7L,0x8110e25977260252L, + 0x4f3776fa6132f329L,0x71ce95f6f911712cL,0x00000000000000c9L }, + { 0x85e3de361c4417d8L,0x26e85989703b3b41L,0xcb1f8ab41d0da946L, + 0xe042ea5196c2e272L,0xe2255e1f662c470cL,0xa1bd3d5672c1f8b9L, + 0x38698fd77fd14593L,0x4b9eda73f820ee6dL,0x00000000000000edL } }, + /* 31 << 7 */ + { { 0x6bdf5a24c96f6fd8L,0xde9420b80ec69af9L,0xef89755b25a46a01L, + 0xf7447d81e8871eb1L,0x1477c32e4ed21e9dL,0xdf036e42996bd5b5L, + 0x78099d1b9918c856L,0xfd2e3b70f8dfe807L,0x000000000000011eL }, + { 0x19437721fb1df8efL,0xdae12e1cfdd0d41dL,0x3fdde152bbb0b79fL, + 0x8f8c75324bd97ea2L,0xcdbac848d872e34fL,0x2b507f37168ac6d3L, + 0x3128cebe46a04043L,0xb4196978f5a821c8L,0x0000000000000092L } }, + /* 32 << 7 */ + { { 0xe070a145e4ffac9bL,0xf8295455aa280fffL,0xa2d6cf897010805fL, + 0x04e130893220ff7cL,0x5935e6e2ad11681eL,0xd91efb7bb9cef2bcL, + 0x3c260b9914bb8c4dL,0x24bf88d53a265599L,0x00000000000000f0L }, + { 0x1ff2439ccc9c279aL,0xdfcd0b6e90bab4cfL,0xfa08a7bac3acbfdaL, + 0xc91b8f40cdb22a56L,0x9624ac18672be103L,0x1e59bac0a7cb0f83L, + 0xae0aa78eed5f94c6L,0x9655b8c3b43029e1L,0x0000000000000052L } }, + /* 33 << 7 */ + { { 0xf0abb749e0b45567L,0x35a3562be1b25216L,0xda2ba89fbebb512cL, + 0x4f4fc05d4d9c9ed4L,0xf4f488b08789462aL,0xc1b1ed9676223e7bL, + 0xf4702d30ca356c71L,0xf25425fa6075b8afL,0x00000000000001a4L }, + { 0x48f573eca93e0794L,0x336e5577b317dac3L,0x66be7dbc49940907L, + 0xa2da2289fe310627L,0x4d39847d129aaefdL,0xfc33556aa71e2383L, + 0x3f36017c8c962e42L,0x4a29a03939ef2110L,0x00000000000000e8L } }, + /* 34 << 7 */ + { { 0xdec3cf59ea27c05aL,0xa7f67083b52e2b6eL,0x38ce7a200474b960L, + 0xdd1975e729def768L,0xb114af497c50fb3cL,0x11ba4d4fb0b25935L, + 0x5b8088f0f6882daeL,0xdacff26b9f98d4a8L,0x00000000000001c2L }, + { 0x14dac24531675d19L,0xb28812683ad6531cL,0x335b8fbc4fcee598L, + 0x03d355986a6218a9L,0xf949f8bc00de3490L,0x5f3ac4d1197b4f84L, + 0x5d0134a9bf92f51dL,0x34d805dbf42b3ec5L,0x0000000000000106L } }, + /* 35 << 7 */ + { { 0xacde8c05303edc58L,0xe7448e83f30b2de0L,0xd01600b5f56693dbL, + 0x11dddd528d1de024L,0x504351d8ae9a52a3L,0x9575e24ebb9ae192L, + 0xa063f065f2894ae3L,0x3217c3923c876549L,0x0000000000000083L }, + { 0x212aa9d5e6932da1L,0xf7acd1f004c91e15L,0xb961a8a429495668L, + 0x943925040ea593f7L,0x834bfa64200847b6L,0x033b1ec77dd4647fL, + 0x4541a2f2334e806eL,0x90ec0295dc2bc63fL,0x0000000000000126L } }, + /* 36 << 7 */ + { { 0x35da5eaa514aa1a0L,0xcdd35c375b9bed0eL,0x91f1d3dc0834c326L, + 0xe529992bef9c0f44L,0x04570f257660418dL,0xbbfdcd21f2e7a423L, + 0x2b52019a1b481b2aL,0xf06a0cbd956bf66eL,0x0000000000000133L }, + { 0xf47b962bb2bf730cL,0x22ad11370b787697L,0xc2c14c015f9afe14L, + 0x8bd0f5f23b4dbb1fL,0x2787d1b7137d3f32L,0x0e60261f91a6f97aL, + 0x761bfe8ae5b0f47fL,0x9e0f42185e729f2fL,0x000000000000005eL } }, + /* 37 << 7 */ + { { 0x3c0cdc877d8589ffL,0x6b599da101ab0129L,0xed6ae833cb178542L, + 0xc4206185c7014257L,0xfdee5dc18ed75972L,0x30422a42c46afcb6L, + 0xc990c82f85ac77a7L,0xc73e857d3a775c10L,0x0000000000000180L }, + { 0x2dd3442b8642a173L,0x89d3b9151d213119L,0x19f6ce0fbd1f66b3L, + 0x1cb3fe0ba7811243L,0xc8383bb3c3786820L,0x8b5dc0c1b06a0e53L, + 0x575971ed870c6488L,0x5c1eab4ff71666b8L,0x00000000000000daL } }, + /* 38 << 7 */ + { { 0xa112afd237c6742bL,0xf85c7e5425ef3985L,0x4e623a062fde58cbL, + 0x4831bb2c1b18660eL,0x070a617b8cc9ce53L,0x3b59092b1d9b9cddL, + 0x4ff5178379d76cd8L,0xc59e27c72cd5db8aL,0x000000000000018cL }, + { 0x18adadff364202e7L,0x7394b31421136377L,0x27ee75271da7b19aL, + 0xca9f1342464f6acfL,0x1f1d417206c24afcL,0x9b2124214cee62e0L, + 0xb63db356d4324e57L,0xc446ad3f5e72cf84L,0x000000000000013eL } }, + /* 39 << 7 */ + { { 0xb4dab7c1dd5c0101L,0xa2df684b3940fff8L,0xb22f144657aeb440L, + 0x20bafed817987125L,0x5d3a5c17f81f2ea1L,0x8542239d74810449L, + 0xeb05aff9d7f767b9L,0xe88b3034fd75c524L,0x0000000000000041L }, + { 0x1dc76528d9c83428L,0xd317a4a7299a05ffL,0x2868cf78d8e73503L, + 0x4d9140a24d6ccb4aL,0xe0b0f24894eb64c8L,0x4b38e9477c5e853bL, + 0x920c1373dfb43575L,0x069956acc7f6e5c1L,0x000000000000001eL } }, + /* 40 << 7 */ + { { 0x2120f7a02565cae3L,0x8b7701807772e523L,0x83d1d71da1a4e556L, + 0xac9de3546893b076L,0xfc06849cd643a9a5L,0x66cc453d6e46c73eL, + 0x295b72d3e6b4cb6cL,0x27d7ea49f8d2fd22L,0x00000000000000b0L }, + { 0x0a0022913905ce31L,0x58fa6abc37b6e43aL,0x639d7074f0bed901L, + 0x751cca9a0b2b80d7L,0x498f8be5356bb88fL,0xa1e192ed084fa7f7L, + 0x72123071fdabffe4L,0x375dd6cab6d31ec0L,0x00000000000000e2L } }, + /* 41 << 7 */ + { { 0xb986a1033fa0a922L,0xd8af1426471aa7a6L,0x142757f75da16f1dL, + 0xaa5aa2e5112ab9daL,0x2528729d7a36d194L,0x66f8e9d2a5931641L, + 0x647ff5ed954111d4L,0x91353bb782096582L,0x00000000000001f3L }, + { 0xd5e1357471f27d0aL,0x8a17cfc4bef718caL,0x045d82b76c25b86eL, + 0xc190470da060638cL,0xfe1ccd344fe469efL,0x7216f6a2b6e4a4e6L, + 0xe878f0013d4ecd71L,0xd9e6bef5c06db8e1L,0x000000000000000bL } }, + /* 42 << 7 */ + { { 0xa2ac1ae9ac8574c2L,0xa94d700bd8e57fc9L,0xe553a50483768f7cL, + 0xf7ee17786a4b96ceL,0x9cd65ec7435476b3L,0xec29f389227fc1c3L, + 0x04d7c2cb02ab76ceL,0x955fc98eabeea649L,0x000000000000003bL }, + { 0x2caa7206a4347b25L,0x8f9c3dfc888d9c02L,0xd13abad5c7637e02L, + 0xdd4f2d9889bff5e0L,0x6639923e91237db0L,0xfceb7bec76b38fc5L, + 0x6f51b35946622c86L,0xcb45000bbc3b052aL,0x0000000000000096L } }, + /* 43 << 7 */ + { { 0x7bdd3a436fe42b7bL,0xd751294a80a13668L,0x0c360e45a02c31b6L, + 0xcadb32ec364ab022L,0x714b47b52915fc64L,0x12894036314a27daL, + 0x93988364131a29c9L,0x8345fcb41c11e126L,0x0000000000000020L }, + { 0x7379fc37adf71005L,0x763700973e92607fL,0x7ef1e34ade1aecfaL, + 0x3e9dccbc364796b5L,0xc2a20940b9b3b46eL,0x209ff5a7b71eead0L, + 0x8fc5eddf3e6a184bL,0xcaf50b8310b1d636L,0x0000000000000004L } }, + /* 44 << 7 */ + { { 0x24ac95c1b023a7a7L,0x900f86d152a600b3L,0xee5907d16cb1e79cL, + 0xac05c5459e15310bL,0x379b5f665baf1a18L,0xb936c3748ebdeb65L, + 0x2da06f140b4bb218L,0x3cce7e77a902a571L,0x0000000000000105L }, + { 0x6e88f5350d47761eL,0xcdbe953128102f67L,0x5fbc898b5d8a732bL, + 0x4e2b80b20674c9ceL,0xbb1b8ff3b9ca6e9cL,0xacfb7ce058cf73eaL, + 0x3e38eb1e246664f7L,0x23e34b6712c6e67bL,0x00000000000000b0L } }, + /* 45 << 7 */ + { { 0x788cfa862ce9987fL,0xcf87d7181f3a827fL,0xeda41d438963ebe8L, + 0xc0fc14f2352ede26L,0x1c887d1f9aab3086L,0x4b2ee360e7afe709L, + 0xb32dcc7781abfa6eL,0x93acc0e721e565c6L,0x00000000000001acL }, + { 0xda186d0d64798caeL,0x0e1245903faa2a4aL,0xdf259b1e17f2b48eL, + 0x08d00309e1738aa7L,0x55fad154488c2c04L,0xa49a1f6c8d1cb326L, + 0xc17528412a587b54L,0x1ae3846021b28da0L,0x0000000000000125L } }, + /* 46 << 7 */ + { { 0xc60450ffcfe88e41L,0x90e4a31ce80ee199L,0x129c8141054f77f9L, + 0x3d87883643d18e19L,0x7d538b08d0aa43a2L,0xaec78cbafedc871cL, + 0xc2512a4b22c45176L,0xe0fa4fd7adc1c057L,0x0000000000000054L }, + { 0x6977ed3cb3d0e464L,0x20392cf15876e2f1L,0x4ccc33533e070075L, + 0xb7aeeef1cf3dc364L,0xed66fe98757b2e2fL,0x5aaa91f114988622L, + 0xe8f06691772fe270L,0x593bcbdce68972e1L,0x00000000000000abL } }, + /* 47 << 7 */ + { { 0x79f3c8d48b4979c1L,0x9f314f3ab34227caL,0x53f70c238542df00L, + 0x735c9c1f5d2925f4L,0xbc85ead47a520674L,0x03ed628cbaac2b52L, + 0x920fcf7c3b63752cL,0x82b2bbcc4ddbca03L,0x0000000000000025L }, + { 0xb3e15c93eee68bacL,0xcaeaf1cab4c6d5dbL,0x87135e437d63080dL, + 0x7aedd26eece9d3d5L,0xc2cf0281703d669aL,0x669e59b813ab55c5L, + 0x3682cc5bdc1f5faeL,0x27631c8b969d7e7aL,0x00000000000001faL } }, + /* 48 << 7 */ + { { 0xd8af5067441231b0L,0x060a5889b44f70adL,0xc8b668b8d34dc5fcL, + 0xefd712c3c0913970L,0xb399e81b39dca3dbL,0x78fe34977b823568L, + 0xe9898e44b2345a5cL,0x71d1075305abf156L,0x000000000000004aL }, + { 0xb207612eabd2dbffL,0xda4acee7f8666c01L,0xb5385d489eb81547L, + 0x54aef0260b0b50f1L,0x753b6de9117bb8baL,0x770ce21ac85f7040L, + 0x2c84b50d55ba9a93L,0x3d6e5e41ed6d5184L,0x0000000000000172L } }, + /* 49 << 7 */ + { { 0xbc90b46a54ddbec7L,0xd41023cc9f994187L,0xacd294e2e2bc97ffL, + 0xab4a43a925eb30dcL,0x73a2f91e7d271790L,0xfdddfdbd90babdcfL, + 0x075d1009786d202cL,0xe27659f112af11b5L,0x0000000000000132L }, + { 0x82bb740b5015e833L,0x1b146b579756a549L,0x0676b19bbdbbc646L, + 0xd905bde08b917c5dL,0xd612630a385d16b8L,0xa990c8d30750f8afL, + 0x9b692f8b964eca68L,0x0cb1417c6cbf6aa8L,0x00000000000001ddL } }, + /* 50 << 7 */ + { { 0x633c95cacfeb6e6dL,0xf310f055d3a913f8L,0x82b3c285962f7c53L, + 0x93cea71d41ab7d17L,0x3e1938c7c2663a84L,0x03e5c149da567012L, + 0xbef4569ed1fddee1L,0xec2c08d2c0a14e36L,0x00000000000000a8L }, + { 0x9f1984ea8c66bb0dL,0x33ec54fa24a7ab3cL,0x82363d70bc7cd59dL, + 0xcdc086c9dd7205b5L,0x81cefb78bd3f9b62L,0x2f7d9848d0c20358L, + 0x7d19925e827ab25fL,0x27432ca7bdc46e8dL,0x0000000000000103L } }, + /* 51 << 7 */ + { { 0xe5fb482725ea9627L,0xd1b15417eb4a6824L,0xec66b336f640dc0eL, + 0x58b67fde1f768b93L,0xdb052d30c38e94beL,0xb8f9bf0026c71d3aL, + 0x524029e3967c2608L,0x2cc1604c3e96c14bL,0x0000000000000018L }, + { 0x1e00bf024096151fL,0x4682903a3280e791L,0xd10747413f59b08eL, + 0xf805afcf57dc3da5L,0x69231f31c41ed588L,0x8d88792c3a6f17f2L, + 0x8c7ed0f3e5562da1L,0x56210b8fcdd3b578L,0x00000000000001c9L } }, + /* 52 << 7 */ + { { 0x9560994f8b256515L,0xe6a1c4bfee0c60f3L,0x90616d735c40e06aL, + 0x96b06d488938c896L,0x3e91746cf7a0d6a3L,0x64101f1f323bb0dfL, + 0xb9063a273c5f05d8L,0x9a3278f8707f1505L,0x000000000000016aL }, + { 0x99599034cd47eb2dL,0x2445f57192baec6aL,0x3d775c01f4785d10L, + 0x5b52741ba39f1742L,0xc3b9b777ae6b587cL,0xf843aed8a8f986d5L, + 0xac95d65bc8e3e8a2L,0x5ae638e982675b40L,0x00000000000000e1L } }, + /* 53 << 7 */ + { { 0x02e5f7474e915b10L,0xfe984f976f9492f8L,0x9539886769709243L, + 0xf4525d86df138cd6L,0x0c4e8a47f691a83fL,0x7ad0d619cd6eb376L, + 0xc5185be22dc115d3L,0x0f31c58e3c0cb3c6L,0x0000000000000117L }, + { 0xf32d5bc157532f62L,0xb11abd34088396f3L,0x41bca25c0b5c4572L, + 0x85c1d94afd7dee4cL,0xa3bb272129b04077L,0x4ca6c9d0b087910dL, + 0xef465fede2824a30L,0x3bbcca9059bb45e4L,0x00000000000000c2L } }, + /* 54 << 7 */ + { { 0x5620edeac1805532L,0xb0d80cc507b46a79L,0x147fd42b4ad3c20dL, + 0xc5a3cfad1efb9437L,0xa8c731ffafcc3dd3L,0x3ba852716b815aeaL, + 0xae961cead66d3336L,0x3f046de2c4a83a83L,0x00000000000001d0L }, + { 0x87ee923ff0ea8ff8L,0xd0fa8affd0f83feeL,0x5164155928b3f47aL, + 0xfe93907dbda41a94L,0x8da252c4dadabbc1L,0x02c5a8792d3e40c8L, + 0x1ed3ebe41b678f29L,0xcc33244ff9b63773L,0x000000000000007fL } }, + /* 55 << 7 */ + { { 0x347a11bc92d26303L,0x59820ecf9064fc95L,0x32c8ea2bb66853a3L, + 0x3dd1cb41b71920c8L,0xdddf7e6914bbf36bL,0xb0f71e18c4d27229L, + 0x35218d027c56d60eL,0x0e1f568591c81b81L,0x0000000000000049L }, + { 0x94649487c1a83b3cL,0x79505e0ac2894f9aL,0x12548fe7f64af5a4L, + 0x0ee8e52a838817e3L,0x625ae0a8faeb95f3L,0x6e82be89f6fd174bL, + 0x16cb1065c3c4ee67L,0x43c8d753e4cf40ebL,0x00000000000000d2L } }, + /* 56 << 7 */ + { { 0x7dc3f4d8b36d4820L,0xc1367c104f2582efL,0x0d65009c600cea72L, + 0x663152830418692bL,0x973e1aad1b497fe9L,0xa89362316a9c3aa0L, + 0xa884d7d4c778e6f9L,0x241c3d34142bae6bL,0x00000000000001acL }, + { 0x09636cb73ff4bc16L,0x7f924363dc70dd8bL,0x44ab005c916de001L, + 0x5995d92173cf5f83L,0xbc1528c1ae70dc46L,0x94335a4f065bfd79L, + 0xc50d05469b5a3a41L,0xfeaa8d5a6c039454L,0x00000000000001bfL } }, + /* 57 << 7 */ + { { 0x47a78493446416e9L,0xa88b09e0e09e1613L,0x8ffe68539241f3eaL, + 0xbf5369920fdd9ac4L,0xd619356d487d6560L,0x66a29c0b3a326f9aL, + 0xa4562dcb975f8c4dL,0xa9b11a8f6827192cL,0x0000000000000034L }, + { 0xa18d94c3beabe49eL,0x4c95b3d210247731L,0x7e4aaa62800c61ccL, + 0x192be7ad93aafef3L,0xacdb0684548c37edL,0x906c5dca6b217274L, + 0xe82905411aa5e87bL,0x99499377af948d0eL,0x00000000000001a4L } }, + /* 58 << 7 */ + { { 0x3ccbfc85947098a6L,0x51a647133cedcde0L,0xab36cd65840db808L, + 0x27f54065e1e6da2eL,0xacb7bc24c4dfb935L,0x0539fb6b85811a51L, + 0x9a834c153458dbedL,0xf200b042d91c1ffbL,0x0000000000000129L }, + { 0x64408f3e63cef0a1L,0x9248b34a1e8f0362L,0x666f2a1ef133487cL, + 0x47574c1e074d37b5L,0x747eddc62bfb71a2L,0x126ccc0ee2f3a687L, + 0xfe46cb0a74df9695L,0x76c6f165390f4609L,0x0000000000000166L } }, + /* 59 << 7 */ + { { 0xcdaa7ab5f50dae67L,0xaa79f66748a0f682L,0xb8527b90a5eece8cL, + 0x52140489e971d0cfL,0x46592debe5dcb864L,0x93b033d749d3a03cL, + 0x2ae696de6111a307L,0x92d90b30ac5a9212L,0x000000000000012fL }, + { 0x1237c3437c548a9eL,0x0a6a33f85ca220dcL,0x89efc633944f854aL, + 0x1720353b68491e43L,0x50c189b0beb38a01L,0x84cf858336ee395cL, + 0xd644ee4d644a684cL,0xc496e4eacb2536c3L,0x0000000000000039L } }, + /* 60 << 7 */ + { { 0xa12ab81202bf43adL,0xec13c1d85b4fa76cL,0xdc2693f35ef4cae8L, + 0xe77c16cfd10b16c2L,0xf0885709af73ee84L,0xa0178986db518b00L, + 0x5b09e2e2435f7a09L,0xb043059120378b2aL,0x00000000000001baL }, + { 0xe4f25ecfc35a9aefL,0x183d67e6923c79b8L,0xc1ef889887e007cfL, + 0x243bc11fdc0f34bcL,0x93f6d3cab5219ef8L,0xce0f0c34e7f8f11cL, + 0x2980543534f5dd6fL,0x425109831808c4ddL,0x00000000000000aeL } }, + /* 61 << 7 */ + { { 0x4a1c78eb6d3ef9b9L,0x8e378182ea5b2c00L,0xcc80009a7f17bc57L, + 0x4ac0725c3aa76f52L,0x67074a8316090e69L,0x247a99f906040d66L, + 0xffc91223c202669dL,0x36cc81087cf6079aL,0x00000000000000ecL }, + { 0x0f4c307f9a9b6f1bL,0x784340bca4206c20L,0xb97598c2e2d0b207L, + 0xc7505ffe1ddcc638L,0xe9d636eee73109c9L,0xd85e9e014b69754bL, + 0x2a9802d5445c653eL,0x56e8b9e89805df1cL,0x0000000000000183L } }, + /* 62 << 7 */ + { { 0x50749bac0480a6eeL,0xd198789139ea640eL,0x13ac416185393676L, + 0xe28d116425c2eec7L,0xe636a59655b9f322L,0x4ba90f173a8ba194L, + 0xbca2eb38d49ed797L,0x9dc621d4f4e753b3L,0x0000000000000036L }, + { 0xe1f60197f0f3bf12L,0x9137f979e7460711L,0x65cf559e696dd5d0L, + 0xbcc8bc9be80803f3L,0xab56300de8693f04L,0x03ece5d00979cf03L, + 0x907f30ef073bc6e5L,0x3a7773c1344d837eL,0x000000000000007aL } }, + /* 63 << 7 */ + { { 0xe25c24651f92f8b4L,0x3c12a2ad947b1043L,0x159bbc425aea4035L, + 0xe2519611012a7a12L,0xd237c4afeec2c287L,0x070eb6c60bb8f6d1L, + 0xabc1864e29a8a0f1L,0xc6d6f9dc9a1ef77aL,0x0000000000000179L }, + { 0xca4e4bdfe6f42115L,0x86dc6504954f8811L,0x501d86c5c35fe50cL, + 0xf381c7b059fd3d0aL,0x2f3d1282a5a428a8L,0x152986e24130b67cL, + 0x2b3719068151bc5fL,0xdf41c90aa163dc68L,0x0000000000000063L } }, + /* 64 << 7 */ + { { 0x2172a8aa81a7b4c3L,0x97ca8dfdabf93bbdL,0x2525d8caf37d1211L, + 0x16442a63917e5726L,0xc67b9ec8e12467bcL,0x53240d523c5227e1L, + 0x6f6049267101b132L,0x70da1ac7de1a0f56L,0x0000000000000086L }, + { 0x8d0c56be53e7dcddL,0xca55c626eabc48deL,0xd4cbc52a585ca3c4L, + 0xa36f240277288d74L,0x42922ebcf7ff687fL,0x482022cd5efbb12dL, + 0xe539f2aa7a1abed1L,0x561c207037c7469fL,0x00000000000000c7L } }, + /* 0 << 14 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 14 */ + { { 0x6db672c63a46caefL,0x999560bf8bd02589L,0x53e343871d900fa3L, + 0x33c80c2c53d2590eL,0x3b11ead69801a393L,0xc6701879591e52f0L, + 0xc1c1982d60428399L,0x77d747971932418fL,0x0000000000000177L }, + { 0xa4f615a05b4fa0ecL,0xb5afe599059eb5c5L,0x1260ba581142b875L, + 0x3cc654abf4f69ae0L,0x0422ac16ab6d27bdL,0x119f316acae65c55L, + 0x3ec0a913e15b6c6fL,0x1603620e0ae71c5aL,0x000000000000008dL } }, + /* 2 << 14 */ + { { 0x7d5f8e474538e761L,0x37c788724c878738L,0xe69f4711816427f0L, + 0x4da6f4677315c7d8L,0xf4665c460e84d136L,0x6b6c9c506a9d82d0L, + 0xab5d9fec1a96fcd2L,0x9ed96c86f7974f3aL,0x000000000000011aL }, + { 0xbecff7179a66d7acL,0x67d08b6d8f4f08b6L,0x0f7ce5bec02e60acL, + 0xcae69a50294aed52L,0xb1e2278e10970dfcL,0x618459af75b3e40eL, + 0xe395787ded5e54a0L,0xcd8a442be862c8ffL,0x000000000000001cL } }, + /* 3 << 14 */ + { { 0x501c5422910040b2L,0x3d8fadef6fd18a7dL,0x1ca9754cc159f2e8L, + 0x3e337074f3570cc6L,0x8d2e0b274f64c1b0L,0xb458964157d8e707L, + 0xdb9c20ff3b377e8cL,0xe202cb7e3c7a95bbL,0x000000000000014cL }, + { 0x443b521528898156L,0xc803d7d24d94ae93L,0x768497c3aa70513bL, + 0x48e7031adb916475L,0x7668dee27654c9a3L,0x9d9403b272eb0122L, + 0x28eaefd23999b060L,0x663b8a7974c55442L,0x000000000000017bL } }, + /* 4 << 14 */ + { { 0xa3480d58caa4bec3L,0x389489935989e4a0L,0x3f4d7b6c6f7ba4caL, + 0xe2913a55d39cb2aeL,0x615043ead7be6499L,0x9d4c41cbf504fec3L, + 0x4f93da968d44893bL,0x33a7acdc9a7928e0L,0x0000000000000013L }, + { 0xc865844f48219a09L,0xa6d0b17937426bdcL,0x7386a0a6284ae0e6L, + 0xddd6cba0c2dc16fcL,0x77d2934d2940d262L,0x3af6ed35741b88abL, + 0x96f43a65a6dae43fL,0xc71435351414083bL,0x0000000000000057L } }, + /* 5 << 14 */ + { { 0x7790d2eebb886b9cL,0x80caee478c4013c7L,0x2d7d404ec987fefdL, + 0x31a416b818d7d489L,0x00f6c0f7206e539eL,0x725d958fe52b602aL, + 0x5729d79e5c4f58e3L,0x2e03f6286906e10dL,0x00000000000001b7L }, + { 0x2e85fdc682793d69L,0x90676d2f89340b5fL,0x21b84c0e32410378L, + 0x0a3f3ec8596bac35L,0x986e33fa25754c0dL,0x9d278b18aa967da9L, + 0x60e45c3719545be5L,0x7c67878814875f0bL,0x00000000000001b2L } }, + /* 6 << 14 */ + { { 0x5f3b4a9387f6aa1fL,0xc2b1fe751d34fc74L,0xc437a7aee34cfc89L, + 0xed72014c30161cfbL,0xfe567e693f395ab9L,0xa1639cf06f0de2a7L, + 0xd21f55d847970dc6L,0xb5e535794207deb0L,0x0000000000000139L }, + { 0x594d169afd419bd1L,0xb52e9e26c90e05bfL,0x44d444b7a62de4b2L, + 0xdd2f17762364737eL,0xc0f6a25563c867d5L,0xf2b52b0e19341f8eL, + 0xaf95afc014538a86L,0x9a4427ad1dd0c0a7L,0x0000000000000040L } }, + /* 7 << 14 */ + { { 0x11f2c66709bb5ac2L,0x09e0c0068b1bc2b4L,0xb8b059d595edf7cbL, + 0xe84466599cc3b6f1L,0xff0bb4d21a19ff38L,0x4b28f740daa847abL, + 0x4dc18b713f2ba13fL,0x82797e6a24f93563L,0x0000000000000066L }, + { 0x24647b9ce0b37526L,0x151f8e85a66e7426L,0xfbc49bd366cbd549L, + 0x35236b47f1bf9e2dL,0x22d57f5652ce20f5L,0x348388680fb86429L, + 0x4fd6f7c92729eb8cL,0x47760b45003362a5L,0x0000000000000104L } }, + /* 8 << 14 */ + { { 0x394d510624891f36L,0xb4265c4f825fa293L,0x65a54b87f6ecbd5eL, + 0x6a2f900f2a351a03L,0xfb26d52f8d1dad9cL,0xbd651f19ba2c39ccL, + 0x2d5894f2bb30ed5aL,0x4cc3f05794732ec9L,0x00000000000000c3L }, + { 0x8c03ccdc9bfec4ffL,0x4ce76352d52329c8L,0x0a92cdf03995d109L, + 0x44d28a5d7518ba0cL,0xbd74ecefce16a6a6L,0xcab52531123cb82fL, + 0x72065b99f34e1c5fL,0x4949379f03a9a916L,0x0000000000000164L } }, + /* 9 << 14 */ + { { 0x8ee1c74910ad8ac4L,0x110157d24a29d219L,0xf118918bb59bf09eL, + 0xfbba7a0ed6b61104L,0x63da0b9a32b9e1deL,0x3fb49a8689f92a39L, + 0xd229edda291d7f6aL,0xc1e45caaf69a443cL,0x0000000000000044L }, + { 0x08deb935b6066acdL,0xdd28bcb7b88f3d6aL,0x2e6be43d91b9a3ccL, + 0xb2480a1a17c1ed87L,0x6bde69bbb5b2b5bbL,0x97dc1fe8cc8f7010L, + 0x7e2807e7fdb1b32fL,0xe7b130d5f70fd8a0L,0x0000000000000043L } }, + /* 10 << 14 */ + { { 0xc3b8439c8b56ac59L,0x59cf8542da6f121bL,0x025a79c1032590e5L, + 0x3f98129081a8cbceL,0x757cdfa5b3cf905bL,0xea0bba9b91354970L, + 0x59482cb1dedf20afL,0x67d12c761e59c982L,0x000000000000001fL }, + { 0x639ba69e08e7113dL,0xa47020a6d9a39024L,0x9fef97a3cb50be8aL, + 0x5a7a5e7f9d733c97L,0xf2794e015c28cf0dL,0xc1dbb356cae9bfbbL, + 0x77793cc224716c82L,0x5ccc298d50e5d617L,0x0000000000000037L } }, + /* 11 << 14 */ + { { 0x9812a6d442817149L,0xb73e1c2efd845130L,0x842633677ea37239L, + 0x3074c70bd66a5ae2L,0x15461fbba435a562L,0xe1457c9dcfd53243L, + 0xcb95324a4b559ae6L,0xd21b06be80fee082L,0x000000000000005bL }, + { 0xb2a047a250bd939bL,0x50f8174e9ab3c538L,0x7566dd4371fbec0cL, + 0xd8e989b442a8285eL,0x40b5fb08c129ba84L,0xd3febcf542eb6910L, + 0x85627da7e425b9cfL,0xb5baf38fc4232846L,0x0000000000000071L } }, + /* 12 << 14 */ + { { 0xd49bea9d0d67ee97L,0x3b54996ff8ce59aaL,0x609f5808f5862792L, + 0x8fff6ee3bf16a7b1L,0x2d515a919ef889a0L,0x9fdad17ecc6a85c3L, + 0xa93c168b54d161d4L,0x269cbd3fd42907b7L,0x0000000000000077L }, + { 0xefbdc466cc9bc5aeL,0x562163329f2d0abdL,0x567a46bc78b7f191L, + 0xfe6f5ffe55cb85ebL,0xb6a1bd3a1fa1e9b7L,0x92a1f94db23aa4a1L, + 0x8706df4e85b417c5L,0x80f8e27972c38e1dL,0x00000000000001f5L } }, + /* 13 << 14 */ + { { 0xdac5bbdfc3346801L,0x108dec419bcb0f02L,0x5fe3f06d48153635L, + 0x5ce5a8679d23b153L,0xe5dc5b536531411eL,0xae4960cfe36feb3eL, + 0xd2e67e4524c4fa67L,0xf00a144f30702333L,0x0000000000000182L }, + { 0x746a744e9428502bL,0xe12542d5e48622c9L,0xd694eee1a88fe253L, + 0x971c241e1cd5c3a5L,0x7de4e880e0e6cad2L,0x76187547c4ea49e2L, + 0x0bd43790d02af40fL,0xe7cbcdee5dccd37aL,0x00000000000000edL } }, + /* 14 << 14 */ + { { 0x7c22cc01f186fbd4L,0x658ab99c126df416L,0xe558e64094889f6aL, + 0xa2bb3d4d6138f3ddL,0x0b566d593c7c1262L,0x3d6e21d39de86bbaL, + 0xf58a8bf45b3fa663L,0x5fcb32d5e5a9783aL,0x00000000000000deL }, + { 0xe003185eed8094b2L,0x8c44e4e9cb093e1eL,0x9b22bf6557b21153L, + 0x4e77fcbff755775dL,0x60aa9c223918bf3fL,0x02efccef93854e6bL, + 0x8d5c09d076894b99L,0x000562319adbd877L,0x000000000000016dL } }, + /* 15 << 14 */ + { { 0x63d27e60b67b7904L,0x440e4862517bd828L,0xc6f01d917b4009d7L, + 0x111a41ce5fd8c6d0L,0x711dda3365429b9eL,0x2cd2af55551c4ccaL, + 0x5fb416e681cbda87L,0xa20d3108da546986L,0x000000000000004aL }, + { 0x0554dea9f7d2d8e0L,0x8b6bfcf7eb41abb9L,0x40ee181804e3c7c0L, + 0x530c3e7220c85212L,0x235f022bc9a2fca7L,0x95fab2db0300639fL, + 0x48426194585506f5L,0x61a8465852018681L,0x000000000000014dL } }, + /* 16 << 14 */ + { { 0xb3ee676376229445L,0xf39bdb10fed827feL,0xbbd917775abbf64cL, + 0x260ee1d529a83fa6L,0xa0fb51a9e14c0321L,0xe285da4285203855L, + 0x5c8ab2d255550c91L,0x88a94f055e3e844aL,0x000000000000006cL }, + { 0xe2c20af1885e1b36L,0x93632bfb5132eb84L,0x8ec0a26aeec26e58L, + 0xa189aa9bc1a0b075L,0x3786be8fff4a93f9L,0xaf4ace63e4302fc2L, + 0xc19ae28d5856207eL,0xcd791d4e5425099bL,0x000000000000013cL } }, + /* 17 << 14 */ + { { 0xc2e311642e400acbL,0xb36b8691858c3d81L,0xa51d8133cc1c343cL, + 0xb58a9c83c55818e2L,0x8ccec493946b84c6L,0x5665d20eb30779ccL, + 0x2edf3534a8d10a72L,0x865f8ffe3962aabcL,0x00000000000001a3L }, + { 0xd992bb1c76d1bd21L,0x1b61530c574f2eebL,0xc5cea02a6431db08L, + 0x5abac615b6643c40L,0x839a739d9f0146c4L,0x0a6bd0d2d7b1dc8fL, + 0x225d01259f6f6dcbL,0x413d64406b6c5498L,0x0000000000000047L } }, + /* 18 << 14 */ + { { 0x72eff79a8d1df15dL,0x71e97979714870c1L,0xe66f939f9eb85672L, + 0x88eea0f77089e658L,0x4d2e37bd4bc49047L,0x3acc988fdd1bd5e2L, + 0xf4436daa725264deL,0xd294eb310c6f167dL,0x000000000000012bL }, + { 0x37dcec51defe6cb2L,0x0eb39b1e82ae447dL,0x0e67d023461c386cL, + 0x67298ef032d4e679L,0x70c9d3e74c18fd88L,0x322b18eedc3a0158L, + 0xe643985c8d036e18L,0xafecf85494a080a2L,0x0000000000000102L } }, + /* 19 << 14 */ + { { 0xbea1d3999258e609L,0xade5b232992aab46L,0xa937d66aabb0b737L, + 0x75919dc971c1be45L,0x11f1edf6dcb6b3e1L,0x62be1beef84fc1e9L, + 0x5a7e2788d3a531f7L,0x5cc174fcb1eb09f9L,0x00000000000000d5L }, + { 0xec6dc93ff199ad60L,0x7966f0725067e8eaL,0x5db6fb626cfa4a74L, + 0x9b7792ea5a8e325eL,0xd0165798a79ed9e2L,0x618db06c17fea9f4L, + 0xd4f26caecd5efe3bL,0x8848d8a2b1147f97L,0x00000000000001d7L } }, + /* 20 << 14 */ + { { 0x782a46b3a45742d3L,0x7fb395dc9ae4f831L,0xaa0a249906715575L, + 0x336dd153987995c6L,0x5678c82a70d00fcbL,0xdc87a87233b45f8bL, + 0xd5b269003498dfa5L,0xa8ef2e81410f7c71L,0x00000000000000ecL }, + { 0x00e42ef069a89d17L,0xdfd638d21de60ee9L,0x5e3e68a98e2a3b24L, + 0x19d1cc8ad7a07f7cL,0xd84e18094d2c9b89L,0x7674bfd1fd778f5fL, + 0x962735a61e2b3490L,0x9ad592a5f535952dL,0x0000000000000031L } }, + /* 21 << 14 */ + { { 0xc8f1c0548a0c0bccL,0xa30f26a5b54457fdL,0x6fc1b24c95252d41L, + 0x7c17f5b425f17d79L,0xaa30b0d126e54e69L,0xc7319dab8072a467L, + 0x1b3b1209f653c077L,0x95dd97a541b0d682L,0x00000000000000b4L }, + { 0x48e4678c3f3185d7L,0x1558aab9c5f4f58cL,0x8ded1080b9177b55L, + 0xb0f7ed32d457739cL,0xfd11aae5a6f09d9fL,0xd8c1338874a99d1bL, + 0xe68c2f61727f9c0cL,0xbe311c7779ee6a1eL,0x0000000000000165L } }, + /* 22 << 14 */ + { { 0xf5eaf9f335acce1dL,0x0b96a6023b9738ebL,0xad95bc9e3eeb44d8L, + 0x0eec1a5bd535a06aL,0x772a256b21ef687fL,0xb4d097892f4e9fceL, + 0x5f51391a31e0fab4L,0xe19ddf7c24d4a11cL,0x000000000000001fL }, + { 0xe78b3824810e0d87L,0xb9a45999c38b789aL,0xef8856703d63565bL, + 0xc5c41e8704fbb216L,0xfdad741d95b5b15dL,0xf47540283617de9aL, + 0x270e0d35c4ebd6d3L,0x38c2d45b3166287aL,0x00000000000000b7L } }, + /* 23 << 14 */ + { { 0x87dd73b253e3448fL,0x60a7de8cc6d5fcc9L,0x0f0e775e7d9f372aL, + 0x7d3d0454356ca0ecL,0x67ef63091202240bL,0xe19efd47f65d2cbfL, + 0xd3407ca767184ce4L,0x6e17fc95a294bbdaL,0x000000000000011aL }, + { 0x4e28296f4ece1453L,0x1c7af2e5626ebbdeL,0x9701e52b02e2a703L, + 0x17ae7fa8ab279190L,0x8db8f694dcaf71a6L,0x1ab5594633e49a0cL, + 0x38b2529c4c4de6d7L,0x16f59109d0e0b997L,0x00000000000001e5L } }, + /* 24 << 14 */ + { { 0x80d754cbaf63a6ffL,0xa4f718c114970f91L,0x33767545b184b25cL, + 0xcb615b6126115598L,0xddc1e848b0ec3a99L,0x238885ae7a9ee0f4L, + 0x4f434c1a70cff1afL,0xa5d8f62804f92877L,0x0000000000000103L }, + { 0x889a468cf5e7b21cL,0x42c6b1284f73f4c1L,0xc825355fea99c1c9L, + 0xdfabd44eb8502cabL,0x2c19c4f8078ff997L,0x766be192f948436eL, + 0xc086ddd4d3dcdf09L,0xf28f946c1d8fd86bL,0x00000000000001ffL } }, + /* 25 << 14 */ + { { 0xba6292380ed67da3L,0xf098937a18b18a45L,0xf40e4ff532aa3697L, + 0x65f0f6d484f25d73L,0x0447d49a2409ee55L,0xd8f3f95ebe06fc9aL, + 0x6720b40c38a09cf5L,0x498d11f28fdced99L,0x00000000000000fdL }, + { 0x30ba67a673dfbf54L,0xe1f24bf629950888L,0x0cb868f331bfd446L, + 0xc17fd67b7f9e5043L,0xa7da16fa4623e449L,0x3702bc7d2f358149L, + 0xc350a191ea209cedL,0xe0d4d94b80089030L,0x00000000000000a6L } }, + /* 26 << 14 */ + { { 0x4a2e8b7588b8ffebL,0xac8299ad552743eeL,0xcc88211f46a49114L, + 0xeb39502e41625dedL,0x55322cd961f01503L,0x5dfc1640b9fcc446L, + 0xa4dcb9b9752c2d0eL,0xbc5ff858b4084347L,0x0000000000000016L }, + { 0x842af48f1d8ace0dL,0x0c7fed42216d4efaL,0x2f280f63cee88219L, + 0x15a76cf8278f154dL,0xc1dba01188ffa0d4L,0xe18e7b9a5dab9669L, + 0x1072ce8351c3451bL,0x81466b0bc0625ce7L,0x000000000000008dL } }, + /* 27 << 14 */ + { { 0xc6532dd495f1a993L,0xed2d9c8e994550dfL,0xcd980625dc3206d1L, + 0x44920d5d8a99860fL,0x179a1fbfe95e6ac0L,0x21586f6e642b18a0L, + 0x382e467bb9bc16e8L,0x1bdc2a08eb4a3081L,0x00000000000001bfL }, + { 0x2dcdd601a46e68e0L,0xd015240b93b19720L,0x77749858ce57fd5bL, + 0xb3cdcd8216d48d32L,0x65ad981f28365bd7L,0x392c56e9dca4a70cL, + 0x76767327573481d2L,0x5827f187873f6cf3L,0x000000000000005bL } }, + /* 28 << 14 */ + { { 0x9ddc754cae4622faL,0xac04c42905c5947fL,0x0b53d6db2dbfb6a4L, + 0x1fb16f7b883c6d42L,0xe38b1745e5dbab18L,0xa320f80a0087b164L, + 0x4a5c274f32cd4694L,0xe32650c10855dc1fL,0x00000000000000ceL }, + { 0xf1c1a8c55997c5b5L,0x3a20ff072e051a16L,0xbaaccce236a4bc59L, + 0x72ae4a192c407bfdL,0x0ada1e57c128c710L,0xf4ce04dbb2d91ff3L, + 0x8616da8731fd0538L,0x5c19b57f245b59f8L,0x000000000000010bL } }, + /* 29 << 14 */ + { { 0x31597b8c0a73b81fL,0xcd52d82105ab588aL,0x4baf4239e28ced35L, + 0xc53a092e39673af6L,0x9e64a4d0f36bf7f2L,0xac2e493576c02cdfL, + 0x4ee3570ad7fd8566L,0x3a35bcc242fcfa04L,0x0000000000000178L }, + { 0x556867075b8e9c9dL,0xba2645ac9c6ad053L,0xb43529a90a748b98L, + 0x2f9af439901cf7c1L,0xd7996db75282357bL,0x11015af01ce89b7fL, + 0x67b216b74963a6caL,0x8a8db55b74ad3678L,0x0000000000000054L } }, + /* 30 << 14 */ + { { 0xb2d55efa0c44bf54L,0x8f646047b5668a6cL,0x5e399e3f28000a00L, + 0x50551969f61e1838L,0xc32196b12613df05L,0x3e838233f7a1478cL, + 0x392a6c1fc508d707L,0xd93d05d6311b3998L,0x0000000000000009L }, + { 0x6b31024ef05a16e5L,0x5a914eedc202d6aaL,0x2ecfe24db04a8795L, + 0x596a539e3e88e191L,0xebfa53cb697fdba9L,0xa69b735d1dc0d0f6L, + 0xa6a8ce767ff79787L,0x071e45f09aefaf38L,0x000000000000007eL } }, + /* 31 << 14 */ + { { 0xf27472c1ae93f8c0L,0xf04c6ea4a8e48c7bL,0x31d58b6422cad4c6L, + 0xc7ff26cd06556907L,0x44e71c873b03e6f6L,0x01eb2ef5eea10ed3L, + 0x61bc27e81b7996e9L,0xd0ab98cdb523dd48L,0x00000000000001ebL }, + { 0xfe27e8cd3a19cd0fL,0x5ccf20a0d70e567bL,0xf2e641e142c65e55L, + 0xba46dfaf1ed8e850L,0x1af42222e5ed2072L,0x691811faab988c72L, + 0x94e7f4fa93d8842eL,0x6442a9c5a1152efaL,0x0000000000000186L } }, + /* 32 << 14 */ + { { 0x44edf8be84357b3fL,0xe5df9129588059d0L,0x83e65cfaab7d5b23L, + 0x79c2e1982121a504L,0xe6a19633eb3be7d1L,0xd9f8869586b85bc4L, + 0x78508fa279f2187cL,0xa2eb8fc012254f09L,0x00000000000000aeL }, + { 0xb1a6704e2797d3faL,0x95b4c2682c78669fL,0x5294a8e42997c68dL, + 0x7246b0614085bad5L,0x7ca017c24159e5efL,0xb8fe1a6187d1a013L, + 0xdba0dcde44bc4ea3L,0x1fdd53c6a8ce296fL,0x00000000000000feL } }, + /* 33 << 14 */ + { { 0x8476dad138c75952L,0x256608000e53b9c0L,0x6f5f6ffc5002a11bL, + 0x025ccfea8d537febL,0x1d09d62b2eb845d9L,0x5d4596b2e1a65903L, + 0x49528722d492b0f8L,0x444b3def4cf2b0fdL,0x000000000000008cL }, + { 0x25c1941661967bd6L,0xff50303688985e0dL,0xd180daccc95cd952L, + 0xea18affd18456eeeL,0x6ccf36af65317267L,0x8fb2380251dc5738L, + 0xaf16178d33439112L,0xb342543e0b99b0cbL,0x000000000000006eL } }, + /* 34 << 14 */ + { { 0x53277acc3ccbc40bL,0x218e751f6d939a22L,0xb3e92769c121b67fL, + 0xdbf0fdc0522b77a4L,0xbed0af74fafbf581L,0x8a9c509e162b2417L, + 0x1ee6bd74f86831a5L,0x14cbe9f436df364bL,0x0000000000000100L }, + { 0x013a60b8164ffbdaL,0xfc05d43a28203c5bL,0x112489209bc7a027L, + 0x2ffa9cf550713fb1L,0x9c1d0bd5b07e598fL,0x207fcec72df3eb6eL, + 0x632ef362e16f2bc5L,0x20bb3d440a840efcL,0x000000000000013dL } }, + /* 35 << 14 */ + { { 0x9a5c53712dbf76edL,0xf5cc66adfa183c55L,0x84ce4c1f1c020230L, + 0x4c2d3b4490f2990cL,0x9150627f643357baL,0xb7cfd07f31fe37c2L, + 0x1e5a915b888b5198L,0x5ac98b6e98bbacf8L,0x0000000000000185L }, + { 0xc79d020aeb775585L,0x5974d3de45a3257fL,0x72b152daa6a4749bL, + 0xf3603d175ad334feL,0xe3d82b9532e115dcL,0x15a6b62edf4f90d7L, + 0xda84eab108fdd614L,0xd893690b706bc3c1L,0x0000000000000199L } }, + /* 36 << 14 */ + { { 0x7dbc777cc8eda3e4L,0xf93443a53d138b69L,0xf88673fabe32f323L, + 0xc53f686eaf91501eL,0x2c583a94a9e8df0fL,0xdbb476a579a360bfL, + 0xd6d61debed573377L,0x8bf9489dd85e7ce8L,0x0000000000000186L }, + { 0x4ea12cf58c06659eL,0x3a5df265fbc275d3L,0x434f440ba5ea596bL, + 0xa386793e82ec727bL,0x4f4d4575b4af3144L,0x465aae52b64cd799L, + 0x4c7c443401dd7df8L,0xc7d3008e40ec3273L,0x00000000000001e3L } }, + /* 37 << 14 */ + { { 0x296d12f19fb85a94L,0xf9e671e189de482eL,0x596ff3b73de0eed9L, + 0xd4bdb6b976feff0bL,0x8ac1d1fe0133e043L,0xf3a91297c19b02d1L, + 0x247fdc3217bba0e5L,0xadb9cc46caed480bL,0x00000000000001cdL }, + { 0x71f9bb404eb96e74L,0x786704f425cbd4f9L,0x35c01daf4127f783L, + 0x53bcd595ca68c06aL,0xf7a45e8f548751daL,0x8c0b80dbc36658b5L, + 0x0eccb01f3182ead1L,0x7b4da8ae3dd4577bL,0x0000000000000111L } }, + /* 38 << 14 */ + { { 0x0f80cc7b2aa68d6cL,0x2aeea46ba5b1edefL,0xc1a582e38e4246c2L, + 0x4890117c468c83c0L,0xb203066fc0f4c50cL,0xc3d86d97753bd8c0L, + 0x7c7796b45821c82dL,0xeab746ef789602bdL,0x000000000000008bL }, + { 0xfec63dc6d79ff090L,0x3f34e577fcc4f8c9L,0x755ddf0eeb73c7c6L, + 0xcb6ee6d1c400c474L,0x070ef40f27a0f15cL,0x3e30e74ac75fb5ebL, + 0x203e4d10f0681326L,0x1933a5161906e321L,0x000000000000015eL } }, + /* 39 << 14 */ + { { 0xf40dcbdd97c06856L,0x5093f148ba7c198fL,0x9b63c31888a4e5e1L, + 0xfbe0d089980bc362L,0x0d1d1889d1a2f1deL,0x56df5ede9fb22fc5L, + 0xcb790b388b0d329dL,0x68de9e9507e0dd4aL,0x00000000000000ddL }, + { 0x870b198434557937L,0xbc4de915cb8041c0L,0xfb1e1e3b25df82a6L, + 0xffd486000bb66bbdL,0x408f76d32485fda7L,0x5eb686cd56698db2L, + 0x9b466dc1a16d4f85L,0x50657a6ce17cd57eL,0x0000000000000085L } }, + /* 40 << 14 */ + { { 0xd06fcbb8cbdd8953L,0x5a7495784a83da15L,0x9a509298fbdbf149L, + 0xe87856427b745cc2L,0xf473e129b6b6ea45L,0x325e846f5982df34L, + 0x2761bcbf39c922dbL,0xd598487a4c8a1843L,0x0000000000000141L }, + { 0x4d4ce00e255dbee0L,0xb990df3e75a0e0faL,0x947755b894297a3bL, + 0x4797193fbf7a683bL,0xe3be49554c1f5e73L,0x0ebabf7ce3832a6eL, + 0x55aa92bb16574df2L,0xd05f35e66ace50f8L,0x0000000000000032L } }, + /* 41 << 14 */ + { { 0x0fe114732d5dd3eeL,0x387d15619e3b9f56L,0x6e68c4c4a2b445d9L, + 0xea666a0682103ef1L,0x5d1c563756e00144L,0x6167168a056a94dcL, + 0x75af25224dd46bbbL,0x9d5ac0de25f2477fL,0x00000000000000d6L }, + { 0xb2c55ad2ad8ada45L,0x5e2a1d14c04094efL,0x2c7f76f93c359f4dL, + 0xfec96532462b8fb3L,0xc5ddd1a99a542665L,0xe0ff14b28d3fb2adL, + 0x802a81d51b16e7c2L,0x02eda0edd5bd8418L,0x00000000000001b9L } }, + /* 42 << 14 */ + { { 0x19d40ead8c3208a2L,0x921a28ae4b57239bL,0xd9c1cdf87c048e0dL, + 0xc0896b487c1924adL,0x6f8e11b70015cb8fL,0x93fbe340e490613bL, + 0x34f8734f8758f850L,0xaa9acf4f5cf61cbfL,0x000000000000014cL }, + { 0xc3636b8815758f79L,0x7c1d4f0928dc60c1L,0x48cb27e6b77f1edbL, + 0x5d5a94b007580b50L,0xb6934e25a129e7bbL,0x6f00412da44d9a6eL, + 0xe6e347f209f16d89L,0xa60de5594470a50aL,0x0000000000000086L } }, + /* 43 << 14 */ + { { 0xd94e16dd0f5545c0L,0x99311bd1655e0f8cL,0xbd28f252b6084433L, + 0x5dfc8beacb8a0b6aL,0xca27033f7e2964c9L,0x4e1d96942b5eacc2L, + 0xca11c059f2eab44eL,0xb9a4ae9feb61f161L,0x00000000000000deL }, + { 0x6840e5d242e77d55L,0x58235511a7df2d6cL,0xb00763753af2b1f8L, + 0x7ac404ff9f430068L,0xf5b2bc675e6129d2L,0x3d474d6c30d6fbbaL, + 0xa0c66d83afc5cd14L,0xa93c5ccf84651070L,0x000000000000019eL } }, + /* 44 << 14 */ + { { 0x28e8d7cdab4f88e3L,0x898baec98ec7fd82L,0x13328d8ec459c3fbL, + 0xa635e9b13b0493efL,0x039493381ddff9b3L,0x93f2886cff82a4fdL, + 0x9cb165730c609c2cL,0xab282b67ae8e6e06L,0x000000000000018dL }, + { 0xc16fbd160792dcfbL,0xc42bbe74adf64db7L,0xc85534c4315f21d9L, + 0x66cd4cd3747b8361L,0xcd26cbe3f33752b1L,0x185add8554d2bfa6L, + 0xac366f9f3364f9b0L,0xdb6b5e544188b82eL,0x00000000000000f0L } }, + /* 45 << 14 */ + { { 0x40394dc28eb60051L,0xe85d4323b94b7c23L,0x35ece007398df4b0L, + 0x4048e7668f3837c7L,0xd2968bf381699670L,0x341d0b90ef321d08L, + 0x9fb3d866adb69687L,0xf649f7a01e95fbc8L,0x0000000000000040L }, + { 0x637c2c6a230521a6L,0x538783613fbf1391L,0x624386a14812d57dL, + 0xeac925d71c92e4b1L,0x886160dadd9f8fb7L,0xafade790ad700edaL, + 0x4795d6eac4a75e99L,0x30a507699c844dbeL,0x00000000000000a4L } }, + /* 46 << 14 */ + { { 0x7749f0fd407325c4L,0x82a14463fa3e21b6L,0x456f01251367a25fL, + 0x7fb481cd59345788L,0x18b408080fbab520L,0x44f57759821b00b4L, + 0x2260d7a7e88bb2c7L,0x3b6a935d609f7ea5L,0x000000000000004dL }, + { 0xa078b27909d38bfbL,0xa5a0db3da266eacdL,0x5b022ed5eb2a2fafL, + 0x6596a4aea49586caL,0x401a450d6f1bd9bcL,0x2a272cd4fe972a51L, + 0x1af0c66897c701beL,0x74c04b6721402335L,0x000000000000012aL } }, + /* 47 << 14 */ + { { 0xa046a8084e621a9eL,0xb61cdc55514a1be3L,0x45b894ae05e335b8L, + 0x2a7afd2f3f2cbd40L,0x9febc8b21d4cdc78L,0x8022fb1d990da9acL, + 0x2a544f9786ea08feL,0xc8f50cd46af5d246L,0x00000000000000faL }, + { 0x8325acc8dbea7bddL,0x86ec5d580a254408L,0x0328bd2f6b93bde0L, + 0xc4a75a47256ef2b3L,0x61b14aee0d43792cL,0x660894bc15f60963L, + 0xf06c411a9e2ec909L,0xc7bb2f80846c3b1fL,0x000000000000017fL } }, + /* 48 << 14 */ + { { 0x5e597d089b0a6b1cL,0x3ed100fab66ad53dL,0x9852718497c12a70L, + 0x295556e631779d59L,0x9733135a0d5a0720L,0x4cf945e66f937863L, + 0x8289d86b87679e05L,0x1979cf02f3b45d70L,0x00000000000001b1L }, + { 0xeca4684be1874574L,0x4cde87b963ec3ebbL,0x74486ff281b929caL, + 0xcf9c54dd5113e211L,0x95bbc0e1b8237581L,0xa92320f70fe355e7L, + 0x945a1a0345b9dbd3L,0xebbc1d0d4c99791aL,0x0000000000000143L } }, + /* 49 << 14 */ + { { 0xffd45977b2f6d655L,0x2a2895ec5dca5a02L,0x4b12c0cb0f0229d7L, + 0xbc5066638eb2d617L,0x7859a404b4414ebeL,0x25a4ed7bd0414b93L, + 0x967f9aa382670c9eL,0x1ad77f6fc430405bL,0x00000000000001ebL }, + { 0xf88be831313b212fL,0xe2e6cab7348e2d56L,0x357676d439019ec4L, + 0xff936f3869846bd4L,0x6345ca8d66ce27ceL,0x82836c0ec72384c9L, + 0x5ebe05967beacdfcL,0xfc1d54f4150fd26fL,0x00000000000001abL } }, + /* 50 << 14 */ + { { 0x6649d0ddd7587da5L,0xa9f6c67abc3b988fL,0x7ff9da31921c0c68L, + 0x47453a7901728d6aL,0xf29a285af14e4958L,0x559fef9ecb3695aaL, + 0xf00d26d90d698abdL,0x9f2a87adf0e12bf3L,0x00000000000001e9L }, + { 0x1cdd21ff7823d0cfL,0x7817d994456ba521L,0x11a2d178e6b7b13bL, + 0x272ed174315c7508L,0xee909a0bdf995ad9L,0xe9432355b9194220L, + 0xfe5bd9b1b2458aabL,0xea8e397b7dcd8a61L,0x00000000000001adL } }, + /* 51 << 14 */ + { { 0x3dbd43387b55498bL,0x6e596b6bc82e0ebdL,0x382ef7e250a694bcL, + 0x3c840753865dac40L,0x37b13cd0267c7b8aL,0xa4767ad5cdd6b7e6L, + 0xf700ba8d60af4929L,0x3fdbfdfd4c96867aL,0x000000000000019dL }, + { 0x4f711cb378e1611fL,0xeaf744da23723236L,0xc3f6bda778ebdacdL, + 0x9aeef0e09e0053b5L,0xbdb6d71a90b16d38L,0x3765112f3dacd8caL, + 0xf6d3a468704cac3cL,0xe6f258412646b586L,0x00000000000001e0L } }, + /* 52 << 14 */ + { { 0x74ff5c5188e250d1L,0x46e6a1dedc911cecL,0xc28ba25b48c5291aL, + 0x0f571b2aa9fde68cL,0x1c7d3fe989952038L,0x40b66dab6a4850f9L, + 0xf2a3db3956af4d75L,0x9861204f03123ba0L,0x000000000000016eL }, + { 0x1ecdbbd45ac22cd7L,0xe7349a910000be6fL,0xff94c6a2327586b9L, + 0xe78c257f7e123652L,0x18e795c5aaebcaf2L,0x7dffc5b363c70794L, + 0x6c3b3d2067e76a08L,0x820b7e33afba9f7eL,0x00000000000001cfL } }, + /* 53 << 14 */ + { { 0x28046bad1bed718bL,0x89dd1a2a107fad6bL,0x72d4a58ce5be292fL, + 0x44c2bfb40b8f0633L,0x175b44b9acf0bc64L,0xe77b0389e2a0082bL, + 0xba56099662ce72f4L,0x88d47c05977a1195L,0x000000000000001bL }, + { 0xe08df997f1937128L,0x7be24d4e6d7b6de6L,0x2c1da8cc2895c158L, + 0x867b4dbde6095143L,0x2f9fd14167dda364L,0x746c8bb98f5dd0a7L, + 0xec978e1931fe3f41L,0x3c2b278da2b6be6eL,0x00000000000000abL } }, + /* 54 << 14 */ + { { 0x53305692201e8211L,0x453de0b663165a91L,0xb5787597bc0e4356L, + 0x49d7a66f6036286aL,0x4f3ec2a9545399b0L,0x87b0f978e70c2bdcL, + 0xc41adf47db793dc5L,0x6394b7ef20acc60eL,0x00000000000001f1L }, + { 0x8f24f67bcb1ba826L,0xfde4bb5c9e783ae8L,0x4ab170a0b072e2f2L, + 0x01968ba82be627d9L,0xe8c1a0115710c838L,0x9aa5552eff2c4c0aL, + 0xebf694e1aadea875L,0xcd34f51184feeee3L,0x0000000000000101L } }, + /* 55 << 14 */ + { { 0xc1e4d3db1ef1e686L,0xb8a1403857560e2aL,0xbb3d623b47538074L, + 0x18921f9026b5e77dL,0xbb7096d508be51c1L,0x1118eb73c714f7a6L, + 0x0dfe997369e0fc89L,0xc28cba21f8a93363L,0x000000000000018bL }, + { 0x66c5603da2f03e1dL,0x4f17de957d775555L,0x26e00c87c2bb36bdL, + 0xf5a4806a3288e106L,0xf934a912a9a42ed3L,0xe374aa3df76e08e9L, + 0x3bb52d94983fa9ffL,0x74b832e55d421496L,0x00000000000000c4L } }, + /* 56 << 14 */ + { { 0x932645e87ada2a97L,0x21c7eba2e8300b13L,0x948ee3bec714208aL, + 0x7d4c3d65d66c4b41L,0x8373248e8530bdeaL,0xb053676048b836e1L, + 0x9db23d3975656dcaL,0x37da5fa90e294ebdL,0x00000000000001fcL }, + { 0x8715df66bb355ca4L,0x9f0eb8f4ca81ae22L,0x477877be167325c2L, + 0xde6d697564174912L,0xfd9770aebb0b5cd2L,0xd83513421999f713L, + 0x14fbdc847fecf8ecL,0x1700b51341521ea8L,0x000000000000006bL } }, + /* 57 << 14 */ + { { 0x1fb72fa4e1a987e1L,0x2ec447d4b3a419e1L,0xf414904f0af68019L, + 0xbd78ef312a752878L,0x84280607c9d3f03dL,0xd234588a676d8a40L, + 0xfd83257c8405dfdbL,0x9e40b20e5b0e8942L,0x00000000000001aaL }, + { 0xad94e86e8e352cd5L,0x28598b94e481b980L,0x6bf8a3fa3441e4f1L, + 0xcf0e604288c65230L,0x9e3cb7518097669eL,0x205666a00e1cc1eeL, + 0x0c18df29b2f5cc87L,0x418ba30d6986decbL,0x00000000000000b3L } }, + /* 58 << 14 */ + { { 0x440f76b5efc7c941L,0x2655270e0f135411L,0xf29c7ba6095b4edaL, + 0x7ec4aadc12c0b3b9L,0x162b1a6e2637926dL,0x25e1e9634eef2802L, + 0xd413ab20e56edc9cL,0x0d6ab23eca7901c4L,0x0000000000000019L }, + { 0xf1d31cac5c98bcd9L,0x939dbd4ebdcfca60L,0x0492c4cae0be4dfcL, + 0x68e939f3422574f3L,0x80996ded137cc107L,0xc714e969aff399fcL, + 0xe772477000fc77ccL,0xd980f985472af5ebL,0x000000000000014eL } }, + /* 59 << 14 */ + { { 0x5a28ef5122dc1e97L,0x9da0854654fcd9c8L,0xc6b613d635ad72b3L, + 0x413cb175fcfc0f36L,0xe16aa604bdfa0b08L,0x3e1bbeaf35235a92L, + 0xbcc910ff3b7d1193L,0x4f6b1df31c21441eL,0x0000000000000087L }, + { 0xa940ba897d940cdbL,0x93708a405b4b3a34L,0x67579e47cc7f3b19L, + 0x16260fc86b5c6b95L,0x0bac93e0a1c9c2e9L,0x5dd70dbe2d9c44edL, + 0x01ed179d47ff1001L,0xd7e891ea2eb5f1fcL,0x000000000000006eL } }, + /* 60 << 14 */ + { { 0x330f31b6c916e988L,0x601fca7fa6e737c0L,0x99e063dcdf9c7b04L, + 0x49e97a9627cd93b0L,0x576fe3429a7c06deL,0xa6d8c4796eac19aeL, + 0x4c5cc139fc72dc81L,0xf34b69073533fe76L,0x00000000000000f0L }, + { 0x8d444d34983c41e6L,0x650af8bbe2e49bd6L,0x1794d4c0c1bcb88aL, + 0x798cb3d8b2fe435aL,0x8544a707e7433605L,0x82dfc6359fd393e0L, + 0x572bd64292981c51L,0xb451731fe001273dL,0x000000000000010cL } }, + /* 61 << 14 */ + { { 0xfd6a06190f7d5e62L,0x363d0d2820d126daL,0x56b94c82ec82e67dL, + 0xdc493f04d5e8870aL,0x936ac4b4c9cd9fc3L,0xfb6d59b2ad27c8daL, + 0xc94e470edf8f5ef6L,0x9e47d7f32e818bf9L,0x00000000000001a3L }, + { 0x384d552bcefa9b33L,0x02d8353268aade38L,0x61b0f08d8208634fL, + 0xbf13ac956235c74cL,0xa44deea4a108f58eL,0x485fa5c8a991ab4eL, + 0xd53cb2c59682a856L,0xa590f750072f2accL,0x00000000000001bdL } }, + /* 62 << 14 */ + { { 0x5dc324b538f9f14fL,0x0e1b29ca25417001L,0x244cc7596932af3dL, + 0x19787a6c2765e15cL,0x7144d48fb7fbbde0L,0xe898a059012c68f9L, + 0xc66ebe6d9d907dafL,0x02aa0fd3e114e429L,0x0000000000000101L }, + { 0x138b94bb6551e782L,0x326631f7bb5ff6d2L,0x21c17feac81f02a6L, + 0x72f829c4a1d73b84L,0x941c0390bd578fceL,0xd2c600c409bbf7a1L, + 0xb849f9bdd3438945L,0xc09307c24d330616L,0x0000000000000170L } }, + /* 63 << 14 */ + { { 0x6c553f60c70835a7L,0xdb249d1d454158b1L,0x34eea92e29f411e6L, + 0xd75f12f67037c4c4L,0xfc5cafae3a77bd41L,0xa7309f006680eae8L, + 0xa04eb002404a6360L,0x359a53cfc069275dL,0x0000000000000147L }, + { 0x2bb3ac825af98268L,0x1f3fdf5f50056aa2L,0xd82b90bbcc7a2c08L, + 0x5c7b793a7c4dbbeaL,0x7bdbb47f568ef299L,0x3b7716307b4f839dL, + 0x2fa53e49d475515dL,0xa0ef2b77608ea974L,0x0000000000000101L } }, + /* 64 << 14 */ + { { 0x4f0e025fa2302041L,0x4d50d98948d262b5L,0x01720a0ad622c7d4L, + 0xe3b5a94158d1c97cL,0xfbc5b18338aed2a5L,0x27f02177823b5d8dL, + 0xadf74074775f38c5L,0x95bf91f44f1a6ea0L,0x0000000000000104L }, + { 0xf9cd82998e2d6ab2L,0xa75b296b6c27631dL,0x077000491e00bac9L, + 0xdc99e546ebd66975L,0x44fa528275e54993L,0x25fbca4bd50e44d1L, + 0x4663139381057b1aL,0xc22eff26bdcdf45aL,0x00000000000001cfL } }, + /* 0 << 21 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 21 */ + { { 0x3519394e7f6b9b0bL,0x76e8c23501aed422L,0xcb70fe5c64bf111aL, + 0xc6c025915b3c7eeeL,0xb53b8858f9ed8925L,0x51503afabb66cd7eL, + 0xa2479b22a6b3f2c4L,0xfbcb06c48eeee4e4L,0x00000000000001c6L }, + { 0x29f2ff47198bb516L,0x1895fb6877671ddcL,0x10a1e7541e2b80eeL, + 0xb19efc72a7732627L,0x88f9176e1e0ef24cL,0x22ef41af1fa7183aL, + 0x994ea31c47d7c8d0L,0xe838b3f2e9793ae0L,0x00000000000001b1L } }, + /* 2 << 21 */ + { { 0x39f73a5667d26eacL,0xe15b72b9938fa0e2L,0x16e09f8b554f85dbL, + 0x0e6d1cee02a16cc2L,0xcd989df35e548e22L,0x9ba195fc3cacda0dL, + 0xcfecc6f94ce28d02L,0x8b8f51e0c91fa6d8L,0x000000000000019fL }, + { 0x79f455b2c1a33bd9L,0x76514d33fe83b781L,0x3f5ec772f2ef9526L, + 0x2d1e47946ea54f2aL,0x472fb203c1e2d62aL,0x6e94a20beabf2c9dL, + 0x11da7502781a8076L,0x12ccf29a91ba87c8L,0x00000000000000ddL } }, + /* 3 << 21 */ + { { 0xc9518f3fb711958eL,0x74d3a13f0bc5dfb8L,0x7da464ac62025afeL, + 0xbfb139fa9a3e2fa5L,0xb30511cc9a8d7c09L,0x4d3a313f17e41faaL, + 0x7c53ff823a10c83eL,0x2e85a451abf3f866L,0x00000000000000d5L }, + { 0xe2844777df1d2939L,0xc3a4ccfe21fdaeb8L,0x47b7e6138c8f6176L, + 0x038b43eb1b6571d0L,0x9f380474e3f47c8cL,0x623e3416b97bac68L, + 0x625b6bf1bed4caefL,0x6ac6e0c81664fcddL,0x000000000000011fL } }, + /* 4 << 21 */ + { { 0xe7bbf4a8d2249935L,0xbaa56999adb64049L,0x5e0ecae945f8df55L, + 0x20b22a2f64e2a249L,0x480102231420d76aL,0x24bd8f9ffb45acfcL, + 0x66e9c51167673f8dL,0x719eda3c1f02406fL,0x0000000000000138L }, + { 0x56f88274d07c2fdfL,0x2b82612a54c1db4eL,0x0d4ae93896c3b009L, + 0xf97a730e5897ed7eL,0x3e3505c0396f9c18L,0x20d682e4daea8f31L, + 0x05fe6ea4e85bd289L,0x6e05507b14613e5cL,0x0000000000000164L } }, + /* 5 << 21 */ + { { 0xf0307ea7a3abdf0fL,0x25bc4d1d49de2354L,0x2b8ebd094b8160b8L, + 0xf437b69e3a63c866L,0x59c64b2fdf07da1aL,0x817d2723ba53a71aL, + 0x1ec10e309715b466L,0x852eb71d4b5f821fL,0x000000000000010dL }, + { 0x3a53ed2dab617319L,0x0279fb691e3accc8L,0xe2746511030858a1L, + 0xd5411a7f4fd0acffL,0x8f31def309c66cf1L,0xf8b414cf85c0c9f7L, + 0x15222a47d2565b67L,0x732e7eb6471411c7L,0x00000000000001a7L } }, + /* 6 << 21 */ + { { 0xbc36eed32f7bf34dL,0xfcdedaaa7cce180bL,0x3b719b5b5cb22fdcL, + 0x6ae5bdd9e4111433L,0x865a0148381f4186L,0x170a523b9245c6f6L, + 0xe3be816ac2b1118bL,0x30e7aa62174efd73L,0x0000000000000033L }, + { 0x1abfe12c4ca2f7c2L,0x60485268733b1f6aL,0x82690e31a1d38bd6L, + 0x3a55f831d862dc8fL,0xfb3e4436e03e590aL,0xf265ed1075222c2cL, + 0x41cf2d8753b1fb51L,0x12922d525a82041bL,0x0000000000000060L } }, + /* 7 << 21 */ + { { 0x5b6a6ca5ef7dc356L,0xc47b9c0f55c1a178L,0x89eb8747c07b52b0L, + 0x097775cf736ebbe8L,0xcb84611f506fbdb6L,0x2135b7da68c82d60L, + 0x1084122dac4454a3L,0x115da2f59c93e278L,0x000000000000016eL }, + { 0x743591776721fbbeL,0xd051798061337a71L,0x78b344de3aa24943L, + 0xf41a1bfb0c600b70L,0x25f8d92f0af82b82L,0x1ca931aa74d2845bL, + 0x0a49f46da9adc76eL,0x78678a328a130e6dL,0x00000000000000bdL } }, + /* 8 << 21 */ + { { 0xb5f5385146d39a88L,0x58a99a0f9cc76debL,0xcfdd909ef3825b82L, + 0x8f49dbdcf4694014L,0xefd505fe9cfec7b7L,0x66a7f2a156925281L, + 0x6f478d9b2a604fb8L,0xcbc72e9e60f7c436L,0x0000000000000021L }, + { 0x5bf75682d9c31e98L,0x74d5cd73c8bce143L,0x778bd5f1c700e94aL, + 0xb01480c73a66b233L,0xc667f078688df0ccL,0x780c73e416bbcf57L, + 0xb77f5ba9485890e0L,0xd62addf5b9ddacb3L,0x000000000000017dL } }, + /* 9 << 21 */ + { { 0x672c965fd2ab32f9L,0xab2a374db95c87a1L,0xe4fe8139aa132e05L, + 0xb98cbbc73f85bf7aL,0x725d68d9d7ccb3cfL,0x582b15ae213f62efL, + 0x11c9b3fbd26e9f8aL,0xbeaca8d74076121eL,0x00000000000001f7L }, + { 0xa1ba33fa48888485L,0x9bdae7eee5ea2744L,0x4ac9910c709eeb17L, + 0xc40fb92fba0596edL,0x5ae8178f018b8c4eL,0x195d2620536bad49L, + 0x6ae6807272578ac1L,0x6c759dc05272df37L,0x00000000000000b9L } }, + /* 10 << 21 */ + { { 0x28b82948eb24a59cL,0x0bf25c631b236d05L,0xd387a58c49bfeed9L, + 0x2dbced2791685729L,0xa1cb27d76177f87aL,0x090e003097e13ddfL, + 0x20eb9c50449d6867L,0x83a4a7d47cf7d05eL,0x0000000000000055L }, + { 0x65e05f87d0f35fe0L,0x655acaba791ab720L,0x55a8ce4f5da1223cL, + 0x995fb0091d34dd73L,0x20286c23a837ca4eL,0x9fb1050b368d1ad1L, + 0xc5fa244d09ee2148L,0x6cfa02a5fdb0a25eL,0x00000000000001b1L } }, + /* 11 << 21 */ + { { 0x22421cd9ffa96a6dL,0x9d55702c11437d61L,0x7321fa9eeefe0024L, + 0x6dcd329c64264faeL,0xc46bfa2b9df8f072L,0xae3d0b0bb712b9bbL, + 0x58872fd2843ca51fL,0xfd7ba4be6a587093L,0x0000000000000085L }, + { 0x6a5d962f34e5cb31L,0xbee0621565638aaeL,0x1c6f68bce79e0fa1L, + 0x2a909815c8d586b0L,0xcd970a5f1e6c2e8bL,0x5d111730d4788cdeL, + 0xf1e99fa3f9502cb7L,0x2820507bb7a2fea9L,0x000000000000015bL } }, + /* 12 << 21 */ + { { 0x2840a083925c2709L,0xbeeb776d988e40dcL,0x9d2307cb281c1df1L, + 0x17b077868a93579dL,0xec15d5f787d44e7eL,0x78b701a8e8f57a6eL, + 0x514706b67b1c05ecL,0x7bde81a7918bd719L,0x000000000000014cL }, + { 0x41d47f48d22c8109L,0x48389e6b0794bcadL,0xb520a2fd65114ee0L, + 0xad7526c779202240L,0xc25e665af69e5952L,0x180f827aab014a8dL, + 0x7ec96758db437718L,0x16b626051ae01bf2L,0x0000000000000159L } }, + /* 13 << 21 */ + { { 0x338766eecc06a7bbL,0x38189d60f59f6fb9L,0xca63fefc094f4b7cL, + 0xfadabb8f08ed44efL,0xf56c6c633c5674aaL,0x2ec1c71c3c5178f4L, + 0x9a2f00872b225bd4L,0x7cbf794d2016171fL,0x00000000000000d0L }, + { 0x08fc4051c49781faL,0xacefd0a45580fd95L,0x2f99b4e639b0e279L, + 0x88bc9e961fdcb4f4L,0xf3a44b826f6877a2L,0xf16942811529b850L, + 0xc94ab84f2fac2a36L,0xdddea7e816a99317L,0x000000000000007bL } }, + /* 14 << 21 */ + { { 0xb6fa89c0c3d79838L,0x05627f88d9737181L,0xd82f21517555d940L, + 0xc6f6a5ca0013b808L,0x54dd8d247472b0c4L,0xbe50f040fb2fe264L, + 0x43656c23cfed9e37L,0x35bd9e4cb5cb0103L,0x0000000000000150L }, + { 0xb9da98d1c1640493L,0x6f00d8cf3420374aL,0xf9b4f2fab2892822L, + 0xb2d967e790404bd7L,0x520b40fcf966a3e1L,0x4eb41624d081f30bL, + 0xfdf4f5d90f5d4fefL,0x49b561471a1cf19bL,0x00000000000001a5L } }, + /* 15 << 21 */ + { { 0x5312245c641ed37aL,0x88c4f135b0b9a3d5L,0x30d1dde090934ee0L, + 0x2b24f2af7109bd4bL,0x9894c85859f2fef6L,0x0c3a394918ea397aL, + 0xb69afb5c6556487aL,0x3e722abc2b37f147L,0x0000000000000014L }, + { 0x4f3625ece9d75120L,0xe8bc9f6e760f4777L,0x5ec9f7ad26990adbL, + 0x12ce89f5a71fa52bL,0x162ca13a59e66240L,0x30e35e8e2c327a19L, + 0xcc9671c352274f0bL,0x1c3179a8ad16ddeaL,0x0000000000000043L } }, + /* 16 << 21 */ + { { 0x173ab94cd4820fa0L,0x6ac5ae1344f0e094L,0xf67c3f7164fc4c70L, + 0x2648a59a2ad78517L,0xa7b344939505580cL,0x8cab6b445a741d25L, + 0xfe41bd4563355fa6L,0xd2731f41d1091a6dL,0x0000000000000096L }, + { 0x2550a62286e4f90dL,0x65fa978b80070b03L,0xf3ac57b5e3b526a5L, + 0x8e9207bcf84fa4bcL,0x675a8e31f88e6047L,0xf648cc7c9050ddadL, + 0x1fd62dbcbc5b7dc3L,0xcc0960d696f0aeabL,0x00000000000001daL } }, + /* 17 << 21 */ + { { 0x35b9693643afc702L,0x7ab761253f64bf46L,0x74536901af94fce5L, + 0xc398a29f000050c4L,0x253372e11f61a444L,0xbec32a601a7a968eL, + 0x30f79ca4d32cdd9eL,0x8fc8a273d36aa188L,0x0000000000000199L }, + { 0xc1011950aa637877L,0xcb16308c8348afadL,0xbdc517d0d75d1a04L, + 0x2416e9476e49496cL,0x536ff3587ffa91ddL,0xebfa91de093d5f53L, + 0x153def3fb776f2e9L,0xa316de0622aae585L,0x0000000000000077L } }, + /* 18 << 21 */ + { { 0x993330c9fc3f22cdL,0x79577855bd4eda29L,0xa63d19d45c666141L, + 0x0ccb6a6a0146d928L,0xb074cc0b404bc7b7L,0x1f7a96006b8f40e2L, + 0xbd70c74eecad6cd4L,0x822420f05348f77bL,0x0000000000000151L }, + { 0x4a3aacda27f09596L,0xaa0514071e083dd4L,0x0424ddfeb91831f5L, + 0x0131f82ae4d16705L,0xd233fc30983d9eb6L,0xfc5e1759f3dd3cd0L, + 0xa114e50adc95864fL,0x5ccf299fd01ca0b8L,0x0000000000000019L } }, + /* 19 << 21 */ + { { 0x4addcb6e89602aeeL,0x0ba1477fe7a864c4L,0x6896c0d0f2013ee4L, + 0xf5447fc09de2ee99L,0xf2325a0a377aab46L,0x96715e9cc03d1c27L, + 0x3c1b0e332e7c82afL,0xb8a1bc163c32e96fL,0x0000000000000124L }, + { 0x5285e546d427a41bL,0x1e61fb9d07e6faadL,0x6fa31b8c157445aaL, + 0x613502752276672bL,0x53c4145cc150c553L,0xefdeade47ae2f69dL, + 0xedfc8ce4622f4c92L,0x30cacfbd5ecf6936L,0x0000000000000156L } }, + /* 20 << 21 */ + { { 0x49d65486dfd05db3L,0x18ecda88925480efL,0xe5b4aff93f652a32L, + 0x80fbd64756e37478L,0x62442ef6e9be38ecL,0x8dea0ede229ca487L, + 0x59628d74c45becfaL,0x235003453f7398eaL,0x000000000000014aL }, + { 0xd54645dad2ee9439L,0x35245813ba3abc64L,0xe3fc1027aa7cd95cL, + 0xeecaee6787f95eb9L,0x9985e5e9fadb71b8L,0xfe172fffbd62ea7fL, + 0x08830ae729eba895L,0x32f7a9479ea9c27aL,0x0000000000000167L } }, + /* 21 << 21 */ + { { 0x770d7b309a9f0e06L,0x205abc30d3efacefL,0x2066b6add8556b1fL, + 0x2c8caabeea298703L,0xaf766f04735b610dL,0x7c5f3a1083028232L, + 0x92971e06f3c16a5fL,0xc5f89af1fb4e3fb4L,0x00000000000001f1L }, + { 0x0a43e9f2974cc63fL,0xe8c4f4a95430a8a6L,0x7ddaf6fbfcb45c3dL, + 0x40f4d1e3666ef219L,0xe11fa195c2a08ba7L,0xe4176c0448cc0ad3L, + 0x31da8dd0809145e8L,0x528f736389a99e31L,0x000000000000009aL } }, + /* 22 << 21 */ + { { 0xe30c5fbf95151b35L,0xaaffaa4c155b8c7dL,0x4033f52aae3e24abL, + 0xefcad4675b76fa3eL,0xa0bcb1bd465985c2L,0x7640ccc01c9d6d1eL, + 0xae11fe7dc809f0feL,0x33f7a2ae8005b8ffL,0x00000000000001c7L }, + { 0xc69cf35dc2423d81L,0x773a30c54c072a0bL,0xefdf7d157240291cL, + 0xdd1706dd64a77c60L,0xd313db32365ddda5L,0x3d4114c15a818bbeL, + 0x83081bbb0394b6b9L,0xf661d4963d411defL,0x000000000000011dL } }, + /* 23 << 21 */ + { { 0xe5de8769bd6b5de4L,0xbcc2c3222ca8a529L,0xc18bcfbfe5d4e70dL, + 0x9756709ea7699d37L,0x3b89a6dbe98a7c6cL,0xe184ab40dd776ae3L, + 0xfd37deff67292edbL,0x462b1ccb170fc654L,0x00000000000001fbL }, + { 0x3009aa92647fbcf1L,0x8785ad4545f17f6bL,0xae5f59f29bda68c3L, + 0x77316cdb713844d1L,0xc3895dbe491a5e2aL,0x6dcdee854abfe1d4L, + 0xfcebdedbde5388b7L,0xf805a43359094b22L,0x000000000000019cL } }, + /* 24 << 21 */ + { { 0xf7ba638d767335cfL,0xb6fb9463733c2346L,0xa0e475a1ed81c85fL, + 0x5d2fc9f1cb58e955L,0x941fbdc675a1dfd5L,0x732d6cea0f14f1c2L, + 0x4c13e55921367160L,0x5ba5a950ef0e8d39L,0x000000000000007dL }, + { 0x355dbbb2bbeb43f6L,0x0adaf151b3505972L,0x01976a0482f0d0baL, + 0x080824b6fd4be88bL,0x8392b272f4424f37L,0xdc69d67872f6efd8L, + 0x0a4b8ba4a06b0ca3L,0xb731404cad3fd7c9L,0x0000000000000119L } }, + /* 25 << 21 */ + { { 0x8c4b893e8438ab04L,0x426e9c015b7cbc7eL,0x3a294e956f565d53L, + 0x69f98657888e6f0cL,0x6a9486adf5b803f6L,0x687e5264ebcd577aL, + 0x6cdc93244a205a78L,0xe10a9e9101c97e5dL,0x000000000000001bL }, + { 0x42d58686fb52764bL,0x157871b80b39ec31L,0x75a0505c0126704dL, + 0x96dfd8c2437c5a08L,0x5302bc74e97bb9d4L,0xcae0092ca0fbe05fL, + 0x4fbf134085caaaa1L,0x6d4de19cce5683a5L,0x0000000000000139L } }, + /* 26 << 21 */ + { { 0x45d1b5e3ae0d2bd0L,0xc13cdbc8ef5c8f0bL,0x662be7feef589951L, + 0x423d544a2ef4171dL,0xe796a4fd9bfaa54cL,0x5e9de83af9b57752L, + 0x2042c3e8074e5cbfL,0x6c4961a3ae564697L,0x0000000000000001L }, + { 0xe64d92cdaa69b5edL,0x7ca022404de0366cL,0x385792b3c4de9d34L, + 0x75b4e2b8b781fad5L,0x77d2671da5772294L,0xc636df75604302d1L, + 0x35987ba57c003a82L,0xcb61e2501756909eL,0x000000000000011aL } }, + /* 27 << 21 */ + { { 0x399a20701976dae9L,0xf6f2e4571b71d3a9L,0x0bfd6fc7e9b58f96L, + 0x8164107dd991b706L,0x7fb03af7769ff866L,0x0af6920107c255c8L, + 0xdf5033bbe1aec752L,0xedd2e23d0349d656L,0x0000000000000030L }, + { 0x48b220ca845c71bdL,0x60aab3582feac689L,0xc6d4210a5c225cfeL, + 0x5b76afddcd64d859L,0x006a820512c63e37L,0xfadfd8bbe741c044L, + 0x1bee33806de9fb14L,0xc9e8b7a6ac7fb855L,0x000000000000005bL } }, + /* 28 << 21 */ + { { 0x03e7b349f5aa1704L,0x2b9687f12b58ae4bL,0xffcdf578bed31f46L, + 0xa9fe8c654e9c3260L,0x6ba91059a84d3576L,0xbe8c2ce937adc2cfL, + 0xee24bad42fb77257L,0x44fd20b693ddc7b8L,0x00000000000001e9L }, + { 0x0712f3d6af6ae6cdL,0x514a5d50bce47d88L,0x4c6cf0d6b218f3e1L, + 0x7d9faf6bbe3ebfe3L,0x819449e21731f3f1L,0x7f24451d0be0a5ebL, + 0x5fd2cc6d6f0fce5eL,0xb9ad4dddfc0930cdL,0x000000000000015bL } }, + /* 29 << 21 */ + { { 0x89bcb72f2965c1a3L,0x78ed158faec32473L,0x267c90783c9aaa3bL, + 0xd76fa6e14f0cd352L,0xebd853f87afb0f9bL,0x95af77ace6185ea6L, + 0xe4203b726f3b8b65L,0x7ac011da5e19c531L,0x00000000000000d2L }, + { 0x6a32dcf32bb4a48fL,0x175e8a50892a8375L,0xd4c5362a9b6237f1L, + 0x82a7c950596608d4L,0xae9aded7238fb8faL,0xfbdad9713df49d9bL, + 0x53f6cf7db8a3ecdeL,0xdf1be8c023bb25c6L,0x000000000000001dL } }, + /* 30 << 21 */ + { { 0xe0af085cad265654L,0x96a9b06d0e9ba333L,0x0498e6bb80075bf9L, + 0x0774434cfcf89a88L,0x15763bab9d96f967L,0x7ad273ba38775e47L, + 0xa68abae987147b51L,0xc5afb6878f85dc73L,0x000000000000009eL }, + { 0x189583a3e29591afL,0x6ab0c471c24ca220L,0x76fefacd27b4fd7dL, + 0xc550d3513e87b357L,0xae78a070f6961730L,0xc3a5de91c814eb1aL, + 0xdcf655c51e9f0ec6L,0x1603d1090f632d7eL,0x00000000000001dfL } }, + /* 31 << 21 */ + { { 0x655aac17eac519cbL,0x067206c2476f0c95L,0x4ae4ba9e51b4934fL, + 0xb1c15e7aa0cbfd9fL,0x7fed60c898298b00L,0xb3e6e1a265b6c888L, + 0xf518f046f462720fL,0x3639f35c665b8764L,0x00000000000001c0L }, + { 0xdb15a3075d4b45e6L,0x84723914fcfdc41dL,0xcaefb6d932ec163fL, + 0xd1731f430c7b5872L,0x85d686fd05f934e2L,0x513e444327b0a36eL, + 0x2d051e22c7b3369bL,0xb4398e0fd3c6239dL,0x0000000000000184L } }, + /* 32 << 21 */ + { { 0xddd7a0733f0741e6L,0x6d74b1e2a4689d58L,0x6f6037646798ba3cL, + 0xdc038772b996b179L,0x6e7e797ae1668e1cL,0x551f4a8e3e285251L, + 0x705484e5e67fd032L,0xa65fdfcdebcceea3L,0x00000000000000eaL }, + { 0x33a1c053acfc4123L,0x25030e6abbe1fd24L,0xd9d6af7100d3d90cL, + 0x063f818bade7d232L,0x99630ac3521ce597L,0xbb594d0eb9ea28edL, + 0x3f61acbd3a1fb8eeL,0x7e00686f88caade2L,0x00000000000001a8L } }, + /* 33 << 21 */ + { { 0xbf04c2f10188a108L,0xd97a675f9bd5cac3L,0x7cf307ba3d550f85L, + 0xd92bf5638175fa0cL,0xc0b15ced245f8262L,0x21bc381f3ac2e314L, + 0x0546ca1b630688d0L,0x175e7dc3c4b4496bL,0x00000000000001beL }, + { 0xbf335d90428c7969L,0x0b0387dd9505806cL,0xa6ebd4a0fef04f3fL, + 0x019c08a6f16cd1eeL,0x159f950c8516a54dL,0x884c6b4c08e3a0ddL, + 0xb24a63a5f481327fL,0x0f59393fc58c7694L,0x000000000000006dL } }, + /* 34 << 21 */ + { { 0x9e5dfe7511482722L,0xd5cab6054b5e60d9L,0x4fc124c7091e9a56L, + 0x1bc8ffa6100bd803L,0x415d2974a1c1f05eL,0x5deb11b2328462a2L, + 0xad47e4ee7cb76829L,0xe28e8cb0734c35faL,0x0000000000000156L }, + { 0x01bfee8e695d79adL,0x632fa08695129a0dL,0xdf01f6b258964d2cL, + 0xa535f7780eb0c0feL,0x437acc8b6f055b47L,0x1e090aa3051435eaL, + 0xabb69b8a1ab3ecedL,0x1ef06b846365da48L,0x000000000000018bL } }, + /* 35 << 21 */ + { { 0xb6a65d464fa97150L,0xb19c96e55e1dcaa9L,0x2a6a862687e3b1c3L, + 0xd4b9123d8d849559L,0x56b91af2419166f0L,0x84129be9531e2638L, + 0xfb4a404a41f85918L,0xd871249af9933f3cL,0x000000000000012eL }, + { 0xefc9fb3fc819be78L,0x3c47fb8f43aa377fL,0x431fa13f86e32edbL, + 0xa6fca0c0f64f51c5L,0xd04a845a6d65828aL,0x2c545f532d623133L, + 0x979e09ca20bf1330L,0x64b6bd355d28805eL,0x000000000000004fL } }, + /* 36 << 21 */ + { { 0x2303e5e45b1a63a6L,0xa8610396589d2437L,0x27c48ce9d7d1d687L, + 0x5032499a49a2c92fL,0x30727e711af2969bL,0x2668c8f074c335f4L, + 0x9d11c2808dc25fe6L,0xc1523a37506d1e4dL,0x000000000000010bL }, + { 0x8709ecf62fa178dfL,0x0eb58c3af8acc7d4L,0xed6a352bd28b9c62L, + 0x9b232108faaa811eL,0x0cb3f9aa038718f6L,0xd05191930f05ad17L, + 0x67bf6bec1293e6a5L,0xa3508a7f9abd18f3L,0x00000000000000dbL } }, + /* 37 << 21 */ + { { 0xf1555428b42a7bd1L,0xca229f80867797e1L,0x23196735437fd7e0L, + 0xd6fc95759fda4046L,0x8731b144c2ba4501L,0x748a43a0bda4783bL, + 0x52878286165a1976L,0x345e999725fe070eL,0x00000000000001d8L }, + { 0x0193b975d77d2d63L,0x9d406d507cb07465L,0x5d583f3f36396728L, + 0x236de10de9208950L,0x5f11d5c8ad10ddd5L,0x5d6a91ce792b9f41L, + 0xf7889239e0f9ed3eL,0xa6278c8f7ed1f09aL,0x000000000000003aL } }, + /* 38 << 21 */ + { { 0x4658a9436e35bfe0L,0xf279bf8ffef18cceL,0x3d61d8ba12d3e8a1L, + 0x67988c8558473f0cL,0xecd48fb0d8032914L,0xb5c547e7a044eadbL, + 0xf1f61987b5cf8144L,0x90c69abfbd65f4c5L,0x00000000000001d6L }, + { 0x6dec092ab30cf08aL,0x5a64596a78a36a3eL,0x53cfc2f19e10edc3L, + 0x82422ac095ce9b54L,0x0b10c3c0c6b2c86cL,0xc4dfc68d6242e7f4L, + 0xcd0fc4557cc7de9fL,0xe0a9c2568e4beec1L,0x0000000000000147L } }, + /* 39 << 21 */ + { { 0x7dcd4b4eb68ba235L,0xcaa7d77bd7b38b21L,0xcd6cb4594112e083L, + 0x04b016fdda7d8cafL,0x5c12612e23c89818L,0x02ba59c59b3e18caL, + 0x834f4c029a9ffffbL,0x5da192d72e725278L,0x000000000000003bL }, + { 0x3aede0a9980ddff6L,0xcc057c5354ec8b06L,0x7467ec0755f24b26L, + 0xb3e3163b82166403L,0xf3b64bb023f83247L,0x92f1e85be0a32218L, + 0x7995216504903b6eL,0x0e3d1d200ffbaf22L,0x00000000000001d6L } }, + /* 40 << 21 */ + { { 0x1730b2a5c9b816e5L,0xf2c2a1bb5531ee33L,0x1da21329659e0b2fL, + 0xbec25192c88bce0cL,0xddd4b9e6ac6f52abL,0xf15bd68f9e4b9f7dL, + 0xea9e12e67d249b4fL,0x56a65055451abab2L,0x000000000000015dL }, + { 0xc4d7dc72c4b6910eL,0x5fd335922ddff22eL,0x43e429e2b8287af6L, + 0x258a4647a1585685L,0x31f72f413b62ce95L,0xf63172fa57933b4fL, + 0x54749d3a0c5cb4d8L,0x514da5d28cc931c6L,0x00000000000001deL } }, + /* 41 << 21 */ + { { 0x896165235e844515L,0x0445b78da349dfaaL,0x2d97809a98cd85abL, + 0xa26356f9acd297c9L,0x23138b25dfcf5a42L,0xf5c56101752030d3L, + 0x561b7e4288014607L,0xda38360218c12c54L,0x0000000000000052L }, + { 0xa13a8ae1f3c90e7eL,0xdc8c1d54456ccc88L,0x88f1c0dc733a77aeL, + 0x55e1567fceb2924fL,0x300ab8cef0291fc2L,0xe4710c77ed9f68aaL, + 0x623ef9b378f1c0afL,0x90b6c115031e610dL,0x0000000000000055L } }, + /* 42 << 21 */ + { { 0xaf881c6a5ed47898L,0xd583a12d314be8cbL,0xc871d32a13385ecbL, + 0x4a3b26ef9abb78b6L,0x10bea9273313c278L,0xc5e4fef09e2de3c5L, + 0x899e5a357f670b65L,0x7af654120a813003L,0x000000000000000bL }, + { 0xab30982a8e0cd895L,0x3ab1a9f32bbae855L,0x1cb2164cf9f45ea1L, + 0xdad048026dc7de22L,0x31422acf309178deL,0x86ba73a70880dc8bL, + 0x53255902a44f7762L,0xf8b3d2e6ac090679L,0x000000000000015fL } }, + /* 43 << 21 */ + { { 0xc81e8be2e96fc1efL,0xe9b8e8ecdf4a79a0L,0xcef3624371875b4bL, + 0xb76d0eb976c9298fL,0x04a9f25a7a2f4f86L,0x5ebfbe16b98eed69L, + 0x591258f21cb923a3L,0xd2704654ca8f4451L,0x000000000000008cL }, + { 0xa74e395adab82d0eL,0x1838dbcba15563c4L,0xcb3ad95626785cf7L, + 0xadb39a9392ecd059L,0xbf1440198a788f6cL,0x75a7eb68cdfe152fL, + 0xb637e415a521d720L,0x2224b489166357b1L,0x0000000000000144L } }, + /* 44 << 21 */ + { { 0x7201e2d03e49e1acL,0x11258a5fa85d74edL,0x3d5f97d1aa5d5c55L, + 0xb0353c296874d83cL,0xbb008111fe79c52fL,0x1bb9d0a52da4d5d7L, + 0x6c0efdbd45897053L,0xc65636e774481945L,0x00000000000000c9L }, + { 0x71b21b9b33347b52L,0x84ca3f41d1ced26cL,0x49d65cb043c68921L, + 0x601a6edc743b9e48L,0xa97dc603e90fc101L,0xb42db9c7d4aa7153L, + 0x4440eced6c624cd8L,0xcf52d4b8c03036a6L,0x00000000000001e2L } }, + /* 45 << 21 */ + { { 0xb6b5541f80eea99bL,0x5e05ba1d0d5c66e2L,0xda681cef35ca870fL, + 0xe9ce25959f52ac0eL,0xaa144a0897103546L,0x2e8adeea8bc0324fL, + 0xde71c23196352390L,0x5ffc158b1190a231L,0x00000000000001eaL }, + { 0x5a6d7ebfd3c77423L,0x087f3be2aadf36cfL,0x44b7382ecac813f8L, + 0x9a7ea85fda66aca5L,0x7e5e106813af7225L,0x40e6c5730690c976L, + 0xe2bb8ee55cda95bcL,0x89ab8fbbc7d15d87L,0x0000000000000131L } }, + /* 46 << 21 */ + { { 0x2b26bc1e17ca1ea5L,0xabb1ea0d563a9b23L,0x4cc07a7612c5592cL, + 0x752dcb1940938649L,0x8fc2268dba4358cfL,0xc345a36ba21931cdL, + 0x89df99fef439d9cfL,0xd9befa2b4f0c2580L,0x00000000000001c4L }, + { 0x01e1ada7484a52cdL,0x4e80d9c73ea4c56fL,0x48c0b1526b8a8abaL, + 0x8a3649d3a4a434e3L,0x9c200c7ff18df9beL,0xe4cf8e1570e9b087L, + 0xe5ac0206645b233fL,0x1ae5ea925372333fL,0x00000000000001fdL } }, + /* 47 << 21 */ + { { 0x56086f88e91f967cL,0x5b068412777b581aL,0xdc912cb06758892fL, + 0x619754c616bcc341L,0xd9e47125e612c547L,0xc12c85685dc7a039L, + 0x40e97019a27fd186L,0x7959dd2a02b1678cL,0x0000000000000006L }, + { 0x586f5aa0a27d91b6L,0xc2305d49c6dda71fL,0xf2d049553d6d14cfL, + 0x9e617cf9d31a1d03L,0xa356ea57cf1d8491L,0x65704bd38b75a624L, + 0x5fa767156b36400aL,0x975fbf3565a5489dL,0x0000000000000180L } }, + /* 48 << 21 */ + { { 0x0febef06cb4a809eL,0xa8af434c261b4facL,0xe3b7185059d1e70bL, + 0xb62f01e45d1c24f4L,0x8abe4e3db303a2ceL,0x91a5a10f36e2c2a8L, + 0x710bd5ca7e472ba9L,0x14a31aa677a05ff7L,0x00000000000001bcL }, + { 0xbadec183453e07baL,0x54c92ec41e3087a2L,0xb87b0b724a6e06a3L, + 0xe26c6162b5507e77L,0xeb64a07175faedaaL,0xde1c75464988a08dL, + 0xe99b34851ff0a98fL,0x29a660c2053f0e60L,0x000000000000013aL } }, + /* 49 << 21 */ + { { 0x06ed9df68c85e279L,0xde8923fb07d26dabL,0x9de21f265df321eaL, + 0x46573469d2739ec1L,0x9bd77ab28eaffbefL,0x99493fc132417837L, + 0xdbdb09c5fa664abaL,0xa128e72276e35b38L,0x0000000000000108L }, + { 0x89bd3d466e299be1L,0x909cae0e56999cb7L,0xd14abf38cde8b394L, + 0xb449bc09d7c8d422L,0x0a2f935904ef1f5dL,0x164492d22c962ea5L, + 0xc334ff1d4322bdc8L,0x0ac21031d0e9b113L,0x00000000000000ccL } }, + /* 50 << 21 */ + { { 0xf0185304edc95e01L,0xc200becd9186d5d5L,0xbc9946c03722a9d4L, + 0x73d2b80aee0c0f7bL,0x3ffac114c25f680aL,0x86a201b64940567eL, + 0x1744de6eddd392baL,0x9a30fb2334cd027cL,0x0000000000000078L }, + { 0xc5e2e29d26372a22L,0x82a3e1cb9ea293dcL,0xd470ef090b09afa0L, + 0x44a6a43be89990a9L,0xffabe7ec82f9f8bfL,0x8c1227e6f274a2b8L, + 0x73b2f7bba3b60869L,0x1b2510533ae49e15L,0x000000000000003aL } }, + /* 51 << 21 */ + { { 0xc6ce482996fffaafL,0xc3f952ef261e06b1L,0x0d07b3310380850fL, + 0xa2e1b0974bb1fc31L,0xf060f318c059a3c1L,0xef8ae3b2316feff5L, + 0xc8cf197fa5686663L,0x5f974b59e2cba233L,0x00000000000000e0L }, + { 0xcc26bce6f24e9213L,0x5d59296170c1bfbdL,0xbb4681d331d2656eL, + 0x4271c96450f0683fL,0x5a9976d944783a96L,0xd3ed0a6a41eea6ecL, + 0x9f24f797beae86afL,0xf38112f792dadcecL,0x000000000000001eL } }, + /* 52 << 21 */ + { { 0xba2cf7c45cb6324eL,0x7bba7a0805b7da17L,0x53084c6758c59b62L, + 0x2cd1c9a43cab8df1L,0x0cfd3c151f38d256L,0x373df9d2a34a65bcL, + 0x88a92c31e815057fL,0x11b2a0e5b8c0cad1L,0x00000000000000c5L }, + { 0x97f74ae939d6533fL,0xb04e5e9516c73570L,0xf7a53b4ffa446dfbL, + 0x61dba2b24b4d094aL,0x24fcf80996f67da4L,0xea60ebf50b21ab78L, + 0xe4d76de60bf97bb5L,0x95032a893c1883bcL,0x000000000000008dL } }, + /* 53 << 21 */ + { { 0x55ae0b1461612346L,0x9b0f5c01b19b690bL,0xed0a114eb5185164L, + 0x852e56403471d444L,0x54c370df67c7af55L,0xf3573f9e9df278c9L, + 0xf658b02c593c852fL,0x353dc49805bbf9f2L,0x00000000000000ccL }, + { 0x3f5d30ef08d3da4bL,0x40be4093f479393fL,0x67b87661778da315L, + 0xf50ef8637512f2ccL,0x4c8c728ab69c92c9L,0xb8a11ef63cbf14a8L, + 0x75df83bc26cd8287L,0xa4cf8319c99c3d51L,0x0000000000000054L } }, + /* 54 << 21 */ + { { 0xd5b6600ff0c1462eL,0x532c6476588cdddfL,0x97737ad126ec65e9L, + 0x88033b8f801516dfL,0xf9c06f4a55cfa617L,0x5be87436cb5d8557L, + 0x4245d3d2f48c909fL,0xeb3b54849a6b09b1L,0x000000000000016dL }, + { 0x377762c7c7ff074fL,0x62240f289ec0af00L,0x47dfdc3ecc3b8a22L, + 0x70a10d983d40f5bcL,0x61657aba05e18781L,0xb7c112fa31910613L, + 0xdc29680dfb3b31a7L,0x00aafce3d41fe224L,0x0000000000000102L } }, + /* 55 << 21 */ + { { 0xcb71ba5711096104L,0xb0ce7ba638a2b20aL,0x808983acaa6ba1feL, + 0x21cab98eea2ddbe8L,0xf7e656439f7d2f9aL,0x687d4d7ac9df8af6L, + 0x342af3932ccf2d82L,0x0352c43ac6a8fbdcL,0x0000000000000052L }, + { 0xa968c9be5fb47434L,0x3135271d91ca8cbeL,0x7cb25ca5e3f8970fL, + 0xc427d97cb50ea245L,0xc5144f82bfadd4b8L,0x54473d162937e958L, + 0x9b5c789b8cb41622L,0x409ccf55ddc0786eL,0x0000000000000114L } }, + /* 56 << 21 */ + { { 0xd495b0ec16eba181L,0x834a505a76f4fb91L,0xd1950cf8db3b0c92L, + 0x5108e0cdbf6bd68fL,0xd8492e459f777ac3L,0x2caa1a4e184f5a9bL, + 0xfbb91ea5ce860f91L,0x05957c32597025f9L,0x0000000000000097L }, + { 0x1774a862b07d29ffL,0xcb4eda89cbcc928eL,0xa19276a102ad819bL, + 0x3d0cacf6f5af5b3eL,0x8dcec31f3bdbc653L,0xaad437092f1ade3eL, + 0x544e97c3574e10b1L,0xd4364902c62c4debL,0x0000000000000077L } }, + /* 57 << 21 */ + { { 0x1c617b0ec7454b5aL,0x192f8da5688d212fL,0x8bb3ad324dffab82L, + 0x921d0b102ac4cb94L,0x11a93e54698b156dL,0xbbffdd46ba176214L, + 0x29fbe579b563b65bL,0xf339c5ead20f3224L,0x00000000000001fdL }, + { 0x425dfbffb94c9ba7L,0xd5bd477b25caf944L,0x99c838ffe4b5755eL, + 0xc25eb25038c573fdL,0xb5118d7f75ed773bL,0x44f6e5e8e6ea5b48L, + 0x2ca7102a8a7e4c4aL,0x0124d2c29c4f4fb8L,0x00000000000000c0L } }, + /* 58 << 21 */ + { { 0xa364e55cbfc650f0L,0x1a2b834413857077L,0x0f38bd537e8e2025L, + 0x61829061f7c11262L,0x7e9710e7c3c52c38L,0x9d61eb0b5437249aL, + 0xe968f5e104c73e0dL,0xc086474678394fb5L,0x0000000000000185L }, + { 0x08333228a65f7f47L,0xcb026a48269ef9c3L,0xdf7cf0f3902d129eL, + 0x7fba1a847a5fd75eL,0x3f7338c577e5e332L,0x24a06483c7545ae0L, + 0xb84bdda926f22ee7L,0xef0538ff3c3d5295L,0x00000000000000e2L } }, + /* 59 << 21 */ + { { 0x7cac9163c6f9388cL,0x91c49193705210dcL,0xa5c1088c1358a451L, + 0x7b6710a56e928e99L,0x98d4263179fa67b4L,0x31932728f9f623b3L, + 0x27ed2f252417899eL,0x6900af3df6e9651dL,0x0000000000000044L }, + { 0x7021421eb17ee12aL,0x9dfd837bbcafe310L,0xe54f136b84cdd344L, + 0x0943bddb5f7875fbL,0x23786b2bbd2ff324L,0x758d4cf386597884L, + 0x8e831868849cb1f8L,0xfbb48f93c01a2e6fL,0x00000000000001b9L } }, + /* 60 << 21 */ + { { 0xaf3720cc32afdff7L,0xcc84e0db51d60c35L,0xedf89c0a595d2b23L, + 0xfbb1fafb0c9c0f55L,0xe3556b3b8b9b6aeeL,0xc208eb382f5e0a50L, + 0xa8ee4fa226e707feL,0x65ce95e5b043519eL,0x00000000000001e4L }, + { 0x05eb17d3ca16c880L,0x617a63a31f1e4c1bL,0xb8aa601ff35ebf94L, + 0xaf515b4fc5022f1aL,0xc98f8f0a17a2b7fcL,0xa768b9ca67e03fd5L, + 0x67e6d9afb2e5670fL,0xf069de05d217f764L,0x00000000000000c1L } }, + /* 61 << 21 */ + { { 0x90211eba92f1a89dL,0x8a3418ed81b3b80aL,0x0150ed13b0ea17d5L, + 0x5fc05e99bddb5ff9L,0x3db25dcec5bd9918L,0x50bdec38c09beb7fL, + 0x34650128159c352dL,0xdf63ab922366df69L,0x00000000000000f1L }, + { 0x15595a9127aaacfaL,0x4b262b893f0cad94L,0x750f5c4e8ffe4bbdL, + 0xc7c6cbdf6617b6f5L,0xc96e50343fc54c60L,0xc0bc517912e64bc3L, + 0x145b25c60d79b77cL,0x049ab957d4b9a2fbL,0x0000000000000044L } }, + /* 62 << 21 */ + { { 0x127e4b38a542a8b1L,0xdd85f0c3e2f444f6L,0x44ee07fdb6e479fcL, + 0x59ef243a40f4a51dL,0x1e0e83ac6f8ae9ffL,0xc7de39577fae0abbL, + 0xc911c37f62b89c14L,0x998e3826b3ff9facL,0x000000000000010cL }, + { 0xa45e8947bb8b084eL,0x5a4ef9bf80029d67L,0xc371f57529b4c9c3L, + 0x1b4c0fbe77732c86L,0x3bd5da09a6282a9fL,0xd668e0a2ffac1e00L, + 0x2df2a2a08aae6398L,0xa4ab9fcd898755b4L,0x00000000000001ebL } }, + /* 63 << 21 */ + { { 0x1935900b9cdb1e23L,0xa2ad33fe0d798a47L,0xa53f7a0a7d291929L, + 0x320277e12e3a69e3L,0xe5139222cfcf9a38L,0x2ff86f3bdb790cd4L, + 0xc0bf4565f22c11d1L,0x565340d652a81f8fL,0x00000000000001a0L }, + { 0x16ce3752e31fc3cdL,0x948e3a7d7e402490L,0x2e2c0f7e5343bb9bL, + 0x17956f666ce73ee7L,0x93ca1925cce89b93L,0x6b3d128c41464004L, + 0x11370aca5c4abe8cL,0x70abf93623032806L,0x0000000000000124L } }, + /* 64 << 21 */ + { { 0x87daa931dbbc43b6L,0x882a2421d1895005L,0x6013f2ba7aae1124L, + 0x01d2a8f0bad34df3L,0xdb1b7f432f0b3d59L,0x59039f352a95ac2cL, + 0x555077aaa6c0a8b9L,0x543b69a862685a03L,0x00000000000000e0L }, + { 0xbb104059ec48d2afL,0x691c1f18930fa171L,0x8c434c7637d9fd85L, + 0xd1e56d52068741abL,0x1117a9fc130e01ecL,0x126d517b1f510538L, + 0x25ce40cef290a27eL,0x0d0b0d8ae90e675dL,0x0000000000000028L } }, + /* 0 << 28 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 28 */ + { { 0x93d61c073ca98249L,0x04c4f43d907aa44aL,0xee3d2c3151d5b1d6L, + 0xe74d6892f859df2aL,0x16ab4838922f8fa9L,0xb0b6f7d1757b0010L, + 0x7ca940d8a918f320L,0x45e9325b97b8c9b5L,0x00000000000001e1L }, + { 0xdb18d7bf37316056L,0xc34532048fc0eb89L,0x3929db79060e2db1L, + 0x2ccbf668b1e67403L,0xc7ce019a9b05f538L,0x9377f4464dca0ffeL, + 0x42947eb68a570683L,0x6eb25e34e0a1bf92L,0x000000000000009aL } }, + /* 2 << 28 */ + { { 0x8b60dc2df6130e66L,0x8ed41be47b6448fbL,0x0862c67e5ea7dec5L, + 0x97cd528272e431edL,0x944a76da0bff84c7L,0xc168a8fda2075162L, + 0x21af8de713ad271dL,0x27ebcd24437e9e66L,0x0000000000000044L }, + { 0x399007d74c7b3dc7L,0x5128a9febafa8850L,0xb7e7cff44db4a4b4L, + 0x923571173410d116L,0x47320eacb450efedL,0x47ad42e6f8042421L, + 0xe4ef55c263b6aa24L,0x7a97987c3f882833L,0x00000000000000b9L } }, + /* 3 << 28 */ + { { 0xc76e5f4d022667cdL,0x2c78870a3e07fb1fL,0x9e001396d1c68926L, + 0x707a9c2eed2a502eL,0xaa9a37f8e68933fbL,0x1ae458ac4e4c8f98L, + 0x8047ed5da71656eeL,0x06826324a9df9d4bL,0x00000000000000adL }, + { 0x2c4cbd9319c176efL,0x8dac64428092be6eL,0x0c349ee87a074bffL, + 0xec35b47c0129e1a0L,0x4096e01235253255L,0xb3e6e7fd06b23b09L, + 0x8b36eac316fd165cL,0xb7d5f97294ad125cL,0x0000000000000154L } }, + /* 4 << 28 */ + { { 0x58241d2f43416440L,0xad00daccf36bb0ccL,0xef105aade1b517a4L, + 0xa88fb367418524a2L,0x960efc2bd69f903fL,0x58b761e2bb316fa8L, + 0x634a724e43593289L,0xa341dd2178b2c871L,0x0000000000000178L }, + { 0x29de72f83261324cL,0x53767dbc0ee348c0L,0x7f5a005b2ebc04f2L, + 0x8c6987a70f7b3b9aL,0x0f387e9d7e48f271L,0x644134585b456a10L, + 0x015a5da5d0a2a3c5L,0xb236cc58708be977L,0x0000000000000057L } }, + /* 5 << 28 */ + { { 0x13c413632bb30ebfL,0x1c51e2fb84a62b56L,0x0ffaefe0883cb2caL, + 0x23a651151997f036L,0xf030712c9c64e7e3L,0x06320295457aaf0dL, + 0x945bd82fb2498539L,0x2995df47623a481dL,0x0000000000000161L }, + { 0xf962f403feb7f8b3L,0x244d8a3a872dff16L,0x59fdaabc89f6ddcdL, + 0x41661bc87db8acbfL,0x1b641d50c256c87fL,0xd7f8aadb27ccdb57L, + 0x4daf613731f353c8L,0x33d401882a2a6f9eL,0x0000000000000125L } }, + /* 6 << 28 */ + { { 0xbd4062286eed0f95L,0x8693a9d97c6c1c3fL,0x0392e931c066dcc8L, + 0x2e1eedc6cc819d2eL,0xc7a315d6069e2217L,0xa7495b1808e6f72dL, + 0x364f38117051ff13L,0xeeeb3cc4bb59d612L,0x00000000000000acL }, + { 0xfd9e0c1078616d6cL,0x585de5ada8d4af09L,0xd08d9021c3143504L, + 0xb38fe6557f67ffa2L,0x5682040cd0fcb4d0L,0xd56c8348e6329179L, + 0x76229a732621c84bL,0x0c98cb02de4cc81dL,0x000000000000000aL } }, + /* 7 << 28 */ + { { 0xbfb85a9ba8f70eb0L,0x9fa45d83e59b107cL,0x90207916cff39e25L, + 0xdf20520f61816661L,0x250a81b64d88eddfL,0x3583bce4d4c88e4fL, + 0xfcafba316121deecL,0x40e71ea745ff2c20L,0x00000000000001d6L }, + { 0xf91f204550452ed6L,0x4c7be74a00fca85dL,0x6ac750dc0fcc66daL, + 0x829ccef87bb3a7cdL,0xf232419e97039a3fL,0x1647039db5f00ad5L, + 0xc72ebf12dd7f9785L,0x42bbfd03c675e619L,0x0000000000000119L } }, + /* 8 << 28 */ + { { 0xb2308971ed0d691fL,0x31c3c54e8cef282fL,0x5cc3d26a97a5ed69L, + 0xb611a35d43da55beL,0x97492fe68fe11b61L,0xc10a18f0e26f0805L, + 0xb6e0f2a2ea78e6bbL,0x2570f42e71c3f3daL,0x0000000000000043L }, + { 0x30048b23d0630da9L,0x3756d05e0f635ba8L,0x1e6e383a8ce0e80bL, + 0xcbe43361c723d456L,0xb1f089e45cd18501L,0x53132cf36ff2c160L, + 0xc759b7a24a0406d9L,0x17917e066dc5143eL,0x0000000000000141L } }, + /* 9 << 28 */ + { { 0xf8c5b2eb11628dfcL,0xc66d10fc0a42e49bL,0x44c184b4e26fe77fL, + 0x22ef369da4d3daf3L,0x6662870a2cbe0115L,0x80846b9736ca9e6dL, + 0xbbafd1317ed388a8L,0x4da614274029082fL,0x000000000000011cL }, + { 0x68db20a0c06f365dL,0x46005f812ffb8bb6L,0xa0a17ce3bec18577L, + 0xb7627d8dc3cf8b54L,0x441830cdff30d00dL,0xa53ed5cd8f7a941dL, + 0xfd1147c999071de9L,0x619db4eb77dd8ad3L,0x000000000000010cL } }, + /* 10 << 28 */ + { { 0x7fc5ba6013564f62L,0xe82397841238d231L,0x8249f141e7b6cfbcL, + 0x37e1b845668154d8L,0xa99523bc5095c5eaL,0x1196969c76fb9d42L, + 0x6428aa7c92185a3aL,0x428e78d88246676aL,0x0000000000000095L }, + { 0xe44cef632b1e5685L,0xd9665e7e0d228ca1L,0x066367719dfacf7eL, + 0x42b09d00fd5605acL,0x0b101edce65b8e60L,0xca4d0290e2210994L, + 0x50ed6bc6cf9cc92cL,0x2ca63c9ebdebc35bL,0x00000000000001fdL } }, + /* 11 << 28 */ + { { 0xa32a9d65fd85284bL,0xb08333a5a4fae26cL,0x8aca0a12ef75e239L, + 0x9035b7385a77bb33L,0x466f97979aa71a08L,0x375dd05662f91bc6L, + 0xd93ea401f5cb1c8cL,0x7ed00ca316d691c4L,0x00000000000000d1L }, + { 0x8ccbd8fae0a3542dL,0x948625d070b071d9L,0x8654df26586a94d4L, + 0xd6fbfc0f80290be1L,0x2e103eaa260729e8L,0x342394e5f5f52aa6L, + 0xfaa21f70fb684270L,0x1348fabd7c862797L,0x000000000000002eL } }, + /* 12 << 28 */ + { { 0xf5e5237b52fecadeL,0xe0a50e6e4db2795eL,0x7481845f2a4c1493L, + 0xfea36aefaa8fba83L,0xe7a2e20bee8eab2aL,0x3f4ceb5766a0ad1dL, + 0x9be294e7c15afc26L,0x7952338d711c1de5L,0x000000000000016aL }, + { 0x8a9806bba64d25ceL,0x6e72057d8eb8e0acL,0x46eb31f2d3f7e99eL, + 0x4a408b8d7c3f1998L,0x5df31a262f59604bL,0xd12dc298137d937aL, + 0x60c9a1480678810fL,0x3011046b48418a2dL,0x0000000000000139L } }, + /* 13 << 28 */ + { { 0x317b098c5ca5f978L,0x18fd83202c21b6e9L,0xc147ab01f1193accL, + 0x7ef3414deaff0d9fL,0xef1a3d6fe28e9173L,0x2603ccbeb25fceacL, + 0xf825502d5affa9ffL,0x539a125e066a68e2L,0x00000000000001dbL }, + { 0xaf4b16bfa90a3b2dL,0xe0d81048ff63d1faL,0xb203e68707286721L, + 0xec631a327bf45b93L,0x4d38cb033b10b512L,0xe08eb57aca57f26bL, + 0x737d9cd0d7d6ddd0L,0xb00845a86c572d39L,0x00000000000001e1L } }, + /* 14 << 28 */ + { { 0x6f24af3eec231c64L,0xa4edad38e7c7bee4L,0xc6ba273ed1eead3fL, + 0xf68b581f08c4d123L,0x1db45c2364b13567L,0xcd3d42cb5772318eL, + 0x5cd6e6cb007bf13dL,0xf02d313b071bfdc9L,0x00000000000001aeL }, + { 0x09083eda4ed3782aL,0xbcdf9ce75b29280eL,0x014232bc63b04675L, + 0xbd4bdaeceefec679L,0x8712c19ed47c5385L,0x4bb00b447ae76a01L, + 0x1f847ebd2fb2cf2cL,0x9f2b12b9a7a56439L,0x0000000000000050L } }, + /* 15 << 28 */ + { { 0x6ee9325abf53583dL,0x3b7ef054921dd595L,0x95c35ce8c0abc07eL, + 0x08a3a510646fe810L,0x77fe478185ec11baL,0xe382a5134e870f3eL, + 0x4eeb7787a9f1b8bdL,0x8521f39d9d8f6459L,0x0000000000000068L }, + { 0x10aae6d8428a37fbL,0xf0578f9fd66092f9L,0xb90f50ee1ab36081L, + 0xbb41bcf30ae841eeL,0xf73fa600cf79eb81L,0xeb88461b991afd1bL, + 0x1b9feff62ce5c0f2L,0x80183105cab1b304L,0x0000000000000029L } }, + /* 16 << 28 */ + { { 0xabf17feb008c7a44L,0xade7917a7b12def4L,0xbfba65b83786b033L, + 0x8ef4af44f0d9f9f1L,0x8f47615cab136caeL,0xe42f100dbcbd5d3eL, + 0x8f3dc658f05f9b27L,0xd95a6b2dd6f2dbb2L,0x000000000000008eL }, + { 0x23bb06a546bbc412L,0x15718699d0135eb6L,0x8f1639c4b4ed9e10L, + 0x767fee42d7f644a0L,0x668267b0fe8ec420L,0xec4f548264e90133L, + 0x679e614e536d132bL,0x0d72fbaf174df6c5L,0x00000000000000a5L } }, + /* 17 << 28 */ + { { 0x0d423fc8b05c732aL,0x1a585cfb9fc565afL,0xd0f09ba8438e8e22L, + 0x87cb63973b53004fL,0x30673a866bc86afbL,0x555ad65d7e068e3cL, + 0x4da2eed69da1d5afL,0xe25456e54cb910dcL,0x00000000000001fbL }, + { 0x412e1fd8c5516135L,0x5465fd3139d159b8L,0x36d437af1a1ab099L, + 0xed474e6a03e10008L,0x4f229235c14fd291L,0x6f4a44ce3622b070L, + 0x5221ac85c506ac3cL,0x3e9f54bc552fd421L,0x000000000000002fL } }, + /* 18 << 28 */ + { { 0xbe5ee60713182632L,0x4d463baf9bbc5212L,0x658fc11c5c21a796L, + 0xb81542941474c364L,0x66b925171f6fde42L,0x85cc97916fd16ba1L, + 0x57c38a821feefec5L,0xb5c51d319b44b1d1L,0x0000000000000145L }, + { 0xe2da12a3c6228bbaL,0x2be11b05a8522096L,0x8b60f44c6f83d963L, + 0xf3bac9f672782a82L,0x32bd85e32757d236L,0x901c50c6d59ffc8eL, + 0x1075eca86ac7c1deL,0xe78c786cf1302741L,0x00000000000000c3L } }, + /* 19 << 28 */ + { { 0xe045587941df76d7L,0x5fed520b829a93bbL,0x8eb7752aa9c54f24L, + 0x0d73e9371b21ee03L,0x4a09582102686b1cL,0x8f887b9944c1e014L, + 0xc7dd142035f32864L,0x0ccee70dcd5896e3L,0x00000000000000f7L }, + { 0x73d4e288f84e6409L,0x37444a9e84a45459L,0xac888ad1fc6bd60dL, + 0xd0f03008ce257cdaL,0x1eba5a906f0e3546L,0x556839e8a7788e16L, + 0x1fa94edc1624a710L,0xf1ee65bb28d9bd43L,0x00000000000001bbL } }, + /* 20 << 28 */ + { { 0x9685604a378806e4L,0x5c0e5a8763fdb760L,0x47b37fd23c4d46e7L, + 0x911e9f7c46ed261aL,0xab42ac68497d6281L,0x656f59410a8e88d4L, + 0xcfe3ffac3a296c4dL,0xad1cbe5719241e9dL,0x0000000000000070L }, + { 0x880193368cc01741L,0x8d354ea83f0eadf5L,0xd334ed2c30cf53f4L, + 0x4490515e7d521d3eL,0xd5d3d50f83d830dfL,0xde5fac26a718d969L, + 0x45707e7e4e0d85a2L,0xbac03693e2568e37L,0x000000000000003eL } }, + /* 21 << 28 */ + { { 0x05691330a54a03daL,0xf9e12a7e52a5d05bL,0x4e0a20c105bb2074L, + 0x27872197491f88b4L,0x9314134b0a02eb07L,0xb5840bb05633b498L, + 0x0348bb60dac28206L,0xc7eaa69e4fa82ef0L,0x00000000000001ccL }, + { 0xd9d4c45319e54096L,0xdd3587dceb3875e6L,0xd6fdfca14a4b4149L, + 0x1abc691467148a91L,0x6061e274b3da5e59L,0x2f9eb868f7cea854L, + 0x057bca4f05aa6a2eL,0x3aed5fe77cc21838L,0x000000000000000cL } }, + /* 22 << 28 */ + { { 0x718f892ef3348160L,0x9f4c88c8f6acbf45L,0x2f886753c5fc4654L, + 0xb078063657a5c32dL,0x63248d855ab020c2L,0x490a231ebebbd4c5L, + 0x6daf12ab6bf2e47dL,0x144885a078e76ccdL,0x00000000000000d2L }, + { 0xb5c08a51682e7203L,0xd3b7632dbdc76ca6L,0x1272869f10ad6667L, + 0xa07122fb431c8b82L,0x226ced3c14371122L,0x2f17db3f67cbe25dL, + 0x74e6946b54858696L,0xed3a84c544ffb855L,0x0000000000000102L } }, + /* 23 << 28 */ + { { 0x40dee98563bc734eL,0x0ef45f6e0c38c9b4L,0x9c5858bc94b7735dL, + 0x4368630de30fa193L,0x4b9bc14602726a71L,0x18142c21960891e8L, + 0x08d6b5780dc0ea24L,0x055ca09abe408a2eL,0x000000000000008eL }, + { 0xfa8cae11987c9c95L,0x9f692d092b011ce8L,0xe5f725ad8e7c6b42L, + 0x6b54b6a3f4d13859L,0xfaa64fd0e8384d5eL,0x66a16b11682b4c97L, + 0x04b57cd76809bc31L,0x1afaad69d3eea7e7L,0x00000000000000faL } }, + /* 24 << 28 */ + { { 0xf7692e60b5cdfb7bL,0x7fd887faa548923aL,0xe43623edc2d58828L, + 0xd8047114e1e2f3d7L,0xadd5a16d7c6abbb8L,0x9fafe28a2f1133a2L, + 0x5ecaa9bd69688723L,0x5281e4addb5c34c7L,0x0000000000000153L }, + { 0xfc1a8cf1c42d278eL,0xb76ff008220d26e8L,0x6749770f1fb6e918L, + 0xa7e6f520aec33172L,0x23c7456c247a2e94L,0xe4ef1adf8c38913cL, + 0x26c0c817aa8455e9L,0x9e38ea10680454f1L,0x0000000000000155L } }, + /* 25 << 28 */ + { { 0xc4a03e90b431c373L,0x75a014321dd2d074L,0x9ef6f893b6235777L, + 0x3e6557afdd83b252L,0xc30056fad6d21946L,0x49175c7ccc4e6bfdL, + 0xb48d402316ad5f7fL,0xe1f1aa86dabf2fa4L,0x0000000000000054L }, + { 0x7fd8a69da617a3afL,0x3a6875aeeb6b1e2bL,0x1b42637effaff5c6L, + 0xa397fc03cb327e27L,0x251106b73e60a49aL,0xf36242809c3e2b31L, + 0x04a1243054509c5dL,0x023238269992caa5L,0x000000000000001cL } }, + /* 26 << 28 */ + { { 0x1375e971b4520f2dL,0x1507d0b367025850L,0x9d9c351920db152dL, + 0xb9f63cb55e5e5b1eL,0xe4cb0a89428af0f0L,0x9e1857404a4140ecL, + 0xf05173ae168f1d13L,0xe6a8daa0b553bdd1L,0x00000000000000d8L }, + { 0x5bbcc749353c767aL,0x85192aa1f811d60eL,0xfbe7b89e7cbc2769L, + 0x752cc4426f68ea1eL,0x0e5d76f4f550fd90L,0xf2ca39620ee19f5dL, + 0x1fb501b48398c8fcL,0xbeeab7ad6047081cL,0x00000000000001d3L } }, + /* 27 << 28 */ + { { 0xe4f13a4678187349L,0x1a7986452cdad07dL,0xd12edd3b435e558eL, + 0x0a466b2500c452f3L,0xa1dbe185e3d65c74L,0xcc10132b5bd1dd39L, + 0xbb42cff31919d652L,0x711748162c26481aL,0x00000000000000f5L }, + { 0xeae22d3ba0f7e4f8L,0x29d360c402dd3bb2L,0x016d91b5e3b86cf6L, + 0x888b637cdf4dfbaaL,0x72a94792ab525be8L,0x69d628cde606b3baL, + 0x34904e50a95540daL,0x18979662c4644642L,0x00000000000001ffL } }, + /* 28 << 28 */ + { { 0x1f06341d13a9aa39L,0xedc0d26a7415e0c4L,0x1ba18cd0ee8fed64L, + 0x6d8fa3e4fbfac509L,0x394bae0b08dda417L,0x44ead62fd835f0acL, + 0x793b6a8501b1d873L,0x0956227870f859e9L,0x000000000000017fL }, + { 0xc42d6d2dc699d304L,0x41e68ef38ef5407fL,0xbfe24ff5a825f2ceL, + 0x36108ae9bdd3f4d2L,0x7fd3cdccbe47aabeL,0x93e1da7ca8b5af7fL, + 0x29fa9925b0cf1bc3L,0x5c9f946832411132L,0x00000000000001eeL } }, + /* 29 << 28 */ + { { 0x600a0bf9d2dac618L,0x685a2a4448291771L,0x0e7bac6514e41416L, + 0xff8f4f544c0a19a3L,0xab91c47719bf697bL,0x8874baa15a8ae505L, + 0x4a95436eb79474baL,0x7ec81267f0b04bbfL,0x00000000000000b8L }, + { 0x61aa15a6d7dc681dL,0xdcbce70b90573096L,0xed2174ff06e003d3L, + 0x5118698a1f70c6b5L,0xf3f6ebce94529a6bL,0x2e3c16ba7facd875L, + 0x56ff76fa9c233c63L,0x48d58232b25308c3L,0x0000000000000040L } }, + /* 30 << 28 */ + { { 0xe0fe382608b55604L,0x2c1942a38d460615L,0x9527fdfe82cecf84L, + 0x2a193e252ceeffc0L,0xc238cb9d25d20cb2L,0x357240a611d898a2L, + 0x94c467330a554201L,0x860fcb2f93f09e27L,0x000000000000019fL }, + { 0xe19b84d262304b4eL,0x7d06c611375b7f95L,0x59267a24db7fa504L, + 0x2712fa55cb322b42L,0x31e2690a2bbe4428L,0x0fd9a28a599eb8cfL, + 0xff80495e26c5d99bL,0x6de22868dd44826cL,0x000000000000002fL } }, + /* 31 << 28 */ + { { 0x18e983ea9babce7fL,0xd794f3872c121aedL,0x95377a1ff9be1cb0L, + 0xec3a779fe94176eaL,0xd1e5c67084093fafL,0xcb651415f62b3cf9L, + 0x1b0e00bd178fec50L,0x84529ad9e1300de2L,0x0000000000000019L }, + { 0x4cdd2ef87fd07920L,0x987b7c86fbae0a4eL,0x7fe7d1505007327eL, + 0xdf55c0da4a8c92b3L,0xcbdaf5ad3b61e07cL,0x0a768f308803468bL, + 0xf887801ab2ef831eL,0x5dd31ed28493948eL,0x0000000000000166L } }, + /* 32 << 28 */ + { { 0xd256b6072de9b28fL,0x286fc763cbefe110L,0x736ff52a138783fbL, + 0x6f6d822b4ebcd973L,0x78c5a0b685eca174L,0xbfcc5ab679ea21b7L, + 0x9ccb278d4937e0daL,0x9f50f4422d009304L,0x0000000000000049L }, + { 0x566947d729f8a798L,0x9793a90581cc1ed4L,0x73da631e851115d4L, + 0xa0b5c181fc7da6f5L,0xa70593d29f5a7634L,0xc9f71df41e6250a8L, + 0x2dfd10897a97cd35L,0xc3cdac7ed4e0074aL,0x00000000000001fdL } }, + /* 33 << 28 */ + { { 0x527dff175e34cb9eL,0x1185849f852371a3L,0xb9d706ca6726ca47L, + 0xc1dd38933d915bf0L,0x9a9f8c7c35256ac7L,0xe2c3c09136838dcaL, + 0x05e64ff7870a320bL,0xa994d8aef3132cc9L,0x00000000000001f2L }, + { 0x46e991b9272d18e8L,0x8a25edf2fd58a9ceL,0x3785868f285cac3eL, + 0x5fb2e743d880fc4cL,0x3a1d2ef3b2c3e9a9L,0xfcdc85b5e00ef7a8L, + 0xbd24e872b5b4076dL,0x668da80789caff99L,0x0000000000000017L } }, + /* 34 << 28 */ + { { 0x97bd38162078f256L,0x5d8f6c9b986e1064L,0x54503fe4f389bb49L, + 0x926f338e896b4713L,0x456413e55b6b0d6aL,0x0c55b0104f321c4cL, + 0xff7b9d8b68144d46L,0x5558ef1ff5a77121L,0x00000000000000acL }, + { 0x1d11903a5e46b3dfL,0x5d07f4024329d68cL,0xe7fc54483d4584dfL, + 0x85cb4edde10f716bL,0x2d8138c27ae7e0dfL,0x23fa18aa0e362800L, + 0xd29f622742704470L,0xd019bbc004ea20cdL,0x0000000000000010L } }, + /* 35 << 28 */ + { { 0x77b70582bfc725dfL,0x40b2fec1c7ac2712L,0x3c12e6c61950a057L, + 0x854deccd121c54e6L,0xe6650cb5f20f8d75L,0x68bd92942455567bL, + 0x4d88a2b9b216af0dL,0x690d0fed06150c6bL,0x00000000000000edL }, + { 0xd17bece2f2efbe2eL,0x231fafc9bb1f57baL,0x06f9776b85b23326L, + 0xaec0776ef3f362f2L,0x21e0b5369e0c259eL,0x944c4d19139e4f62L, + 0x6d3067c700a6a022L,0xf58f9dfbd6539e9fL,0x00000000000000b0L } }, + /* 36 << 28 */ + { { 0x61acda55111366a8L,0xdec95e38b7f07d93L,0x8222e3d6baf73ce6L, + 0xa8d24dfa339b0560L,0x205df5b8caa53e45L,0x53ceb84c07f44c35L, + 0xd44ff89c819e3f50L,0xab02c3c1a9d0fa4bL,0x0000000000000156L }, + { 0xdb16ebcd79987e96L,0x986ace08fa013307L,0xb83bb30a0c1a370cL, + 0x9d2cd27dc95cec3bL,0x4c125471240da52eL,0x8f9b0f23588d5cc2L, + 0xb03e0c8a0e21e5c9L,0xf3a55d8b860c813cL,0x00000000000001a3L } }, + /* 37 << 28 */ + { { 0x90c5a583c3835851L,0x4219b5de3cb23bf5L,0x044d34effe2a24a9L, + 0x5ae5eb83762f96deL,0x9d476c6487fa31c9L,0xbc6aba41aed7972fL, + 0xbea75febc9ec7341L,0x8ca0dd60627f5b64L,0x00000000000000a8L }, + { 0x1b50115ed6888e70L,0xa4545e2d794f75c3L,0x401861f9dace48fcL, + 0x2d5a89afe11b82bfL,0xbe38f972ae3391a5L,0xcc1f6db06d026c05L, + 0x084ca28ae7a3ef5eL,0x176e9f1fd2864659L,0x0000000000000050L } }, + /* 38 << 28 */ + { { 0xc2019c0271b15a05L,0xd48bbe070860ed8dL,0x2361677c20c6be37L, + 0xa957ea9259c63464L,0x48bdee912c443f75L,0x44f7cc053758b6c0L, + 0xf4910cca5bc5717dL,0x149c230cd5085dc3L,0x0000000000000100L }, + { 0x6daa5d33b9ce132bL,0xccf4fbdb2fe80f7bL,0x844149b7b34a3216L, + 0x66a4b73c9372de80L,0x54b0e50ab74feee9L,0x733214dc6d44764aL, + 0x06e5f78a04b817acL,0x179ba42cc714eab2L,0x000000000000006fL } }, + /* 39 << 28 */ + { { 0x41ca17fe324ebe57L,0xe5b597fc38248859L,0x25a9281f71b1fc77L, + 0xec8c0f355257f629L,0x9951712b32763a6bL,0xba39f3393983150fL, + 0x59a52f445396034fL,0x1272ba122070c464L,0x0000000000000195L }, + { 0xfd4339486ff38a26L,0x0b94645a82186928L,0x37e034cc66c1e676L, + 0x82bb11a0826343b0L,0x90d5b4c1572a5785L,0x1cb39ed5535b33caL, + 0xa751d3ca20e812b4L,0x99eed4a961c771afL,0x0000000000000120L } }, + /* 40 << 28 */ + { { 0xe819cc844c848a52L,0x97d6bb24a95d1c11L,0x68b512de29b00048L, + 0x61bc95c028390416L,0x51f63fea5a21876bL,0x109aebd7e3075459L, + 0x7735f70c233315b7L,0xef66a59c6e0f35a5L,0x000000000000006cL }, + { 0xad0aafcd784af74dL,0x3c5bae42c1b0ebe0L,0xe12e2963f5a634d7L, + 0xa0bbcbda7a495138L,0xa4c2f0c2863b8befL,0x1d13dc0a0362bfb8L, + 0x04be3b6554a111c8L,0x9eefc66cb793f1e6L,0x0000000000000149L } }, + /* 41 << 28 */ + { { 0xaff95949aba4045dL,0xef288e2241d0dbc4L,0x0ab889d979410183L, + 0xc48157059989f404L,0x2b110dc9491692caL,0x6d5786920c0aece5L, + 0xd6afdbf4189fb2dbL,0x6188a57807a0d0aaL,0x000000000000006fL }, + { 0x9daa0f2d3790f377L,0xad1c16553dbd8164L,0x806e1f9d98217107L, + 0x10c67e47fc30518dL,0x80f2ab513af331c8L,0x36c8e5f435bc6788L, + 0x698054e25e7b589fL,0xff145a30b144fd63L,0x0000000000000039L } }, + /* 42 << 28 */ + { { 0x77f03fcfcaebe2f1L,0xb032ab4fdb8d0df5L,0x50298bee11271b80L, + 0x7c9b538213044e40L,0x1e63fb4cba1f0404L,0x0cc77790e5221558L, + 0xfb667df9d87c07c1L,0xa1bd9b5d4fa5461aL,0x000000000000015dL }, + { 0xe666b8a3f59d0fc7L,0x8cd99856f93b329fL,0xf15ac9dc87d43f51L, + 0xd62f1d4413c7726bL,0x35987d28cf157054L,0xe7319cf68090fc08L, + 0x4fb5e518a58e7727L,0x3d916c071005294bL,0x0000000000000096L } }, + /* 43 << 28 */ + { { 0x26387c347fa8da38L,0x006b5bc95663e995L,0xa81762db2fdc672fL, + 0x5aabce64d76c766eL,0x6c65a1c46b285f33L,0xecc5dab81d758143L, + 0xdf983510b9f496b5L,0x2cdc07a59176541cL,0x00000000000000a3L }, + { 0x290335c934e7f5cdL,0xddb7748db9b7f197L,0x109b9ae09bc63c8dL, + 0x29e888ac028d3fc2L,0xbf292ad344771ea8L,0xdb60dfe2168360c0L, + 0xf3f5490d7d7a936fL,0xe387ba89057dfc8eL,0x00000000000001f0L } }, + /* 44 << 28 */ + { { 0x9a4b7f6493d68b0bL,0xe6e6ab700404d1feL,0x6879df3a95063fb2L, + 0x6aaafd74fc273d08L,0x71f6336497fb7eb7L,0x4ae28c634362c275L, + 0xf5848a9d8c1a487fL,0x67c1efe8eeda4a03L,0x000000000000005dL }, + { 0x60e266b622da687dL,0x0a676ac5b651741eL,0xe39e0012736dd734L, + 0x7cf92513a3880359L,0x8b6c609629913ad2L,0x2c5b829daf6a7412L, + 0xd6fb88f34ff49696L,0x2eb1377cdb4b807cL,0x0000000000000030L } }, + /* 45 << 28 */ + { { 0x7a3663d88d937dadL,0x505e614c7a1002c4L,0xc785b58d85142711L, + 0x656182e5d3ad2984L,0x8d10353e265220cbL,0x3b1be55ab5b54894L, + 0x5cee86bd6fdc5483L,0x3405297d76c6839eL,0x0000000000000198L }, + { 0x872c51c47e6df1d6L,0x924e0616fdbd9f55L,0x8dfc23caec4f6876L, + 0x14989c5e42c59e5bL,0x9cb5b0b59f20ac1dL,0xb9a5c77bffb1f896L, + 0x9eeec4ae52e7e29dL,0x6b0353efc45b8bddL,0x00000000000000bbL } }, + /* 46 << 28 */ + { { 0xf2d4f1c72688cd8bL,0xf192d22d5ed62791L,0x40228bada8326f24L, + 0xad8b562974aeaf7eL,0xc7d3b568ecf92ff6L,0x23108db20fd0b496L, + 0xafc3418f1ebda53cL,0x029bfd1a165a4ef6L,0x0000000000000160L }, + { 0x340cb9d7c908ed51L,0xef88826762017c05L,0x1313d7669ae63181L, + 0xa8d9c2cd6c8a1b89L,0x524cb90b35bad6c9L,0x91af48d2c6aabfdaL, + 0x7b7487b93c3231cdL,0xf20201abf5028680L,0x00000000000000acL } }, + /* 47 << 28 */ + { { 0x414b33978ac56c3dL,0x721029e16bea57c1L,0x9cee0c176997d034L, + 0x0e066baa743206c4L,0x54151f4c5f961984L,0xed5900facc869502L, + 0x0d9da2d227e7b18eL,0xc7fa2af5e3783266L,0x00000000000000a0L }, + { 0x6616bf15ad5ac8b4L,0x72c8d17c80102d46L,0xbc1f78c8168b5c6eL, + 0x48018dd3c400914dL,0x20cd1be54cc9c130L,0x7fc70e61d99d9e60L, + 0x8b14cc2a42d62a83L,0x74df1bd13070d89dL,0x0000000000000074L } }, + /* 48 << 28 */ + { { 0x8fd8dc497694f566L,0xb2ab052d1fa6e13cL,0x7e4a708e9aabc7cfL, + 0x68a9c33a6b81f0f0L,0x96e4988af1f7cb07L,0xdca8d731ea264c22L, + 0x245371e321b3372fL,0x02216476308d2300L,0x000000000000017aL }, + { 0xebf5411633696cfdL,0xa10c5784419623a9L,0x4c6a119c38fa84d5L, + 0x5c8eee646d4e5bafL,0x7f99d26d9b4959d5L,0x58708101174f417cL, + 0x48cab8f38294b72eL,0x828aaa19d9710f9bL,0x0000000000000192L } }, + /* 49 << 28 */ + { { 0x8a45f3c1f711e80fL,0x9f5a1b9df5d9caccL,0xbc1fc975a7625035L, + 0x80b67120b5f7fc73L,0x9bc987572eafb656L,0xf20515ca406e0a1dL, + 0x83305c67b23372c6L,0x40f4c0ce9ecd6e90L,0x00000000000001c9L }, + { 0xb3598c05be9425d9L,0x255dd9d8a49eb383L,0xd4ec4a2f04a8bd10L, + 0x5c0d36abd5c340f9L,0x0d568642795637acL,0x728aa0f2e00d6487L, + 0xf8f49bec3e20b8a8L,0x65d71712de23750eL,0x00000000000001fdL } }, + /* 50 << 28 */ + { { 0x73e49f63e25b0952L,0xb877a9e56030d626L,0x3ce843972e0cf3a4L, + 0x3d1a2de214820b67L,0x528fcf23c2261e9aL,0x8a9318e1c47264bfL, + 0x87331e933d4a4fe5L,0xc5f4d3321ad39c92L,0x00000000000000e1L }, + { 0x3c22a52df23bdfd4L,0x78a4fe92df000b45L,0x359fa4c23eef9355L, + 0xebd16e3f4c272143L,0x661c7c241736536cL,0x72a7416408be5d5dL, + 0x417ef58e88a433ebL,0x79f30b9d3a0aa85cL,0x00000000000000d0L } }, + /* 51 << 28 */ + { { 0x968f8fd66f2f9e3cL,0x0dbd79fc8adb4e50L,0xf78190a5bc410644L, + 0x0167515013eafb09L,0xaf9a05602b728353L,0xed98a1b3fb685f2bL, + 0xe07d0806e04f3502L,0x6607fe2785954a39L,0x000000000000007cL }, + { 0x7169286c817b5efdL,0x8cc453557b7f8a16L,0x0c3339433f68a9c3L, + 0xf6f5c92ef13d3edcL,0x1e09ef6cd115e7cdL,0xad3b1ba9f6271d3fL, + 0x4cc6ee4fc6be55c7L,0x32cd2db2e20d8022L,0x0000000000000151L } }, + /* 52 << 28 */ + { { 0xa2b57e47ee31f1eeL,0x35ea17041c5d6c0eL,0x5e23fbd525856990L, + 0xb560d2e20908d00dL,0x5c0804d5b516dceaL,0xb84fd1cf6a938abeL, + 0xc1f15c276ad24548L,0x42850a0f4de8022eL,0x00000000000000b8L }, + { 0xcf3f0f8c86fac159L,0x1e9b9f2741cd1461L,0xc470786c27f36196L, + 0x8eb042646ab8c405L,0x237fb49506bbbc7dL,0x40c30b4714750e91L, + 0x9db7193bc17373a0L,0x597f1ea9b153e953L,0x00000000000000c2L } }, + /* 53 << 28 */ + { { 0x4fdf2cebc8434a58L,0xe2d310b11587a33eL,0x7870a0118476a5e1L, + 0x23a5c69ca4d72f4dL,0xc77d905fce3550bcL,0xd61a54a01c717549L, + 0xd97c3c91864059b2L,0xe1f1c2f0d6636fa3L,0x0000000000000036L }, + { 0x312ce02a59481b67L,0x361cf25529cb2c16L,0xa6cb28f9c8ad1e71L, + 0x82729b40b59d68f7L,0xf6d4ac7bb8b4e098L,0xb4993a2c711612b7L, + 0xac9ba748b7a1e41aL,0x5418d9a322a5eb87L,0x0000000000000005L } }, + /* 54 << 28 */ + { { 0xd89fc40850c82edcL,0xa41578bad7465868L,0x6fd92f67c937ccbcL, + 0x7564cfdc2467d5adL,0x32ba16a72b4babafL,0xf6ff32a4cfe6d562L, + 0x2797b1689ef2ceb3L,0xb7a640f2f578b176L,0x000000000000008bL }, + { 0x4f319670b8ca6146L,0x972e72a0742476efL,0x7bfd9206f2f25ce2L, + 0xc68027e007eae81cL,0x2eeb468847fc7d16L,0xaa08354f502dda1cL, + 0xe3137c68ceb141ddL,0xe5e27505d1f6ba8eL,0x00000000000000f9L } }, + /* 55 << 28 */ + { { 0x62a6970950bee1d4L,0xd1f4674013839f16L,0xab48f6f44cb023ecL, + 0x683371fa89ac6079L,0xec8abd69ca14e962L,0x3f2a8f4619f6cf07L, + 0xd2ceec98cde7fef2L,0x4615e03fcac0741bL,0x0000000000000196L }, + { 0xcfe7cdc5821caba6L,0xbd82d1481006abb4L,0x5725410af4919b55L, + 0xab26f4c1af20960eL,0x1bef3c7b9558b455L,0xa4b2fad5d098a394L, + 0x9a656c097f01c091L,0xdc7048d7fa70a8d2L,0x00000000000001efL } }, + /* 56 << 28 */ + { { 0xcd4f21b88df62d16L,0xa99200834bc94f02L,0x95211c6856a37590L, + 0xc933ae22dc0e83d6L,0x8e733582aa26236eL,0x129991995e6f68d9L, + 0x43f6cb3853e4cbe4L,0x6d14bf9948098f89L,0x0000000000000177L }, + { 0xa3e3899f9173bb49L,0xbf5bc771d1969f3fL,0x09207ffae5b5b91bL, + 0x29d14eec974821e1L,0x0d3fdde868cded21L,0x4d99062ac9895973L, + 0x836aaeee85928705L,0x9ca8345688cad913L,0x000000000000013cL } }, + /* 57 << 28 */ + { { 0x7b16f87dcace418eL,0xd2a74c4165a2b019L,0x95ec1e15c7d4681aL, + 0x01c830f417e4aebdL,0x0de6f0ba401c8bc1L,0x8114c81627d5149fL, + 0xa01d3c00882054d5L,0x2eca68b0abf0dd69L,0x0000000000000105L }, + { 0xe2858d41a6369d0cL,0xdc444fb36d9ac709L,0x8d51a6eb2ea50b78L, + 0xc3c1f95e17149666L,0x39e712190439bd89L,0x5f3e04cc1fa5b6d4L, + 0x40c776d524944156L,0xf7859b8892597750L,0x00000000000001ebL } }, + /* 58 << 28 */ + { { 0x18a3a3122877dcf9L,0x4c57231db0789aa4L,0x1f9758d9f4e77a15L, + 0xc5c1c2ddea02469cL,0x4f33f024035a3628L,0x33b60b241bf0ca55L, + 0xb957e710bdd64bdbL,0x96d0abb164b24ecaL,0x0000000000000059L }, + { 0x0b9145eaa2035647L,0x929ac0d3196cc1d2L,0x0daf403c10192c2fL, + 0x7250de726ce0a385L,0x5027595cc2217f3eL,0x7650cda46818396bL, + 0x7df3f9df94ef5f3fL,0xf9ce383e3e9c1a38L,0x000000000000010eL } }, + /* 59 << 28 */ + { { 0x80307b9548f639d7L,0x089dd1b7fada9eb1L,0xc0f43d7948380214L, + 0x2cc2421afbf50727L,0x24111e56f3ae1323L,0x33dfbad162541943L, + 0xfa604e1b9bf30a36L,0x3a41e9980ca23f2cL,0x0000000000000051L }, + { 0x463d1718e369ad8bL,0x4b789649cbdad74fL,0xd921f71d3229fa15L, + 0x179d3710aa4ffff0L,0x347a47e251e078e6L,0x79ded1a24aaa0a8dL, + 0x93fba0dacc6d871eL,0xf4b3e8f626309221L,0x0000000000000158L } }, + /* 60 << 28 */ + { { 0x0220cc1183aab8fcL,0xb66eba5bd21536a0L,0x7f537a8dad7476c7L, + 0x6250e935d154a65aL,0xe3e8bf57ecf46b3fL,0x8f3f800bb0133ba3L, + 0x3b16a5be59bfbdb8L,0xe575b5ca3f0f52efL,0x0000000000000096L }, + { 0x41211e957280531eL,0xc1287776078368adL,0xae87eec633a5db39L, + 0x2d076f221846c7c5L,0xe11fe8f1bbe5f70eL,0x7cb1cf6f8bc46087L, + 0x5c2a40382eb6325dL,0xb0aa793ef76b0b80L,0x000000000000018aL } }, + /* 61 << 28 */ + { { 0x2d5e2db75759d562L,0x036f50c2f0d645e9L,0x5a2f49d295e73061L, + 0x43a6194ca19819ccL,0xac1f9da3bdf2a175L,0x5bc51f5ea6e0386bL, + 0xd932dc0aca202aadL,0xab9d361dead9e506L,0x0000000000000179L }, + { 0xc3fd69caa5500bccL,0x35ec63c388bc2b00L,0xaccf525c4e34d8b0L, + 0x7689d112db03dce1L,0x6686d2b5ebcfdfc4L,0x6cea5f5186d46983L, + 0x52e8e77117e4417fL,0x279048cbca4c2157L,0x00000000000000eaL } }, + /* 62 << 28 */ + { { 0x5cfa362a96323595L,0x10d1ad380b2a3e9dL,0x59baa91aad12493cL, + 0x0f5bb9d432c9bd68L,0xac141dcc0464141cL,0xc3d395e08460a809L, + 0x6919e44f14719d6dL,0xa66fc8237e5aa5c2L,0x00000000000000a4L }, + { 0x7d4fe91ad7fb6604L,0xf0cdc6166125d587L,0x52bd3f11ba69e22bL, + 0xbcb33f67f9af69bdL,0x4f3824ca3296150fL,0x2490712b2ab3bf72L, + 0x561da32946d6d180L,0x1a7607b4bb6fe954L,0x00000000000000c7L } }, + /* 63 << 28 */ + { { 0x6f5340ce32ebce2aL,0xf8403e09331393daL,0x4cedd42fd5eed810L, + 0xd697d934f24a2dedL,0x7253178f7f2eed40L,0xf637a3c22e487a59L, + 0x3a562449230bbe9eL,0x7ee1c93313a9f00aL,0x00000000000001e0L }, + { 0x3e9c3f430c67ff5bL,0x415d74ea77e94e6aL,0x4ce10a8fdaa7e03eL, + 0x3a38ac77ca7e82ebL,0xb63762010391062dL,0x9e261b95276309b3L, + 0x33787055f4fd6e43L,0x0655512a1e763082L,0x0000000000000114L } }, + /* 64 << 28 */ + { { 0xaed480e002f799d8L,0x021a2f24d635c04cL,0xe49a60d8004d0abdL, + 0x58ab8fb04d8873bdL,0xf36a43c81ecd98f1L,0x555f15c4cd2f93f7L, + 0x295d868d4eb95e03L,0x1aa8e425b50553a1L,0x00000000000001deL }, + { 0x2efce59878fcbf33L,0xc499e9fdf4e93711L,0x0f5220ad0c6e6fadL, + 0xed0d0a9b9fc9da8cL,0x84235958f7813802L,0xed21398e62729b3dL, + 0xdf6c25ab3c9bb158L,0x87d306d9edd78e48L,0x0000000000000085L } }, + /* 0 << 35 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 35 */ + { { 0xa0bede1baa9a08abL,0xa32364531fc48289L,0x27f83d5a8c4e95c5L, + 0xd4e05998a98b2122L,0xe4118aec7d51821bL,0x18ae6c6a883ce4e9L, + 0x686d2a6e0d65e4afL,0xce00c72768d87702L,0x000000000000004bL }, + { 0x8c8c0c984c0b4b11L,0x09c31a00977e63aaL,0x1999f20642100b94L, + 0x7314df12c4f0cf33L,0x0bb59f1a5f5da290L,0xf08e85bc03d4fd95L, + 0xcb0b361fd6fb0524L,0x9192a173f35d713fL,0x00000000000000b6L } }, + /* 2 << 35 */ + { { 0xd23357cbc11b2a0bL,0x70de0d3fb179a631L,0xb57a0fad122b12b6L, + 0x3203600f78489416L,0x99fe0b86d26d2267L,0x36f3262f3b9deed0L, + 0xb6e6116024a32dcdL,0xeabd7c06aa745047L,0x000000000000009cL }, + { 0x3f277e18cdc4cff1L,0xcad81ada6eb7045fL,0x3f82d6dabb804212L, + 0x7eca837503b6a618L,0xda88796995f2970dL,0x21fc350feb3a7834L, + 0xfc332b6e55f9ac1cL,0x0d85b43418d4111aL,0x000000000000003aL } }, + /* 3 << 35 */ + { { 0x0a06f1def4fbe878L,0x09f64854ab82ebedL,0xce87ff16deccf5daL, + 0x8da229260c3ee54fL,0x9daecdb54b3b01beL,0x3235049383c3f13aL, + 0x83009a5fce924497L,0x02c32dd344f6445cL,0x0000000000000099L }, + { 0x29fe64280889ade6L,0x53e62dea97b76d1eL,0x2bbd1dbc85849c69L, + 0x2b35472d511dd92aL,0xce302bb9a1200de3L,0x8da55e0b188c8660L, + 0x4beaf0cbb581d96dL,0xe8b2a18ff7b7e9a3L,0x00000000000001edL } }, + /* 4 << 35 */ + { { 0x9b7b7da8ccec2c14L,0x1ebd382dcf4fcf1bL,0x72a07d22f1c3f836L, + 0xef166de452992679L,0x5add8684088555cdL,0x61fbb803c2f8fff6L, + 0x7da8e738e52770faL,0x36a94145f9f2506dL,0x000000000000016eL }, + { 0x50af6f099e6f7092L,0x541acac8bd540556L,0xf927547a616159c2L, + 0x4d2a84617c7dfe4cL,0xb603c320754fdadfL,0x0ada986235c19059L, + 0xaa95304ebe17a78aL,0x50f8baff41244129L,0x00000000000001d6L } }, + /* 5 << 35 */ + { { 0x260efac5aff9e319L,0x8e3650c87d4f1d91L,0xce7bee9d3b3fb7b4L, + 0xa6d415cc99eed5f6L,0x8041fbf9974686a2L,0xcfa2aa4f379d32bfL, + 0x920a936febfae5bdL,0xb75e33b8282e5ad1L,0x0000000000000050L }, + { 0x979059cd6175e89dL,0xd630ac31f3da1ed4L,0x64f8fbb8d3225d27L, + 0xf78de657c1c566deL,0x6759e1cfd17fb6a5L,0x50864db03bc3a849L, + 0x225685c9365020c5L,0x16ef56abaae66d87L,0x000000000000013bL } }, + /* 6 << 35 */ + { { 0xa8187a5fc0c3e8aaL,0x89107b456dbae123L,0x023bb57a57ee86f9L, + 0x7b574f5dd0a5e4b0L,0xf832b673e8c79ce3L,0xc10bfdf66771037aL, + 0xf448e37efad2b5dbL,0x198293caef6701f1L,0x0000000000000070L }, + { 0xee3e2714deb4db92L,0x5af53d653e2a9e0fL,0x8fe35f6e0fa38e48L, + 0xe512d6895dd085e7L,0x8b6aac03ead33215L,0x44cf0c3c6dda0f8bL, + 0x1d642848a861f599L,0xaae60d552d02dd4fL,0x00000000000001bbL } }, + /* 7 << 35 */ + { { 0x79710d49dbb8281eL,0x2296ca841e2a213dL,0x7f8e1a9cd847ec78L, + 0x158f24bed7aecd74L,0x571e8ca657b870e3L,0x8748e407df98feb8L, + 0x822192b857348ae5L,0x7eea8f745f73e93bL,0x0000000000000136L }, + { 0xa56d856b87d43834L,0x9fe0ff564fd91500L,0xf52ef09924aebb47L, + 0x3c311496864cba01L,0x93d2bcd8a74253feL,0xfbd223f96fbb71dbL, + 0xa7776b83bb1badf6L,0xcbdd63a911f4e4dcL,0x00000000000001abL } }, + /* 8 << 35 */ + { { 0x6a7316dd5dc62efbL,0x377777eb824cf4dcL,0xe349e9457255c5a0L, + 0x64111cb9bf497e75L,0x1d91dcc714b1eaaaL,0x87ea1779cbcf5454L, + 0x070f848d42c890f3L,0xaaaf9a3881e4bc6fL,0x00000000000000baL }, + { 0xfd521f1d853096ceL,0xa44cca3f3c8f7ac5L,0xd960927b3f30cfceL, + 0x0d30feb7c5999722L,0x5b0d38e22adc3822L,0xa00f4a022aed7af7L, + 0x1bc83e716ed4dd28L,0xba2213a238615d83L,0x00000000000001a7L } }, + /* 9 << 35 */ + { { 0x7df37450630874ddL,0x028f47c9e0e446b1L,0x1ca7c1c3881cd7a0L, + 0xbef2f5ce04bd0fa2L,0x6cb6bfe1bb0716baL,0xfe1d702d263901c7L, + 0x8537e484e098aba3L,0x9f5aa1332885a9ffL,0x0000000000000196L }, + { 0x82ad05da072c7a4fL,0xaa09589418c27ca3L,0xc39c9cdde3b68587L, + 0xe94da9f0e85d5f74L,0x7287ea7df479ea5cL,0x7b4a94403825c3aaL, + 0xb8bfa948d0cc984aL,0x713f8cc029742546L,0x0000000000000073L } }, + /* 10 << 35 */ + { { 0x4a7912f4453a46d0L,0x1d2e76231c7cf115L,0x2208de2b8f29c610L, + 0x809daa6181e502b5L,0xa8f149fc6bd724b5L,0x38af43fc31ce0e18L, + 0xa2fdba97e8139cfbL,0x665271bd9eb155f9L,0x00000000000001f1L }, + { 0x8b86fc468d28ea7aL,0x8cf2558a51d8fa58L,0x2c97c546d7ba83e6L, + 0xde7cf223acf26595L,0x7802038e4ce7407fL,0x95574ca7ac6b1de1L, + 0x98631057f0e9c573L,0x43fa05d155e8bd77L,0x00000000000000a1L } }, + /* 11 << 35 */ + { { 0x35492249be9bd7e4L,0xcafe206f52521c89L,0x47dc978d77be09a2L, + 0xc0ea0080ebf81c97L,0xc3e10de2694d91c7L,0x9811bc56772426ecL, + 0xd1f4fedea3e2d3faL,0x4305664eb53e0995L,0x00000000000000b6L }, + { 0x25a3583d1be78b26L,0x8ddb19664de54cd5L,0xa766c88dde4b1b5fL, + 0x5b23a2a7fcd74bcfL,0x1ed2d0cd3a4784f6L,0x14020b80b6c02a2aL, + 0xec21146ed646ac46L,0x6896cbd4a392e47bL,0x000000000000000fL } }, + /* 12 << 35 */ + { { 0x6f3326291afb08a9L,0x9f159db2908c995cL,0xa36f91bb5e8dfd58L, + 0xa8ee1cd8736ce4d1L,0x45832ab2cf58ae21L,0x08ee6a41316e9061L, + 0x0e39db0b1a2ffc32L,0xea9414e1380323caL,0x0000000000000042L }, + { 0xff010e54fc832689L,0xee1f4824c59214ecL,0x8ac0dc4a62b52f22L, + 0xcca082d2dba63497L,0x03b4dafd826e8cd4L,0xbc3c59580f8ac57aL, + 0xfa53329789525cb3L,0xc00ed59547ce0d1eL,0x00000000000000d3L } }, + /* 13 << 35 */ + { { 0x38ac2c1f27599c6fL,0xc8515670058fc921L,0xc95ee67684954ea1L, + 0x7d87418fc0523bbdL,0xa9b8cc71138ac6b7L,0x0230bfcbd4f818f0L, + 0x114c9539beee08eeL,0x32b57cb454435a17L,0x000000000000019fL }, + { 0x997e7382d0b079caL,0xcd0eddd90952e376L,0x80c215bbc512362dL, + 0x0d7da33c06ac1492L,0xb6eeaf325c2720a8L,0x90b67f842106208eL, + 0x2e08a3ff7e0e56f9L,0x76e9b031c6afd704L,0x0000000000000120L } }, + /* 14 << 35 */ + { { 0x50c0ae1bbc78973aL,0xda11769b5afc5571L,0x72dca0e13989fc6eL, + 0x270e2d671f23be5aL,0x0c9d34d48bb2aef0L,0x37f07fdb48ef02f4L, + 0xff1c0206e0c549efL,0x9a62c16a6fd94255L,0x000000000000005cL }, + { 0x368d1944785a3c92L,0xbab00ef3f92d17b7L,0xe8f15f69e6b2113aL, + 0xd58342fff6b46f5cL,0xa0edf615ada12adeL,0x12d90798e7b4926bL, + 0x284ca9e1671bba9dL,0x15def43a8ed23f9bL,0x00000000000001d2L } }, + /* 15 << 35 */ + { { 0x132dfd2814ffdde6L,0x9e0ba5fef2e1c419L,0xda21747db1dbfe86L, + 0xd0fadabd78a62587L,0x50ce9d488a2d37bdL,0xb4ee4b8999d70e25L, + 0x12ade8a38db8fbb1L,0xc03f3ecad2ef5fd5L,0x0000000000000188L }, + { 0xfd17694aa2e6a2faL,0x1225dce9e3196a27L,0x25b09ef2a191bfcdL, + 0xe260325f7975942aL,0x3e62541bc98342fbL,0xdc572b5772876fc5L, + 0xadbbfef5c5ca0ccaL,0x16960b62028d57f8L,0x00000000000000d6L } }, + /* 16 << 35 */ + { { 0xd714194e2f415999L,0xa4b29d685128d8d9L,0xd1046cb83324ca57L, + 0xe5e6a47504828300L,0x9c5c4f6c694c18e9L,0x84d00de8d01e5707L, + 0x8ed94290bc2b3e66L,0xafe23a51ec6a6e31L,0x0000000000000141L }, + { 0x2b996fae976e9c3eL,0xba0cc494be1288dfL,0xceb07fa6de8aab8eL, + 0x7f460647985651adL,0xee200258189376f0L,0x21f13320f9a7ac9eL, + 0x66a57bc58f3cd359L,0x7b13832775ed8259L,0x000000000000018fL } }, + /* 17 << 35 */ + { { 0xa54091b5fe742428L,0x9fb8d0ae9ec0b6bcL,0xcc9d3cfd658202baL, + 0xbd1360dcdec5a1f7L,0x62b0e1a8e8f09c97L,0x66efebd288db9d06L, + 0xe55656f90a30c3dcL,0x62ae035fed27ad52L,0x0000000000000190L }, + { 0xdd740f0ce530b69fL,0x3d2642b8e83c5d80L,0xf42b632fcb53650bL, + 0x94ffb5fb97b9f17fL,0x446299b5b4231998L,0x43df639e2a9a71deL, + 0xeca5ac9a024e9ab0L,0xff1ee10a967d00dbL,0x00000000000001e1L } }, + /* 18 << 35 */ + { { 0x68d62207babd7e5dL,0x7618829b2368aad8L,0xd8bc9d594cb2e88bL, + 0x8f4545f76fb516efL,0x0a5769f13078ffdcL,0xa7ddbd1aa5fbf510L, + 0xc4c70ba312ba6fd0L,0xef602078f8e0dafcL,0x0000000000000016L }, + { 0x6035217344f95785L,0x945017d70d3be590L,0xc12442d3ec7355cdL, + 0x57fd71d8b71b36eeL,0x65aeb2d8ced7c563L,0xcf050f810155581fL, + 0x6cf9c035b2f83a15L,0x302a86fc4cb4ddf5L,0x00000000000001fbL } }, + /* 19 << 35 */ + { { 0xd6296332f0be10e3L,0xaa891752113c0106L,0x3b36e13f465f6c3fL, + 0x90cfbe2263274782L,0x3c2b63160c7104f7L,0xa12c5ed0f74fb1bdL, + 0x6687e14716325f64L,0xeaf5b0a897e27f93L,0x0000000000000012L }, + { 0x627645cc46e1d4f7L,0xfafd755a48296d3bL,0x5bb39b18554edbf9L, + 0xe91e5bcea18115ecL,0x292b808ed880befcL,0x2a8abf44b42f1efaL, + 0xfe48a1ec96bc93bcL,0x081792b8168933f9L,0x0000000000000077L } }, + /* 20 << 35 */ + { { 0x0e5eb14161f457e7L,0x92a3bacfeea01ebbL,0x564ad7678fc26d40L, + 0x4f48057b446b177aL,0xf1255b23788fcc70L,0x1ac8aeff459f99eeL, + 0x24731f8fc8af6c59L,0xc44122dacb32a633L,0x0000000000000114L }, + { 0xce32416d6fad3db7L,0x59d803bd73ad309dL,0x211e641d0cc0c9aaL, + 0xe385bce0bcc90bbcL,0xd24b7461c6690793L,0x31d7bc5e4908516dL, + 0x268c54c4acbf7371L,0x95c7374121b9407aL,0x000000000000016bL } }, + /* 21 << 35 */ + { { 0x25a6ec1a4565257cL,0x56da9c6f45d4b786L,0x5200f490abb04ea5L, + 0xf553a6bfa633d526L,0x8a563d61827dc74bL,0xd459b61db4bc0d1fL, + 0xd0762e8ca49cce8fL,0x29c625e7ab89a9c8L,0x0000000000000091L }, + { 0x4ce3b817f18f870aL,0x07ae3556c595c23cL,0x314772b0444d3dd1L, + 0x7f09c046fc8efafcL,0xed25d47e330b71bcL,0x604967dfcf801ce1L, + 0x67101fd2467cf3b9L,0x3ec2c9be15a86011L,0x000000000000005eL } }, + /* 22 << 35 */ + { { 0x5ee4bcbf9aa0a8c1L,0x3f701e41a37cddbfL,0x57790d50253a2decL, + 0x594f2c7ff30ec59cL,0xd104ac878a383d81L,0x28c238510097bacbL, + 0x961c3dc36de5b94bL,0x21b3dbc6dd673666L,0x000000000000004bL }, + { 0x223f6e40bde60673L,0x8109ed96d88c2017L,0x3ce877773b960bf7L, + 0x36b20625e3dfdd86L,0xc959fc6a737dc9e9L,0x9f3acc42635e8d98L, + 0x4c171a9b4e87cf60L,0xec218d66033b148cL,0x00000000000000ecL } }, + /* 23 << 35 */ + { { 0x7f91a5a7bade2b11L,0xb2db06128afc083eL,0x17ac0a6acfbf6469L, + 0xf0a36b26bbf78b3fL,0xa69725b4b4b48af4L,0xda7604dc4099e049L, + 0xeaafe16b1c309bb4L,0xf50567b89f231c92L,0x00000000000000feL }, + { 0x4fbc4f36d6af3953L,0x704506374fa5818dL,0x71c67575b7d018d2L, + 0x1ed130f60d98cd48L,0xbba32ed1f5abe4f8L,0x90a0d5512ef01ac1L, + 0x45da1ef510ba9c66L,0x28d732789d8a6865L,0x00000000000001e9L } }, + /* 24 << 35 */ + { { 0x4b52f6fc34afd3ffL,0x246423c4e0c6879cL,0xd21db87bae1f9e19L, + 0xdb92edd51b56152cL,0x4a4aba09d420e95aL,0x21e9b73d912bbaafL, + 0xc22a8b330f004632L,0xa90e4438f3b78c5aL,0x00000000000001abL }, + { 0x7bb69d03a21ab7cdL,0xb0658b924a91742bL,0xd7386c4d4adc7123L, + 0x981e8258ee6c6852L,0xb2ae71400efe8332L,0x5b91d439245b9f6fL, + 0x8f2118290fda3b1fL,0x54beff28842e2ae7L,0x000000000000006fL } }, + /* 25 << 35 */ + { { 0xe8b3dacc713db46fL,0xcb8349a7f7365288L,0xa68504d17f939e4bL, + 0x51411b60ddbe781aL,0xf7a8ba4b0fac5648L,0xed3a51aa99f6d936L, + 0x4ba023ffd3588b2bL,0x16e53adf129cf4e2L,0x00000000000000c6L }, + { 0x02ca237308fbb8e1L,0x90ae9b6d0b60e3caL,0x6776eee587d8385aL, + 0x0d6b1b9314bc9d4aL,0xf68d1f6c8be2915fL,0xd0e01dedcaf0f0b9L, + 0x31e1a76c60db9ce0L,0x882c3e86268d5f62L,0x00000000000001adL } }, + /* 26 << 35 */ + { { 0xb7715ceaf14e77a5L,0x30b15cbf85d3eae0L,0x99aada434b1d66a7L, + 0x11e13473f303a321L,0xe075d04ca3c2fa64L,0xb02855205f4d07aaL, + 0x3d838d13e5d0dbd5L,0xd65785326adca13dL,0x0000000000000082L }, + { 0x3785f4d05fa73d0cL,0xfaba0633c4098f4dL,0x9d0a4e294f749d9cL, + 0x5966c3debe4319fbL,0x97e059ae1a5367dbL,0x0333cfc26fa3debcL, + 0x38e9675b283be3d5L,0xd800fdc824d7367aL,0x000000000000017dL } }, + /* 27 << 35 */ + { { 0xe425802eb9ecf83eL,0x67179c5efe330a56L,0xa25d8b602db73eacL, + 0x6e954907ce31e6bfL,0xef2500fedf0e3b24L,0xacd2851f01fa4396L, + 0x836d694c844ce475L,0x1a7d6c59d48a71ebL,0x00000000000000ebL }, + { 0x25d87aac9839f111L,0x09fa18f8953ff5f0L,0x9b079ca013ca276cL, + 0xcfa23f4558b56ba5L,0xbee77e5598b2b4caL,0xc8c1a305977b27daL, + 0x8481c92fbcc31ad3L,0x88afda5311734146L,0x000000000000010cL } }, + /* 28 << 35 */ + { { 0x7532ad64b77714eaL,0xaa8b0d323e2ce07eL,0xaeaa4744b51e888cL, + 0xfd08f7188e7b5816L,0x57016828e993b6a7L,0x69a9d410ebc5408dL, + 0x2c0a373da91c70a1L,0x5b583311992d9800L,0x0000000000000197L }, + { 0x93f02d300402323aL,0xeefa9c5481b765f8L,0x99557a399c28c31cL, + 0xb410e5842d8a08a0L,0x70568fa7fe1a202aL,0x4ac9eaec07bcf04cL, + 0xe4f69dc4367c9a04L,0xb483bddb581e3bc5L,0x0000000000000178L } }, + /* 29 << 35 */ + { { 0x54c022d2e7b598dfL,0x79e2cccef073700dL,0xa5336c980669c555L, + 0x27cdeec12dc4509aL,0xcfed8ba84197e364L,0xd45ef416e15c075aL, + 0x29454994f444e86bL,0x799e40a7cb4d8ee7L,0x0000000000000060L }, + { 0x0000babdf8d904ecL,0x7653dc720e4e5eb3L,0x0d0087d05b6f7b42L, + 0x38be9b10a6df9d85L,0x1240afd3b918bb23L,0x23d4f0c546176345L, + 0x8242efed0c47ed2dL,0xd81070a93597fe3eL,0x000000000000002fL } }, + /* 30 << 35 */ + { { 0x1a52836470b32e66L,0x5b76dabb0e9b3086L,0x48b57c94382fa85dL, + 0x49de76759cd9f361L,0x6c744071b5f63c2bL,0xb7067ebbe741cfa6L, + 0xa95efc93702d19c9L,0xfd647a47cb409c01L,0x00000000000001c6L }, + { 0x4c31f670f9f06368L,0x775b1698a075c748L,0xfb6cbb0fe7656e7dL, + 0x69c0f805da1881a5L,0x1267496cc2fe6b31L,0x0612fb94d8926ee1L, + 0x561807476cefc9dcL,0x5518665cf90f2aa2L,0x00000000000000e4L } }, + /* 31 << 35 */ + { { 0x64d0965b1258699aL,0x4318a056f7bd1b22L,0xe86e065bcbafcc09L, + 0x7607f0446a179875L,0x368d4f652cc4b9b5L,0xae4dd2ebe34674b6L, + 0x8ff7af5a487e0831L,0x6c65f9af48388679L,0x00000000000001f0L }, + { 0x30078bec2c49e55bL,0x353adc525131ccecL,0x1a60d3e32e7e4f72L, + 0xcffb559b0d8f9cf7L,0xf7ba89a6b2eb6515L,0xee0b6927ba852015L, + 0x691a678d8a982a09L,0x70d6cf66d8d36085L,0x0000000000000178L } }, + /* 32 << 35 */ + { { 0x1683448de93ef527L,0x158fd9220e5b6f98L,0xcd0bcea6e23f2fa6L, + 0x455d35de15982d3bL,0x2fc788d7bda98d71L,0x65d44c08f139b889L, + 0x219749fd4105fd65L,0xa9ff816ae02f1fd1L,0x00000000000000f0L }, + { 0xa2a53148e55948cdL,0x2d952f500cc0a0b5L,0x26f77e7161839836L, + 0xa8bf589aad87bb45L,0xaf21e0fc6a9296e4L,0x917881207d394e96L, + 0x47f3d38968622361L,0xf17141da090a8cb2L,0x00000000000001b6L } }, + /* 33 << 35 */ + { { 0x01279781a9d1af36L,0x135633e6209d1854L,0xca9959c34bcc2c26L, + 0x9ea2bc68a8553b2aL,0x5f97d137804b7466L,0xa1790fc774b3739fL, + 0xe150cdf4830b9a2eL,0x3b7a3f5850e4d1e3L,0x00000000000000bdL }, + { 0x75a53d5299105ebeL,0x4fafc9b8bd00206aL,0x9fadf1e5d48baa99L, + 0xcd2c901c2526d67dL,0xc2888298bf4b9ef1L,0xa185bc6b5615d788L, + 0x66f7aae4c02b90eaL,0xb58dfd6d544bc9c7L,0x00000000000000d4L } }, + /* 34 << 35 */ + { { 0xbeac5a04dc3598afL,0x57afe9de8974646fL,0x729e165d14018728L, + 0xdb5aa250c48f69fbL,0xbd3732ca6a2a9532L,0xfb8b66ed0953ae9cL, + 0xc4b2791764a63ee2L,0xb3cee5a7b501889fL,0x00000000000001d5L }, + { 0xf01d492bc01b6386L,0x940d92c8f50d3221L,0x5fb0717d1df2850aL, + 0x53198fadecdc9e39L,0x3b43aaab37221dedL,0xa776ceb1fdfd723bL, + 0x795561a2d6f780ecL,0x0ddf9112bd9c4c20L,0x00000000000001f2L } }, + /* 35 << 35 */ + { { 0xb2b958b192246bb4L,0x9ec1fcb042a8ec26L,0x7d035e6622501d65L, + 0xef96b924dca60d56L,0x40aa7306904e72ecL,0x65df4ceb51cae3feL, + 0x5588726f2e43f4adL,0x01b8b3c1ae60824eL,0x00000000000000d2L }, + { 0x274d226496d3ef76L,0x706cf68d25663f29L,0xe211735d2bf5fb14L, + 0x40d4f39b286e96f9L,0x2c31dc264a2cee83L,0x074f6ff7ccc7a3dbL, + 0x049ab010073b5faeL,0xe8f39718a3d95de9L,0x00000000000000b2L } }, + /* 36 << 35 */ + { { 0xa58e956bdb750655L,0xfe67d0fba11d4441L,0x802a67090052ef49L, + 0x7b37040d5393e1e1L,0xad57cd92a8bb9edbL,0xaedc34ed19bbc0edL, + 0xb8126e59b39c8f27L,0x6a9f6ff2bda531eeL,0x00000000000000d5L }, + { 0xc8b6e197c6cd7feeL,0xd9b0106ebfc7cd1cL,0x3c0e289b85aecd74L, + 0xedb8f5b324b94e66L,0x3d0c40fa4ce2b7b8L,0x849aa40d48dca638L, + 0xd6fc723dfa48d109L,0x4515db8af5dd0476L,0x0000000000000193L } }, + /* 37 << 35 */ + { { 0xb0683294383e3ce8L,0x844dc47e33ed51a1L,0x50c94e00d5996d55L, + 0x267925bb3bd6b4f2L,0x604bd4192b0e3cc3L,0x52095d9846421982L, + 0x55192496666ba499L,0x606df30c07e53b0dL,0x0000000000000092L }, + { 0x3f2abad16f768ae8L,0xf2e7735ad41589c9L,0x61e8c3dce8516d55L, + 0x28e7a45b76303e72L,0x2b702b792e5c0204L,0xdf2882d56c2ea1b0L, + 0xebb4aaf6636de2e3L,0x9004996c994b5397L,0x00000000000001ffL } }, + /* 38 << 35 */ + { { 0xcc3ce574ac6839afL,0xd81a4213153dc2e1L,0x64829f778956f5daL, + 0x884a66eb32d3dfacL,0x9a52938ce79ff6f0L,0x0d56f215990e8a04L, + 0x723fbeb5a65e0186L,0xf97b1537897e6514L,0x00000000000000bcL }, + { 0xf78198881229ee99L,0x08bf9c3750e4f43aL,0xf0b1d230513be81dL, + 0x8d920ee5da4f7856L,0x9e796bc35527eb47L,0x55d5a35111049ee3L, + 0xbb55397b9ccf6b0dL,0xfd8390b6d17e55b2L,0x0000000000000180L } }, + /* 39 << 35 */ + { { 0x8234b440d5d9e622L,0x32e428882d0e1ea8L,0xe6c81ec8c99d38ebL, + 0x699cd82e1c6911f6L,0x6de7dd4f91d95087L,0x61e4d9397e50aea9L, + 0xf41214b481faf2c5L,0xc1542f137ad1dfbcL,0x00000000000001caL }, + { 0x17093e561e2098e1L,0x6971fadfde3dc089L,0xa5dd0f64be5223dfL, + 0x8d77620a6d9a65adL,0xc3f34c417cbf0ed6L,0x7d4db21d9d25c237L, + 0x09ca81b6e8037141L,0xc7fdf36262a876abL,0x00000000000001b9L } }, + /* 40 << 35 */ + { { 0xb817b9d19327a3feL,0xdf8ced7438fc86a0L,0x2f4869ed9e7c4a3bL, + 0xc669a7c7022e4d73L,0x0712075affe0fd8eL,0xc59f461860ab5941L, + 0x4c87781b6b168844L,0x0500f12a63e6a599L,0x0000000000000105L }, + { 0xdf244b348e82038bL,0x78b822897d03382eL,0x8a995d805630be38L, + 0x217987bd6eec82f1L,0x58d1319f3e083c91L,0x21230442c486ee90L, + 0xd86f468d72c75e0cL,0x18abe0c5258783d0L,0x00000000000001f9L } }, + /* 41 << 35 */ + { { 0x2cd9bc83a782ee3eL,0xc6a91ca1e958c3feL,0xa54debb7cef64e5eL, + 0x13b5af135fa73ebdL,0xbca16674c51b44e7L,0x338f4db0f95d0250L, + 0x180e25555828c384L,0x6986868c0f86e48cL,0x0000000000000191L }, + { 0x142eb0de6eaa2da6L,0x9637e5a057a6f82cL,0x7d8f0f3eb84c21f7L, + 0xa3ad6bbcd40afd1bL,0x1a3cd6cdc1aee54dL,0x279abcffa6b9ea25L, + 0xbd7ea9b9bada8b66L,0x326276b4e718fe19L,0x00000000000000f1L } }, + /* 42 << 35 */ + { { 0x65ac0bb815dc08a4L,0x9b10b48fc7249272L,0x2f699538403ae1cfL, + 0xc4f720464df2cc13L,0x17bc249e14a28ca6L,0x8df067e5a41e3e9aL, + 0xebbdab21cf185139L,0xe1eb3c38424aadb7L,0x00000000000000ecL }, + { 0xb6e0e2277b05b001L,0xc762d6e11470a2b5L,0x6bc7a06efd43bfd1L, + 0x5a47815c8ded3264L,0x1c717f27bd22d935L,0xe646d0b9b9dd3b0aL, + 0x214767e6b264d058L,0x02cd9ec33f311afcL,0x0000000000000060L } }, + /* 43 << 35 */ + { { 0xc955625993b6ff60L,0x86bf8b7f7fca1753L,0xb14d3c3b96187d86L, + 0xab1072c272d71a34L,0xce7cbb0d13fdfdc4L,0xae9ae97bd5bd0917L, + 0xf3f150fb81d80e76L,0x5c972596c81ad986L,0x00000000000000b5L }, + { 0xa46c1d9b50ed6bbdL,0x7d7a19d1be2ea4dfL,0x60b07dd08bbf9adeL, + 0x6588d7bf1aa70d31L,0x289483929a089f74L,0xcf439ad0c4f70486L, + 0x8753838310931614L,0xef1f89c0cfba8634L,0x00000000000001b1L } }, + /* 44 << 35 */ + { { 0x830e21f1e0d139ceL,0x999277dabb3c2b4dL,0x61d61a69008b64d7L, + 0x3ea63636ecc3e4e6L,0xf278f8248d368e08L,0x5b5021ec52c1a135L, + 0x49d77b1348ac51c4L,0x75a6dad231d8b33aL,0x0000000000000184L }, + { 0x8979245368b03863L,0xeee0ec59459c5d13L,0x2215a723a762a714L, + 0x3c61664047adac25L,0x3ea9bf8cf74e1d76L,0xe81f677183aef634L, + 0xe90d300c801a214aL,0xbdacf70fbbdf6cbdL,0x00000000000000d6L } }, + /* 45 << 35 */ + { { 0x549ca8a62fc82ebbL,0x0966172fefb4da37L,0xbd657773f05f137eL, + 0x9273f3b9a0a14c1eL,0xae7674bea56d37c3L,0xe796fd98e7eeb54cL, + 0x690f7da335a31dd8L,0xa773d549f2bc1053L,0x000000000000019fL }, + { 0xc61a2aee6e4461e5L,0xe788fcf6f11cab4eL,0x1e15e9ca7de73225L, + 0x203c64df06c82462L,0xb6c8a5f360b7c125L,0x10a30ced94087cf9L, + 0x244298d7aac7db9fL,0xadd593c3e38b5928L,0x00000000000001fdL } }, + /* 46 << 35 */ + { { 0xac92aa67efd692f8L,0x461a4f0414aa1b74L,0xf756506f6e0d1b70L, + 0x49b9332e7202c5a0L,0xd458392bf4a3eb34L,0x70a6f237903e034dL, + 0xf7e74bff376f0e79L,0x1bc49634e7406700L,0x000000000000003bL }, + { 0x69e839c6aa0f8fe8L,0xa0b25fcee8e918f6L,0xf06f66d255fd9ea4L, + 0xe54dd6f4c525cfd7L,0x0fcb1d3e1bab49adL,0xfe97828a8d6460e0L, + 0xc8f87d5388be0227L,0x32bc6479a1578dfeL,0x000000000000010aL } }, + /* 47 << 35 */ + { { 0x05a8ef77b79714baL,0x221f14fe8c448461L,0x036b54d7ebb08ec8L, + 0xb1fe5c5abc4ae646L,0x68b64241fa20e161L,0x6c4ed74f692e9ad3L, + 0x906e788265157d53L,0x34dba9f715097f12L,0x00000000000001e4L }, + { 0xc9480d587381885cL,0x028e2690706a98a3L,0xeeec9a743a34e3a5L, + 0xb249016cf655d964L,0x305946104c688049L,0x9e7bcdf9637460ebL, + 0xc1a40951e8f969beL,0x144d1507a47ffb9aL,0x00000000000000b1L } }, + /* 48 << 35 */ + { { 0x93f63503110a8e2cL,0x8bcb929fed94cafcL,0x44934ed35baa912cL, + 0x4dfc0eac7920fec0L,0xf2f642bb86b99a2bL,0x0e2e59bfd0903505L, + 0xcc44edd3ef99237aL,0x98cde5795af6f89aL,0x000000000000000fL }, + { 0xf279af6dd1ecb2d4L,0x2c520d717a4ba4e6L,0xce33faea662b4f22L, + 0x5d2c6df333ddbc4eL,0x5c2f283138dcc3b7L,0xbeea8bbc5e64ea59L, + 0x434aad230772c227L,0xcd6d33c871062cecL,0x0000000000000007L } }, + /* 49 << 35 */ + { { 0x59bd148246810d15L,0xbc56caa70ec29531L,0x11fe2559ba32ad70L, + 0xcdeaf7da9b04c10dL,0xa556a5eee042abdbL,0xf5bada661c47b667L, + 0x134006e409d62e56L,0xede1067f8a871ddbL,0x00000000000001a0L }, + { 0xe87c38b88ef287f2L,0xee72603243670334L,0xe21a09b60809c366L, + 0xaf24199e33fbad00L,0x99a879c66232ebebL,0xfb8b08a73b1c8edfL, + 0xe8cbbe4589ee6cf4L,0xf4ba2e5b2ecf414aL,0x00000000000000b9L } }, + /* 50 << 35 */ + { { 0x332d72a9ed527173L,0x4c71937716c49b89L,0x7a127d87b684d892L, + 0xedb45e899f77ace9L,0x68ecf908296cadb1L,0x917a4d8b8ce0247fL, + 0xc7c634e3094ff83aL,0x095bd5da5ccc4d40L,0x000000000000000eL }, + { 0x27e9246e4f7ddfa2L,0x7ebad205fca8cbe5L,0xc99c9b0be453080fL, + 0x1896f632232df338L,0x2e0647ce4226a666L,0xac91062c634cd754L, + 0xf0df151c71368fc4L,0x53d1a5a831f38365L,0x0000000000000114L } }, + /* 51 << 35 */ + { { 0xe1e928f1f39f6d35L,0xaf73a1624b4eae8dL,0x599d1e428e29612fL, + 0x6e7b4c2da2bd2b03L,0xcaa94b7e5ee08f6fL,0x26452280a708c9e4L, + 0x4753c0e9eff87fb1L,0x9b9aacc612d79464L,0x00000000000001ffL }, + { 0xece3bccc055d6691L,0x80f7b76baa64ed7dL,0x63a1053ef4adacb6L, + 0x3f5e833d69482071L,0x8ab2d2e776d37d8dL,0x7b4984d4509e5d79L, + 0xe5a32d95009cb254L,0x12f2ee9dde547828L,0x00000000000000ddL } }, + /* 52 << 35 */ + { { 0xeb1a46cb0fe7547cL,0x7ac12a6c6dc6c819L,0xaa7ee65698dfd5bbL, + 0xb28ce5904b306758L,0x4d7945bd2c268cacL,0x10ac23b8b94ae165L, + 0x6e1efae24834d9ffL,0xb2b5efa68220fd25L,0x000000000000001fL }, + { 0xbff886d8844e5b7fL,0xc242fb3908602f14L,0x9f9bce94f58b4aa8L, + 0x80e46cc5c48583ffL,0xcf29272952d4ade0L,0x38f3ac0b3f65f345L, + 0x8ff60c1263b097e5L,0x35ecd555287b7dfdL,0x00000000000000d3L } }, + /* 53 << 35 */ + { { 0x278102a1a5b222e4L,0xdc5f6121b47a6032L,0x2d38216511dbcae9L, + 0xc63c21bf42bdb2c3L,0xafb68446d7dcac98L,0xe3685e2c3cd2e244L, + 0x414b4974731f6884L,0xd741f91c9dca2bc9L,0x00000000000001e0L }, + { 0xd0a669e178ac8c45L,0x07fa58a7b7b43ab5L,0x5fd07638f2052d03L, + 0x4f0e4b2faa9999feL,0xcb9ac71370fd1eaeL,0x9410da48ebee3806L, + 0x7fcbdae832a700beL,0x488b3853abe7c10cL,0x0000000000000154L } }, + /* 54 << 35 */ + { { 0x8868bd8fe499eecbL,0xd00762e79257c00bL,0x4e58b972773fae1dL, + 0x0aff71c0bac2df50L,0x3ed0b4a37887e1d6L,0x627024cc558b9dacL, + 0x9e13a0f7c4135286L,0x4491810f6ebacef7L,0x00000000000001f9L }, + { 0x6d08b913f5189d9fL,0x8312190a24b2fc37L,0x3c282ef702c1068fL, + 0x40cba7b5bcc75f8aL,0xb992a9c6dd4704f4L,0x437dc8551cf0f9d2L, + 0x7f902ae7786d0addL,0x464c08c2f6b23723L,0x0000000000000125L } }, + /* 55 << 35 */ + { { 0x153be35473256476L,0x632f6ad7cdbfb55dL,0xeacb063c1701fa3dL, + 0x711992817e50dabfL,0x8e70efe0e361247cL,0xeef996913d6c2584L, + 0x99342b365f1a7172L,0x82aa3e46a3be3898L,0x000000000000013bL }, + { 0xb6926ffd3bcc0564L,0x80027de19f27bfb8L,0x0a6cec9deb09b4cdL, + 0xf952ee6b237c12e6L,0x20b2d4160475ed2aL,0x533c12b6f0643f11L, + 0x11bbdecdac6468acL,0x651e84e95b039866L,0x00000000000000c3L } }, + /* 56 << 35 */ + { { 0x90a60e09bd58611dL,0x19f9e897807acd15L,0xd14b65a53359e712L, + 0x3a8cd601ba2e3c02L,0x3ebf3dc780855419L,0x0ba3dcc9993da1c9L, + 0xd926fa9c763aea46L,0x5ee8f9003720bb78L,0x0000000000000094L }, + { 0x9ab2f1db266ccac9L,0xc5c9efa5c6c9d71aL,0xe65768f4ba1a6c65L, + 0x80f0f12041ee29cdL,0x8fbbe2f2a9fda865L,0x678e08db3f325a3cL, + 0xd72a224576840159L,0x934c1904a4d8a7b7L,0x0000000000000108L } }, + /* 57 << 35 */ + { { 0xb8c07819e38a2661L,0xf92e9402c650e59dL,0x850a80f7d03a96cbL, + 0xf0ea0c7a76ee02d4L,0x02d96e07379b1ef3L,0xd5859369a8a6f8e5L, + 0xb4f7b27644e88c39L,0x8f739a71219c6d30L,0x0000000000000186L }, + { 0xf88fbdd26ae83e11L,0x1fc231f84717aec8L,0xce4d216fe7ac9dc8L, + 0x8c12832abc741f40L,0xa80e842a9f5a5cb4L,0xb9bc23a867283096L, + 0xf430ff4d9f6dbbbbL,0x45c97a92f754b3cbL,0x00000000000000ecL } }, + /* 58 << 35 */ + { { 0x16ae9ad4b70d4afdL,0x9c91d2ff7b8ce81cL,0x3e1a68ffeaeff601L, + 0x99980d7d3c05df00L,0x96c29797ec215736L,0x6b6bf83e0a99316dL, + 0x8054259cf62899d4L,0x9f39008f36676143L,0x0000000000000179L }, + { 0x2a1d68baa09fd65fL,0xb072da01ec5477d0L,0x140d29fa404cf420L, + 0xdea8aef7aeb69f20L,0xea9606763125f663L,0xa26de345935e02bbL, + 0x4c0152b7e9df1fe1L,0xda6d781f6b1162bbL,0x000000000000019dL } }, + /* 59 << 35 */ + { { 0xbb583f128d5542aaL,0x1e29f9ecad0e801eL,0x02f793efaccc9b85L, + 0x1860130f78b3365aL,0xc802165b57c401c1L,0x6f5837856b64648eL, + 0x78f10b17e6256a68L,0xcd0480700f5c43c2L,0x00000000000001b8L }, + { 0x1e171d092dd5f89eL,0xc80605272a1458eeL,0x63b259191e335fe5L, + 0xddf7e4de6c2fe24aL,0x80d9ad40e1a3ca49L,0xba447a3c45a9751bL, + 0x79a8b213fbe20918L,0xf998902bbd826959L,0x0000000000000114L } }, + /* 60 << 35 */ + { { 0x4d0c9dda2123b077L,0x3719ed5f6ec3e16fL,0x76e14fb3c9becf2aL, + 0x7dc73e7accf6aa22L,0x0a75e100ff4a5f27L,0x2989ac9a67dc8054L, + 0xb33b298936f3d4bcL,0x0d79fbdf97f57f76L,0x00000000000001b4L }, + { 0x375f79ecdba7f5bbL,0x74babfbe620e1045L,0xaab000d015cd82b4L, + 0x8d9157aa4d992702L,0x6bcd5a63d172ff7fL,0x21e3f97b1b8b959cL, + 0x441526998f55153bL,0x77e4a535e4da53acL,0x00000000000001fbL } }, + /* 61 << 35 */ + { { 0xc760c5eb35a8ed76L,0x8541ae9427f20985L,0x8a3d6f6810483c73L, + 0xd591830c3769b499L,0x398ce1dcc595ef94L,0xbc869a47fb6918d3L, + 0x9142551146b1edc9L,0xf492ed9dd2a4555dL,0x000000000000002fL }, + { 0xd7e24885b950c177L,0xf90c3a0f3f6c9fadL,0x415add2251346351L, + 0x756021d26e45e3f0L,0x72d65b4ecbc2463bL,0x53f3c07f1d91adb8L, + 0xfd0b41b9a53cbedfL,0xf9a3af4ca9280662L,0x0000000000000112L } }, + /* 62 << 35 */ + { { 0x6a014cb6220edea6L,0x00dcaecc734585b6L,0x8d16a80d06d56992L, + 0x3aa9c24e844d8382L,0x71822c10f37e3563L,0xca2e4d4a8db2350dL, + 0x799322647ddafb6eL,0x8529609524430c95L,0x0000000000000125L }, + { 0x8c63f30844734d8dL,0xf46aabb1d60d0ba7L,0xe8395fc9b18fc0c3L, + 0x042d21115a21c6b9L,0x1b5e8a27cdcb4261L,0x50632338e3efa848L, + 0x60640eddc6805c4cL,0x6273c772ea293267L,0x0000000000000127L } }, + /* 63 << 35 */ + { { 0x386e53df690f045fL,0xf7efec6f9776e13eL,0xd24ce88d1988fcf0L, + 0x0b5d478a3fc9102dL,0xc08a3154a9efc4f5L,0x06d0820155e4fb51L, + 0x03e1529886b656d1L,0x456f88bd833c233cL,0x000000000000013dL }, + { 0xbba27abb7db3562aL,0x05c60b050a210d7bL,0xd46a2c3778da9e61L, + 0x7b6c46bc07fcae8aL,0x3993caae6c9035bfL,0x48e1c8d96b90aeb9L, + 0x01561d3435f0b54aL,0xe9cf0fd81ed47430L,0x0000000000000003L } }, + /* 64 << 35 */ + { { 0xa6149ac821a91383L,0xfae0515c63176a50L,0x436d0d6a696e3e7eL, + 0x3d44ea3eb32b2e16L,0x142644cc66f3bbe0L,0x697d5cef82322b00L, + 0x384543c64c388edbL,0x4afedcffd395d1d1L,0x000000000000002dL }, + { 0xa7b35fac84cec5f9L,0x003daaa65d827e5cL,0xc7c5f3d3dfc8263dL, + 0x8dca1315d98790d2L,0xb1f54568f00346beL,0x3e6f6876de680d87L, + 0xde73e95b6af2e269L,0xa5969ec77e8fae93L,0x0000000000000185L } }, + /* 0 << 42 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 42 */ + { { 0x747e8dddbc0ffee5L,0xa50f058afb9178a5L,0x14f41c93b2d52777L, + 0xf7c95492aecac13cL,0x9698bef3b7f715d3L,0xbd7fabc2f132c199L, + 0xbec856136e9b47a4L,0x08c7486e62f85cadL,0x0000000000000036L }, + { 0x3a44d74eb39c9ba2L,0x22b4c6aee89de157L,0xfe4985bbb93dc56eL, + 0x85d435bfdd1201aeL,0xa01e7f53aa68d442L,0x2a4fcf509d9afcdaL, + 0xcc4c5b5c6b95d6f4L,0x5e8afa0e8c390221L,0x00000000000001c1L } }, + /* 2 << 42 */ + { { 0xaa498bba799e30a1L,0x7e193db1562eede4L,0x4dc49402f4a94bc1L, + 0xcb63d972c4d7d024L,0x98d59716f6778b61L,0xe6c0a829b40261e4L, + 0x5f1738fb65e3b28cL,0x3c4e80d216a75ce7L,0x0000000000000035L }, + { 0x376d19ef733ded62L,0x9400e36e0a93203dL,0x013ef7caf98ba94bL, + 0xb7556ca035756aaaL,0xc6b8b05a2f643611L,0x23891b32438f759bL, + 0xecc9927e27f69733L,0x2ebe0180969ae58dL,0x0000000000000069L } }, + /* 3 << 42 */ + { { 0x9417c585174bac49L,0xac479ed35fe90109L,0x40cb51ddc374556fL, + 0x2767e7c46cf0ff89L,0x0df9e9546a284db1L,0xbf89af89b317ca42L, + 0x863018a69d05c5e5L,0xb68c6f8129717e9eL,0x0000000000000151L }, + { 0x69a97e79a12e3bd5L,0xc85780bf4a057173L,0xab4d585f2bfaaeadL, + 0x039589dfaea4c334L,0x38823f8a9a840138L,0xe4448b345b0d8117L, + 0x9fc3060247e9d309L,0xf9f42989a44f2124L,0x000000000000004bL } }, + /* 4 << 42 */ + { { 0x6f7f2fe8b6813f72L,0x95f979fe940210f0L,0x42cbd8d71a409657L, + 0x2e2d2f08474c141bL,0x63e5eff052a6a362L,0xcd24503eb1c5a27dL, + 0x30335d77db08b256L,0x827057cf9724bebfL,0x0000000000000003L }, + { 0x6b372ca7dc5feef6L,0x7557c331d06696b6L,0x4b10d90b4a42090fL, + 0x2c93e9846f10352bL,0x72ac24ff2b8ded01L,0xfa53f5dad8c833a6L, + 0xc166ed4b7c9354caL,0x81df59a09e3b4cf5L,0x00000000000001faL } }, + /* 5 << 42 */ + { { 0x3d37b5c3f1e8092fL,0x52f9d7f02083e034L,0x2f7de776ee86398eL, + 0x302298c9ac0cfc7fL,0x08b41a8f7f5bbbf5L,0x331ce968d91f52b2L, + 0x1ec1c1dc1cb43d1cL,0x6c93d72038545bc3L,0x0000000000000121L }, + { 0x9fbb15c9f1925474L,0x2779a1a6c6e0020dL,0x367a921594e71878L, + 0x4a1ce464e2692460L,0xa3c5e4560c3a5aefL,0x5dc2ef3ced47ac93L, + 0xd27ada21415bd438L,0x90ca8c3166b0f5ebL,0x000000000000007fL } }, + /* 6 << 42 */ + { { 0xce8865dc68f7cf0dL,0x9734bed0051db980L,0x6c1ebfc3805247b4L, + 0xd80a3c973be516f2L,0xfdd123b9beafd70bL,0x6dec300ae3a84da4L, + 0xe6d7e14f24178c07L,0x39810dafc4a67c89L,0x0000000000000067L }, + { 0xfea3bf4eb47aea7eL,0x8394f3c24a4fe2ceL,0x50753f252a191024L, + 0x39846eca1cc794e3L,0x6ce5aa37c806fa24L,0xacacd14890982be7L, + 0xbab241b1ed46b6b3L,0x7b696b06ecc56027L,0x000000000000010dL } }, + /* 7 << 42 */ + { { 0xda69cd2586c38b08L,0xfca9f4e06f3fb63dL,0x80d0a99f6a96c788L, + 0xd69615b854e77f31L,0x17eab3a318a78b2dL,0xc66044b4227cf99dL, + 0xd2ed45988076af9dL,0x2bfd33d7c6fdca97L,0x000000000000014aL }, + { 0xd6701f8ed292c052L,0x0b5afccd75d952dbL,0xf6cb5727fc4b2866L, + 0x39ec93d2bb220367L,0xafb0b16b7c18bfc9L,0x198e2dfd9803aff0L, + 0xc5ff9fc4d2b124a9L,0x7ff8ab6a82414a7eL,0x0000000000000009L } }, + /* 8 << 42 */ + { { 0x8ce9e114e43c6a8bL,0x4ee9f8b0804168b8L,0xf5fffd5b5fa74509L, + 0x4589d84cdc20088eL,0x00cdb864d8cca5adL,0xfab8ee1674cb7285L, + 0xd5e46fc6ab702098L,0xcaeb2ef79cdb1896L,0x00000000000001ecL }, + { 0xf5886bbc26b04438L,0x61613d0ed9567198L,0x749eb5d8ca99b91eL, + 0x5861944d53b41996L,0xdc22071c54df4765L,0x1f6288a8b8414aefL, + 0x6b34db2f7ff1c5eaL,0x5edf2873ff5262b0L,0x000000000000002eL } }, + /* 9 << 42 */ + { { 0x239a57b62bd9413dL,0x341d26f30136f66dL,0x9e4222cf78b3a484L, + 0x76a6fe313e227fb8L,0x9d724ac0af025ba5L,0xddc57fc691ab5699L, + 0x8b768f35e278e0ceL,0x485f7b177c12687aL,0x0000000000000198L }, + { 0x3d6e79f4688c325fL,0x45b4d50680cb1ab1L,0x92f89190ab8640eaL, + 0xd70c0dc4e079ffe5L,0xa80c729bdd78e9ecL,0x2b7956208591ab90L, + 0x5cd2ea6e7f90dc9bL,0x7790417ef057c6c7L,0x0000000000000024L } }, + /* 10 << 42 */ + { { 0x3688c2cfb08b842dL,0x46e70a8b509af85bL,0xa580b032ae4e0ff6L, + 0xb945d72433674d0bL,0x58bfa0dbb95b1c05L,0x3e5e9d8e35ca8038L, + 0xe0ef29bd3ad503b7L,0x9f300fc22fae5708L,0x0000000000000148L }, + { 0x7c225981128ae9feL,0x1d205a4dd0101b45L,0x95c33725f98e3575L, + 0xc2c9df812443a70aL,0x1be7bd1c134b6ea6L,0x70b8bd656f1a4e1fL, + 0x7d3e41455ee45a2eL,0x27bc05477e8f38cfL,0x0000000000000117L } }, + /* 11 << 42 */ + { { 0xd8495a1a4ffde107L,0x82acb1112b7f0d9cL,0xaf106329c3622c88L, + 0xe61140ff59f698b3L,0x75bd953659068866L,0x03d5841c591b7a27L, + 0x967eb69f7bdc7639L,0x504f158a5e4ec11cL,0x0000000000000137L }, + { 0x4b1ed56a9f2d48a4L,0x9f0ade2220312abcL,0x07898cc964b74837L, + 0x082d0eea21d92392L,0x6c1103086781ebf3L,0xef9ecbe4e9ad8425L, + 0xbe46c0f330b02c57L,0x2b1d60726267db5aL,0x0000000000000139L } }, + /* 12 << 42 */ + { { 0xcb9e50b3cc6dce31L,0x0b656fda60399388L,0xd3104fa3600d4b62L, + 0x7cfc68acb660e0eaL,0xd8c2218add9f415fL,0xb3e0d7c626c649d9L, + 0x37e6279f4d09f7fcL,0xe9ef720e136ad787L,0x00000000000000e3L }, + { 0x0bada672d03870d7L,0x553ef23f815592e0L,0xc1132ad8b8dd5471L, + 0xd8666e9fb80b521aL,0x38f49d542c62908bL,0x3095f1bec9f15b3cL, + 0x5aa771bafe8866d7L,0x0fa253a28590b3cfL,0x0000000000000015L } }, + /* 13 << 42 */ + { { 0x6e37d1fe56b126a7L,0xdb1215088ecf27a8L,0x3012eca692def89aL, + 0x03761cf976effd5cL,0x9b49af9365fe555cL,0xd4cd598969a41926L, + 0x506c406f65022be0L,0x4b49c23050baebc6L,0x0000000000000154L }, + { 0x6fdd3549c425eca7L,0x42690f8c7dd8dd57L,0x866a6a5097938d4fL, + 0x6e77025f9055da56L,0x7ba19f35e0dd3935L,0x57a71e7db1e74f01L, + 0x4a9fffaa48000494L,0x5c18a259085743ccL,0x000000000000011cL } }, + /* 14 << 42 */ + { { 0xd38ceb253d23e873L,0x94c2501ab6c80ee0L,0x8d03f7c2d5c233a4L, + 0xd86ee22efb9f0759L,0x8b57c3981344aa31L,0x21f15c8d4b44d7e4L, + 0x22cba67334bf4ed1L,0x2d459d4f587690f1L,0x0000000000000038L }, + { 0x3b954f93b9acda8fL,0xc41e5265e7721250L,0xc76c15110881961bL, + 0x2a0b4efd6e53d2c8L,0x350455dcec0adfc1L,0xd7ac97bd922255b8L, + 0x39d023ee4c4c50cfL,0x1fc03d079473b093L,0x0000000000000183L } }, + /* 15 << 42 */ + { { 0x57c3a1e1315a5a83L,0xb75058039091dc2dL,0xb9c26bcd9d320a7bL, + 0x13c81a8a12c8df41L,0xab970177f757a70dL,0x8f2be088a4a27340L, + 0x01636d7437506668L,0x940d42aa8a92e58fL,0x0000000000000025L }, + { 0x32a6a8b7201c31e5L,0xd1c8f9cb0fcf80e1L,0x7fc5fd9302fcef41L, + 0xa09c4c02472f6386L,0x6a852fb75767dd02L,0x995703ec3d17f69bL, + 0x10cde7934fac3a32L,0xafa0f0edf71bbd98L,0x0000000000000065L } }, + /* 16 << 42 */ + { { 0x49d0e9bbf1a37365L,0x650f8e4e523ffafeL,0x37b691a0517b3aacL, + 0x73372e761824cba0L,0x0c0a1ac3063372e3L,0x2387470841aa63edL, + 0x1d70995daf840388L,0xea733e27491b2e8dL,0x0000000000000122L }, + { 0xeb6e7c3b573b43edL,0xa645f29308f87584L,0x19a56fe0af100aa9L, + 0xb767ce92b1d5fdbaL,0xdbf9b1b20c349030L,0x42f0cdf3586e1ab8L, + 0x65876bea99631140L,0xbf016fd8268af542L,0x00000000000001acL } }, + /* 17 << 42 */ + { { 0x746b65d463a3307eL,0xdcff899fa57f17f0L,0xd9adca93946f6eaaL, + 0x0ebe65fc04756a3bL,0x2653a837dfb99071L,0x3a80de4c3b8ce452L, + 0xc64562cd4fff1594L,0x02dce3ea859d741aL,0x00000000000000e9L }, + { 0x1a949222944de68fL,0x1535fa32a48135b2L,0x2691dcbf6a03cc87L, + 0x99ddb4450fb1f4e3L,0x621c99c2eee93dd0L,0xb9debe99ffbc6597L, + 0x64a5568250d2fbbaL,0xdeefd277ceda3428L,0x000000000000017eL } }, + /* 18 << 42 */ + { { 0x379bfe948e2cc41eL,0xef843d504cbaff27L,0xa541389d608dd952L, + 0x7b3cda475d24e091L,0xa66d480fd051a610L,0x082af70b435609c5L, + 0x00529182762a5791L,0x6f4651518460f5f8L,0x00000000000001d3L }, + { 0x53c20bfeab267f94L,0xb8917621293967f1L,0x176c8af2c7ca82c7L, + 0x875c7792331a2dfaL,0x0a34cdbeaace8970L,0x2228eeade29878a0L, + 0x64ebbedcb9e6b75eL,0xebc8fa2c87e5fbcfL,0x000000000000001fL } }, + /* 19 << 42 */ + { { 0x677eec9a5c8cb438L,0x898a62466444b1f1L,0xfd46a21196472331L, + 0x7a4f96db39ad4d79L,0x24a03f04113e2f4bL,0xe39a949531a0af3eL, + 0x7efd887bd3774f4dL,0xc58e8360638e2e62L,0x00000000000001cdL }, + { 0x8e289acabcd909afL,0x5c99d510d5131f29L,0x5d5122b8e85edbd9L, + 0x919b2e2d6e18283dL,0xd740dddc5070b20aL,0xea31401ae4d879e2L, + 0x14ff112abc0c3eeeL,0x1a6ad04b61dd98e5L,0x000000000000012fL } }, + /* 20 << 42 */ + { { 0x3133809747525f3bL,0x49551e31c6cdf694L,0xdfbe83625b9451f1L, + 0xfed9c23bee491931L,0x5270a02f324d645cL,0xe323781f4175d154L, + 0x31511e52ff475990L,0x0b60cda369b0445eL,0x0000000000000082L }, + { 0x822e418092fa5405L,0xe78f939b6b4d67b8L,0x6f09c296265e1b54L, + 0xf6ef60ddb6baa0baL,0x1d83f2968be593baL,0x777e06327e71d499L, + 0x5db334162e0ded5eL,0x6800dc32357e1be4L,0x0000000000000077L } }, + /* 21 << 42 */ + { { 0x519d71ba17d9dbd3L,0x70caf81bde95e99aL,0x067d64fb20043aefL, + 0x533a02d15cd816c7L,0x662d9255fa0316f7L,0x7a04f551f523f7a0L, + 0x5e00f334ee1e5d9aL,0x246b5aebe7439ed3L,0x0000000000000132L }, + { 0xdf48079568d4c2bbL,0x945bf6c579f10fbeL,0xe817703aa572433dL, + 0xd9c8580c61013cf1L,0x0e77739ac9d9bbe6L,0x2d2a8c7aceebe40dL, + 0xe5561526c853e2ccL,0xa6cf89623e9b763fL,0x0000000000000147L } }, + /* 22 << 42 */ + { { 0x284322075173f4b6L,0x774e95f84181e605L,0x36b97d1c4247bcb6L, + 0x46efcfce781dc2fcL,0x401d5ab2e22608d3L,0x6b854062c32b8fd1L, + 0xb99d2e90c0486953L,0xefcb9abbaa770dd4L,0x000000000000018cL }, + { 0x44a667184ef47c94L,0x3a5e72bf4f3e8c89L,0xff303101df7dc70dL, + 0x4029bc66a63c3782L,0xf74a21b993c6a3feL,0xca320f1afb0d401eL, + 0x80d8bd7447c8d5c7L,0x20da72fc97e610dcL,0x000000000000014fL } }, + /* 23 << 42 */ + { { 0xe29af3f05a5540f8L,0xe1ce7efc815f0154L,0xe3ff74e8e67013a7L, + 0xc51d9c7ce89998d7L,0x93913371cde4787cL,0x72b290126ed58fccL, + 0x61f6d6b908f0c14fL,0xe3aafef0f44b2791L,0x0000000000000119L }, + { 0xc72229012a036f1bL,0x1ecbb2c2387c8c54L,0x6ee742220b32e10dL, + 0x51440a0c8a95e719L,0x3d20b7fb998fcd81L,0xf52f182651a10e47L, + 0xbac05579a9edb43cL,0x05aefa586b0f7532L,0x0000000000000143L } }, + /* 24 << 42 */ + { { 0x7992da26804ed5e3L,0xac4677b05fcc6497L,0xb1a09585208ebd95L, + 0x1c0cfae7199d3419L,0x7c862f540715a11dL,0xd23e934f9cab55b0L, + 0xfd5d250ad4310fabL,0x0976782caef426bdL,0x000000000000005eL }, + { 0xf78b0a7a2a7d5b37L,0xf31c5594fedf69deL,0xb2b9e3f157f76616L, + 0xe9cfa2f23040cca6L,0x8a91bf0c5a72502eL,0x71f8a5b63c810d16L, + 0xc9701ce42a7666dcL,0x2da80d6c216a59c5L,0x00000000000001e8L } }, + /* 25 << 42 */ + { { 0x869a4db4310de1c9L,0x81d5423c32f9190cL,0x42e3244d1db414efL, + 0x09c8b865ced2746fL,0xd49a9c37e3ebcd63L,0xf0120bc11b4c24b1L, + 0xabf538a709b52979L,0xe04be5a8f0f3fce2L,0x0000000000000139L }, + { 0x17738b903126b962L,0x723f5845c088121aL,0xe3d9c64015629c9fL, + 0x509e97d95b8dfc68L,0x578a3869fc16c27aL,0x1bab4b8ea2a47461L, + 0xe1443bf0c957e6f6L,0xe9cc17852bcdad9dL,0x000000000000002eL } }, + /* 26 << 42 */ + { { 0xa2f2f2c1f23794e6L,0x6f6281621181e396L,0x2e9bab64fced0d2eL, + 0x631f7e60c6bd7b02L,0xf0db7f12468fbec3L,0x854afaf573c8b79eL, + 0xf579fbedb0de4a3cL,0xee13e418678e1e08L,0x00000000000001b1L }, + { 0x1c725f6e874ac8caL,0x85414cf3fbbfc22cL,0x5262e129ec3fb934L, + 0x6facf53f4a0f37deL,0x6811fea52216fde7L,0xf63e665515b977caL, + 0x63c2344877533feaL,0x2481232a38302196L,0x00000000000001c5L } }, + /* 27 << 42 */ + { { 0x5e06ff9c56e023ccL,0x8a89de8fdb3e3bc7L,0xa103d54745fa4924L, + 0x788c59a2e1c033e8L,0x4f7c33e967f782afL,0x2f37668d9ba93c12L, + 0x73d6fb8aa10f70a2L,0xdca22ebded2fd7b3L,0x00000000000001c7L }, + { 0xf6b6e6fbe8f38522L,0xcc7d7c45a5a249aeL,0x6a5235175bb4caffL, + 0xbe60043dbe38ee66L,0xf0dad8cd1c96b5f5L,0xb597c1546c3e4799L, + 0x3f06c2ab2c9871edL,0x4581dffb3963ae2bL,0x000000000000013eL } }, + /* 28 << 42 */ + { { 0x46fbcf70aad63226L,0xc023eb8af2c43f64L,0xab1b684ca3f42111L, + 0xc6262621d12cea22L,0xa2539d86ad523f98L,0x76b98970b45f0b6fL, + 0xf9874df8d788809cL,0xb24620c7ab358b89L,0x00000000000000c1L }, + { 0xdd50316f92e3b976L,0xb6a2fed9ecdf1dc5L,0x746bb5d67f40b693L, + 0xb76ce9e40572240eL,0x5c65a823084f25baL,0x51ba6ed9b9890ec6L, + 0x3930dea2cad192d9L,0x2c78150416dd9bfcL,0x0000000000000126L } }, + /* 29 << 42 */ + { { 0x3e8797c7ef8491edL,0x7842b2dd5f13493eL,0x2c99f9363afa2890L, + 0xe3e157b785014501L,0xaea92ae17774f4f0L,0x05b7b3bc560e63daL, + 0x3d2b8875bf8030a7L,0x21095ef3004c61e7L,0x000000000000004bL }, + { 0x203687226361e4fdL,0xc9d8ecca7cb95f1aL,0x1063e19ed5c7cc5eL, + 0x0bb0e71d020a9508L,0x5b895541fb092e99L,0xb1733fb8dae4a59cL, + 0x2a3ef56b29edfb53L,0x75c1ba1c389ab3bfL,0x0000000000000090L } }, + /* 30 << 42 */ + { { 0x0c3e572a0fc1e4e8L,0xdbded0b529787f66L,0x103a656853b84d7fL, + 0x939253ddc74bef10L,0xcc3f928f5bb84406L,0xbab921429ba6615cL, + 0x41a9b386cf35dcdbL,0x750c32e133bd6fc7L,0x00000000000001cbL }, + { 0x0fc37bf5d0b0d141L,0x213a97d1ad59d2e5L,0xcc1dd07db86e3859L, + 0xb3b4668f4a229914L,0x258c47aac41c94f3L,0x51d26a397745d941L, + 0xebbd50931809b9a4L,0x9a31e3caa6f1976eL,0x000000000000010eL } }, + /* 31 << 42 */ + { { 0x12b398e7ecb584d1L,0x71379d000fd02cf8L,0x5d69a03de25d53e5L, + 0x724445bba9cf6c9cL,0x97942b084184b050L,0xd8e7770f8a903d8cL, + 0x23be2cd6e285a39bL,0xd8262c7f54e43dfbL,0x00000000000000faL }, + { 0xb1dfd3321171711cL,0x91720264c55e34d3L,0x6c2fe07176ef48eaL, + 0x50a86d8419f04ea9L,0x21b234876ef9f5adL,0x2dd2953198ff68afL, + 0x2ccd5b31e5be2b6cL,0xdad0c4408cb81a45L,0x0000000000000078L } }, + /* 32 << 42 */ + { { 0x2631b3c36ea5e7eaL,0x0576e1edc566c837L,0x1e50e071ac9c1eb8L, + 0x5e74360ea8482d3bL,0x2e037e7718ceab4cL,0xf7d17456c8424fabL, + 0x202efbf582fe1841L,0x410f708c8cca4c68L,0x0000000000000165L }, + { 0x659dece0d00ea056L,0x495170f43d1301caL,0x749e3276cf2c6e53L, + 0xc0f5d00087b6f996L,0x46b7c068dfd50725L,0x83e9d3de67ef401bL, + 0xec1cee4e3d4ef196L,0x31583197bef2dd76L,0x0000000000000023L } }, + /* 33 << 42 */ + { { 0x4aaf3d3a2d846afaL,0xcf35de6580c606a7L,0x022deb760cb93fcaL, + 0xa0e60afc96fa361cL,0xafcf7cc90b377305L,0x876c066cbc5e9abeL, + 0x5632b4763bfd78a7L,0x1271111fa016a308L,0x000000000000007aL }, + { 0xe762fb74fe8f5299L,0x7b179b4e4b614b25L,0x7e5de5c81f840510L, + 0x69047ec18d783eabL,0x98b381ec3516e026L,0xe7778e602da90ec5L, + 0xf3b4d6cf5930e668L,0xc0bc0326fa9e8051L,0x0000000000000157L } }, + /* 34 << 42 */ + { { 0x2618f715359a6e41L,0x9132721a8b5e4de2L,0xfbfcf4aad2b349c7L, + 0xa4719bf086415128L,0xac080de43058d78dL,0x588b76300c29ab89L, + 0x847249def2b1a566L,0x090c25d01545fdf3L,0x000000000000013aL }, + { 0xdaf439760ee334d7L,0xd847290c51ce020aL,0x62246b88adafdd5dL, + 0xc4fabcc7285636d3L,0x5f4bbd8e908964baL,0xd76d498d43ac7a30L, + 0x5beed326f425209aL,0x7bc22416f9dc7d90L,0x00000000000001e2L } }, + /* 35 << 42 */ + { { 0x512fa0c9f59767e3L,0x2b8b565b236870ecL,0x646b5d0b30d8fd55L, + 0xc0726c70bbcfff1aL,0xf09c125e16cc9832L,0xf7d5fd367c1cfc6bL, + 0x22257c4b1e01063cL,0xdbd248561d6d9bacL,0x000000000000008cL }, + { 0x9ed1b5e18f3af826L,0xc42dcea6f3160e2aL,0x3f3e13b1ff834019L, + 0x39d61040dc48b3b9L,0x557fd39ac47f458eL,0x452a16ac987520beL, + 0x2ec6d00219505190L,0x66fac607d5b3ed1aL,0x0000000000000174L } }, + /* 36 << 42 */ + { { 0x1eb52d8c8961c678L,0x9318b87f5188a0b6L,0x449da5f7998c4703L, + 0x814e23ebba1284a8L,0xc4d98c41bcd03139L,0xb1fe6bef965a1803L, + 0xa2b5b201889a0bc5L,0xbc93a3519b13ccdcL,0x0000000000000181L }, + { 0x17c9763ff4e51116L,0x26c730852a842e35L,0xdd6481a71296397eL, + 0xd588b1b854f7416dL,0x59f846cbc3d62325L,0x8f0d9ad920321e09L, + 0xd9c7bb569e1699daL,0x42a8b57f15de9318L,0x00000000000000e6L } }, + /* 37 << 42 */ + { { 0x90c29dc2d1043639L,0xbbc7cbdcf5e17066L,0xc249c81d9a986ddfL, + 0x081eb4d8c19cc606L,0x2e390f829fa13a21L,0xbb500f2ce369c007L, + 0x2f716b7cb5aa598cL,0x7ac490d538645632L,0x000000000000004cL }, + { 0x1d73543fd62a0b86L,0xed5674cc9eb85ebfL,0xcb4ba15fcbdf1a2aL, + 0xbe0c1e77d0caff4dL,0xcc8905381e3e3f97L,0x9513a987b2624ae3L, + 0x6a93bf660437efabL,0xc7ab75cc119e6bc7L,0x0000000000000146L } }, + /* 38 << 42 */ + { { 0xc2ae2ccee84ed2b5L,0xefb47683e2c4c512L,0xe86fe38992bbe0b4L, + 0xb027a47b2d4a353eL,0x270a319fed2c3482L,0xb5470e60df72b036L, + 0x0264dc8895967134L,0xb410a2f4bfe6af47L,0x0000000000000063L }, + { 0x1d1024bb7ae0582cL,0x88675bcfef5e48e9L,0x0d928096f5e8824bL, + 0xf172a15d94f0edf3L,0x1deda1cb3b823a69L,0xdc3722f122ea0b88L, + 0xdea7f8583437020bL,0x16543f8ce317d065L,0x0000000000000187L } }, + /* 39 << 42 */ + { { 0x1df3bd54a11f8d7eL,0x76b9ed25db6b8ddcL,0x5bc6ebdf63a01f8dL, + 0xf96b8f72b7f1589dL,0x94fa3a83a5476f8dL,0xcb63499a5fde5d1eL, + 0x7f89c36d688048cfL,0x3e2f2d487a377a21L,0x0000000000000169L }, + { 0x8537c54896c0c39bL,0xeb98619a0dbf2821L,0xa5abaf6ffc11be9fL, + 0x88d3c0a10a5b0478L,0x4cb55dcb4c72ea75L,0x7585e394e753fdedL, + 0x797b588de3417ae1L,0x82dea02b45a0ebddL,0x00000000000001ceL } }, + /* 40 << 42 */ + { { 0xb6f28395a29d2024L,0x77ac682616726da8L,0x295c447073fd20f1L, + 0x9b2ec3f65856531fL,0xca27a7659555237aL,0xe497546f7a7bffedL, + 0x4f51b73ba508d541L,0x6b23a204befd9c3aL,0x0000000000000039L }, + { 0x264295052ddd93a9L,0xda92059c3ccf01dcL,0xaeca4b1d851d4cdfL, + 0xf7fcfb537e0ca450L,0xe87d6e18cddb9a15L,0xb126c191d895153cL, + 0xc8a744137e5a46c8L,0x36ad8a91de12f4f4L,0x000000000000018bL } }, + /* 41 << 42 */ + { { 0x74ef02d265e852a3L,0x324fed680f658cfaL,0xd466f103eca3fc85L, + 0x6034b5f0a0f02951L,0x806ff94ed0dc2633L,0x70ab663177e83219L, + 0xbda70dbc6db52c50L,0xbb7c538955b0b5a5L,0x00000000000000e1L }, + { 0x0e0d975de1aff7efL,0x3e308234bae3e688L,0xaf6711bacf272359L, + 0x62d70d6cffc92c50L,0xc95de419f4cf9c6dL,0xdc7dead78879f341L, + 0x33e941e09e25e4beL,0x14dc267622519153L,0x00000000000001a4L } }, + /* 42 << 42 */ + { { 0xd434b84d74697824L,0x40539e16acf24795L,0xd572b3054ad638d2L, + 0xde69e6788dd379a0L,0x675895fb4f7acb10L,0xa9e7010e28b809ffL, + 0x0c46b090bb932c71L,0x67faa77393164c0fL,0x00000000000000faL }, + { 0x929c93f16059ac46L,0x18d944565f740f53L,0x329b4c8fea065a97L, + 0x84919b306f560f7dL,0x861d07d5062ec3f2L,0xb7f0c7801f09dfe4L, + 0x7bb618819bc97c71L,0xc37b2767a48c1b40L,0x00000000000000dbL } }, + /* 43 << 42 */ + { { 0x3e450ce0a8d0cff3L,0x2e663e6b78ff9f55L,0x9f15134d5c0ea3b0L, + 0xa8e53ea772ce0e5fL,0x6668ae1188e1d08aL,0x11c498e6b3e48748L, + 0xcf8e174401b97c6aL,0xa7d61b3ffb5a55faL,0x00000000000001e8L }, + { 0x9be324b502984ae9L,0xb2ada2ccdc986c8cL,0xb73282ff92e4acd9L, + 0x59645b6929444216L,0xb462b0ec08094d4cL,0x3a282fab8a220754L, + 0x01b6567e6f43ca4cL,0x3413e0b6dd2bfc33L,0x0000000000000035L } }, + /* 44 << 42 */ + { { 0x32c6a931b227157cL,0xd276f7d0d347b7d4L,0xda50395f0ab44f59L, + 0x5d0c517699a52be4L,0x7b0f28e7b97054dcL,0xdae56aefd8e7b1deL, + 0x05b0180087b4ccaeL,0x21c81809274a0ab5L,0x00000000000001edL }, + { 0xc1c7e06b9f9761c7L,0x3bd3309fc2a78191L,0x2c239fd7702a4f90L, + 0x5686716434a9f65dL,0xf8097255c1695583L,0x9b59c0bdf8597cf8L, + 0xe0c0ea8558996101L,0x30afb4de2622f5edL,0x000000000000005bL } }, + /* 45 << 42 */ + { { 0xc37c982c301c80f7L,0xc0bc3df13218b36fL,0x6c7959568f8447a3L, + 0xd3e11b8074d687a6L,0xba0e78bbacfeab37L,0x3160260448e100f1L, + 0xc59f32cc2759d36eL,0x919a699fe7c183a1L,0x000000000000014aL }, + { 0x1db45cb6b237b42aL,0x6be246b1cd20d06fL,0x71175e1852d92e5fL, + 0x1b1a559dc6379ff3L,0xf4a86e0f067063c7L,0x4f55eca2e61fb16eL, + 0xf4aa39140b9b68b1L,0x26746312b83d83f1L,0x00000000000000d8L } }, + /* 46 << 42 */ + { { 0xc3d57a8323e01244L,0x6c1765687554530fL,0x17e5b0c355b8fbe0L, + 0xea068a1726350a44L,0xf06413c117ff3090L,0x2daf2cf8baef2995L, + 0x499bf72a2db9ee9eL,0x1a0ef8eab6efdb69L,0x000000000000000fL }, + { 0xe1a9efa76d4eb0a2L,0x6e8182ea78617b26L,0x9b7d9837881857d2L, + 0xf18d42363a64071bL,0x21e43124c7df9bb5L,0x223bea1620366b6aL, + 0xbca36c195f3b8b74L,0xf6998f39fdd2b391L,0x0000000000000036L } }, + /* 47 << 42 */ + { { 0xa5e08e2708c182faL,0xe7dbceb058be3eb8L,0x454381379f440918L, + 0xfb10aa677a37d898L,0x8ca45c4a838e089dL,0xae3e13c06477efedL, + 0xf42788377871ec9cL,0xa4efd9487c02bcddL,0x000000000000014bL }, + { 0x8058ab501915d2b1L,0xf0e0b18507a23009L,0xf312041cb21746a5L, + 0x19702c8127ff31ffL,0x5bfc0ffa39269d49L,0x11f8306f5b2bb7e4L, + 0x7fffa345134e7ca7L,0xdca4d5ef3f626ad8L,0x0000000000000138L } }, + /* 48 << 42 */ + { { 0x882e70bcf935169aL,0xbe4f1a7e9faa4bf4L,0xc00ee4b63f49bf19L, + 0x7108876fcf2ef2c0L,0x475d94d6ee33e82aL,0x88381a2fba2b1bafL, + 0xd6ba047a0e1d8204L,0xf49a4704c9ad93abL,0x0000000000000112L }, + { 0x64a6ca0222fcc815L,0x35c840ded006e9dcL,0x44777fb5d7dc2178L, + 0x3dfbe7fd91ca3731L,0x026fff77e116256bL,0x6ffb7c2a648b3f43L, + 0x6688f6c0470a3209L,0x632e1094a98c1c6fL,0x00000000000000e2L } }, + /* 49 << 42 */ + { { 0xeb117f6f5c5e8510L,0xb1cacd2d52aeb1d2L,0x62bdb4954f806911L, + 0x230a593c916da8a5L,0x8f5cf40280caf0c9L,0x4031db58a1e8c7e9L, + 0x34c5ef907e23e556L,0x276981361c3c4694L,0x0000000000000148L }, + { 0x99ac9916ec9b4f91L,0x432ac8cc476a48a0L,0x25d59bd336ceb46aL, + 0x03e24d9a547005eeL,0x01435f74efa3d224L,0xa6cf7444a4870b84L, + 0x9725e6046f067e62L,0xba420cc6d694cb2cL,0x000000000000015dL } }, + /* 50 << 42 */ + { { 0xba476d873030cc65L,0x40c8a7233f53e13fL,0xf15a7ebc139ddd99L, + 0x59473015dcdd8ea6L,0x06ad0d0e2ca9faa1L,0xf95d33cc9672c4efL, + 0x183bc0d13b31f16aL,0xc7e7132613880e33L,0x0000000000000079L }, + { 0xd4ca8c9030b61d4dL,0x293fb721dfe76d10L,0xcd4378cd025faf7eL, + 0x496f67ab38987186L,0x96bbf0c70b0caad8L,0x5a441f7ae9a127fbL, + 0x292a39d18bc38c28L,0x1fa309288bff6412L,0x0000000000000006L } }, + /* 51 << 42 */ + { { 0x41cf0341f2c97897L,0x07ecbc3155140e3eL,0x25dbd2d5fd0e65a5L, + 0x89d802f8ab0e731eL,0x200aa9cfaeb60ed9L,0xf2abd47318270058L, + 0x2290980f7222134fL,0x80dd2156d98c893aL,0x0000000000000033L }, + { 0xe4dbee4746b5197dL,0x5d110c69d34bc910L,0x2551116c7c6d43e0L, + 0x03391f080b0a6c40L,0xe11b366d39a3ac3bL,0xda60ccdb29749bb2L, + 0x32d05f2bfab0625bL,0xb2a3d728348b774dL,0x0000000000000057L } }, + /* 52 << 42 */ + { { 0xc3120df78ea50ac8L,0x124a99bf819551e9L,0x69dd9b95eddf11e9L, + 0xde805c83adcd1283L,0x3cb8d5f8a354632dL,0x1d9dcbaf4d718278L, + 0xebf2d04742c9aa97L,0xaf43c4b6b9b68e10L,0x00000000000001adL }, + { 0x21edfcb3d2451b29L,0xcc9acf06a48336f3L,0x8471e8c311e3e8b7L, + 0x3fe0082e3e1e38e7L,0xf9e09f1df6a8aa86L,0x3b445adbe946eb4dL, + 0x83d1b8d21579db09L,0x602db173ecaa7cd2L,0x00000000000001feL } }, + /* 53 << 42 */ + { { 0x7d0146ba1a83feedL,0x0be06076b6b5a896L,0xabd622a1b574173eL, + 0x4511461a9687cf2cL,0x6a8662ecba2a9970L,0xcd138a7026840ba5L, + 0xb4600008dbae80eaL,0x446cc5d10e75802eL,0x0000000000000077L }, + { 0x9d3f027ffb4be66eL,0x155462bc96f39f6eL,0x573003efe04e4009L, + 0xd4b2edd219bab2ddL,0x82a7818a37b54094L,0x2619e20c3540a3bcL, + 0xd9815146946f623dL,0x393c8bfaa09d03ebL,0x0000000000000115L } }, + /* 54 << 42 */ + { { 0xa34c9ed658a89822L,0x42082c28ec5d8ce6L,0xe8d9bea36ba32197L, + 0xad2b7b6391123266L,0xc27acd5a18a3ad12L,0xcb8ed647708ef7a5L, + 0x0c5b0ed49fb1e72cL,0xd6580f7a5e30a10bL,0x00000000000000a1L }, + { 0xc52666b0438e766eL,0x3526912dd840853eL,0xfe1aef567ea68037L, + 0x0d8d383e9ccb43f9L,0x17788627acf36e20L,0xa28864ec0e1a7491L, + 0x822d8f2efcc40c94L,0xdddffd26a2b0bf5dL,0x000000000000012bL } }, + /* 55 << 42 */ + { { 0x48dff182c056cfd7L,0x4520fe724282fd9aL,0xfa0e53108d3bc991L, + 0x298fb724742f02e4L,0x403a647af5b8e1dbL,0x5bcd5a1655262f9cL, + 0x92889ba779b16c76L,0x1e26ed2971a16b7fL,0x0000000000000109L }, + { 0x749c137ce8e9b2c2L,0xd3b85753db690723L,0x940eb70133649fbfL, + 0x89e374f678597b64L,0x02cb67149414281bL,0x024bb7dd9f5a5741L, + 0xe048fe4adb3b50aeL,0xc6de99b14e7a1fffL,0x00000000000001c8L } }, + /* 56 << 42 */ + { { 0xb48fa75a590e92d6L,0xab1888ebeeda06b4L,0xffb140dd9e71fec0L, + 0x8fb4c8ed2a623d2aL,0x645c4442bdd3fd9aL,0x34adb68f2de96e6bL, + 0xbd7819ec59225d29L,0xa0a8b0c5768b22e9L,0x000000000000014eL }, + { 0xfc0ab8407662f537L,0xf75bde0788b7fcc6L,0x6dcf7e22656a1f21L, + 0x6c1116415318b15eL,0x731751bd77a8529eL,0x23e7c2723c846197L, + 0xc6ab7b666ad27061L,0x7e315762aad0e5b0L,0x0000000000000163L } }, + /* 57 << 42 */ + { { 0x6fcfbba1bbd9d928L,0xf3fc85d8a6ad07f2L,0x4eb1ce1bd5ec3e9bL, + 0x35a18838bba23455L,0x83f9e697aa05e005L,0x51b2c15bbbab3722L, + 0xf2f76c2f91080b97L,0x6b4c3d7b30adc94eL,0x000000000000017fL }, + { 0x676703ce17466400L,0xb003d1e144ab5f8fL,0x5c71817b4472d0d8L, + 0xef5dfdab35b276e8L,0x223dce9a94c083b4L,0x8fab9633c842f4e8L, + 0x7487d2a71b32ae51L,0xa73df4521e91b61eL,0x0000000000000149L } }, + /* 58 << 42 */ + { { 0xe3081ef15ee16ecdL,0x572190eaa301933dL,0x717f1fbb78b476f0L, + 0x53f3424bbecdccd6L,0xd6e3646020c12be9L,0x1a9aba50bf672196L, + 0xc677242c64fea24aL,0x1243d8cf3163375eL,0x00000000000001beL }, + { 0x649ea2f620d364ecL,0xef9918f9ca1347d1L,0xa690c23690a25227L, + 0x46de1a1ee9d7a9a7L,0x04d980378f454a29L,0xf92c691733fe5162L, + 0xd8fca5190ab84b9eL,0x142af9fc4db77590L,0x00000000000001a0L } }, + /* 59 << 42 */ + { { 0x9bd789058423f66aL,0x4a1a18da7b4303c8L,0x9bc361e5f078f42dL, + 0x6df051f9fb9c9f42L,0x25d75614bbe4b87aL,0x17afd17b86701f66L, + 0x5d6afce29d60c6f3L,0x167f180eae9efcb6L,0x0000000000000029L }, + { 0x74ade397a1f06cc8L,0x450132df74fbcdd8L,0x5fd754812ec58631L, + 0x7db6f5af073ebc91L,0x29bfa114e046dfe0L,0x955659d40764e251L, + 0xc8420c2c3dc0bd0eL,0x50e6309a6943b512L,0x00000000000000fdL } }, + /* 60 << 42 */ + { { 0x8b2c940f98c0aa63L,0x6b8f11b6552f4b66L,0x36d68942208bdbd8L, + 0x1a27268d6d35a682L,0xffbabe3ce3c883c6L,0xb2f48c34c7b51fa6L, + 0xc307666acff5f20fL,0x988421026823d193L,0x0000000000000064L }, + { 0xa307127da730707eL,0x89c67ae8b63d595aL,0x235a76d5075c1933L, + 0x4e1488e6c680da9cL,0xbd34618dac7d5a89L,0x22131bc390fe0290L, + 0xb6709cefe5f6d0e4L,0x6d74a464cb81bb60L,0x00000000000000f8L } }, + /* 61 << 42 */ + { { 0xa91ebf5aa98f49b1L,0xe9966de520c72be4L,0x399c3525d2df5446L, + 0x30b0a731b8d64fbdL,0x85f30a4fb17ad183L,0x4178f5643bdb7251L, + 0x0497c93c32984a4fL,0x8a5d97eb380b5a7bL,0x00000000000000e8L }, + { 0xf2cae7dca29fd5b5L,0x73d2fdf8ff1c8977L,0xd5fc8a2e7251ce7cL, + 0xbf3e822b747dcc22L,0xdc9332f10b7bb04fL,0x80bd7f3cbf0baf6dL, + 0x063050956dd4d5a3L,0x2a6fa9aa22dfb616L,0x00000000000000ecL } }, + /* 62 << 42 */ + { { 0xbf786c4fd9712fbdL,0xd9e5f5873f8f4ed7L,0xdce2f57471fe2a47L, + 0xb1e0427d219a6430L,0xbff2b1baae521bdbL,0xc5166a723324479dL, + 0xd150919d6a6c33dbL,0xaaf9eba7e5412b6dL,0x00000000000001a8L }, + { 0xcb1c3eba6cba10baL,0xacb0b7075d5d30ffL,0x25ef098c370207e7L, + 0x74fb4cec078d8b39L,0xe0ef8c1cdd7b66d2L,0x678bee9533e9c36eL, + 0xdcbfd88e938220e7L,0x6c67d672b4dec38eL,0x0000000000000049L } }, + /* 63 << 42 */ + { { 0x40811a23bc3787fcL,0xf1e0938a0cc25bf6L,0x8ea5e4b9a83d5a2dL, + 0xeeec61008466a4b5L,0xd1e8499efe0d444eL,0xe27ecb8978890749L, + 0x901b2a6fa043632dL,0x424899f7baecac92L,0x00000000000001ebL }, + { 0x1c781c1973b26d1eL,0xbccb957b8f157a3eL,0xa374f01ccf29cb56L, + 0x3271773277420ca6L,0xd147cbaa49904865L,0xbaa719955c9dce3fL, + 0xe8d401d705fee441L,0x47f583418a001ff8L,0x00000000000001a9L } }, + /* 64 << 42 */ + { { 0x62b631691c3be7efL,0xdab254bf8074207dL,0xf6f1466b235a4c77L, + 0x66f8f60fd3b01424L,0x9da540693bae9f80L,0x1a840763500f8a6cL, + 0x542e2dd9b25994b1L,0x996307e5183b3ceeL,0x000000000000006eL }, + { 0x45e2ae85a4b301edL,0x0640d46de1fb6112L,0x7f102d1983fc8f4bL, + 0x6eeece3a4f30038eL,0x74d062f5912f39c3L,0xa802a55316906c7bL, + 0xa8d8c58fe6d6e8c1L,0x77cfd9bca57b4387L,0x00000000000001f0L } }, + /* 0 << 49 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 49 */ + { { 0x3b61c3204268e7acL,0xf9fef1976dd3709fL,0xabd3a2d551f7146dL, + 0x4c03d266f40188f9L,0x56e27dfd088bae58L,0x5ba4660d03fa2b77L, + 0x56889642a39ff216L,0xc11b9feaca849201L,0x000000000000007eL }, + { 0x357c77971dbca40dL,0x23c449c26d3c6441L,0x6ad3b6a626ca2d6eL, + 0x32cadcb3b2ca4aa6L,0xdddc43e5ad004aa8L,0x7994e3ab37600864L, + 0xf09f263976e4580eL,0x6a2032a0a270cd54L,0x000000000000017aL } }, + /* 2 << 49 */ + { { 0x598766ce569614a3L,0xfc1534f1d485cc29L,0x1f5e87ce4f099a45L, + 0x93e6071f387d4d84L,0xa3f72298a130f8bcL,0x51372018edd37225L, + 0x165340e9b856f72dL,0x4d64ec749011bc47L,0x000000000000004cL }, + { 0x975d2c0d5b62d194L,0x845d25cb1bbd10bdL,0x1326f9d742ee849aL, + 0x61f90228a333154cL,0x4993f33d7500f71eL,0x3824dc401b6dcf4eL, + 0x1c71e193b65b8ecaL,0xa8ddc0c7dca1396bL,0x00000000000001a3L } }, + /* 3 << 49 */ + { { 0x35a51eeba9c350afL,0x22821a636097710dL,0xf5a5c2c95f6ee6e1L, + 0x8f9f201269599ba9L,0xed36867f12918ea9L,0xf1a5450d0ea832e9L, + 0x58c5a7311e15e027L,0x5a6460d600416b3dL,0x0000000000000198L }, + { 0xa0afb24fcf8c23f6L,0xdd6966eb2d40bca9L,0x7c8d20aabf132ab2L, + 0xc24efb7d7d32b64dL,0x26d793380c9547f5L,0x7e7d62251fb47f5aL, + 0xf1f5880d9a488725L,0x8ec9740d38749037L,0x0000000000000030L } }, + /* 4 << 49 */ + { { 0x7bad211d4ca94ce3L,0x7b7bed87d66cc144L,0x7eba714bbb5ee0ceL, + 0xc26e3a3cf5eb8337L,0xe5c617983dfec21fL,0x9c679e26117ba62aL, + 0x6d67db8c1b6e1641L,0x1c2e077ee31c06d5L,0x00000000000001a2L }, + { 0xc0d71279cdc2d75cL,0x0b15b14709371b29L,0xba1f3359dc2ceb6cL, + 0x9a701873be25dad5L,0x03c466b8c8bfe287L,0x9697f32941ec3009L, + 0x4e21e900b4a54990L,0xee1b53c8a99d3093L,0x00000000000000d5L } }, + /* 5 << 49 */ + { { 0xcacf7227489ba5d2L,0x1a523c29f67d6ed6L,0xedfb4ca69923509bL, + 0x571434694ca275c9L,0x31b2e485340e8fb1L,0x1839d1e3fa9cdb14L, + 0x0f1f4aa874423836L,0xa1f66938027614bcL,0x000000000000012fL }, + { 0xddd453d05635bbd0L,0xf078bbfbac909f3bL,0xdc827eb809ce7304L, + 0xbeecd2de0f10ecffL,0xee375c35e43d5225L,0xd47f1c6dc1caf971L, + 0x6d9b9daf56133721L,0xe2e43b93763d052bL,0x00000000000000b9L } }, + /* 6 << 49 */ + { { 0x9a842cd1d7d39f33L,0x45d1ad8c4bca4affL,0xe490b9caf398d092L, + 0xdb73aab1dde5bfc4L,0x3b7984d7741c8e35L,0xf0b828d5f5819fedL, + 0xce2fc89bfbaad5b1L,0xda615df411c9e58fL,0x000000000000015cL }, + { 0x4b01920ad96156faL,0x221381dccedf3b14L,0x98da481c5d9b1206L, + 0x826399dbe4e4c2f0L,0x51dc45b6d73dbbd5L,0x68f639eb3f275528L, + 0x7093a3ab2d235520L,0x520f9416e8429996L,0x000000000000013aL } }, + /* 7 << 49 */ + { { 0xb3638db98048a2d6L,0x0526971177f15306L,0xa09f7697de04e4f2L, + 0xc782f1073cfbd4a7L,0x0232781e01adb58dL,0x2caf85d422724ee7L, + 0x952f12dda5ca3245L,0x658d58a74cc18585L,0x0000000000000191L }, + { 0x35ad5dc0145a3b22L,0xc6acb7e2d56fc847L,0xdce25662da9b5fadL, + 0x3c6d9c6620861f44L,0x4c3680ea5c573776L,0xf59066292ac95553L, + 0x5e70ec1c5f814c5eL,0x3ec45db4dfcde9c1L,0x000000000000004aL } }, + /* 8 << 49 */ + { { 0xbb59f2675c346588L,0xac37b36876f373a1L,0x4826e015a4c6d860L, + 0x4ef341b061bdf224L,0x70b01fb4becac30bL,0x5f518cfcf0bb0c51L, + 0xbbec7a919d163117L,0x8a65bee8de2da58eL,0x00000000000000baL }, + { 0x965caf8e566098b5L,0xe8ba0e81afbd8436L,0x77f4e27d7299fbc1L, + 0xf5169f5c367e2155L,0xc88fa56101a49a6fL,0x8a6a0228557b66deL, + 0xc867f1fa8e1bee40L,0xbf4b5f806de8fcedL,0x0000000000000042L } }, + /* 9 << 49 */ + { { 0xb9524e1cb1f9f441L,0x0598193ea2b3656bL,0xa4259f49f5540942L, + 0x1cfdec1f6adefa22L,0xd923db68e30584a3L,0xa2a82559fd521cccL, + 0x3411a7edd7abf365L,0x40a53bccd2452d25L,0x00000000000000f2L }, + { 0x56b5c3f7d570f6f4L,0x692c9ae1fbd422beL,0x76027f68bf2bc5acL, + 0xe51a3a373e31e89aL,0xaaf34aac1dfddf2cL,0xf45a034e9d16cfcfL, + 0x5c8fe990ce407aa9L,0xbbe2e574acf3b5ffL,0x0000000000000149L } }, + /* 10 << 49 */ + { { 0xc3a9433d7169c63fL,0x3551fd4733c6d323L,0x507699e2bf2ca336L, + 0x5f6565d46878f701L,0x006acf880d019638L,0x788dfdcf4671f89aL, + 0x4d2c708a3224fccbL,0x7802c681810c6074L,0x0000000000000002L }, + { 0xc5576f9ad0c38110L,0xa8c3419961cfd5a5L,0x48b1d5180a8a1b9cL, + 0x9d1cb1dff3740051L,0xd7f5a27ca860e4a3L,0x9f149f487f8bac83L, + 0xddf84b81ac7f5c55L,0x30e0002ad9d529a2L,0x0000000000000047L } }, + /* 11 << 49 */ + { { 0x9585b7bc418dfaddL,0x4c58f686b5a32cefL,0x7efb7fb586cf7c76L, + 0x87512eb00cdda427L,0xd3b27213699021c9L,0x3a1741d24f969176L, + 0x58bc37cb14fd5744L,0xbfe9f937752fcbd2L,0x00000000000000e4L }, + { 0x10b3c0c658cc73abL,0xcaf09eae9f733202L,0xc729ced840fbabedL, + 0x997df02c3ccd6909L,0x4898124828c2898bL,0x734c9f989c96b3a4L, + 0xf025ebc3f20c8071L,0xd3a570ab384f54dcL,0x0000000000000066L } }, + /* 12 << 49 */ + { { 0x35e45c4b84376818L,0xead0b323c0564a2eL,0xf9e0790fb6852540L, + 0x3336a9aa55fed5a0L,0xa1dbea8ed6d817e4L,0x66ca0e37a8cb6e98L, + 0xfeb1493cc60383bdL,0xf910ce15208ea92aL,0x00000000000000c6L }, + { 0x25d128bd3a6aa1eeL,0x0bcabc8337d1b4d7L,0x31cb792ba825d7afL, + 0x288f5d5b7a6b56c0L,0x737a14be66b37a0fL,0xe9005d89b6662a43L, + 0x446d1492f1b00aaeL,0xd0be38cc545ace31L,0x00000000000000edL } }, + /* 13 << 49 */ + { { 0x87493232bdf7f4e6L,0x14541c5b49d814e6L,0xbbad3425fc906a15L, + 0x2cde8410c26b9540L,0x03d8b69a2269726cL,0x522113c8df932996L, + 0x329dda1640f1b59fL,0x1eb39fd5a2a62e23L,0x000000000000018dL }, + { 0x3e9e4ab1d977f9b5L,0x06b3afd518f3f70bL,0xee59a274d12f3910L, + 0x94e07eede16dd544L,0x8adc0bdaed27abd3L,0x7e4135b59fa28de8L, + 0x6818fb61b0186541L,0xcf0f9186cc9fae56L,0x0000000000000184L } }, + /* 14 << 49 */ + { { 0x8532e2b84de469aaL,0xd1324820fefd245aL,0xbf47d6669b108318L, + 0x33c64bcbd7c86054L,0x4859cbd052374f8aL,0x8339b91418337de4L, + 0xb95a65009523dc40L,0xe759895961b6fee1L,0x00000000000000d1L }, + { 0xf85b9031652f4cf1L,0xa2fc5e77406dc843L,0xf12c0af404306a60L, + 0x2ad58232e20d0293L,0x884487ac3ca21308L,0xcc3570635bd3f20bL, + 0xd673170784ae6af6L,0xb8ba5ac32757be45L,0x0000000000000091L } }, + /* 15 << 49 */ + { { 0xa659f5656e835f6eL,0x7ac653f268f73febL,0x9c6e43b5e00fa5c1L, + 0x2b9b4245746bd49fL,0x3d7a750dd88d0e3cL,0x6728829167922860L, + 0x67eac654cd8acf74L,0x5d155498c194238aL,0x00000000000000a9L }, + { 0x7ac1ed3044b273a5L,0x4387bf6824d195a1L,0x5491474be5f20156L, + 0x961ee2ad04598989L,0x6ba547225e180905L,0x0119d4fa7801d12dL, + 0xe0f12a42f7d5390fL,0xb4fc030b3479de65L,0x00000000000001b6L } }, + /* 16 << 49 */ + { { 0xb19a5828676a875aL,0x40729f28c83a4911L,0x511949b1c6dce05bL, + 0x25914c8c3fcf0490L,0xc49490de0c6c2742L,0x3b21feefdf6cd770L, + 0x012d2802dd1a82a2L,0x72f5a0b76453439dL,0x0000000000000020L }, + { 0x3e85d07d9b58e26dL,0x56527bdd1c50af24L,0x564b56587df377d0L, + 0xf5b0335865e97a30L,0x2b22d6e96b31bc52L,0x8ad449abcccd4bbaL, + 0x84b8b888f7c162a2L,0x3b647f0e4d56f29bL,0x00000000000001fbL } }, + /* 17 << 49 */ + { { 0x538386b50142105eL,0x6a36896e5d844432L,0xa3f6bbe123903414L, + 0x806672513da50509L,0x2989b59047596bafL,0x71137e6cddb7c04bL, + 0x6b76f4c5aaf9107fL,0xad1575a18f0a7798L,0x0000000000000118L }, + { 0x8015bb4110007622L,0xf09b16c927c41f67L,0xc42d63b3961b905bL, + 0x8e8d60bf0b64722fL,0x1d44e65b31893514L,0x17aef76a971aab52L, + 0x63b990c7275e43b0L,0xa16c2ad0eabdb348L,0x00000000000001e6L } }, + /* 18 << 49 */ + { { 0x07120374a0e09362L,0x693d91b9dee217daL,0x69d89c4913f0cc96L, + 0xc39d993228c1b06fL,0xa20b517e54c7a529L,0x1076b98fce82be67L, + 0xacbfb44d04839b7fL,0xa0e7e6cbab7a6a5eL,0x00000000000001d8L }, + { 0x5aefed1d047ea917L,0x01ab000dbcbd7218L,0x9c9729136a369a58L, + 0xf9b364f2995e7c83L,0x15d114f8c4e2de2eL,0x04720f6fca00e9fcL, + 0xbf1f54934913f69aL,0x4128e1d050a572a8L,0x00000000000001ceL } }, + /* 19 << 49 */ + { { 0x04052b0bc21341c7L,0xaa3738e32824cd0bL,0xd61bd9fcff92879dL, + 0x1f81dbbdef405b1fL,0x50764b05b9087702L,0x1c477a0b30be79faL, + 0x3bcf0f99f3adedf2L,0x3270cd7bfb1671efL,0x0000000000000099L }, + { 0x2dce98e903485ca4L,0xfd79314bc4e9dc13L,0xf64590430750dec0L, + 0xbcc8ef428a2f200dL,0x2a48ea838bcdc7daL,0xbd19939a616d34d9L, + 0x46c4674e673a3ce3L,0xdd5c17afe21d038cL,0x0000000000000047L } }, + /* 20 << 49 */ + { { 0x807392c4a8bd340bL,0xed20f6e0c0dd8190L,0x9d0f81d8fd1d379bL, + 0x410ec93adc0a7e5cL,0x660be24545c6a3ffL,0x920e232a68bfc7cbL, + 0x3a8e589e4992a0cbL,0xf7aaf37ed0ba8827L,0x0000000000000004L }, + { 0xf799075916cfd13dL,0x593652e05843ca4cL,0x8520e39c721fc81bL, + 0x01c1aeddee26ecc0L,0xed48e68e20064495L,0x2049cfb8c4b50a1dL, + 0x8e4545d403b6baebL,0xd1b2962aa041443cL,0x00000000000001aaL } }, + /* 21 << 49 */ + { { 0x9442441e21dd7a46L,0x5c5c5dfe1855582fL,0x411281d4104a43f9L, + 0x6fcbfb5d529c0255L,0x69cc3d43ef383628L,0x3e5adc17455ff08aL, + 0xb5aabfb47a16eb28L,0x464de4ae5c39b1b0L,0x00000000000000d4L }, + { 0xb36f33a4ee570b8bL,0x7f2f0809885f3677L,0xc110d6674adb4bc8L, + 0x17ebc77514f16716L,0xfcb24ac6d06d0b5aL,0x2a8eb070bb467ee5L, + 0x82498e6dff7b6a27L,0x651560a7b3be99c1L,0x0000000000000152L } }, + /* 22 << 49 */ + { { 0x5974d160916366acL,0x4c1117ea317518b7L,0x0d910ce0a9e0482cL, + 0xc472a9267522314dL,0xb531e038788e3d76L,0xc33babe371936389L, + 0x651b8b5dd7e44d40L,0xff292597d3f61049L,0x00000000000001e2L }, + { 0xe0dab29387469e8fL,0x94f31bf9b9860039L,0x49baafd0dbea96daL, + 0xc0dac82a69701943L,0x3975417bfbced35eL,0xd932a0c9387455bcL, + 0xedee6eec6ae9c37eL,0x004ab4ffb7a67355L,0x0000000000000005L } }, + /* 23 << 49 */ + { { 0x796e9c557ca11ad2L,0x58f34f3f7d85a82aL,0x591f92c027ca6530L, + 0xd83841d85ac34027L,0x465a3ea45f65d747L,0xde92e4766398a5caL, + 0xcca220e5e6df968bL,0x71b01c2ad75b85fcL,0x00000000000001afL }, + { 0xdabcb750ef8429deL,0x6ca8ddffc37a94d1L,0x20d1c1e8c06f1a6dL, + 0x16cedc9406a7eca2L,0x2af3694ef6b38940L,0xb88e20dd41111d23L, + 0xeaedf8e6ab3419aeL,0x233e0966baf248ccL,0x0000000000000074L } }, + /* 24 << 49 */ + { { 0x59fa6794628233edL,0x1c30a04a25a1287dL,0x8dd4fdcabf3fad38L, + 0x522aad840d0b6f87L,0x12a66e90d160f8ccL,0x714213f3e673af6bL, + 0xd5e6feebad0ab724L,0x0ddf08f16a892358L,0x000000000000001fL }, + { 0xaa50abcec077feddL,0x36e6411ee7610feeL,0xbea885d1eb815b83L, + 0x9710ed6d5d57a6abL,0x794e6f1b85645674L,0x7608184df124cbf1L, + 0x356377f6cc8be47bL,0x56cbd2604b3e7161L,0x00000000000000d7L } }, + /* 25 << 49 */ + { { 0xf3da8295fec48fcaL,0x4aa389b04868517dL,0x1eae63410e4fb849L, + 0x2d79adaf3e627a33L,0xeb057943c16bf394L,0xf68653de184ebe57L, + 0xafabe009c84066f6L,0xeadf4244ef303ea2L,0x000000000000008eL }, + { 0x9cf14a2e9d04a4cdL,0xeb45f173a1896d1aL,0x40bfdff6e61abc9aL, + 0x7fc85552930f6a60L,0xd9ca2a4361801770L,0x545752c1d7ce2682L, + 0x75f003dc8e904de4L,0xf7d696c1e0d28d1bL,0x0000000000000138L } }, + /* 26 << 49 */ + { { 0x318921d51607513cL,0x4f4691aba028aaacL,0x6b89bf2c45571403L, + 0x6a33bd35bd50a74dL,0xa12697537320b060L,0xbdc0601f7b3d4567L, + 0xa3a545d0adadfa59L,0x1d57a713fb791746L,0x00000000000000b7L }, + { 0xde5af490b3ccc7d4L,0x230148e32096ef20L,0x1728c17afd7a6a82L, + 0xfe9ae61d832185caL,0x5315c8993c95585cL,0x1c5382864602f4edL, + 0x8e2eb045d51e72feL,0xfd69ab13b1dfc663L,0x0000000000000103L } }, + /* 27 << 49 */ + { { 0x907945a9e512926eL,0x0b965ac514dff417L,0x6e735674cee891d7L, + 0x9560323e84d70b73L,0xb20c6b12b867ed3aL,0xa8c57b78f3e266acL, + 0xaacc72a9031e4af9L,0x21a577716c8def4aL,0x0000000000000042L }, + { 0xc3c3673e5eba099dL,0x9adb0cfb96d46b1fL,0x360e2fce94637eeaL, + 0x96d93ecf8c797569L,0x2044f2cabf3878dfL,0xdbd03f0c8a4a46abL, + 0x0aa2542188fd829cL,0xe10b564de070ef95L,0x0000000000000024L } }, + /* 28 << 49 */ + { { 0x5a14d2a9e7beab9fL,0xbb93a9d2eb30ccceL,0x033eff382f6c4435L, + 0x241389884ca1d8d9L,0x683fded4354372d6L,0x3a5733b79f237e56L, + 0x8cf9a5daed9be593L,0xfb98100be321dc4eL,0x0000000000000062L }, + { 0xe3c8df880c2b9ec8L,0x1e3f4b351aab0f71L,0xea412d009efbaea8L, + 0x89c2b8592a711842L,0xab502e2666337728L,0xf8397452aa5e9d63L, + 0xd1426326e61ce0f3L,0x3c65ba888b2a3595L,0x00000000000001d5L } }, + /* 29 << 49 */ + { { 0x7c40492595f69136L,0x45e971ba99e00647L,0x8f52311b8df162f7L, + 0x37be3333b9625948L,0xf78c980fdcd3f0dbL,0x04fd571e95450e66L, + 0x2c8c94a45ad646dbL,0x7aaf42e5cbda65ceL,0x000000000000017cL }, + { 0x839547cff6df4638L,0x95d2aac672edd7ceL,0xcfdfa802e0028305L, + 0xbf7fbb9b592ed18cL,0x828e423790ce00f9L,0x83653bee85dae1c7L, + 0x92cc5b1aa352c38aL,0x323f48337c389854L,0x00000000000001ccL } }, + /* 30 << 49 */ + { { 0xa8b2a97fb6cedd11L,0x97dbaff9f8032972L,0x342989110d29a5ceL, + 0xace98046cd01a3cbL,0x2a3d2959be278299L,0x5178699b7d9f96a4L, + 0x275e90c83aefc4d4L,0xed40ae71c9ddc6c5L,0x0000000000000018L }, + { 0xc68d02ce0082825dL,0xeb8c04bb63e05e72L,0x734a75cc8b7a75f2L, + 0x1c6ff3f5f996c116L,0xc23fa867564c6f4bL,0xf13cff17198d779eL, + 0xa4f04051d621edcfL,0xd86ea3fb9bc3a2beL,0x000000000000003cL } }, + /* 31 << 49 */ + { { 0x8687c8d286fb5666L,0x51bda4adead592d1L,0x9a47bc7e8d9bf5adL, + 0x0c16fc8e3f314ef5L,0xcb5262e50b653a4dL,0x4dee35618e512af9L, + 0x41aa85f125232f51L,0xb4b485ed7af6c2e0L,0x0000000000000084L }, + { 0xe3326af2a937424bL,0xb091d60435264ac9L,0xb7fe1c75607363fdL, + 0x8d3fc5e05ce8b153L,0xfea7f23d9ac5e150L,0x38b3592fc8c1f2f9L, + 0xb5346f4f01996117L,0x485d0162ede54b07L,0x00000000000000ddL } }, + /* 32 << 49 */ + { { 0x0b91d5244cfc2928L,0xaada5377f9855767L,0x43014e69652cad7fL, + 0x8680000dee500ebeL,0x95eea80462086edbL,0xea6484375ce7cd0aL, + 0xee14680d51a5bb78L,0x28363f309167ae22L,0x0000000000000047L }, + { 0x5335dbd719692e28L,0x30c79590c6495ad7L,0xb9f87b1462c85189L, + 0x5b87c196c5b0545cL,0xbbdfef3cbe7fb4f3L,0x2b531d69f3216d95L, + 0xe7298f7361c8215cL,0x9652e358d96015dbL,0x000000000000011dL } }, + /* 33 << 49 */ + { { 0x4ecfdcbc609f01e8L,0x43cce6f602da9b00L,0xb5ccb510cb875b01L, + 0x38ef08459d14985dL,0x5dc8f4f88e28ff2dL,0x2123c91386de81d1L, + 0x80691159098d59d5L,0x44715192dbc14f82L,0x0000000000000171L }, + { 0xc7d2bc1e420737d6L,0x04d07a8634926b1fL,0x4a558a526719bbdcL, + 0xad7181739558b878L,0x0d2e6fca5af3a191L,0x6c49dd8fa73f113aL, + 0xf57eefe8b3f72cf4L,0x39b23c1b37c5645aL,0x0000000000000093L } }, + /* 34 << 49 */ + { { 0x1a21f7609e382dbfL,0xbe278ead5c4326a2L,0x72f12ef962016fedL, + 0x21bf913c8afa14a5L,0xb7c4145a9f9b8899L,0xc17a7b5fde4dd03dL, + 0x14f34601ab717706L,0xe6192f11432876c5L,0x000000000000013bL }, + { 0xa2f7972ba69fd71cL,0xf6ccdd1f9a0289f6L,0xc4634027e67f1f93L, + 0xc87dcef88ebcb728L,0x2053a2db4468602aL,0x6406b53cacbdae11L, + 0xf3f29cf3ee6ee32cL,0xcb79a551a0e19432L,0x0000000000000110L } }, + /* 35 << 49 */ + { { 0xef5cb254481df44bL,0x3184a229b38cbae8L,0xc1b8f38dd3bfe1cbL, + 0xd054eb6b1523138bL,0x9635da2935029a99L,0xed8ad119918794bfL, + 0xb3b9ea73d86ffebcL,0x174afa73454d5e5dL,0x000000000000007dL }, + { 0x0b7e89f996442409L,0xf308ceb46bf1c207L,0x7bf551a8d7488abbL, + 0x06be62570d2826dfL,0x7da841c4c45d6b49L,0x90e0c656320fcdd6L, + 0x625d0ad9c57ae245L,0x1117fa49a6216b67L,0x00000000000000eeL } }, + /* 36 << 49 */ + { { 0x740389608eaee36bL,0x7df35d2c162d1f6bL,0x2cebbe053186aa89L, + 0xc17db617944dc7acL,0x1c6a5a0781e3d0d1L,0xc1e8ce8e29c741eeL, + 0x8edd5a72fc708460L,0x337ba607bd44d4a2L,0x0000000000000143L }, + { 0xc54244b594eb3f3bL,0xf6303630ea66f38aL,0x72aff79bc6149c29L, + 0xecb88c9faa4b1c13L,0x648ec54762873c5dL,0xec17c8cc65222b9bL, + 0x887c31775c4b3466L,0xcdc8fe2b7e3e7954L,0x000000000000008dL } }, + /* 37 << 49 */ + { { 0xcefcc053278778a6L,0x8b23916cf443f1e0L,0xb54c427ecbee97faL, + 0x24c75ee812bc4efbL,0xb7b0cb176a167e91L,0x1b307498bacd972fL, + 0xd744a51b519cb27aL,0xbc28421783cd2333L,0x0000000000000020L }, + { 0x81f745e1b02be1a4L,0x9de30681c6537840L,0xc9bdcf9e9fe1b32eL, + 0x850f2187eae10736L,0x5dc72e3f8cbe70ebL,0xff288749c832cb6eL, + 0x648754cfbdaa6ffaL,0x24e3f97d26354f2aL,0x00000000000000c0L } }, + /* 38 << 49 */ + { { 0x09c04a0f926bc690L,0x43fbfa9f6286e106L,0xd1a8fb90e1c3b305L, + 0x7b4ee666fe37cdb9L,0x404be05f9b915078L,0x5d886ece2e0fdca6L, + 0x58cfc29cedf36049L,0xf94ec764dd30d905L,0x00000000000000ebL }, + { 0x4ad5bb20650c67d2L,0x760e0784377c5de6L,0xa6fa0ad7a47acd80L, + 0x91e26191802a68ceL,0x932e9b57c492de6cL,0xf51aafe3c0d70872L, + 0x17e1ee20c0a4eb8fL,0xdadd7f5ceb04a2d8L,0x0000000000000101L } }, + /* 39 << 49 */ + { { 0x4b6c35299c3fb75cL,0x2020efc7e2f85697L,0xd2dfcd4892684019L, + 0x366e38a2ec2edf42L,0x7217d73d4aec48b0L,0x098c0ae01adb4cf6L, + 0xbbbcb22ae102cc11L,0x46785dc5b1740371L,0x000000000000004dL }, + { 0xe2cbce804c10fb38L,0x2ae649622280d0afL,0x7e3465aca1b0c46fL, + 0x8bfd57b6dd9e378dL,0x01f628f91dde62e5L,0xf3e4adf12526bb39L, + 0x1734ed33ba88260dL,0xe6b9194041672e7bL,0x00000000000000d0L } }, + /* 40 << 49 */ + { { 0xf42c064a910fda3aL,0xfbafa50fcea6aa25L,0xafe5f5580de9bbbcL, + 0x24e9d4ae0e89fb39L,0x5b71973925c76201L,0xa87ea5295475aeeaL, + 0x1e5495e03c4be889L,0x7866138b995ad46fL,0x0000000000000087L }, + { 0x5f9f803e46afb538L,0xf8c0f078ca875a75L,0x63f1050a89b71552L, + 0x0858baab8d43bb0dL,0x8dc61935fe38cf7dL,0x50fbdae220f815eaL, + 0x6c6a84e52e070889L,0xa524045e73fb1857L,0x000000000000001cL } }, + /* 41 << 49 */ + { { 0x18d8ccdae2479e67L,0x1baa53f2a74a289fL,0x6edaa01c578d7dd5L, + 0x39deb2bf383bedf1L,0x55b2cb09c0c843bcL,0x4389566147a692a7L, + 0x6c9ae23f7e9cda94L,0x21cc9bceba7b7179L,0x0000000000000026L }, + { 0x19db39e25f4eedefL,0x5c22018976727eeeL,0x2bc4e7bfd2b991deL, + 0x9c136af758617aebL,0x25f438605bacc62cL,0x8e469c5668e7b7a7L, + 0x905da2b18fd497f0L,0x5ca49b9f114ad2ebL,0x0000000000000067L } }, + /* 42 << 49 */ + { { 0x1a6f8d8de1a52dc5L,0xcf96814d03a474a1L,0x89bf6b6d11521565L, + 0xdef2de686891d9c1L,0x84acad656e7c7d4bL,0x886b8a280175a6cfL, + 0x9ee8240c926fe7bcL,0x152a76cc39bf83daL,0x0000000000000091L }, + { 0x85376a369b33ef6eL,0x59cfd5a4f181f89cL,0x679c3adc2593d6caL, + 0x6ed84b8bfbae89b6L,0xee128a9017cb08eeL,0x4f29fb137726bc40L, + 0x20ed5454dd8a2a9dL,0x4e91664a378adbadL,0x0000000000000191L } }, + /* 43 << 49 */ + { { 0xa5dad36b52b70595L,0xf87e799e31e1f282L,0xd2cc0cbf9e1f33e1L, + 0x4cf0cf552d62e0a1L,0x18d2a4d5fc30b065L,0xbc92b38273812cceL, + 0xb252123d18868d58L,0xedac40192d70d6d7L,0x00000000000000a1L }, + { 0x06ef9212c3f4e226L,0x59efef442a3fda9bL,0x9bf2803a42146a29L, + 0xfa8dab397c4a93deL,0xe1d34b9562d8ca13L,0x9989f1f7ef4060ceL, + 0xa57df883280ad2ffL,0xa7672319b32b7398L,0x00000000000000d1L } }, + /* 44 << 49 */ + { { 0xb841b3a2716363bfL,0x1fe55966930c285eL,0xd32798db70bcc6e5L, + 0x69e8e3f7cd08aa70L,0xc062c65836522b1dL,0x5f540d1d845d3226L, + 0xc3fb106d65d5893cL,0xa7e29ee98ae43be8L,0x000000000000012aL }, + { 0x10107bd45f12406eL,0x09597eeeaff644d1L,0xd6016ea4a82208eeL, + 0x91ae818f1b864b66L,0x443f4f76f39bba56L,0xfb1e51720ed82a06L, + 0xa79c9ef8d5b29a93L,0xcf62b0acdf4bc546L,0x0000000000000095L } }, + /* 45 << 49 */ + { { 0x786dddbd9a6770dbL,0xfbba1c37c9bdf259L,0xb0ee1d34c50b403cL, + 0x1d67be441e207360L,0x186f06bc422c455bL,0x595806c00c1576b6L, + 0xb88abc273985699cL,0x47398d6d704fbcd1L,0x00000000000001c0L }, + { 0xdf0187cd0bc02f06L,0x98eb35357610f278L,0xa253493558e633dfL, + 0x633c5838abe6ba6dL,0x98e6522d8345acf1L,0x403be8d023f02650L, + 0x929e6fde26185a91L,0xe11ef0da93f223bcL,0x0000000000000106L } }, + /* 46 << 49 */ + { { 0x0e407af213d053ffL,0x90d5045e06f74badL,0x066fbdeab7245e4eL, + 0xfd7b75ca58eb3154L,0x1b71f8fab1085c0cL,0x0813f46a32f91863L, + 0xcb798c7cf9b79419L,0x89ae2cf7b03a117aL,0x000000000000009aL }, + { 0x5c04b5fd596b9fb0L,0x39cc235d1bb7e7a6L,0xbd27de310638c16bL, + 0xd12cf68dc62576c7L,0xdab34467be49955fL,0x6e9ff707f0e83b90L, + 0x9be7a05a7f860800L,0xfceae2f729c5a536L,0x0000000000000076L } }, + /* 47 << 49 */ + { { 0x839213697b1d62c9L,0xd22e2902a95f6724L,0x29d3cff45962cbf4L, + 0x74adaf534e973e6cL,0xad2f70c1a766f389L,0xc8eadf0ac2b8cd2dL, + 0x8206870d8d1654d4L,0x99168a47a8feba9cL,0x000000000000002cL }, + { 0x1b25ea9fef1068fcL,0x48d00e1e844eec1fL,0x5e405a57497cdaeaL, + 0x9ea279918c28de85L,0x162811a8db094da1L,0x83072388e11f3bc4L, + 0xb0b78a7ce6b0b528L,0x97576dd0cdb3fabdL,0x0000000000000046L } }, + /* 48 << 49 */ + { { 0x0f9052c7b597ec4bL,0xc6f41c80ad573ab6L,0x347a20e1e3a60425L, + 0x8b466cefc852cbb5L,0xf38734d5dc78e008L,0xf347cd72d1ad9a04L, + 0xb82d096f30eec82bL,0xe43cc8e9ca4f7ab3L,0x000000000000001bL }, + { 0x78d630caf659615aL,0xd686695eb1617e24L,0xbe1c2194d5983cf9L, + 0x4522d8f78329caaeL,0x672fee6b101fb01aL,0x1b1f4c0950d301abL, + 0x7dd87dac5e431e1fL,0xbf07ee0cea0100dbL,0x0000000000000172L } }, + /* 49 << 49 */ + { { 0x07f921760f9f20faL,0xfc96b84ecb68b725L,0x7718b02302cccb62L, + 0xedd4dd7985b5fbfaL,0xf6e1e454f4db3795L,0xb4888ae03a7653c6L, + 0x8fb8fc4474baeffbL,0x063c86b72d45c22bL,0x00000000000001cfL }, + { 0x0c41e282815b2541L,0xac43abd94c2ee392L,0xe24e214a71c286d3L, + 0x1282830043728b88L,0x3bd6814757828877L,0xf197e9452febf81aL, + 0x8a1c679cedbea19aL,0xe251fa945967dcd2L,0x000000000000016eL } }, + /* 50 << 49 */ + { { 0xe01d086a1075213eL,0x8f1b00cbea347edfL,0x8498f8c425c21dddL, + 0x95abb4086d820738L,0x557eaed7ec5afe32L,0xe497900ca38f0434L, + 0x0515c9bc903e17b0L,0xc02387e3dc40307aL,0x00000000000001d3L }, + { 0x96a0bc63eaf352c3L,0x579c49029ee03dddL,0x757a34d6c98eb9dcL, + 0xcfa9d25abd86a2baL,0xc8809301f6054b1dL,0x30b0c70afff80255L, + 0x69f40403c5ff5831L,0x6e5fd068c60c11ffL,0x00000000000000b6L } }, + /* 51 << 49 */ + { { 0x5340fc5166c83cbeL,0x507640cb9f3024fbL,0xf750a8d5a1a0a1e1L, + 0x9e7f6338adbb9bb1L,0x09a208b43976445dL,0xa6cb9df3457f574fL, + 0x7de5c8214fbd3509L,0xde3f5ae19d50d9a1L,0x0000000000000027L }, + { 0xc82405f8ed17132fL,0x43256e646a46977fL,0x8a353d2a994b1950L, + 0xff3152f2ad9a40c9L,0xfc9cfa4f87e47049L,0x3ac2a3b0c168d8b0L, + 0x80742ee9bb4d7fe6L,0x3d2ba8aa8678a9d8L,0x0000000000000046L } }, + /* 52 << 49 */ + { { 0xa04a119dcd2fdac3L,0x2ee2c11c9ce17d39L,0x197cfc5f32fe1c10L, + 0x656774fe3aab0424L,0xb43b296a067463a7L,0xad338aca55d78e93L, + 0xc74471b7b87e1358L,0x5bb5cfc201be2553L,0x0000000000000075L }, + { 0x71ed2ff019301a86L,0x49d0b18dabb3a3f6L,0xb14c65b0aba86284L, + 0x4a537637cb47beffL,0x340621ea976f6945L,0x66fae29b5c4ae1ecL, + 0x8e56e0fba5e5cc76L,0xc09a72fd7cd3d0bcL,0x0000000000000035L } }, + /* 53 << 49 */ + { { 0x349d9c4dee613f38L,0xef68106f51fb9826L,0x92f4c8386d78ed38L, + 0xb98420f7586485edL,0x0ec1b91d60f3f9deL,0x93d3828b84a7a66eL, + 0xb9f3374876299a8bL,0x4961a01e9f7fcae6L,0x0000000000000106L }, + { 0x9f606ab04ed763c5L,0x1d8f91a3e9a674f4L,0x97fc8242a521bae2L, + 0x7008aa90dcb2af6bL,0xba359079b4110efcL,0x7e54f8c1fd447eaaL, + 0x3aad38aaf746ed2eL,0x3eb1cecef0df9b09L,0x00000000000001d6L } }, + /* 54 << 49 */ + { { 0xd32d280e3ccd90f5L,0x91e87e9e7cb176bbL,0xb5a8a77f30f3973aL, + 0x1f6389b465126792L,0xda9c26d978cd0d47L,0x5207f05eb2c99703L, + 0xbd89f16910a3caaeL,0x79ecf9d1f547e27cL,0x000000000000005eL }, + { 0x5a9846021ad57e4bL,0x29373c083b6dc43eL,0xadfe6bad320676e8L, + 0xd4122508a6380857L,0xa83a74ecd1d1ad46L,0x98c04d58f1207f89L, + 0x19948a9ec12d1d1fL,0xa3cf675347dae287L,0x00000000000001bdL } }, + /* 55 << 49 */ + { { 0x69a5518193a902f6L,0xdc736d4689ee0441L,0xea81c08bf0cc23a0L, + 0x9a63c879a61c1beeL,0x9b8c27d1f2c2dedcL,0x14c7458b5f0dcf06L, + 0xaf32ef31e8763a8aL,0x46821a538c0aa44cL,0x0000000000000108L }, + { 0x0c7e06114965b20cL,0x69154474ce496d11L,0x964e6477e160f351L, + 0x841083e9d6e4fb2fL,0x70640bc3c0e71fd2L,0xbed0bbf4db30ddc2L, + 0x1b30d4d54c9e01f9L,0xc58137f9a143b3feL,0x000000000000017dL } }, + /* 56 << 49 */ + { { 0x32966f76bd667641L,0x7de72be5f0d579a6L,0x0a6ffafb81276712L, + 0x0c76a2135e91a169L,0xeb960ed85f953cedL,0xaa5f7f8f0be111cdL, + 0x2cd0b477911f59faL,0x059a70c6e5220a86L,0x00000000000000dbL }, + { 0x16f99e8ba61c4277L,0x011745b656710251L,0x13cb7d19f1ef7ff2L, + 0x2162edb414f0c0e0L,0xb0a459e867467a1cL,0x2cee2121248e6fc9L, + 0x2ba236eb98142385L,0xbd7465c91e63fa6eL,0x00000000000000daL } }, + /* 57 << 49 */ + { { 0xed2afb1c966951c4L,0xd8fd93de5daeedf7L,0xe5fa9c4cc93d1c57L, + 0x386b2c9bfaefcc9bL,0x4b59656fc59929c0L,0xce1d20e5eab86992L, + 0x3c11b1a137676f63L,0x1a35c38f85420d79L,0x0000000000000171L }, + { 0x8f88bb55620441abL,0x47af796eaf094865L,0x8888bcd9ef98fea4L, + 0xc3f31342413e8555L,0xdfeb4b48c2b75c9dL,0xfd99c9a4a0c1d15fL, + 0x94241431cefee2e3L,0xab9e1f0a23fd9b10L,0x0000000000000026L } }, + /* 58 << 49 */ + { { 0x25ccc78e776cdc28L,0x34bfe1407f0407e4L,0xfdb8fac8d9e8897dL, + 0x49c0ddb3814ce374L,0x4c983cdc10173846L,0x9c4d4eff62108873L, + 0x12fd9caa476f5b17L,0x8658c9a0731d2d9dL,0x0000000000000171L }, + { 0x14b0aedbf23ff601L,0x632cfdc58f68ccdbL,0x8485d63227078299L, + 0x1b83394f4856bb41L,0x5f905b4a04999398L,0x9645aee589030563L, + 0x30ce56159f7c6eecL,0x53aa062997c9b6f4L,0x0000000000000182L } }, + /* 59 << 49 */ + { { 0xddcbdedee68e0fedL,0x99ccbf1f05063476L,0xb89a94876cb38f6eL, + 0x3022a2323bc6e121L,0xbc4aed50fab2bc77L,0x46060acead728bfbL, + 0x6c950f528b881519L,0x9c0edf597814d1f0L,0x0000000000000137L }, + { 0x58b14d40d0e8fc6cL,0x5e61d01c942ae23dL,0x6bdbad564712f262L, + 0x61fd6441155fe00bL,0x1ab1151ad617185cL,0xce8ea53c623a376fL, + 0x5f0cac41c768b147L,0x1f5f832e22a26dbdL,0x00000000000000ceL } }, + /* 60 << 49 */ + { { 0xb9532a46e6ebc565L,0xde038371b7c6883fL,0xa934fd5002bbe9b7L, + 0x4b82ec8ef3837d8bL,0xc00073fba5d11dfcL,0xae3a22c2dc5ac742L, + 0x8adb521a73b3415aL,0xe0902743f43bd62bL,0x00000000000000dbL }, + { 0x3cf82ff6173b48dbL,0xec4e00c39af49c1cL,0x786b4099b3a267e2L, + 0x16e02a248820f967L,0xe0689ca4e4fc99deL,0x2a5f17575fbd3f1fL, + 0xc0c02624ba0f38aeL,0x586ca28d58cae5dfL,0x00000000000001a5L } }, + /* 61 << 49 */ + { { 0x25b05b4ed1f10799L,0x76cd6f8bc800ad41L,0x13fb260460b2ab0dL, + 0xf59f0cccd05a52c0L,0xb65218b0288de748L,0xc03f3f749f3e5ca5L, + 0x777a1100abf69427L,0x69340a66f7b3528eL,0x00000000000000eaL }, + { 0x1dcae8f5b7b29411L,0xa34600730dfea3c8L,0x13b469f258498ba4L, + 0xdf838dd3bbab9202L,0x4dbec5fffe347b04L,0xb84fd1d91eac8f9bL, + 0xe74760c46e283eb9L,0x7c24f5141a49e088L,0x000000000000008cL } }, + /* 62 << 49 */ + { { 0xb330cfe8ae5124acL,0x4205f06afb62311cL,0x21c0f9cbdf03f5d1L, + 0x0f4d3d7709c90889L,0x970803101dc16211L,0xae129fff455f228eL, + 0xb209416415043833L,0x95312756bac62236L,0x00000000000001b5L }, + { 0x74af54c370d0fd85L,0x55862bd38d6ef975L,0x52ae68bcf7bdbbf6L, + 0x92615cfc17424b12L,0x3711586fe21a24ccL,0xc0afbe33e3739cefL, + 0x46f3ae0794c296afL,0xb16803df799e4152L,0x0000000000000031L } }, + /* 63 << 49 */ + { { 0x0e910ae76e30834dL,0xcc082cb386c72df4L,0x88a278b2b0dac2d3L, + 0xddd107b085c2a06eL,0x7423df9623246789L,0xc10556841087702dL, + 0x75cc98651081b21cL,0x5279c1a2e7f4c29aL,0x000000000000006aL }, + { 0x0fcaa2f8953245b6L,0x828ab5a68cb75480L,0x06381dfffccbede4L, + 0x0c3f8eae499f669cL,0xc6fe81f8f5dc5c99L,0xb5083730b4aae613L, + 0x8d2038c777622821L,0x263228451f870cc7L,0x0000000000000179L } }, + /* 64 << 49 */ + { { 0xdeed672da38be841L,0xd62d42d762febdd5L,0x002aa47e2d4af6ffL, + 0xdadcda955be24d99L,0x91ec05c46727939bL,0xe7535c591b7966d0L, + 0x83d7f5f023c47893L,0x470bca4e2c4e6181L,0x000000000000010eL }, + { 0xfe45f9698440535eL,0x240e6b2c49adab54L,0x61089282db726b76L, + 0xcff3e30bfa643350L,0x3aafb169a7c01f88L,0x51923c30e0a5514dL, + 0xc077936f3ccc700aL,0x23375769586f4854L,0x000000000000006eL } }, + /* 0 << 56 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 56 */ + { { 0x9aa53b475a714194L,0x01163dd86957d5ebL,0x5bd2c24f908f5929L, + 0x87f519f4dbe42b0fL,0xff767379fb6b5ad4L,0x4d0f8bba9a193f28L, + 0xf3c42bd0ee549d79L,0xbf851d3f825244faL,0x000000000000009bL }, + { 0x825ffb609f78b57fL,0xd1913668e0f74b73L,0xf42a5b035468f89dL, + 0xa2347499aacae46cL,0xc771e34cc85272e8L,0x295684c1d427d3a1L, + 0xed749903433451dcL,0xb872072c0cbabbd4L,0x0000000000000049L } }, + /* 2 << 56 */ + { { 0xf98db50e8435735fL,0xeb51c34660bd7b77L,0x200adf1e996ef096L, + 0xecac256b57b8d488L,0x889639b07e65ea9fL,0x4f763f7313a4a243L, + 0x401c22a42957db01L,0x86cff8bc68eb751eL,0x00000000000001c7L }, + { 0x1c0549ebe47805e9L,0x57716280b223b355L,0x69a29cf4db5c068bL, + 0x63fb8a8c432af690L,0xdcbb94f97f36ebd6L,0x693983dd08609c4dL, + 0x69875f98d458fa6eL,0x149758e81fff90b4L,0x00000000000001eeL } }, + /* 3 << 56 */ + { { 0x8452736310a0d011L,0x299e1cb30bbaddd0L,0xf5f3079b5294c956L, + 0x9edd5fa24f0a3584L,0x764bb175641f1fe4L,0x0217a94a93b38db4L, + 0x61c992b7e31e6fdbL,0xeb231629764cabecL,0x000000000000013bL }, + { 0xbd3b12a4d2e8c15cL,0x8eef9b5d8accc1f1L,0xb931fbc752747252L, + 0x8a7b86c7f9817befL,0xb52330ba584e1181L,0x9bc4926d25a0a0abL, + 0x002feb9b89a625d7L,0x653946b5db41831aL,0x0000000000000061L } }, + /* 4 << 56 */ + { { 0xf1efe65befd91d43L,0xdd872fb0aa3323e3L,0xcb7c357901e9dd62L, + 0x04677143cd97ebd7L,0x906eceb8806e055fL,0x334b997f2fd96082L, + 0x41a732b0caaad051L,0xe1b97e125bc0500eL,0x000000000000008fL }, + { 0xfc4e9baaa1bee510L,0x2b887e327d63c5f0L,0xcfb9c52eee6cad4eL, + 0x596bb714b0b8dd96L,0x2db254c665f9a215L,0x000359bdd799749eL, + 0xc4ecaf126d91b457L,0x8153709ebe4f02f6L,0x00000000000000c4L } }, + /* 5 << 56 */ + { { 0x2218489b09d9e2f2L,0x563769927f4fe3c0L,0x61e365211ed2b98eL, + 0x166d671a178d242eL,0xb65381c97e8d39b6L,0xaefe583a82851b43L, + 0x6caa4459a24a7af4L,0xb421380f5640d447L,0x00000000000000b5L }, + { 0x706a413011d91549L,0x4214be92e3064f4aL,0xa7c3b7b8b7649258L, + 0xa782e2fc71c05ddeL,0x0fa90ef683d30793L,0x1c77425b15dbd474L, + 0x917cdb42fc0b16d8L,0x7ef5faf8c7bfa045L,0x00000000000000e9L } }, + /* 6 << 56 */ + { { 0x5b408946a60059ebL,0x0d707cc2a1e69f42L,0x9be262d3cddd4401L, + 0x0c2d64bd01c14145L,0xcf8b305a27c198a5L,0xadccc32e05c03494L, + 0x22e5f02b6c1de307L,0x2ae62bac48f43edfL,0x000000000000001cL }, + { 0xb02341cd598a8ef6L,0x0e6041748456480aL,0x159e591932132353L, + 0xe03d4f2c8b3a8a62L,0xd1190722f861d8f0L,0x59d3d3b6a37c89a2L, + 0x07fb0e16a2987768L,0x5dd98e191da91a16L,0x00000000000000adL } }, + /* 7 << 56 */ + { { 0x09238de907ae5402L,0x14fce2b1444b8b61L,0x37ee7affbd0d2eadL, + 0xecbc75de97804504L,0x343a5d1a3b5e5fd8L,0xbf806ee5a90dcf6bL, + 0xd07f1054b2bf2a3dL,0x6f590326ed9fffe2L,0x00000000000001fbL }, + { 0x9c42832de6198aabL,0x3eacd0ed475ec98dL,0x715b6aa8937ffe67L, + 0x4b35d1e7891bffc8L,0xfad8af9cb36a8835L,0xa3a21d6f7b8f6f81L, + 0x4faaf12e771c3b1cL,0x4e05313304fad4c8L,0x00000000000001f1L } }, + /* 8 << 56 */ + { { 0x7eecdfe4e541183aL,0x7b0dd2285aa8b868L,0x8d38cd0c1de22f71L, + 0xa9ca796dbd2638c1L,0x1819373249e5ea36L,0x4fc5461729212b0fL, + 0x4f3874d22debe3f7L,0x46499322cd4b02fbL,0x0000000000000015L }, + { 0x7977cdaaa3bc23c0L,0x954aa6d88ba98676L,0x6b8471c793c7bf98L, + 0xf8025a0b43c70437L,0x0aca0d09b1a8c71bL,0x9fb1d00b6fc48277L, + 0x50bccc4ee312939bL,0x5aeaf55665e5df83L,0x00000000000001abL } }, + /* 9 << 56 */ + { { 0x296c4e5e4114124cL,0x8f760836d47c56bdL,0x9fda0a36f6758211L, + 0x2bc5e75b36cd51bbL,0x32e6873f1bba9f2aL,0xed7a7c288e6c979aL, + 0xbec902bb47cccd6cL,0xacf4c76c48dca66aL,0x0000000000000080L }, + { 0x44a892f6355a975aL,0xbbd44a35bd1b1666L,0x6b49014ca612f865L, + 0xcd50cbb36b81bd2eL,0xb25e204fc26beab6L,0xddcf6123f93fe188L, + 0xa3c651667a980260L,0x87f09af9e19ae815L,0x00000000000000c5L } }, + /* 10 << 56 */ + { { 0x36fafde5fbbd4e98L,0xaf44b924868ff31bL,0x0281a7fa945ab6b9L, + 0x2a4bc2c85323df28L,0x51959c042a3e0fb6L,0x6aba3103143c1a63L, + 0x3a572db88876e600L,0x418cd1b4506efccfL,0x00000000000000b6L }, + { 0xf121c2f9beeeb151L,0x69c65d11d76943abL,0x74063608addb71d4L, + 0x072af1c6702d26b0L,0xe0fd470b473b6ef3L,0x765b0f46bc36697eL, + 0xa24b34bcc6acd252L,0x0401f4521c3311e8L,0x00000000000001d2L } }, + /* 11 << 56 */ + { { 0x3f298d941c6a01eaL,0xb6ee6874dec1f22cL,0x189023ac496d5522L, + 0xca543ecfc08a48c9L,0x240ca6a0a2db1ea9L,0xa591ef2e54492401L, + 0x22027038344bcd0bL,0x34cb84596dc4ced7L,0x00000000000001dcL }, + { 0xb5a949080414ed02L,0x8bddf24ba14739f7L,0xa788ab9679ac753bL, + 0x754163a977fe2ff1L,0x49803f05af045bc3L,0xf482041842d13f83L, + 0x9f86243cd7528fa3L,0x9b4c2abe08588f48L,0x0000000000000131L } }, + /* 12 << 56 */ + { { 0x34853c5d35da05aaL,0xedf100f6b15b5d6cL,0x1dc6e841d3da67b0L, + 0xaa5d83f0310fd872L,0xa686fa787cfd885bL,0x297a0e48b09f9c96L, + 0x9e899c7dfbd40e05L,0x2bfbaf44b9589b4bL,0x000000000000013dL }, + { 0x937376913784f654L,0xaba39ee088f316f8L,0x7fc611358a3c3666L, + 0x5a09da1f80a1fadeL,0xffd6018133651828L,0xc400cf569a7ac400L, + 0x64fc091f665c4152L,0x8679f2d7c350f654L,0x00000000000001ecL } }, + /* 13 << 56 */ + { { 0x3af9dcd14473f37cL,0x7c31843080e36492L,0x5115486ac8d049ceL, + 0x594ee800b10d6316L,0x3c9aec97f913f656L,0xc9b99ae4476865fdL, + 0xf9a597f80fecfa86L,0x59313aafc2664276L,0x0000000000000020L }, + { 0x431f1976e5a1239aL,0xb406a1eb58838d43L,0xc023f4698d681f4cL, + 0x5f1dd612dcccc654L,0x9032c342434f82c6L,0x99bc4322c7965da3L, + 0x8c2f2ce32745885eL,0x415ff1a86cf68162L,0x0000000000000009L } }, + /* 14 << 56 */ + { { 0xb10ff95e0e9132f1L,0xf49e33b7045c8800L,0xdf0314f4842d3bdcL, + 0x55fb97a8f584e160L,0x0b923612e71c4d35L,0xdb8c2885ea2f1d23L, + 0x514e37d7dff21f90L,0x0a0cbe3dfd2278ecL,0x0000000000000081L }, + { 0x20daf0c8e84e328aL,0x9885850d6978f468L,0x8ffffeb098151489L, + 0x5bb7fc177a0b4d73L,0xfc401e51706e036aL,0x7016943c64c94259L, + 0xa8cdd03a570abcacL,0xb94b40df2520b74eL,0x0000000000000167L } }, + /* 15 << 56 */ + { { 0x41ec06596dd7d8c6L,0x8baeaeffb07d565dL,0x55f5138c26c9b544L, + 0x975e212788825ee5L,0x90422ca7c1a092e0L,0xa03de4e8dbde0554L, + 0xdf408882734e71a8L,0x80e01f63a10cc8e4L,0x0000000000000157L }, + { 0x7b4e0e130a65cb93L,0x1f3e7faec1e49a1aL,0x1b12fdb3dd71b3f8L, + 0x8d32d35f1253f42eL,0x38e09d674312f8b0L,0x9a727ebd4281b5ceL, + 0x0002358abf211bc2L,0x46c3dba13e61916cL,0x000000000000003cL } }, + /* 16 << 56 */ + { { 0xbf2097b9414d9815L,0xfa0673b3cda66dfcL,0x8c775677193b17b0L, + 0x07c8dbf6c5387e9aL,0x141de69d4c5da984L,0xf0650ac806a8b133L, + 0xadd766eed2b258c4L,0xa6f4276396303678L,0x00000000000001daL }, + { 0xcdbf1daa39c8824bL,0xf8f2cd8c5638cb0cL,0xd966c8e9b6d33286L, + 0xe4b63ef8c1331e1fL,0xac72d8e3fb5f843eL,0x110d0d4a8ff6b4b6L, + 0x0067fe1eacd5cf3bL,0x5eaae5b321c1e826L,0x0000000000000068L } }, + /* 17 << 56 */ + { { 0x3c2cc8401217a97cL,0xb50bb1f3c1c919f8L,0x5becdaec4470c37eL, + 0x378cab9bceb1fbb9L,0x04563e25cc95f3f6L,0xae040e389d4a2100L, + 0xe66969915950151bL,0xa855c542c36aef82L,0x00000000000001e5L }, + { 0xe5c950666cd47003L,0xc602fbe4b6bae5a9L,0xacce809c76110dd2L, + 0x757b37d9047a58c6L,0x58c9dc97ef3e68d5L,0xa2dc7d9012714826L, + 0xad4e0ba11fbf2747L,0xe710d91de5af3928L,0x0000000000000084L } }, + /* 18 << 56 */ + { { 0xd79cd3bc6e819754L,0x7c18ef6a8809e3d0L,0x29468b115c990dd5L, + 0xed6cf22a8be7c0b6L,0xb62cab9a75ff6d61L,0x035cfbcc46116fc2L, + 0xc375d3ffcb5139a8L,0x9fab6a6207bedcb9L,0x00000000000001e9L }, + { 0x0b07578f9d888066L,0x9438e828693b78fdL,0x046f159b622825fbL, + 0x00125b1e0a349677L,0x408d9c746d8e1d12L,0x4e46f3fed0358225L, + 0x63dd41bff4448e2fL,0xc87ae9f74d54acc2L,0x00000000000000a1L } }, + /* 19 << 56 */ + { { 0xca6d413b739aac79L,0x5ca8d0f0cf84b822L,0x6ea753d2490fbae0L, + 0xd02d641bf68588dbL,0xdcf9aa5b12a69358L,0x3b3d2093c00dcf36L, + 0xaef49dbc3556af94L,0xd878e02a7ad78427L,0x00000000000001a4L }, + { 0x8f65cdc927f5b36dL,0x968c84e51ec6669aL,0x3a412c9d99424d9cL, + 0xbdda9213690b435fL,0x00899ee877958c53L,0x64032787de5424aaL, + 0x6d3df87b4261056aL,0x556eb68e62019e38L,0x0000000000000015L } }, + /* 20 << 56 */ + { { 0x797062240d893addL,0x73be154c32ee77caL,0x462c16d2021f4043L, + 0xe593ff234c08fc6dL,0xb17e52f03b138c7bL,0xa8ab2ad5e5fbe9bfL, + 0xcf64eafcb597ce57L,0x83743f38f970be21L,0x00000000000000fcL }, + { 0x6013ab2614c70201L,0x0c69287d9ebefca5L,0x255e3927d7113008L, + 0x5077b04f4d7558a5L,0x85c914fda5be2217L,0xc44bb4136d336d7bL, + 0x84a4f0027728a6a9L,0x9c83da4f0040ca25L,0x00000000000001bbL } }, + /* 21 << 56 */ + { { 0x3757eb12813cd071L,0xe2c5e0702f18fb8aL,0x3dfd08305f451a83L, + 0x91e430441b09aef4L,0x0b8493b2153d6dfbL,0xe5cb9f878aa9de2fL, + 0x5f4136b67e86e730L,0xde3b214ff8007c7dL,0x00000000000001b1L }, + { 0x6f3d65ebaa289400L,0x2937901f23fc2f30L,0x208a45403c97ad38L, + 0x496fdaf41d956449L,0x68d7b49d7b70e243L,0x1fe890dbad43f7c8L, + 0x6d5d15b6fb734683L,0xd8cace1d2636aeecL,0x000000000000007aL } }, + /* 22 << 56 */ + { { 0xa221c321cfb59cc5L,0x54e6a28dd152e86fL,0xbe574fad4c73e2cdL, + 0x66a16063a28c6a34L,0x9d051f2ba6bf4e33L,0x6f6e57f0efef1f7bL, + 0x9f8530c7266ea965L,0xde64f62ffc3c5ce8L,0x000000000000016bL }, + { 0x101e26f9e249ae6eL,0x64b186185ccd4bd7L,0xbbe43dbfa725469eL, + 0x2a13bcaf3c77a158L,0x3f52b1d45196b990L,0x5d3d31009643175cL, + 0x0c45dd2949e58e34L,0x2376457f0c9272d1L,0x00000000000000e2L } }, + /* 23 << 56 */ + { { 0x1aa1200038b3205cL,0x76e66ea19d58cca0L,0x80c79ada0cc03958L, + 0x81bc2b5e01351773L,0xd3728abfbb0e9fe5L,0xf5ab4fb5c491587dL, + 0x87338eb582fd5606L,0xf6347c4826c78494L,0x000000000000001bL }, + { 0x8a791f669efdfe9aL,0x755e8267e727d49bL,0xa3099a447992d40bL, + 0xef46bdb01d15a101L,0x559174a23c94ac49L,0x584f8051ea2c9e4dL, + 0xf4e16886bf84b50cL,0xff78f54b1b77ee46L,0x0000000000000074L } }, + /* 24 << 56 */ + { { 0x7ac3de5ca9041d75L,0xf644635482759efdL,0xab1b9f174ade5840L, + 0xeee52981b8197804L,0x46000cbe38c3a5f5L,0x88230b1626431a41L, + 0xe28d80f67766d1fbL,0x2e75ed96eb167421L,0x00000000000000dcL }, + { 0xc2e83c5b0e7b8b33L,0x1066058ce284acc3L,0xaf2f0891593a5571L, + 0xdc3b9416f48d96e4L,0xb290b6d07c9c6be2L,0x52eb2fde7c9039b2L, + 0x3eb91e6fb68324a8L,0xb30518e936711c4dL,0x0000000000000015L } }, + /* 25 << 56 */ + { { 0x2d009d1f7e81379fL,0x1186c036eeb29927L,0x9c5dd22a91b3998bL, + 0xb6592abfab377cabL,0x244093f60dcb585eL,0xa4aab53d2afe72b1L, + 0xcb3555cdbf6a94fbL,0x4fb141b4a362f98cL,0x00000000000000f8L }, + { 0x4a846565f7b37ee8L,0x951a4ede2449b8c8L,0xd0207baa5b7a7cadL, + 0xf6d4d8d91d81b91bL,0x92e4989362f348efL,0x2d1344480ce09f2aL, + 0x1d44afc329e7a94fL,0xd86cd0c3d2df7d47L,0x0000000000000005L } }, + /* 26 << 56 */ + { { 0xbed09ef139dafe80L,0x29280156660a6824L,0x44826c00399c500dL, + 0x62a0b9bc916333c5L,0xf8e99a122f328b47L,0x5399ea024bc286c4L, + 0x41c108bcc314dc67L,0x84028e33144199b5L,0x00000000000000deL }, + { 0xdc9acfdf38f3e37fL,0xd25f15a1b6ebf57aL,0xb8cf0e9228aaf7a7L, + 0x3e4b2657ebf0dd9cL,0x47c13b14898b9c60L,0xb86aa4de88315b53L, + 0x3c922479502a03e7L,0x12a71062d5664fb1L,0x0000000000000002L } }, + /* 27 << 56 */ + { { 0x0888c3373a129502L,0x58221c2e5b441f44L,0x492f61b92730c69bL, + 0x9ec3c2d586c89b52L,0x6c5376a2036b0dddL,0x324072e8e30189f7L, + 0x9627871a8f6dcce8L,0x2affe689f14c2a67L,0x00000000000001efL }, + { 0x7a6891020deb1c93L,0x4adb5b63d03180c0L,0xac46e5de2dab3606L, + 0x2cdcc3e0f809e0c3L,0x5eac59fbb6bcf717L,0xec49aba1d98d62edL, + 0x2c547f2b9cbb81b3L,0xfc3afcf962672e3aL,0x00000000000001a1L } }, + /* 28 << 56 */ + { { 0x56b5f75c7925dc46L,0xfa149311dd8ca6dcL,0xfca42ef910155bddL, + 0x661b558ac2779509L,0x330257621f4fc11dL,0x00a4a692ba594d25L, + 0x384f36c67892fe4eL,0x3559163a4b3bd1d1L,0x000000000000007aL }, + { 0x6e75533a4dc7b82eL,0xef9a1ec584f65759L,0x3b6598970198445eL, + 0xc1635aefa87878e8L,0xcded2de7b779f2fcL,0xbcce07ca79b30a66L, + 0x784f05e143cc83b7L,0x18fb4648ebaf4810L,0x0000000000000118L } }, + /* 29 << 56 */ + { { 0x21cca143a0849267L,0xdb3c09476150ab19L,0x913aed807d17af24L, + 0xb9d184be112ef198L,0x51e8658a87ad4afeL,0x3af8395f440d7c27L, + 0xaa056c70a4538d3aL,0xd32da7d8fb38fbd0L,0x00000000000000ceL }, + { 0xdd19261f7df2749cL,0x3d136e087f22ce77L,0x824d05c2e8e2900cL, + 0xbb9952149c7ed67cL,0x3d3500c36d5ca1e6L,0x3c05e9faa07cb4afL, + 0x3cdab734d4dc2a66L,0x8f0dbd5f524a3016L,0x00000000000001afL } }, + /* 30 << 56 */ + { { 0xde0a68bcc944ad6cL,0x5789ad9ef6a7c9ceL,0x5bd83d19ff4935c8L, + 0xa976293acf267a19L,0x04552718f1aef41eL,0xd87c58e4cdccd277L, + 0xd5d18bf72542f5a7L,0xb41bcf033a6a2a8eL,0x0000000000000086L }, + { 0x21e495d3188aeb3aL,0x158ee1fff0ac6ef0L,0x18e6446f09c4dffeL, + 0x82fc447d9c1afb5aL,0x84595eeeace56f3dL,0x94f6edfe590928aaL, + 0x2b0c90f9b997105cL,0x71ba70da5d064ab3L,0x000000000000016dL } }, + /* 31 << 56 */ + { { 0x6ce8c00009abc364L,0x4ec14c04f010a407L,0x45e5c5da8c10a618L, + 0x91bbf4e4ca43655bL,0x171e65a9b62c2cf9L,0xdb1915f3821cd7faL, + 0x75cf2fe3e5acbc70L,0x46e005431959d2b8L,0x00000000000000ecL }, + { 0x2a339ea5cc146e5eL,0x0d4b69e30a693102L,0x7b2707c6610642e3L, + 0x5ed9f43767880ecfL,0xfa1b6091b769708aL,0xba717e325aa5501bL, + 0x6833adaf1c963c7dL,0xc66016586e445c3bL,0x00000000000000f4L } }, + /* 32 << 56 */ + { { 0x9917e8cc2c863225L,0xe627d938dff7a5fcL,0x62017450d3c9fa85L, + 0x30453775ce2573f1L,0xcaaa02c343e3b41cL,0xf541c0065901e5ffL, + 0x78ec0e91e163aa77L,0x1cfd0693f446bf70L,0x000000000000009bL }, + { 0x82aeb71678063f41L,0xc442f8a7861c6782L,0xccfbc9f3f28b51f4L, + 0x92a5f71127335516L,0xdb6dc76e23578ef8L,0x365972b3cfc25f57L, + 0x0dc48630afd7fcb4L,0xccd4f3de87ea6884L,0x00000000000001b4L } }, + /* 33 << 56 */ + { { 0x0cba78e07f180ae5L,0x651f442380de7820L,0xd2280fe6fdcee178L, + 0x0398d592f0604b17L,0x719a56a516c5f918L,0x521773981ba15826L, + 0x73e7c994b4cc167eL,0x46116f5351fc4cb9L,0x00000000000001a0L }, + { 0x39eb2793fe76f997L,0xae5381c07344404aL,0xbdb170ea3a99ef88L, + 0x4b807687b9364627L,0xda38d1e6010cb096L,0x7a26ade40e9e8c32L, + 0xdba2bc77b15795a0L,0x4a4ef3a9e390b4ccL,0x0000000000000107L } }, + /* 34 << 56 */ + { { 0xb822040321dd5804L,0xc94d9465a3678e1aL,0xd4f146062c4bff3bL, + 0xcd466a747b9ebea0L,0x2b6ea1b11a3915eeL,0xbfe9fa8d4c9d25b1L, + 0x5acc811179c0b546L,0xc015bce832ae8d9fL,0x0000000000000199L }, + { 0xb1b1a996c5d68750L,0x0e3f7bd1a3119441L,0xac55b06d80de3e62L, + 0x1901ae921cb02517L,0xbb05723529251d65L,0x84ab8976f8de5e88L, + 0x8b0306a18a2a5ceaL,0x9ff34fa9b109a4f7L,0x00000000000001c1L } }, + /* 35 << 56 */ + { { 0x8b2cc329e83b40b2L,0xd03981d20af83376L,0x223bb252aa06f709L, + 0x98fdf150d0ed1e1fL,0x7197d38267248d5fL,0xc87c56c008a30dffL, + 0xad81ab335715c061L,0xb4c884e24a323524L,0x0000000000000122L }, + { 0x715113b529703ca5L,0xb22e290ccedaf9ddL,0x627021563b1af469L, + 0xe0bade40092e1c63L,0x1e7acfe15a88c7d1L,0xcdc94e4fb93c02c6L, + 0x8053412b444075f9L,0x73d22f8ea1d504feL,0x00000000000001a2L } }, + /* 36 << 56 */ + { { 0x36a54e320063e3a7L,0xb6ab589b2e92d11dL,0x8472d62cadfa6a1aL, + 0x57c611004799cceaL,0x7643922d8a3d93afL,0x55c52a1e7e3deed7L, + 0xa0399845e3d1e892L,0xe9e618c644708214L,0x0000000000000035L }, + { 0x0f929bf6c945e0b5L,0xbf9d77efcd8bcf03L,0xdb81f7486f13964cL, + 0xad6fc6071da1d2d6L,0x658cbe0e689fa23dL,0xaaa75375cf636410L, + 0x5c69a9f55f5e58b8L,0x518fcdbee31fb8a4L,0x00000000000001d3L } }, + /* 37 << 56 */ + { { 0xe2ad01a012b9d5abL,0xa94dc3c22971085bL,0x6355e2e3013db1cbL, + 0x91e8f97c89904bc7L,0x4f79feeb5e230144L,0xafa43327cbf85425L, + 0xc3326b4bd442e79dL,0x696348b814e996efL,0x0000000000000040L }, + { 0x4a58ba810613928eL,0xd30d25defaff6e4cL,0x6ad322500b0b681dL, + 0x79e4e5c4e8c851fdL,0xc90a7058e2a3530aL,0xe2f77504052a52a5L, + 0xe34ab1ae2475a042L,0x9cef5db6ac963f34L,0x00000000000001d1L } }, + /* 38 << 56 */ + { { 0x0b472c43ecfb80deL,0xb75578188d5196f0L,0x30b9f8aeed8ed1caL, + 0xa918b126b15b2bdfL,0xe178db2f9514c041L,0xeb5a0fde8071a8e0L, + 0x21a6b6aaa6e3fbd0L,0xcde45edad9a740a3L,0x0000000000000193L }, + { 0x6ce6ff4ec84efc29L,0xf7196b6a7acbfc4aL,0x5738f1534c99adcbL, + 0x0f8d6be935bb6865L,0xaed7c338f140328fL,0x3ca7470417870fb8L, + 0x09445978820d213cL,0x35cabb29e28e0ea3L,0x00000000000000f6L } }, + /* 39 << 56 */ + { { 0xb78f9b1e46e1e6d1L,0x2ed415ee865fb220L,0x3360c23105fb6fb4L, + 0xd9e41dc9b4123875L,0x850bc64de51cfcb3L,0x5a4028277fb29e1cL, + 0xa819c1459a1fd35dL,0x8963c9145adf53f3L,0x0000000000000096L }, + { 0x351539579dc592daL,0xced12343842c7d44L,0x45e5c8029b3ca1dfL, + 0x93fc7aba55fca7dfL,0xcb44a2f1565bfe7dL,0xeab3c41a6a6d828eL, + 0x832aaaf0b3280533L,0xc36ebe30002f19ecL,0x000000000000006dL } }, + /* 40 << 56 */ + { { 0xbfe65b92256f11aeL,0x41a8ca150644ccdbL,0xe72c186365b6247eL, + 0xe9f2d3429527c44dL,0xf6db950dd064e99eL,0x02e75e2bfa7900bfL, + 0x159712c560f397f2L,0x26eb5985287407e1L,0x0000000000000159L }, + { 0x252ae5169ac237efL,0xf57b24855909b88eL,0x78c4a6859abf3888L, + 0x0e57276f7fbe4495L,0xb9f442a4dad1dd50L,0x20323a507183d2d5L, + 0x1e676b45da92af11L,0x175b3098b5f33ca3L,0x000000000000015cL } }, + /* 41 << 56 */ + { { 0x942771e527b66afcL,0x5a76b2fdd50f0efaL,0x299b8398f6f37575L, + 0xb9e442799d7b0cb0L,0x7962c682564cf210L,0x70aea74fa2ec8efdL, + 0x7ebb28461b348292L,0x40b56fbf19dcd6d3L,0x00000000000001d7L }, + { 0xe4ff5d4aa08f1cb4L,0xfdce86fb3ff162ccL,0x995a1749d9c055c8L, + 0x5be6cef5e3361d50L,0xda779c62528fda04L,0x0264ede22f552d58L, + 0x28357e96ad3823aeL,0xc574b6220958846aL,0x00000000000001cdL } }, + /* 42 << 56 */ + { { 0x4d949b43a2192eb6L,0xd0de4a428ff40937L,0xe301c8c698fc090bL, + 0x2165e3123a1432b5L,0x30a1e1eb1a548d24L,0xb66c9f969ea5f18aL, + 0x28de679479bd7860L,0xad6af8cee995d105L,0x000000000000017eL }, + { 0xe62d25e80f66e7fbL,0xeb2b047e88130c02L,0xe5efabe5f80b2318L, + 0xba1f6034e02a6606L,0xe48ec52ac80eff69L,0xd8b89963cd47e665L, + 0x2694fa9e78df8006L,0xd934e06fdc90bb92L,0x0000000000000104L } }, + /* 43 << 56 */ + { { 0xbdc431c9ad266526L,0xbcd5379c9e6ba1ebL,0xd8fc65f5826ce26aL, + 0x507c025156e89a30L,0x8ec5b4caece29675L,0xdd420cc204892288L, + 0x918fb43e1582ffe4L,0xa3d0b98215ec5417L,0x000000000000016aL }, + { 0xdeecb7356e2a93d2L,0x9def735bd30c86b8L,0xe0df13b92ae8f100L, + 0xd9f31ccf470824c5L,0xe80be73073dd956fL,0xe807bc69658cc359L, + 0xb8739dedee8aacc3L,0x55c2bcac185d5debL,0x0000000000000123L } }, + /* 44 << 56 */ + { { 0x62044fa7be3ae5f4L,0x5d17023b9ee07956L,0xb01411700a1c53ceL, + 0x390aa024b633da2bL,0xad7f59f5895bf18aL,0x3124a23cb468fe0cL, + 0x92ca9da8b12aa6f9L,0x25147c19ebb5363aL,0x0000000000000022L }, + { 0x900b43421dab5723L,0x0e396b3038ee6d5aL,0x1d3fd7ae1561252dL, + 0xd169e474281a619eL,0xf25b8b97bef82d20L,0xa77f259526ffd23aL, + 0xc4075d3659107aa9L,0x4989a721e23d327cL,0x0000000000000043L } }, + /* 45 << 56 */ + { { 0xeb1574d03f710952L,0x9657aa2874526b72L,0x7076e904776a3edcL, + 0x83f624e2036e6cf0L,0x270d20bab0e58a2cL,0xef81f06d5199fb54L, + 0x50f81b5e9ba0b9d1L,0x14e233d537d9444cL,0x00000000000001f7L }, + { 0x9ff63f3750de8a6dL,0x714682f14a57ccbaL,0xb16d90f38d6253f0L, + 0x2690aca4f1fb2279L,0x5c8e1c463fcd9343L,0x1f2f4c3f2b43dfd0L, + 0x9deb00529b4e03c0L,0x75061f03526d24e6L,0x00000000000000bfL } }, + /* 46 << 56 */ + { { 0xe2ca98a649f1acc8L,0xb46de54a52116d8bL,0xc5be41266f108234L, + 0xb5e07998003f2803L,0xe387c00379eff438L,0x4c0287daef9ad35cL, + 0x0a8181cbd85d5441L,0x70ca63cfe9926489L,0x00000000000001e7L }, + { 0xdc2fe75c91ae2059L,0x4e23a3c7eb458c3aL,0x02922886913edb61L, + 0x66549c892805c886L,0x599148224b1cc49dL,0x3326135d3790bfbbL, + 0xaa62b1535b40d61aL,0x06f704773550a984L,0x000000000000011bL } }, + /* 47 << 56 */ + { { 0x23c79b50ac79f04dL,0x96ace814167f28e0L,0x5644752c2d99e5a8L, + 0x28bfad59c099f81eL,0x27250746e9970582L,0x210db573f528c364L, + 0x5b59afc5e8f782d5L,0x402269fb05ace43fL,0x000000000000018eL }, + { 0x1f133f478b618330L,0xb691d541312582e0L,0xf40d84c8a3f64f94L, + 0xb2ed7ae2d60b793aL,0x0b604ad005a966ceL,0x12809a7526260877L, + 0x1248f6b2686c8bcbL,0x4eea6c43474eb54dL,0x0000000000000045L } }, + /* 48 << 56 */ + { { 0xc1d7001b26e22b52L,0xb4b208962825905dL,0xa064fdcde739e7beL, + 0x83bfa334f2d62d9aL,0xa2baf3892758b810L,0x3a88f2e4ebd64633L, + 0xd621be3476dfd697L,0x7168c0779f4a85c3L,0x000000000000017eL }, + { 0x5b8f06f520f8f0eaL,0x95e44505cfacfc46L,0xe20192bd787844b5L, + 0x316468c44a04adc2L,0x30d71497d6774463L,0x7c826a925a606a2dL, + 0x22abadf981ba512eL,0xeb15b45fce26365cL,0x0000000000000146L } }, + /* 49 << 56 */ + { { 0xb9887fcaf7bbbaf0L,0xf0826aeca4ce1d88L,0x4d62ea62c4f7965fL, + 0x06f15abe449e26f1L,0xc18e11f2b31e5238L,0xf8b93bbc1af9d49aL, + 0xcd0a8fb808d073aeL,0x4b51841540d4dd9fL,0x00000000000001e8L }, + { 0x7374840c30998d71L,0x589382d436ccc41aL,0x9ca23a60306c2184L, + 0x176eee48b753b87fL,0x434f008845478462L,0x3fb180f1636c669aL, + 0x558742218a7ad6a7L,0x8abfe4a668f8bf1aL,0x000000000000015dL } }, + /* 50 << 56 */ + { { 0xd232ef02f6009ce1L,0x33e196f312d0c06cL,0x61b889c3d37ab4c6L, + 0x6994c1271c3a216cL,0x50bff9f8aecf084eL,0x7d2e7c71fa0d3536L, + 0x4d0c3a44f42e86b3L,0x42a1cf960b3e7d90L,0x00000000000001b1L }, + { 0x6bfc53d7f9bec891L,0x0969a0d9708ecba2L,0x57d04efe31b7bc78L, + 0x7ebd3fd128efd902L,0xa9cfa0e9fd6e7da3L,0x39fae95964c474b3L, + 0x6549f29eb68d9f5cL,0x3c570ecc13f84c68L,0x0000000000000060L } }, + /* 51 << 56 */ + { { 0xbf9a21ef1634ceebL,0xb16b78a414c68429L,0x7ff643a098fb371eL, + 0xc733a0a42e44c567L,0x24e633f2d310aee6L,0x3ce8bdb316e3c03bL, + 0x3ed7a15b662d44e8L,0xa81ea8e22329d421L,0x000000000000000bL }, + { 0x28405c80d6491d1dL,0x6ac0cea7e069e536L,0x6d6d4d3e7e67cf0bL, + 0x889fe753240ea8e9L,0xcbbf7c5b0a89ac1dL,0x1dd88ea1a2477925L, + 0xefd5f0e148e2e534L,0xaf56b1c5af3ff0e0L,0x000000000000003bL } }, + /* 52 << 56 */ + { { 0x02d6a09b39435d76L,0x14607c4033261f8fL,0xb7bbc6828cfa6714L, + 0x24891fac3486f8c5L,0x940f9bea8592cea5L,0x20d420065230ad37L, + 0x3bff94e036b6e919L,0x596fec36468065a8L,0x0000000000000163L }, + { 0x1836fecec86ba807L,0xe1f9af2970ec6073L,0x51b5a102f4de5c91L, + 0x0108f1ed8eed7ffdL,0x22569348f45657d1L,0x59b9601720dbe0b7L, + 0xa90e49237730150eL,0x477531f6182fbcc7L,0x0000000000000106L } }, + /* 53 << 56 */ + { { 0x4e73f4eea189246bL,0xf9fecdedca87ad2dL,0x207ec7d5e2e3d505L, + 0xf37313baff1083e2L,0x16c97e9761bd24c3L,0x89474bb93845239dL, + 0xca1c54f442bcd760L,0x20cae9061413c906L,0x00000000000001f9L }, + { 0xa2333077c7397651L,0x5c211fa519423669L,0x559cd47daed32353L, + 0xadff408bedc217ebL,0x40cf1b33123dc0f4L,0x839ce0897e4a35f4L, + 0xff76ddbefb7762b1L,0xe8e61888a4957ad5L,0x0000000000000130L } }, + /* 54 << 56 */ + { { 0x591de5c6362c4edeL,0x39aa3a8cf936a410L,0x471d9875f45a68eeL, + 0xab848bf26417283dL,0x3930f64c17395a6aL,0x46c2c2ecd5c088c5L, + 0xc82eea0217d1685dL,0xd2578ceff9e81418L,0x000000000000000eL }, + { 0xd203425d8941c18dL,0xebbd3e9aa73385c8L,0x13b2699de2650901L, + 0x021e79314a90be3cL,0x6af0b6e9e7819b93L,0x2eb061baaa10e875L, + 0xdab14c0d7d693ff1L,0x23a83afc0f7ce5e2L,0x0000000000000139L } }, + /* 55 << 56 */ + { { 0x3415bc0daddd94aeL,0xb6da870e206a5091L,0x383aebe8951ecb36L, + 0xe2381acfc3cc4b83L,0x2c57fcd46077da9fL,0xec27dce3cdc2bfa4L, + 0x5fa457d7ac74831bL,0x70929b9d873146d5L,0x00000000000001f3L }, + { 0x2898bc157ff3cfb0L,0x933c69db1b1cd249L,0x21dab0321a6a3715L, + 0x06e0bc6b6ced718aL,0x93052d3510328c54L,0x8cc2fac1d73c7c80L, + 0xcd2b155686936113L,0x458e24e07668b2bdL,0x0000000000000168L } }, + /* 56 << 56 */ + { { 0x5ac4879b41fa2753L,0x2b60d3a8345073f1L,0xe2046eb872eac59bL, + 0xf480bccdf197df4cL,0x326cfce9578d9371L,0x15c3eff387f19540L, + 0xf5d1f2f62d76b437L,0xaa3b9f284a2d837fL,0x000000000000004fL }, + { 0x3bb0f06972915337L,0x3514f6f95f8295faL,0x1dcca7e3156a036bL, + 0x68d2d0615d48ae2eL,0xb5731bd212ac786bL,0x4015ce122bd726a6L, + 0xd3b325b52f972524L,0xad89e2c715ca38b8L,0x00000000000001dfL } }, + /* 57 << 56 */ + { { 0x80f3752746ba3374L,0x17c3915a76b9250cL,0xa81ef86e5641a4faL, + 0xdb65b01ec2e3b903L,0x46b79c9acbaa007dL,0xf96657aafa03f7d5L, + 0x39c73fbcba538237L,0xe95777559318c4a9L,0x0000000000000063L }, + { 0x78d1e0b2fb143ab2L,0x93fa7bf4cac9457fL,0x316cc8b62c49a2ceL, + 0x5aabc3b2f41cd892L,0xeee80db1bc9a08faL,0xfb07f5c1b59f1a2eL, + 0x1d1ae1622a1ae2cdL,0xfb1c023c3b5e28b2L,0x00000000000001a0L } }, + /* 58 << 56 */ + { { 0x57e6838723bbb10bL,0xce43b3fbe2d50c09L,0xf1dfd466de95334dL, + 0xd33833ab00568c70L,0x3b07afa03dce59d5L,0xe06155f041d46076L, + 0x66345e81ec062903L,0x35831a3a7d9d7c58L,0x00000000000000c9L }, + { 0xcfc25d2564cfcde7L,0xb06f5dce33ced182L,0xe4c9634dd03004a1L, + 0xfb6ff3a0ae1218d2L,0xd789018b67a58246L,0xecb1e40866957bbcL, + 0x905b08ad2c29f673L,0xb1a3289d3755ffdaL,0x00000000000001e7L } }, + /* 59 << 56 */ + { { 0xdb31fac07977f851L,0x94f0265d50bac93fL,0x112b3aa3de5cc643L, + 0xbd801c10091773b4L,0xd39349f406356000L,0xb6d10739951a5fccL, + 0x67c4c2a346a64c7aL,0xbbff71099b914860L,0x00000000000001c0L }, + { 0x3bfe3e8b848ac957L,0x05211d3b5389375fL,0xeb0bc5c75c156ba0L, + 0x08d24e2a1a7a553dL,0xfa0cc235b328cb81L,0x782a58dc91f27c34L, + 0x149da3fa1cdf0ee6L,0xdab41dcbf1d7c685L,0x000000000000013eL } }, + /* 60 << 56 */ + { { 0x468816bf0e4b804cL,0xe3c85f32cf84abe3L,0xf2a460f32a6e257bL, + 0xfd519486a6979e17L,0xbc722a3681e12915L,0x43285362e2965c8bL, + 0xeb684e297dd12eb2L,0xcf0b3d2c51940f61L,0x00000000000000b5L }, + { 0xa9d02d65f2905443L,0xf19253da29be718dL,0x6fbe038204a5ffafL, + 0xd4d42b3a5466364eL,0xf2575aeb4fb8b934L,0x56a35ffa88dd5475L, + 0x894d296f67b692e6L,0x50e6c388fb45781eL,0x000000000000018eL } }, + /* 61 << 56 */ + { { 0x352429236de91b7fL,0xe73222409cc8a649L,0xee8e66a463b2941eL, + 0xc8bd62c66e2c9a28L,0x3e82ac1bc43d158cL,0x2eb41b3b9be8efd0L, + 0x551eec869d585174L,0x258c356317ccd162L,0x00000000000001e4L }, + { 0xe63ed8577a7d717aL,0x0f09b874c207eb1fL,0xbb94042c0ecdee46L, + 0xd08478728f418f3dL,0x3eab958e700177deL,0xc326a47822aea03dL, + 0x22d01a0f37ccd3d3L,0x0bbe9ef8f9dda412L,0x0000000000000140L } }, + /* 62 << 56 */ + { { 0x0c949e8cc853e7e9L,0x52feaad8231a9c75L,0x382cfee233fddf9dL, + 0xc6b412401b081917L,0xbb2af113a5800aceL,0x203d3b3f62a15a88L, + 0x174b2602c32d6121L,0x32bc39cfd9a93228L,0x00000000000001f5L }, + { 0xf13bf17ed40e0765L,0xc54f2409c1eda9b9L,0xe824c8805d92029bL, + 0x43d79cbb1e13c2d3L,0x4b8b896c14d20cb3L,0x3c32a0d82559daa6L, + 0xdd2321a03cfe4976L,0x6dc9b1af4fd3ecceL,0x0000000000000024L } }, + /* 63 << 56 */ + { { 0x0b7ce8de2ef7874bL,0x7b4bfaa070157203L,0x4b6f5fcb1af1491eL, + 0xc0b9acac1e572d0dL,0xe4012cfb1d5fcf38L,0x80f2e86c0a1993d1L, + 0x4c7ae53b9d2105deL,0xabfc15200cf27789L,0x00000000000001f9L }, + { 0xb62c40f483f73444L,0xd3306a87826409c7L,0x9f04fded3ab05940L, + 0x3d7534fb2d9db0f9L,0xe36a557d5f5bff91L,0x3c009b9826b33300L, + 0xfee7231232a9d384L,0x1d83185a7c1210c0L,0x00000000000000b4L } }, + /* 64 << 56 */ + { { 0x72f88bd2a3b42a3eL,0x7cca9272c8287e7aL,0x527c4a5caad93849L, + 0xfb481c75da6615c2L,0x27ce59bf042b3497L,0x70b3fea29ef62e76L, + 0x025ecfc40d097f4bL,0x5ef137da607f9d21L,0x0000000000000053L }, + { 0x3295047616741f79L,0xeaac9e23a4e120e4L,0xe15f333776f4fe2fL, + 0xac402f2d8028f30fL,0x87a6b88b99c4b4d3L,0xfedd124cb0866e18L, + 0x07c5d4334aa80397L,0xafdfa6830961d56aL,0x00000000000000a6L } }, + /* 0 << 63 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 63 */ + { { 0x1f70b2980af43bd4L,0x9351c3c2d13901f9L,0xf702c613845ea55bL, + 0x892be0152c193388L,0x485c40141e6adb21L,0xf394d50e9296f310L, + 0xda5872dfb9ab0f6fL,0xa10bd887f5d389aeL,0x00000000000001c6L }, + { 0xe268935dc8f44c3cL,0xaa7e595bfa72991bL,0x78c2a8e39bf7d8ceL, + 0x0caadde13b655948L,0xd9444be5c080228cL,0x4990733bf651b425L, + 0x7513085102578b0fL,0x5c375bb3c5635719L,0x00000000000000d6L } }, + /* 2 << 63 */ + { { 0x94a182e2689dbe3cL,0x2d8ee16602610256L,0x4d0cb259e6294291L, + 0x244db1200a7fcf2dL,0xb50d34b08f58f3afL,0x2ad4b9ec13275075L, + 0xa19beccfc96bfcfaL,0x1a5509da86f5baf0L,0x0000000000000185L }, + { 0xefe1469c5ac043e6L,0xc86b77217d273130L,0x2324af0c2606809aL, + 0x33a3324e21800fcbL,0x8a38f30fa6b0d999L,0x12db51d0d038b182L, + 0x53ca86ccb3490ff5L,0x2ecd7ecf7a935163L,0x000000000000016fL } }, + /* 3 << 63 */ + { { 0xc4bc947556f30775L,0xb8bffac8a7779bc2L,0x59c0b0079550271dL, + 0x68dc98306303b3f7L,0x4079b84d19d6cca7L,0xb3a87933f7ff41a1L, + 0x8417384b97ab2b4dL,0x795a4fd926faec00L,0x0000000000000179L }, + { 0xc72b8fd99ce51ceaL,0xc230b054cc876f6fL,0xfdb6c8fa23c093c2L, + 0x6c7406112bbd837bL,0x5f6bc8c543802bffL,0x4cfcbe27f9bfb3f5L, + 0x7ed94f5c4444552eL,0x12cd078f366d40dbL,0x0000000000000089L } }, + /* 4 << 63 */ + { { 0xf192b0164b374ff4L,0x3aaded48125fd529L,0x57484ac45dfa717dL, + 0x815b6549a4b4e41eL,0xe230ea493153d4afL,0xbd883b1277769a23L, + 0xae58965ead605e32L,0x4cc9cfcf77179455L,0x00000000000000a5L }, + { 0x5cfe2ea43f30741fL,0x79c512a52565e3d7L,0x9a4075adaea005bdL, + 0x6b2593277fca076dL,0x1ea80148166a3678L,0x25075d36d75fda30L, + 0xd9f2bba9ad53e052L,0x04fc6adc182ac606L,0x0000000000000090L } }, + /* 5 << 63 */ + { { 0xee4779be0ebd57d7L,0x459b071978264e7fL,0xc881006f4bb02fc8L, + 0x9379e60f2a5e085dL,0x4dbca56b92d5f103L,0xb12b712ee2f44c2dL, + 0xb00fd84a642c9631L,0xdec635e926d7fa3dL,0x000000000000006aL }, + { 0x6c7096bf5787204cL,0xff933f0d77eeab07L,0x98ce764279f79751L, + 0xa30afccb5d93fe38L,0x5019a10fa1509d52L,0x6da21e0612f60c20L, + 0x0caadc3c96ee3760L,0xc316231446e7b054L,0x0000000000000129L } }, + /* 6 << 63 */ + { { 0xbb3cb4daaa283027L,0xb3284a114637a686L,0x2f675f378d4d1678L, + 0x4d2b46fe6bce4a50L,0x65e0d06d583ad60dL,0xa897d633525ea4b7L, + 0xe2aa9fcc42bb371cL,0x8f91d296dd500e47L,0x00000000000001beL }, + { 0xc43c6cffaf7965f1L,0x116c957ca63638dfL,0x80c1361be5a3f8a7L, + 0xa1f87abecc9582d3L,0xd47bc0102bef1ff9L,0xdb660935f3fe712aL, + 0x8cc56dcfcc584a15L,0x27984274a035f2efL,0x000000000000019cL } }, + /* 7 << 63 */ + { { 0xfee036b2d5a60b38L,0x6c09cbdd3de853d2L,0x11f2edd4d74766efL, + 0x19ec36dabb8e5e8fL,0x3755e6f8b103f6efL,0x79a6edc7cfd76631L, + 0x7f3d410265a8227fL,0xc02ea4d369326d57L,0x000000000000019dL }, + { 0x8d5764e32379c4a9L,0xdee4a0f9250aef31L,0x88e469b586f0044aL, + 0x4e83703b0ef683ecL,0xe5887ceab9688ea3L,0x6b42207428d29d78L, + 0x3a20510f60098e8bL,0xa6d929986b4eab5cL,0x0000000000000172L } }, + /* 8 << 63 */ + { { 0x803986670f0ccb51L,0xb87e1d013654974aL,0x7f58cf21b2b29ed9L, + 0x06c0e9aaa3add337L,0xf13b35d0e9d08ffbL,0xdd8bf44c96761627L, + 0xa4a18c14758a3ef4L,0x96a576dda0043adbL,0x000000000000013eL }, + { 0x2bde24f8632d95a3L,0x79f15ef14c524829L,0xaadd863e9bdaba19L, + 0xdde053f4a962b707L,0xc598a2de14258d98L,0x9fa5a19d061c235cL, + 0x0ed46510e8ffd32cL,0x2aea9dd1ef78ceacL,0x0000000000000185L } }, + /* 9 << 63 */ + { { 0xdefc05a810b7033aL,0x3f675458ac32078fL,0xd3ea130a9dda153bL, + 0xda427c6edbbec7f2L,0x10c3809b269c27d6L,0x6f00e4483acf442fL, + 0x00cd414cae989dcdL,0x33eb20b2f559398aL,0x000000000000013fL }, + { 0xd16d5178ee191966L,0x641a327ef65d2127L,0xac8d07fac5fe58aaL, + 0xf0bf06d10e428901L,0x65ea2147d6f1477eL,0xdf1a1634f78e78f9L, + 0x43cec4b2889a387cL,0xb52152850e161507L,0x0000000000000163L } }, + /* 10 << 63 */ + { { 0x2bb9d8b5b329f7d3L,0x7e561b23569eba9bL,0x65fdae9309b7bca0L, + 0xf4206737ef327916L,0x2963cd698f7b2113L,0x3b22a38a9cc9532fL, + 0x455c3790b31def2fL,0x9a6cccd5931e5346L,0x00000000000000e6L }, + { 0xe9860742981c49a8L,0xc6bbffefdac58cfbL,0xf28ca98c7dcbbfa6L, + 0x6a53c5d43e512f79L,0x072d38f9fee0577dL,0x75fc677e0b2260d2L, + 0xe1ce6a188e97c4b8L,0x48308dff3d3b1e4aL,0x0000000000000109L } }, + /* 11 << 63 */ + { { 0xbc61c248776b9816L,0x2c83f888139741f9L,0xeab3396648a694d8L, + 0x6a7740beddf8a509L,0x3bde3e01f388b62eL,0x5c579737c76e98eaL, + 0xafea98992f10893bL,0x1d6486dafbf3549aL,0x00000000000001b1L }, + { 0x43087f5893143072L,0xe844a95631e5c89cL,0xed5c4e44bee070faL, + 0x7371101a889fd481L,0x282498090f890001L,0x4c8a8d3097b45376L, + 0x4fcaf87c54c665a0L,0x8685ba22863ed8b9L,0x000000000000019fL } }, + /* 12 << 63 */ + { { 0xd389b38cd19f496eL,0x9868052fdc520fb4L,0x698b397ebf6e877eL, + 0x7896e7ac15d39a80L,0x3eecc9b5057ce533L,0x00f6cfc7001509b4L, + 0x8c2c3d6ee01e25f6L,0xf64a3e270dd1056aL,0x00000000000000f4L }, + { 0x192245f8ba1bde97L,0xb68aa03741e87accL,0x849cb6e449d24c74L, + 0x93c36f752569de67L,0x93542a5630ad91a4L,0xa802f2bfcd021165L, + 0xf3eb185fcecd5cdcL,0x42ef43d9ae520746L,0x0000000000000185L } }, + /* 13 << 63 */ + { { 0x78a7620a8a3944cbL,0xca1330ad3a2c833aL,0x151fe934a9fff1aaL, + 0x03baba7dd19fba22L,0x8cd7ec0578147950L,0x8e217c558062827dL, + 0x91e43f9a80a596cdL,0xe63325fdf60dd85bL,0x0000000000000109L }, + { 0x3beb5eceb5446496L,0xdb683de1a5d6592bL,0x1e1c60087cfee827L, + 0x2c4433fbaf08ef62L,0x6818f3ec36270cc2L,0x03581a8683829e2eL, + 0x40161dd9a2a84e47L,0xfe30fa15a62fe107L,0x0000000000000013L } }, + /* 14 << 63 */ + { { 0xcc39400a70a75215L,0xd237b4077ebe4792L,0xb2cf5c3bd20c2507L, + 0xe7b00e11daa4c46dL,0xa576029d38cec7a9L,0x699005e7b05e1375L, + 0xb88d5837f1b051bcL,0x054f8abb44630f00L,0x0000000000000060L }, + { 0xe53a8d17da2bb0c3L,0xcfdd266cae7bdb56L,0xd9551cebdb11c9b9L, + 0x1a11ac221fc5b3d0L,0x63056e13e161a118L,0xdfd99567f76b5e73L, + 0x7f5f7283836f2217L,0x8323f67fac0c57e8L,0x000000000000001aL } }, + /* 15 << 63 */ + { { 0x49581733db3272b4L,0xae447ec9076cd8bfL,0xfa292064666afbe3L, + 0xf8194e0e317ff821L,0x87ab3e6df316b471L,0x2d909542058df8f5L, + 0x951e4970d7fd5afbL,0x073c56e8e8f2f73eL,0x0000000000000175L }, + { 0xcd8497e0c3995b6aL,0x76d443067eda0594L,0xc71be704d1359e77L, + 0x3e29076941cc9d7aL,0x7762b707ad2c5ff9L,0x4208efa8d4102ac3L, + 0xfa15b04cc6d0e969L,0x977815e5ee7e56aeL,0x00000000000000b2L } }, + /* 16 << 63 */ + { { 0x2838cdbfeb5a169aL,0x25c826c4e1505d76L,0x2d5b0ff8c512faf2L, + 0xcf831e691e5cd6d6L,0x4540775caf5eda1fL,0x4f4514da5ad5605eL, + 0x8472b1ae2f87cfdfL,0x07c8cbcc9e338ce6L,0x000000000000014dL }, + { 0x096ab718d9243216L,0xe966635fe37cff51L,0xced8fb110586ebbcL, + 0x3216d6af86abe6f4L,0x37392a301b77f4e2L,0xecc903afb8ef62b3L, + 0x0453dc44fd74231eL,0x3cb3927b57895508L,0x000000000000013cL } }, + /* 17 << 63 */ + { { 0x6e902960eae03d4eL,0xf94cc9d57676a731L,0x0a624542451cd174L, + 0x702f8c7ff9a07db6L,0x9c4d40d28812a01aL,0x3fdb4c44d775f115L, + 0x3ac9ed82f31b60aeL,0x0c7df028f7bfbf86L,0x00000000000001cbL }, + { 0xf0269e95a5da962aL,0x3cd5a3532fa54061L,0x120017f41745823bL, + 0x8032a3c67de7a766L,0xdea8fbfb36927fd0L,0xc8a86f2deffef472L, + 0x1931f92fe94c1b8aL,0xd2aaccbcae43c384L,0x0000000000000023L } }, + /* 18 << 63 */ + { { 0xa1b0bbc3e372d394L,0x4eecaeca8044f021L,0x97636d0124c350e7L, + 0xf1f047ef209d0e2dL,0x0f922e78d12b0758L,0x8808519a01a2a08aL, + 0x6570af356b5dcc8bL,0xee31edf8b59cee8eL,0x000000000000003eL }, + { 0xd467ff1c9bcdfe17L,0xa6feffa02d58b7e4L,0x3b8f9b17271e5025L, + 0xbe2d94786c151628L,0x934cf55f75fa8b04L,0xd6217d62abf8e3efL, + 0x6f1625cc9841059aL,0x8d4b7142ed8bf5fcL,0x0000000000000148L } }, + /* 19 << 63 */ + { { 0x2f17878e04af92fdL,0xdf29ab114470d793L,0x62094a2a645c90daL, + 0x386b0df0e461f0d2L,0x0ba305e3fcfc889bL,0x01b6e611b9ce50c0L, + 0x396be840f25a09d7L,0x6bd838a8d5715b41L,0x000000000000018dL }, + { 0x60d5c8f971c336e6L,0x7967118885352eb0L,0x58de4fd6d579c51cL, + 0xf65c5db8b8e61555L,0x88023de715ccbec4L,0xf2334044e8662625L, + 0x469745190bbb953eL,0xe8eeca46d6f4b217L,0x00000000000001a8L } }, + /* 20 << 63 */ + { { 0x6bab39543f106abaL,0x3d9c29e352dc57c5L,0xbe339e28d50ff250L, + 0x247c8a49f08b8b6cL,0x79dd3128317bbe5eL,0x735c5b6dd533b321L, + 0x1374c3a28fd164c9L,0xce029be81cd71fb5L,0x00000000000001e7L }, + { 0x0657b663c6aac6a7L,0x0dfd84e22f233cfbL,0x210fbd7bb2f9d0b2L, + 0xd7e7ca02a90dec0dL,0xc37d11213fa3b10cL,0x160276ee1aff3c3aL, + 0xd825f5ceca6d0545L,0xe03cdac46558c1bbL,0x0000000000000124L } }, + /* 21 << 63 */ + { { 0xe3bdc41f8a78618fL,0xe5e00b1c2fa238c3L,0x132f66010ad92caaL, + 0x35cdcd7606de8ddaL,0x78c2f93ab95776e5L,0x3819082008f8ed10L, + 0xa2862e2c045f708aL,0x5bd7b941acb45943L,0x00000000000001c8L }, + { 0x23fbe1be22514d9aL,0x7e20070b8cd4c4b4L,0xb77618475401c6d5L, + 0xa95163e18bd87a02L,0xc87858801cce9d98L,0x814bc82ea8f70e71L, + 0x166f6fded4ddb0eaL,0xc2e1b1800be17361L,0x00000000000001fdL } }, + /* 22 << 63 */ + { { 0xd238e451a7c101bfL,0x0bad1bd28a0e4552L,0x15eb91427e77916bL, + 0x9c8ef51649ebcc4aL,0xf6425c28ca085959L,0x498ce242c87d4707L, + 0x14b8121edfbf2093L,0x338d5aacdc3b4580L,0x00000000000000e9L }, + { 0x886a911f6177b4ecL,0x6d305ab15a7d1548L,0x6ec3be3475e8baeaL, + 0xcf1cb5a7891bdf87L,0x9aa5f7ec6088b030L,0xe261eedf83288f6bL, + 0x2db08f6444678f76L,0x8d912da6c1cc7f63L,0x00000000000000e2L } }, + /* 23 << 63 */ + { { 0x03241429b406a7c9L,0x1b3c5b0c0308dd8eL,0x27f414dbcb00e3ffL, + 0x8a9e850bba256af6L,0xd8f206fadb86c3b8L,0xaf051dbdec108e78L, + 0xf8093ab3972aaeffL,0x2bc37f41ac85f743L,0x000000000000006eL }, + { 0xf57594cbae5ca3d2L,0x0b140cbbbc1ec354L,0xcac08c4c7d8c40c9L, + 0x0266382d8f8f46cdL,0x9a78dd2907b8649fL,0xd8fe7b2347d25b30L, + 0x322a7bb1b13c050cL,0x7a03a032e5ce60b1L,0x0000000000000140L } }, + /* 24 << 63 */ + { { 0x0644bc15b320de71L,0x2ed296112aefad3fL,0x67754c4c88519d1dL, + 0x47de1837e5f01818L,0x8ea1dd7e63033410L,0xc1abbecd8f6b2c5cL, + 0xb06bd510c8ab4da2L,0x4e0cdecc5964e0aeL,0x0000000000000006L }, + { 0x7c3b12481ffdb678L,0x28ff66790e9911a3L,0xaf7deb99a718a3daL, + 0x391b96215e272577L,0x0f2789b0b4001e20L,0x60b4541686a4c603L, + 0x0c3941313a259848L,0x3218bf48aa7e7d38L,0x0000000000000109L } }, + /* 25 << 63 */ + { { 0x9e4832f7116eeb08L,0x7554d6af45cb254fL,0x7098d4b3fab6ca32L, + 0x994c923e7f403fcbL,0x265bea28516cf1e5L,0x927a73b63faae4ecL, + 0xc69ac429986dbcd2L,0x67575e0ecdf65148L,0x000000000000009bL }, + { 0x71fc74abaa76166eL,0xb0cf3fe0f558c185L,0x9b68f71b7630d7eaL, + 0x78713c556bcf7e7dL,0x442d5e7f38f14777L,0x71c0d44b3002fd0fL, + 0xe3ab882351ea27dcL,0xa89c6ef9a03d2925L,0x0000000000000080L } }, + /* 26 << 63 */ + { { 0xb03563923e5a6e61L,0xb5e0e27873b8d05dL,0x53f7d377b3078789L, + 0xd56358e1c698706fL,0xa2674710505a1be5L,0xbc1559f4cb650c44L, + 0x93361f0867d11d3eL,0xd5d124ba63826b8eL,0x00000000000000d9L }, + { 0x943eb3571403f021L,0x80d125b3cf992a03L,0xb1365ff2d8bab8cbL, + 0x5eb0490498a1fae7L,0x4f728e6f8004beaaL,0xa4b1dc8074121d80L, + 0xb8b782b3c6b52616L,0x844cf275c3ab4b91L,0x00000000000001c4L } }, + /* 27 << 63 */ + { { 0xdb731dec2c7c6d72L,0x95be4c6d7253d7fcL,0xd983f26a3926cba7L, + 0x256d5d8e5a99093fL,0xd645f6ec8fee2c83L,0x76df91bfb562b901L, + 0x96cd6502753a97a8L,0xe53742c4a5a241d4L,0x0000000000000012L }, + { 0x5e920ba21d11162fL,0x628a198b1bae3cc8L,0x571e9f2b57d92345L, + 0x7a8271497c06194fL,0xddad8318036be033L,0xac129b561d5e7930L, + 0x89a5024057a2e5f6L,0x92e5f7ac3a385984L,0x0000000000000041L } }, + /* 28 << 63 */ + { { 0x255d6d1bf8682934L,0x6167be166fbe8832L,0x3f2fb997c331343cL, + 0x80c30a82bbc0455fL,0x6b06e6039604ee9cL,0x61433976a8a2c05dL, + 0x9b693e3ca6c73e15L,0x5643bc3ac7b6d3ddL,0x000000000000001aL }, + { 0xf362db1d072fc054L,0x95f7e68981452d4cL,0x14382bdc7e8044fbL, + 0xf0ccc6594e884acdL,0x9f4598920db303d4L,0x4494e0177763f7b4L, + 0x8d4de910b3422e42L,0x949b363a2b03d91dL,0x0000000000000174L } }, + /* 29 << 63 */ + { { 0xb20fa7f54065d8a4L,0xd5b8bde5f6cf1533L,0x61ca108443d18b61L, + 0xb4601d7fd5f9671fL,0xe56d7386bb8f433dL,0x29cab0089d364d92L, + 0x6ecb78fce1b4761eL,0x05c88ba39ae6c258L,0x00000000000000deL }, + { 0xc84ab45ea2dcaaceL,0x3eaa1e487c3d46f9L,0xfb29bfa87eed4017L, + 0xd630592c198259f1L,0xf88e6103edba3b02L,0x7e5463527779bed4L, + 0x66431dc5fdd7c079L,0x9c9edb200505eab1L,0x00000000000001a4L } }, + /* 30 << 63 */ + { { 0xd5f2127b4413c3c8L,0x53d19d1790c5ee5bL,0xf46a12c4bd0b0607L, + 0x23ff83b852f21415L,0x345d3cc5564a2084L,0xdd29de25b8128778L, + 0xf22c6930511855afL,0xd4a6af3342541b83L,0x0000000000000067L }, + { 0x3ff378abc334139fL,0x0d042fa8c59741e7L,0xcc57395df6333e80L, + 0xd458b57146a21bf4L,0x7b5378f61b92954cL,0x0b6a333ea29d48caL, + 0x83ebfc9324bd0efbL,0x84a30cec1062509aL,0x00000000000000a4L } }, + /* 31 << 63 */ + { { 0x7364675a65f8a22dL,0x29dd71039c0e9419L,0x7288a9c2677d42bfL, + 0x1b1991ece9334a31L,0x4b82036d71ee3d46L,0x27597cb83d91b61fL, + 0x1f4334c21acd3c55L,0xdd32214062f2ff38L,0x00000000000000c4L }, + { 0xddf5a8caa81abb52L,0xcc1d2bd5579198bbL,0xc721ebe0c5d36fe7L, + 0x80f82284e4f0a67bL,0xa611478e97997e42L,0x36fdad39b81d30e9L, + 0x8abcdb957f9c8ddaL,0x81ddf6066c6cffd1L,0x00000000000000e9L } }, + /* 32 << 63 */ + { { 0xd8720c8ae96a67e2L,0xa646ecedd54e0f6aL,0x3936ce2dcbd31552L, + 0x6d44c225b35268c2L,0x56dc0d086168f13dL,0x2b9bbca1fb2c207fL, + 0xb4f0da3aa136c5a4L,0xed2cf79bf8216f89L,0x0000000000000141L }, + { 0x1eb0707d7ec6bceaL,0x341b0f22ebe7eed0L,0x99c3c50c37333b3eL, + 0xdcd5faebde463e09L,0xe688ed5ee481cdd0L,0x6fc12bd10fead808L, + 0x7b8a498329746a50L,0x2be1b92f829e8d3cL,0x00000000000000a8L } }, + /* 33 << 63 */ + { { 0x81c98ad1581c9488L,0x3aae9ba74c2faccfL,0x4d2f796dcc390584L, + 0xcb22c0644247e6c1L,0xed884751e19da72cL,0x4dd2400863d28313L, + 0xad0c15efb0c8a9d6L,0xff5e4387d0d17586L,0x0000000000000023L }, + { 0xc41ee78e2c12a59eL,0x8b5949a3ffb2cb34L,0x62a3917a30a3b1a8L, + 0x5e9cd23e2d50ea6bL,0x226184de89a1faf0L,0x3d73604ee842ca17L, + 0x72b01c78c7b33e65L,0x46bc0719126c1d15L,0x0000000000000163L } }, + /* 34 << 63 */ + { { 0x4bed8237777faa85L,0x48347d35048d0b1eL,0x7f0b38f476fcc2e5L, + 0xef6f31029486baa3L,0xb5f8acde91d438bfL,0x41a87e12a8526166L, + 0x8cc2ef135592881dL,0x8f99d106859fd440L,0x0000000000000045L }, + { 0xe6eac1d316e9fab7L,0xda9bb9ddb98856beL,0xc6a6ad2170c12aaaL, + 0x4e24a2b058c77b2fL,0x33432f40f95a8674L,0x021a71070c9020e7L, + 0x4089166d8650289aL,0x4d1986f551969a61L,0x00000000000001a4L } }, + /* 35 << 63 */ + { { 0x9c21dfb230fb58e2L,0xa6f3914dbbbd435dL,0x6e602e2898109fb9L, + 0x1b700670b44ff97dL,0x7edfa439ab01fb2fL,0x42325f8d60a9be9bL, + 0xb5e0887a0a50918aL,0xcbcf143dab8877f4L,0x00000000000000aeL }, + { 0x173d8cfbb2bf045aL,0xf7ed171942d73eb8L,0xca200f4ec094493cL, + 0x9ad3c750b986ac9eL,0x11c9b6c837ae7c68L,0x315a7712db180137L, + 0xcd8c3dbe14ef1560L,0x35a53627c9a60bfeL,0x0000000000000029L } }, + /* 36 << 63 */ + { { 0x8bb1ae9afa1e7335L,0x472218bcbf806e06L,0x2b9d24030e0e14d2L, + 0xdd03847c275884bcL,0xa4ee2d9878f97e8aL,0xaf93d39acb69f1f8L, + 0xf3e76c60b8b834bfL,0x0b4f2c0473094895L,0x0000000000000060L }, + { 0xed08418036b33636L,0x81b18aa7f78d1671L,0xe20fd32389e80436L, + 0xf8c9f81cf1faefd0L,0x1f7ad3e2602d9f2dL,0xcbff2ab2648cee87L, + 0x4e7440a0f25ddd51L,0x2670e9e1dceab96dL,0x0000000000000193L } }, + /* 37 << 63 */ + { { 0xaa3d00eac058f210L,0x6e5ea70ba3c14757L,0x379d3b3b7e489509L, + 0x62f62a0692767b17L,0xf1539bc721b030b4L,0x4162495067544fc9L, + 0x4f5b019986666040L,0x1be80ab4745a7692L,0x0000000000000032L }, + { 0x6049ad0475117623L,0xe49e9c8781efb988L,0x90dbbf72b574d1cbL, + 0x27d065b3ece9ab4dL,0x8370a703e2632ad5L,0xf390eacbf0859cccL, + 0x4babadd0e117d800L,0x066b62d0cc9697a5L,0x00000000000000c3L } }, + /* 38 << 63 */ + { { 0x63c4d634e78a2da4L,0x4b7a5da294ad0358L,0xf41e93ee9966ada7L, + 0x12072c45fdad9880L,0x1418ca2416a9e931L,0xf74ace09b30a4eb9L, + 0x637d765850045726L,0xf3b20a5e78ae3400L,0x0000000000000157L }, + { 0xf15e1c7ab9e5db75L,0xbf1ff1c2bd8986eeL,0xdec28ef35b8b26b6L, + 0x7bf708f1dc427cd4L,0xefb80977c13928a3L,0xf4975bf5e4c8c4aeL, + 0x8feb6a29fe76a6bdL,0x695bb4dcb4eb10a8L,0x0000000000000193L } }, + /* 39 << 63 */ + { { 0xcde6ee654783776eL,0x47aa98dbab6a4a7dL,0x5c94835bc8032fddL, + 0x992ff3a1c6534206L,0x98b88c5e4ff7a8c4L,0xfa011fd93428ff60L, + 0x4788d51c15510bf4L,0xcacb9fde0719ed09L,0x00000000000001c3L }, + { 0x7d204f6aa83da259L,0x9e4d04ff36ac5825L,0x1928285183d984faL, + 0x29d49d8eb1488712L,0xf43e3fe22c533a1dL,0x826ce0162ddd30e1L, + 0x2c7004d34084ac94L,0x88fe5d68d54b7430L,0x0000000000000091L } }, + /* 40 << 63 */ + { { 0x6e4b93549a207163L,0xe7ce12aea3fa19e2L,0x3b79d9c3731e7845L, + 0x22d9831ccd2ede26L,0xabb6705b86ca4cc8L,0xdd0920df29ddb9f2L, + 0x9d0f445b811862d7L,0x4166d6e7839180cbL,0x000000000000005eL }, + { 0x182711206b4e81f1L,0x4b6b68c349837065L,0xb7377b163263682cL, + 0x2e0fba0eb79cc7e6L,0x55d9ecf4c02e89f6L,0xe993faa0482c2446L, + 0x9753d5e395d1ab52L,0x73fe5af5c1b06cfeL,0x0000000000000096L } }, + /* 41 << 63 */ + { { 0x7cb01f2827560700L,0x3116ede7f0594295L,0x11714c3a769c3488L, + 0x31b80a2e8fbf02ceL,0x11cef04e5c6535dcL,0x64df55ff3e27c9edL, + 0x0d63a163bc8158feL,0xf9cbf1603b63a411L,0x00000000000000abL }, + { 0xe26c4abf3158ba4aL,0xdb117a38e132d2cdL,0x4ad696322a572d0aL, + 0xbefddee3a8b979c3L,0xbcee468784c32c63L,0x3f861ac5f38ca9cdL, + 0xe94a5b0f5d303be4L,0x60bf3ab155305eddL,0x0000000000000143L } }, + /* 42 << 63 */ + { { 0x2b73e62e977fc1ccL,0x33819c4b1eef1b48L,0x86c62fd2606d7b32L, + 0xb1156e8b9ebbdc60L,0x066d6f66b9e5df80L,0xc91948c566b577afL, + 0xcc7f76043ef1fbe6L,0x5c16641e68aa2e46L,0x000000000000012fL }, + { 0x2747d4b711e75c9bL,0x6b27519fd9bd64a9L,0x1283769a2b54c57fL, + 0x10e1164d081abde1L,0x5a1f76f9992f7ed2L,0xb6c588bf852850b2L, + 0xddb5d11939399542L,0x6a550ca3115876ecL,0x000000000000013fL } }, + /* 43 << 63 */ + { { 0xa47c585a016bcc9eL,0x7e0ec9f0578e48a0L,0x708dc0a8d8c93583L, + 0x6612567c04b5e0bcL,0x3ac1ff92dcf8bbb9L,0xf51ba067ac4f9eb8L, + 0xdfbec7c94bcfd749L,0x44bbd8642eb5bab3L,0x00000000000000e7L }, + { 0x25f2603535bcc21bL,0x060e8f62f03ee3e6L,0x15c57ec99e0479e7L, + 0x976e8a5b05f84775L,0x127c6d03c8c674bbL,0xc9e9f32f933c6b5eL, + 0xe405fea7628e2f5eL,0xd83b69a218dc1cf1L,0x0000000000000085L } }, + /* 44 << 63 */ + { { 0xfe64594aead54bcfL,0x698f1f89193023aaL,0xea6309e02517e1b2L, + 0x5dba33b1015a622bL,0x95a9c92cfbf4115bL,0x30c7972981cc106eL, + 0x05ae3f9d9e44aa71L,0xb996f804a23ddbcbL,0x0000000000000101L }, + { 0xe3d697de475da0b6L,0xce26df727a70a313L,0x057b0e9b55989f12L, + 0x705ef3af732a2c49L,0x55b69e808c2b9314L,0x17f9042282897d3bL, + 0x235ae22e75a957a7L,0x77811795607c84ffL,0x0000000000000022L } }, + /* 45 << 63 */ + { { 0xff24c793f9b699caL,0x14b15d3217ea10a0L,0xc719f783343073bbL, + 0x2a83d8b90e93b5d4L,0xde503f82f5353f85L,0xbac075114f18ac59L, + 0x530d278c7f81ab98L,0x7f51762a1c619e74L,0x000000000000010eL }, + { 0x5897a7e714360ac7L,0xd69da38de3ff95c1L,0xcf0c8334bf9736ceL, + 0x6e465e9273bc4213L,0x950fdbd6a4e87fd3L,0x4bb07b3a47ca6e6aL, + 0xb9eded92744a7e7bL,0xfb882d485a7514baL,0x0000000000000116L } }, + /* 46 << 63 */ + { { 0xc25e05c36b05e0d7L,0x5a2b025d4da08ae2L,0x71b546980cb36f9eL, + 0x96bf67a4248d9978L,0x9a7bcaa587c7581aL,0xffaa9a7f39a6ff19L, + 0x425c17c27471755fL,0x250359a026062f38L,0x000000000000000aL }, + { 0xd0b2e9a20e795413L,0xbd4e31cb33853998L,0x8c4541ae95a4388fL, + 0x8cb64c8bc50dd060L,0xe34c811101ab0da8L,0x5de5d97069305f2cL, + 0x7a13ad595f5aba36L,0x9e2facc817de1ab9L,0x0000000000000086L } }, + /* 47 << 63 */ + { { 0x7cdf9204bd8cf1d7L,0xe3ad55d8bb0ef236L,0x15773c50555a9e3aL, + 0x46af0d3003688a2bL,0x221ee5d72fc9260aL,0xd1561cf90bcc7900L, + 0xeb1929a2fa485b0cL,0x13482477f9bbdb62L,0x00000000000001b0L }, + { 0x9432cb5a6ca8bf58L,0xb64c60ac6c5411efL,0x52c05411eb94e57bL, + 0x34597b1a6e3b2facL,0x7861df27595820b3L,0x4aff5520d02f000cL, + 0xd0d78d9d0b71a270L,0x015e10d18fe933a3L,0x0000000000000125L } }, + /* 48 << 63 */ + { { 0x0498365338587f45L,0xdf7ff2c77c9bede4L,0xb0158c72a21d0d97L, + 0xa774c9e3a5e5d3c9L,0xf58e7c02cb10150cL,0xb4af1ee79c24a5a9L, + 0x02e0a1d644ab20cfL,0xf24d2c5ada1b7783L,0x0000000000000028L }, + { 0x80bbf95278f7b5b8L,0x4c0bd396efe31505L,0xbcf19c3bec6d175dL, + 0xa8e820cf868e9452L,0x902114319cd29126L,0x7518c2f8cfd996fbL, + 0xf676e6cf4ebb311aL,0x6d108d7e7fac5d7aL,0x000000000000019cL } }, + /* 49 << 63 */ + { { 0x97aaaba1c829bb99L,0xf6860f8745bb55feL,0xf8b50fe71498e519L, + 0xab3d16e3fe8f0221L,0xcac909cfdcdb7323L,0x685be174503ac866L, + 0x97600f6f42b4888bL,0xe935129f3b2bde52L,0x0000000000000189L }, + { 0xb9914e14c98a8902L,0xaf8252efd12e2742L,0xed19080a9e82bc5eL, + 0xeb729e01b2ce8f88L,0xad7c2b07fdc2d2e7L,0x714be551434c5683L, + 0x533f127a33d712c1L,0x5c1a7dc314b47fa3L,0x000000000000017fL } }, + /* 50 << 63 */ + { { 0x4291478195de7bd7L,0x19372e9158be52bdL,0x17fba77f92e5c6a7L, + 0x6374fd9b699a1dfaL,0x22a45a6bb5c76282L,0x5c9a9a28c720a1c3L, + 0xfb9dceee60825162L,0x69f9a5093ac4b11eL,0x00000000000001a1L }, + { 0x934410934bd1d68dL,0x9f75dc6d6ded82e6L,0xf936823f9ad5428fL, + 0xe87ec8b9c84feabdL,0xdac9bfc13c2f56ffL,0xa160399975f3b25eL, + 0xfa8751ec5ca58a84L,0x1a709ac5f6e53798L,0x0000000000000165L } }, + /* 51 << 63 */ + { { 0xf327fa804594c279L,0xb58ea5486215b212L,0x20e5b9a833e0ce1fL, + 0x3747da9a4a370732L,0x1634ee2e0777119eL,0x32df2dda5c08f5e7L, + 0xa5d66cb9f811ab09L,0xdf8c1dc9ed7de036L,0x0000000000000003L }, + { 0xc910aaf774efbe82L,0x20b0ebe46c378a20L,0x7542866526368ec5L, + 0x85dcb02d25e2b43bL,0x14b5055e26273949L,0x07415b0245387101L, + 0x0986b25d605380d0L,0xf0f61b04cf0def05L,0x000000000000013aL } }, + /* 52 << 63 */ + { { 0x8a87953f92591b59L,0x3143818d5a451f32L,0xf2e96f8b0c29d764L, + 0xff943ed94c40a8a4L,0x82967f842535730bL,0xccf1a40336dcbf8dL, + 0x7dc8d83c45f21a55L,0x0d1d08e3cbf08cb6L,0x0000000000000093L }, + { 0x5eee829d0ab0a1a7L,0xafcb11e44d5694e9L,0x4d87ac664b03e1c1L, + 0xeeeffa85094e51d0L,0x07e16ed7f957519bL,0x7a329adbbe6b8eb5L, + 0x56f27e425f41a65fL,0x749a394ceb0c920eL,0x0000000000000113L } }, + /* 53 << 63 */ + { { 0xac10b05febfab632L,0xe9db26eefb615921L,0x83f9274ff7b609bfL, + 0x208ff0fdb1c6defcL,0x25f97bd50ee1d15bL,0x0d1929a73bcd6f7aL, + 0x13414e340bea3ce3L,0x5a1fd76dac768695L,0x00000000000000f0L }, + { 0x52166f0da85b0ab0L,0xd7b84eb0d03e5891L,0x3d177fbb05391cf4L, + 0xeda59b5d0eff66c3L,0x4eae434e57c15709L,0xe2e28d1394652300L, + 0x9f6723518ddca2b7L,0x6b18b0d057928e33L,0x00000000000001d9L } }, + /* 54 << 63 */ + { { 0x692493e38e9e3858L,0xe3613ac3c54960efL,0x394b17dd14dee07dL, + 0x6dc33f9010f81a66L,0x1c2b9571fe76445dL,0x64f0d6f6fb0961bbL, + 0xfedcf72c371e04d8L,0x0877291e9cf9ca3eL,0x00000000000001a2L }, + { 0xe33520e7f874564aL,0xa7acf4bf1026c741L,0x46569cf304030157L, + 0x89310d88c0980781L,0x1fe5287f0acde0c6L,0x3e05a033a4696f12L, + 0x1a5f3ea425cf5a8cL,0x4587f6f783c85990L,0x0000000000000076L } }, + /* 55 << 63 */ + { { 0x26047dcdebe585d9L,0x3abcde530ddc7b3aL,0x43231aa9e036918aL, + 0xf4316041818bb25dL,0xd95a25f65eb56f8eL,0x03d1ea7b84e16f23L, + 0x385f010d0e007937L,0x421c7ab1ce1fa2b5L,0x00000000000001bbL }, + { 0xea42f6aa9c67ceb2L,0x2756973ae3f1940aL,0xa4fb7ed908e9b02bL, + 0x26e6e78af5b1921fL,0xa9293c8b90f146fdL,0x39bd4ec3fa3d511fL, + 0x6a82d362a65253d4L,0xbca05eb41e0fdc37L,0x00000000000000ddL } }, + /* 56 << 63 */ + { { 0x10fc7b6fd90fa53aL,0x3e6ce96795fe03c1L,0xcc2308acf9b7b89aL, + 0x1bc9f27a33c58673L,0x1c12f0186273457cL,0x68e172f74c063ea0L, + 0x52f75bf1d34d34deL,0x0ef03bce65f6b2caL,0x000000000000000bL }, + { 0xd8cb54d3d705118fL,0x1e4b56503a7509c9L,0xcc35224e84c60259L, + 0x8ed6226ce578172fL,0xe6fbb97fafa5bb2aL,0xdd461323189d5d7fL, + 0xa4ff6f7a32109b05L,0x5896d4ed26f15e4dL,0x00000000000000f6L } }, + /* 57 << 63 */ + { { 0x05a13d26e82b26aaL,0x9be7e85d1f51ad4cL,0x64216a135bf8d231L, + 0x9a52f4d338918288L,0x811795296042f73aL,0xcf4531b88fb3e209L, + 0xa5ecc2b2b60e33c8L,0x4065785c53ac1b01L,0x0000000000000106L }, + { 0xc19366e2098430e8L,0xd01b9140bad7cc45L,0xd9cd202403d7ed4eL, + 0xfdc06de10880828bL,0x9dc1458082c50ff1L,0x050a0ad4dd5cb07bL, + 0x23c2c3e802e03fb2L,0x9ef4c1720f843180L,0x00000000000001cbL } }, + /* 58 << 63 */ + { { 0xf269b6ea5323d6c0L,0xdf260532c643e91eL,0xc1455524c2768048L, + 0x7b5d6da85f0207e5L,0xde9b50f074781d78L,0xd95862cbdcdf15deL, + 0x5d8d5fabb94c1baaL,0x6892741043e83ad2L,0x00000000000001e8L }, + { 0xf6e72c24aee1d9b7L,0x3326fcbb24b9bb0eL,0x3a749a424a7f74ddL, + 0x5d2578773471ef0cL,0x33ecdd54c95f4cc2L,0x52eb8a04991cbb7fL, + 0x3a73fead1aea5286L,0x6a0234c85d8e847fL,0x00000000000001a1L } }, + /* 59 << 63 */ + { { 0x3a9e952c73bc08afL,0x94816b86fd2ebed0L,0x308a88e4ad9f31a6L, + 0x9a7dfdeed0600231L,0xe4f89ee9f181de44L,0x0a7df633f7c26fb9L, + 0x2c9ae6ae5f221c7aL,0xe47f3d2872d4115fL,0x0000000000000010L }, + { 0xbf708d7c085f506eL,0x7c4c215b9b26815bL,0x9176aca1c2ec9917L, + 0xa6a694bbe5176a22L,0x119764672c3a75ddL,0x8c4de6dc47e0fd08L, + 0x6111e87d0d1f734bL,0x8d1ded25dc07d865L,0x00000000000001eeL } }, + /* 60 << 63 */ + { { 0xfd833a94974ce9cdL,0x4ca65be246948f84L,0xb7ca6a79f17f102dL, + 0x0c2d75789b8d7dc5L,0xa344848c42df2129L,0xbc8ee0055ec488edL, + 0xb46597b0bc2053abL,0x5c865ead03f3e2ffL,0x000000000000012fL }, + { 0x276249288a899a69L,0xc02d2ee568d0b403L,0xe46c1cdd828b23ceL, + 0x8e52b84309c12d2aL,0x860ddf224f41e7ffL,0x077cf30319235b08L, + 0x7a4db503690d79ecL,0x13f69a12e8a29f83L,0x00000000000001e5L } }, + /* 61 << 63 */ + { { 0xcb347d5f91d9d6feL,0xd0a524b43b9ef3c1L,0xa9e6392c547edac4L, + 0x04d365a9b14bd9b4L,0xe5c90208db0aa5dcL,0x28ab9f56ad2db020L, + 0xdcb2fa5be7ec65e5L,0xed81f29efb0e42b6L,0x00000000000000f9L }, + { 0x4bc95c3197f4324bL,0x64fafe5f71476329L,0x044a41775a70b0cdL, + 0xf2c61da17ba19d9dL,0x983216f2da3877adL,0x83298d8d4b4bd3c0L, + 0xc7243bf461564560L,0x464b5bf41111eafbL,0x0000000000000061L } }, + /* 62 << 63 */ + { { 0xc34857014b40dfadL,0x6400778d0760a76bL,0x89f31e637e4a553eL, + 0xb98eb50e3246a962L,0x956d3e4e7966732eL,0xb7fd65f4820569a6L, + 0x35f492b77a5fd094L,0x079fd9be89a9e2d7L,0x00000000000000baL }, + { 0x2f9bb4decf21481aL,0x7303b3e8d3a12b6cL,0x500d6509ad398eb9L, + 0x4e8be72efe3a1a76L,0x8a82354a5745b6f2L,0xf037ebdaa20fdfe6L, + 0xe12aac60584f6152L,0x75883d79f01f9963L,0x0000000000000178L } }, + /* 63 << 63 */ + { { 0x32b6b535c7c043c7L,0xe41d0e05423daf2bL,0x2cbacde8c0f421f2L, + 0x11cd2d90fdeefc5dL,0x2fd90ed7e691e7edL,0x4bf72e93f5721f79L, + 0x764fb068602fe745L,0x21594eaa97cd2c33L,0x000000000000014eL }, + { 0xaeee0ca0ca8ea7a2L,0xe469cbb01485c545L,0x4c947ee0467f66fcL, + 0x92f9f6b3606a8aa1L,0x56783791ed86541dL,0xcdb60ee64c8d8313L, + 0x2f7fcb7560eca2e0L,0x0a847de3e7816be5L,0x00000000000001ecL } }, + /* 64 << 63 */ + { { 0x27b472fa3d0e9678L,0xcef22b18f18eab4aL,0xc49e75cc5fde2724L, + 0xd9ec427a39f3c3d3L,0x240963ea07a9f9b2L,0x2b0301e0fc73761cL, + 0xf734c2f480d8c0e3L,0xb9f52a609948da76L,0x00000000000001c9L }, + { 0x6ef525e6fe70af9bL,0x4faf1315c936d7daL,0xfed0cb7aa1c67010L, + 0xb1c8cdfb650fc5bdL,0x643e77418ac141ceL,0x8ecffb683144c8a8L, + 0xf6ff17f5544deba8L,0xea30d6b2721a6d98L,0x00000000000000d0L } }, + /* 0 << 70 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 70 */ + { { 0x476395029a2936b0L,0x48a88079918b33d5L,0x453bc7931b067a87L, + 0x5fe40adc63e55fb7L,0x615677f03d2b9756L,0xdc376c92833e336aL, + 0xf7743da0b0b6c914L,0x780bc5a93effe23bL,0x0000000000000037L }, + { 0xe9c333804a1a9456L,0xda9c55a8eb2eb173L,0x022757865ed17864L, + 0x491f250816039d4fL,0xb1ac3d7b0ec8959aL,0x4d3855d3465a7151L, + 0x7285c2d418e0d6b1L,0xd1a5af1833eac3e9L,0x000000000000011cL } }, + /* 2 << 70 */ + { { 0x80877bfe331dd8a7L,0xb708de53dd15f09dL,0x19f93da02ae5d424L, + 0xe7e4ca9750064a50L,0x95ff7eafc76f7c7eL,0x5a6e49f06e0344d7L, + 0x1f016633a4144a34L,0xff32c2a16fe7d882L,0x0000000000000031L }, + { 0xc8c7b1826df27677L,0x33049c001751ad84L,0x7685641c109f091bL, + 0x0e2bef03fde67f9fL,0xfb7ba091da20f027L,0xcd7200850d2e4994L, + 0xf565899fbc584078L,0x465f703946520913L,0x0000000000000058L } }, + /* 3 << 70 */ + { { 0xb439f594f839375eL,0x273e0e12d6fabfa0L,0x4811f186986b62bdL, + 0x3460a7df29a07e23L,0xf7fb6f4c20aed738L,0xf26b2b9c97d88113L, + 0xa87f0ecc047ebd57L,0x0403df2b1a81558bL,0x0000000000000130L }, + { 0xe8071597f66ce34bL,0xd14095a79440f830L,0xe7924b9994488565L, + 0x972cc87937553260L,0xd33dd2db3c2f0eaaL,0x57e6830e9bf34ee1L, + 0xe2f328371d13ba62L,0x4f84bcb955174ffbL,0x0000000000000010L } }, + /* 4 << 70 */ + { { 0x519a595a5c6f42b0L,0xe39552a9a675acb4L,0xc993a489609641a9L, + 0x1427eb18cb9d6301L,0xeb102dd9fe02d86eL,0x86dfc2d5a4c15ddcL, + 0x0dbfd177b76f6b2bL,0xec31ea91b285289aL,0x00000000000000e2L }, + { 0x46ed4b2ab85445ebL,0xd272162cc8124af3L,0x1b7e446d675589f1L, + 0x605b28069e56e455L,0x39f9b1abbd0d72efL,0x52d79f51793f00e5L, + 0x28d3e23b7978077cL,0x9759e0e64866c9d8L,0x000000000000010cL } }, + /* 5 << 70 */ + { { 0x5783df4e1036ae82L,0x96d3c8e7186158a3L,0x161876e639b3df61L, + 0xca9525e1b8b99336L,0xdcf0e3f745776a81L,0x1cbd5ebdcf6ba61bL, + 0x895ebb5a345d74a8L,0x9861a95fa29bb4f8L,0x0000000000000106L }, + { 0x59316637e824248aL,0x81f91fac0ec8a559L,0x48d8a50d1b2789daL, + 0x75627156f8298d6eL,0x1c340cf13f452c6dL,0x7e99c311d0221466L, + 0x57d7d1fef04842bbL,0xe0208d495f471f39L,0x000000000000006bL } }, + /* 6 << 70 */ + { { 0x04d40419725e8d7eL,0xd8a785c43faf2e8eL,0x7ed0dc16b456823bL, + 0xab8169b6f8db939fL,0x5066e969510cd7adL,0xa9050429b3cff3f0L, + 0xbc2fb7ff7450449bL,0xc550ea90106929f3L,0x00000000000000fbL }, + { 0x75cfb97c77f10a30L,0xdc20c3be0987d625L,0x69e0878cb18777d1L, + 0x2d77668ec850871aL,0xf2759bdbdcf84652L,0xf7efd592769eb8e1L, + 0xb4dd3aea69169a30L,0x18106739653dd7ffL,0x0000000000000053L } }, + /* 7 << 70 */ + { { 0x6f435bba95d13790L,0xc082ddb3b8171861L,0x28f5c4cfc3b9797dL, + 0xbd46e6f20fcbceccL,0x8a08c2a6fb265f73L,0x0c89cd1ab54ce3e8L, + 0xb0915e664d2eecf6L,0x4d3a71276c691306L,0x0000000000000154L }, + { 0xdcadc680181127fbL,0xd56dde12bec233c6L,0x77ec3c0ffb46e355L, + 0xb047a6a206812dfbL,0x8fee038a509e92cbL,0x799cdc0199465b9dL, + 0xf82664276ce79488L,0xbee7eeebd26ea0f8L,0x0000000000000128L } }, + /* 8 << 70 */ + { { 0xbb292bf5f98c20beL,0x3a3411a680e7f858L,0x4ee85c60884a2064L, + 0x8bf2f700d1ffb068L,0x28a61b4ff5514a5dL,0xef4280ba021ca558L, + 0xaa64507dbeae03e4L,0x8bf2aa0fd601d59bL,0x0000000000000123L }, + { 0x61ca18a2ee308bf1L,0x9e9dcccd55fddd4fL,0x858dd91faa4b0103L, + 0x21a4a04654338e37L,0x88237da980d564cfL,0xfa544159a0dc17b8L, + 0xf6e71432e684e7c1L,0x9b2ec5cb6bcb8393L,0x00000000000001d5L } }, + /* 9 << 70 */ + { { 0xf8184868d78b3258L,0x294a5c8693d8db62L,0x3631dd8ae94c1e33L, + 0x6086c859e22ade8fL,0x1333c8b0fad51601L,0x755a497081778bc4L, + 0x9d0a7220adf97dbfL,0x9271364238802b70L,0x00000000000000daL }, + { 0x5c8e5b906601b869L,0xde3d0dfec943c397L,0x39d9f226d18fdcc1L, + 0x8c57ea34a90b2bf5L,0xa83a15756c4a7c25L,0x3e4fee5b3def7d92L, + 0x5027d72ad9fe323cL,0x01ec60b853dd5f74L,0x00000000000000ceL } }, + /* 10 << 70 */ + { { 0x8bcb3d9d1b2bf1b6L,0x973fa9e312796b3bL,0x8c312eee0826934fL, + 0x182e3371e7601ca6L,0x6ced3e75991abc97L,0x5b8ae056a347d5c8L, + 0x4808debf4983ea2bL,0x0011eae167510327L,0x0000000000000156L }, + { 0x15ce511d25441f8dL,0xf97a353ce97373ceL,0xf04be7106e289edaL, + 0xe44c1542149c56f2L,0x0bbd7b79d31e6fa7L,0xc7461c7396def27bL, + 0x4947e763f0b4134eL,0xb17d8168c56f702bL,0x0000000000000128L } }, + /* 11 << 70 */ + { { 0xebec93f04ac3f69bL,0xea341f190108e13cL,0x44c89c183b9dc7aeL, + 0x1b643dea7d219c86L,0xa0f8211f1b24f1adL,0x362ed929b660c2e5L, + 0xf4111c58accef904L,0x7d5ae557616b6ca3L,0x00000000000001c9L }, + { 0x5e15f25e64078f7aL,0xd5afe9e9dd0f5ee0L,0x494ccc3c481745bdL, + 0xab14448b8d7378d7L,0xe2d6f0d304da0eb6L,0x133ece3a8e9cff05L, + 0x26d60edf3571e036L,0xbbd2816fb4f9a6f3L,0x0000000000000125L } }, + /* 12 << 70 */ + { { 0x23c4d719a31b8bb9L,0x048abcac740f7882L,0xbd895a1c5ee72269L, + 0x95595e429077db96L,0xd01e2d4b49088381L,0x48570b7ff7e50193L, + 0xa8830d23e6f4ed20L,0xbc8aae5b9a45ddb2L,0x00000000000000dbL }, + { 0x527e8ff8970f294eL,0xd3f0cf5026c1d870L,0x255e637c69aefa63L, + 0xf332ed105bf085b7L,0x6a0bbba69520d99aL,0x46f6172c3bbf6757L, + 0x5f0b4e9f77664d47L,0x5b8c50a11e6ba9efL,0x000000000000019bL } }, + /* 13 << 70 */ + { { 0x9236c67ae086b576L,0xa790103d654d0bfeL,0xc8991f4701bcdc45L, + 0x6a6f3fb3bec6da2fL,0x01249352077f3e54L,0xc242ea70f714141fL, + 0xeb3a0089b7c19b57L,0x03647999992e47faL,0x0000000000000025L }, + { 0x8b46fc0338224530L,0x0349584a95b7d309L,0x86dd737f92514473L, + 0x3e5228f5a538c8c4L,0x184a5259939d9593L,0x4bb9925b0213865eL, + 0x908c3fccb5c7937bL,0xb776024601e2984fL,0x0000000000000065L } }, + /* 14 << 70 */ + { { 0xfadbffb640bf4cfeL,0x6354576d468d492eL,0xa417db805eb84c73L, + 0xc5a35afd367becdaL,0xd167b100c1d6833fL,0xf181791bc862b1ffL, + 0xac0ff3d2016dded6L,0x29435dd2caf04f74L,0x00000000000000bdL }, + { 0x5caa8fc6fdd66ecaL,0xcaf04d02f73457f0L,0xe4738b2cd8d60dd7L, + 0xcfc4221aaccec9a2L,0xec1fc96e6d831b6dL,0x8661a6fedbc8cecfL, + 0xc3fe3bbe0060ed23L,0x24c065e7ff5c70dfL,0x00000000000001a8L } }, + /* 15 << 70 */ + { { 0xc66433dceb676b47L,0x05c45b2d6ed68884L,0xbec6dd9cc497d94eL, + 0x71567210fb86988bL,0x924292f876834001L,0x0d814f7c3e825949L, + 0x3738e741cd6e5f83L,0x320f4721372ab8dfL,0x0000000000000039L }, + { 0xbf2143f9dd94ecdbL,0x4f2a53a48df2f184L,0x5fb740b7b9c7e9efL, + 0x96aed28a2d5b4840L,0x9a359445fd4ed339L,0xb275af3abcf23a6bL, + 0x0449c11290242ee5L,0x06516db8a7ffada0L,0x0000000000000056L } }, + /* 16 << 70 */ + { { 0xee02879ba3ea1a16L,0x8e2314e439f3a110L,0x931dcab50671f5a4L, + 0xb3010e67fe8a9df4L,0x5df54e420f52a951L,0x0b5eb472530c0c9dL, + 0xfc19d87c18a07787L,0x60ed2c6ff0f2b9f8L,0x00000000000001b4L }, + { 0x6bed0b7e2c3d7f45L,0xd6903d7610fd3891L,0xcb4a9876f30f92b1L, + 0x328688dd25c03d48L,0x8f51283ccdb11750L,0x195f5a81b8d8e361L, + 0x4cf3580afeb346d2L,0x044cfa9425c261e0L,0x00000000000001dbL } }, + /* 17 << 70 */ + { { 0xf125d0eb580b2aeeL,0x3878efbce322b538L,0x2faff7abfbc577d9L, + 0x911a569672826ae1L,0x217136025b759fdaL,0x874e78b16c83f073L, + 0x8357b9eb1ae1f09dL,0xc50cf6a266f5e089L,0x0000000000000041L }, + { 0x7c5ba64930d641b4L,0x4e0c4810d87b5271L,0x7de6d695aa6c6d4eL, + 0x48585b5d9e6e0dccL,0x27e52cc9733b7f52L,0x44f93094118055e6L, + 0xc466de5e79e243a3L,0x9b120026ce5a2675L,0x00000000000000f3L } }, + /* 18 << 70 */ + { { 0xa379e5e5451e1063L,0x839bca6f1cfc15bbL,0x76267c6eeeb1f167L, + 0x6bb1f5c8e8de14b2L,0x053a50d80a056b25L,0x67e18b5c6046ef57L, + 0xc508121660b6d2acL,0x97efffed56e951e5L,0x0000000000000189L }, + { 0x6fe2249a0a6237fdL,0x3e8fc2c9af92fe3bL,0x93406ede77987ec1L, + 0x4a6f3ffcdd289c9aL,0xe6d23748bee59fb5L,0x1218f5958261de30L, + 0xcc8cfa5bd3a0e00bL,0x3658c52f485e6937L,0x000000000000016bL } }, + /* 19 << 70 */ + { { 0x9588721c38f24c62L,0x75f78ee0b63f6799L,0x966fb95973e53b96L, + 0x6898f5efedfa6b1aL,0x5d4be54bfd90c78bL,0x4a9c2f701c06c9a4L, + 0x14cbd88183649de9L,0xc1f0d4edc04aa071L,0x00000000000000a7L }, + { 0x523d47a89250cc0eL,0xf2bc8db83a37d239L,0xa41c054f9af865f7L, + 0x86e0c8282af3b998L,0xea4726e56c6e21eeL,0x9d8a2148c0a499d4L, + 0xe590538e9cb7dc09L,0x74c7dd3bd2d8393aL,0x000000000000005aL } }, + /* 20 << 70 */ + { { 0x4143662dbe2d233dL,0xdaa51ea937b9c343L,0xe4543a36e39801a0L, + 0x8f043e35c1319e2eL,0x7a0f8df8504b47b3L,0x0c081ef2d4d60dabL, + 0x710081a899486cd4L,0xd2e4ec67a36e4207L,0x0000000000000116L }, + { 0x55c7a9fe07d56e26L,0xe01c071f037f6067L,0x6c4b2bad496b19baL, + 0xe511cb4ebff6490dL,0x0e086c9385868504L,0xac978aca2db3b875L, + 0xc24e564e28a8e768L,0xdba8099b3b1f3412L,0x00000000000000e2L } }, + /* 21 << 70 */ + { { 0x1e036478c5e524ddL,0x7482522f06751ae2L,0xfdd65ecb4d805e43L, + 0xc85188213c81a2c1L,0x4351af46208510feL,0x20258b4286654055L, + 0xce94ad2436951413L,0x51719c791fa571bfL,0x0000000000000186L }, + { 0x4160e3cc57cb1d62L,0x54f91d695c7ca415L,0xac962802c7c230eeL, + 0x474a74dd38772ba5L,0x07dc8e8dcbfb771eL,0x4bbf1d327faff162L, + 0x5c6df969a5e951feL,0x8faf6e8b30aa1ccfL,0x0000000000000124L } }, + /* 22 << 70 */ + { { 0x2ae2176e4a7aedcfL,0x6cc99ac5db4109f8L,0xafc603ca9acd7a99L, + 0x5709cd2e0292cec2L,0xdc3bbdc4a767e68aL,0xd1b99d15ca6414ccL, + 0xbdfd63d8b90ce4c3L,0x5a277d71758432cdL,0x0000000000000165L }, + { 0x57bc026809f79161L,0x8069381b75174677L,0xaee30d96471d5392L, + 0x5b411edd0ab268faL,0x5c6780935ee8258bL,0x16dd3897aff1f327L, + 0x6bd497a4b7be8300L,0x0ed094006a9ec135L,0x000000000000006cL } }, + /* 23 << 70 */ + { { 0x8935e366d379c4cfL,0x3ac951791ba6b75cL,0x0ee860648518d548L, + 0x9ad786622b6b6891L,0x986fd8162db27dc2L,0x94cf26ad2da61998L, + 0x97182802f34138fcL,0xe6a82f6cccc999ecL,0x0000000000000059L }, + { 0x279ba13f96ed64d3L,0x66e45c5f8c942f2cL,0x1e723356b167bb87L, + 0x63392b6ace4614e6L,0xb157e406828ef176L,0xfbdeff2043a70a11L, + 0x26398beb39d70349L,0x84fc3685130bd235L,0x000000000000003fL } }, + /* 24 << 70 */ + { { 0x06845f761c46e0a4L,0x304f1a22bf614354L,0xf1ef1477999719c7L, + 0x077cb0a94a2082f1L,0x88d79bc1c5ce4148L,0xc0da1dbebec656ffL, + 0x5b78daea71ddae3aL,0x6216ba84ea9a43f2L,0x00000000000001abL }, + { 0xdead308ebea49092L,0x857aa6bd5f4f2d7aL,0xa80d0eeedf4f49dcL, + 0x81f3fbd4612b71f0L,0x8feb7e2e009cb72aL,0x149df0698880b33aL, + 0x49e5030ac70e9ef9L,0x7fdf363de32d7d93L,0x000000000000018cL } }, + /* 25 << 70 */ + { { 0x4b7b9419cca1b866L,0x1b91f7a8ed4b0024L,0x292b214b3f190795L, + 0x4f0f9a9468187149L,0x6af57b920991d891L,0x62be4e9781380070L, + 0xf9631fb59570bf10L,0x85e5e54a46791c24L,0x000000000000019aL }, + { 0x0d1b6e6f58d272bbL,0x2dd54725026260b1L,0x52925be520caa63aL, + 0xd73b7bdf49bcdf40L,0xbf521158f9da7059L,0x2d260f094a257601L, + 0x8711caee7733680eL,0x01fa1a04c910008fL,0x000000000000006eL } }, + /* 26 << 70 */ + { { 0x5fc62df164ca38aaL,0x6e65ba75928404d0L,0xfac5532c04b905ddL, + 0xe7e788ced028cacdL,0x6f6d46a1e4ebaaadL,0x612bb1ccbaea7105L, + 0x234e28db202ccfb7L,0x9368d2b838a2bd0aL,0x000000000000014dL }, + { 0x8fc9d15599616067L,0x8d1b2d5f8551e5c2L,0x86bef02384a895d8L, + 0xe771fa9da6dc3808L,0x7aaebdff2dbeb156L,0xd8179f04aa61893fL, + 0xe3a11d5ab4df5a7eL,0x83babe1fee003cd5L,0x00000000000000b2L } }, + /* 27 << 70 */ + { { 0xbaa5e960356b7671L,0x69cbe8949a55c547L,0xea21f0e7d010da2eL, + 0xa6458bae24057f33L,0x685e19ade119c12dL,0xa4a4bf1cd1323547L, + 0x638bc3c57ea2692cL,0x1b23941f787fa55fL,0x00000000000001f4L }, + { 0x07070fbbd655f411L,0xcdc85f1b0a078b8bL,0xcded78e989c7f01aL, + 0x8e56f279561248b4L,0x27131c4543017398L,0xd85568439a89e7acL, + 0x840f778f5022acd5L,0xd02560ccbad33615L,0x0000000000000132L } }, + /* 28 << 70 */ + { { 0xdc8418b0430aee87L,0xbd5f89c0fe56a51aL,0x10db29c4c631bea3L, + 0xa03fbcbc0c973806L,0xace383ab0ab83b29L,0x19f1d9ba5aec5994L, + 0xf36d16c245cb06a0L,0xa80a5ef4e427ea44L,0x0000000000000104L }, + { 0x8c77115e67c4bd41L,0xbc0d9d752edce105L,0xb4d58d35e1a157deL, + 0x0564c86be85ab91fL,0xd0bdf26d3cc536ffL,0x4b36e529237657ceL, + 0xd108d7f75c27949bL,0xca0dc963ad81e019L,0x00000000000000d5L } }, + /* 29 << 70 */ + { { 0xdf088fed5170a20bL,0xa9a7752101752a4bL,0x7ad0d5a5d8096500L, + 0x40031b9258232679L,0xf714c7503413e6bcL,0x9869755a1fe03713L, + 0x6e2501ebb8c82fd3L,0xdc0984c442530088L,0x0000000000000016L }, + { 0xbd92501c05c25c04L,0x1c5321cc9ec4c868L,0xfb1f693a4a48a478L, + 0x86d327ca3b87d350L,0x46be9803e525bdcaL,0x1ec5bdbab274abccL, + 0x83c080f9369d1df5L,0x4f700c9cc46b2610L,0x0000000000000149L } }, + /* 30 << 70 */ + { { 0x2f91efc7c0526cd7L,0x798751b06bcf03f2L,0x2e9a2b185b1d1b15L, + 0xa5df63b07585e989L,0x0f7f20cbe088785eL,0xf7e41109c3e810eaL, + 0x6f2e6b0196ddfd16L,0x7c815f673677adf5L,0x000000000000012cL }, + { 0xc88f54441615746aL,0x7c59e46e6b14f304L,0xa4d9adb5c4343abcL, + 0xba341145932fd2e2L,0xa8ead97ec577c110L,0x72c03b1efc69b4dcL, + 0xc7cbf9dac2418ecfL,0x585777da2a60ff3bL,0x000000000000009cL } }, + /* 31 << 70 */ + { { 0x355b900311214b41L,0x5c4c60458255d77eL,0x653841db02befdccL, + 0xf0dc18ec82060478L,0x041dbc6fd8b5343aL,0x2ab668cdcad75821L, + 0xa8219ba431403e5eL,0x2187bcd7dd6f1818L,0x0000000000000057L }, + { 0x3a187181672ef3b6L,0x32aa9ece8bbe1871L,0x42db253f9083e47dL, + 0xaf661bbede04d778L,0xc7d30363b6e2c3a0L,0x883a3cf7c303c0daL, + 0x2e743c812e09eda8L,0xb8a80c00d43eb9a9L,0x00000000000001d6L } }, + /* 32 << 70 */ + { { 0xa8aece33c6467674L,0x89ff0d8f1843299aL,0x59adbaf13ca3a78eL, + 0x196374f4b9a4a75fL,0x789f72316ac10a73L,0x448193fe379c704eL, + 0x65c6e5794fdf5217L,0xe6565a0b57174e77L,0x0000000000000001L }, + { 0x509a5be95b7a3aacL,0x5ec7dfd35b281d20L,0xfd549cc3ff94ffc7L, + 0xf29fe9740cd3bd2dL,0xd8a4f3af708e35b7L,0x45222e3bb52edcd2L, + 0x896b102acd710014L,0x452cd4f4e63ebf74L,0x00000000000000e8L } }, + /* 33 << 70 */ + { { 0xe1cc6844d76a7d21L,0x98a653c0aad01f39L,0xa8e0aa6f5cc3db7eL, + 0x53e176c8616ef3c3L,0x2fcd40c1ecb97fe7L,0x979560d1dc9765aaL, + 0x8ceb932ec9497178L,0xfdda2a9f528d5fbfL,0x0000000000000046L }, + { 0x5d78055191f09fb7L,0x5025b276358205e3L,0xa2ebb84ce669a482L, + 0x0a0299fb3630ef08L,0xb1b479c07a91a3e9L,0x90ec5d7d5a165b78L, + 0x82e7a594584a646fL,0x393648e146cf895eL,0x0000000000000126L } }, + /* 34 << 70 */ + { { 0x4122c7785767b754L,0xdb5083e1fffbd4d7L,0x602665453533b469L, + 0x729a29a76027f62aL,0x9d5e8150d18a8518L,0x99983cb81c736f6dL, + 0xeee827892fbcc4b2L,0x48c41e5e31554af0L,0x0000000000000062L }, + { 0xed41a057380bd093L,0x90fc9595fa2649adL,0x200199ad591d2e82L, + 0x8936742d4c2cb7bcL,0xd0a3fd6e36575e21L,0x76e227053f305fa9L, + 0x7bdda0df7a21b3afL,0x143dbef0c67f4dcbL,0x00000000000001d1L } }, + /* 35 << 70 */ + { { 0xb5f11cf2ba68056bL,0x15cbc5b04de81e37L,0xd4417ca25cfd5b38L, + 0x5bfb146339a246f9L,0xb74fd92f7cc33e94L,0x7f04e99306ac2e09L, + 0xa79523fd32ded24cL,0x546f77e4f9bd012bL,0x0000000000000169L }, + { 0xca1194a897bb2761L,0xeb8c23d390384797L,0xae9b4cf2ec702d04L, + 0x7591beee789790d9L,0x40a5a1fd3a51f2ddL,0xf696422591d9c91fL, + 0x943b41c0b539db9bL,0x4bef1a61aa810609L,0x0000000000000084L } }, + /* 36 << 70 */ + { { 0xb89c242a654f4e39L,0x3a242406ac7df59eL,0x351db3bf11cc09b6L, + 0x337ce2e5b5cd27daL,0xb911a8c8941f351aL,0x1829610f4f4c663aL, + 0x8c1e50449abd904dL,0xb2a4aecbbe384923L,0x000000000000007bL }, + { 0x20f4b1d2485a76b0L,0x946f7c9f03d1b899L,0x7ee2ae32c4080be3L, + 0xd8926ff9ae4c9f69L,0x29dcb8a9752ffbd4L,0xa19b5f862d627aa0L, + 0xf21a0cf6dbad059aL,0xf209613aeaf6e53bL,0x000000000000008eL } }, + /* 37 << 70 */ + { { 0x77b7a695addb51a9L,0x371624600758a0dfL,0x6366fadd9b96c964L, + 0x9620762a8a257da9L,0x4cf5a821b73d4d9dL,0x885a02e7a5a44883L, + 0x2efaff5be9e6dabcL,0xe56434986c631f4aL,0x000000000000010eL }, + { 0xbf8c22605af0c963L,0x38af35636c1508fbL,0x90e5c18cd3e73bcfL, + 0x45ae790f15381143L,0x59b3919ca68915fdL,0xdeece0122f3f3dafL, + 0xfe50ee7cdf4846ecL,0xe06d6ac82b4d8de1L,0x0000000000000042L } }, + /* 38 << 70 */ + { { 0x4f86e374d4ed697dL,0xfd6361cb5404674dL,0x3db2dbac05238633L, + 0xeaf6a3abd21266d6L,0x9f326b1f6a6ac16eL,0x68602e1a98c82d68L, + 0xf07a98f68c974427L,0xbad6f88e5e76b4d5L,0x00000000000000ceL }, + { 0xd1694a3fa0041699L,0xac0ea49e1561ebbdL,0x9b0d19f405ab2755L, + 0xa4f3fc8b7cc61e69L,0xbef82a8875e5fe87L,0x725fa4412cf49dacL, + 0x2bd557393e82ee1eL,0xae5fe7a67ebae861L,0x0000000000000005L } }, + /* 39 << 70 */ + { { 0x9ba09dec8ca53aabL,0x77869c0e2e5633a1L,0x4461fc7c445d39e4L, + 0xd6d9b5a15d1700e4L,0x14500eb6bbbe1927L,0xccc39a2e8b1d7227L, + 0x24697b146d4210ebL,0x5f09bd32b1242c25L,0x0000000000000021L }, + { 0xc8a50fa9ad88b85fL,0xf50a6eb907748b9eL,0x8135162693366f48L, + 0x56d7aa91bdb9d82eL,0x03d6cc9b76c2a5ddL,0x6aa01804c25383cdL, + 0x0827cdc98f69ded1L,0xfb2e0e58afb7e2acL,0x0000000000000024L } }, + /* 40 << 70 */ + { { 0x3c4abf3fd97e4127L,0x4d660aa326a7685cL,0xc97e6b3ef7a49e47L, + 0x7b9d1badf34336a0L,0xf7dbf5ad2d068101L,0xb0467ec242fece7dL, + 0x73d37b2b1e537f73L,0x057949157ad06ac5L,0x00000000000001dfL }, + { 0xe8ecdcf643f6a0ffL,0x972197753cf6cfddL,0xc01f32aa931a36ecL, + 0xed5da14457db8ac9L,0x94cc6cabbc5ffc23L,0xfbe877fa7d5d27b3L, + 0xe1c5975e5e6428ebL,0xa2193c9c5a0daa17L,0x00000000000001a4L } }, + /* 41 << 70 */ + { { 0x011b7a689d61071eL,0xe35a59a67bc5c1bdL,0x4b057690da770dd1L, + 0xd5a013dbbc56db57L,0xeece6246f79b0f59L,0x5518d020efdf202eL, + 0x8df9df69a3d6bee3L,0xbda52c0199387789L,0x00000000000001e1L }, + { 0x1c5105e0e2d9d626L,0x9f7cd19de1a5379eL,0xa53dd35bc7f9f7a7L, + 0x18b24218ce4fa266L,0x3e96fdc091047332L,0x97b94516870935f2L, + 0xa433f8ee4468e1c7L,0x4870ae15d77235fdL,0x0000000000000121L } }, + /* 42 << 70 */ + { { 0x7f759aad1e4e55d6L,0x1b6e4c24207d1d3bL,0x12b6917f6a8f4624L, + 0x746b53df3e128c2fL,0x527ebc86d5a0eb44L,0x65d4cf7354880c3bL, + 0xe52f9c5ebc2f92c2L,0x243b18aaa7581d5cL,0x00000000000001ceL }, + { 0x1f18adb3d81cca4bL,0x08ef8756ddc57750L,0x38cec13ab0ed406bL, + 0xdbece99f1f99966aL,0xf86d0ec07285ef43L,0x04a1c0d00f13fcb7L, + 0x5a7dcc74169a4c21L,0x8224bde539fd0453L,0x0000000000000039L } }, + /* 43 << 70 */ + { { 0x3f137f59354edaf3L,0xcf81077943db0014L,0x11befe5055d92f75L, + 0x03b4beb76cfede5dL,0x142107251b39d04fL,0xba238693ed8e6364L, + 0xd3fb6113eeec60deL,0x88ce54c975c482ccL,0x000000000000000cL }, + { 0x8b56584bc7c2449aL,0x737e2a1969e7b7c6L,0xb58a7dbac7c387c9L, + 0x82b6e6dd3480c33bL,0x0cf346f48d2ea041L,0x37f1d4d6f60831b2L, + 0xaf853adb2b91d254L,0x122fe99374182acdL,0x0000000000000132L } }, + /* 44 << 70 */ + { { 0xdb55c6c2986f7001L,0x67b6c09795cb3d1bL,0x8c985426d2166c0cL, + 0x59026d57e1c9bf0dL,0x59a7419071c85698L,0xfd2dc29c95831354L, + 0x78a61f5c43d60779L,0x09fbd320b2969ee8L,0x00000000000001c2L }, + { 0xba4292559295996cL,0x76252a2b15de5fd1L,0xd2cc5546092dcfa6L, + 0xe91692386e887ebfL,0x4f4495125fc232c0L,0xd0c857cd21b85119L, + 0x2f36082f35fd0670L,0x1be3d45ba37cb8f5L,0x00000000000001bbL } }, + /* 45 << 70 */ + { { 0x25daa7796cfea33fL,0x93c7759086e6a405L,0xcc1ccf6884a1c535L, + 0xfeeab6205ed4d313L,0x6950b0c08a15da19L,0x5b4db037675f34acL, + 0xd84162c5a5cc3525L,0x912030b7ec7e21adL,0x0000000000000078L }, + { 0x86c56b4f19587a4bL,0xb15a58ba8f6920d2L,0x779c28482105317dL, + 0x5e6d47915859b5ccL,0xa7fc961e970b44dcL,0xcc0bf4dd02b1e83bL, + 0x523624b256135690L,0x3f667b5f02443fb1L,0x00000000000001afL } }, + /* 46 << 70 */ + { { 0xc1d83a2638e205b2L,0xfe5221c8d7d0c2c6L,0x6bdb4ac3c1a769f6L, + 0xe4bb2703001f5426L,0x8938ae06f0e37a2dL,0x0f684030b78134eeL, + 0xc37b7d9287aeb54bL,0xca52792c4d47a2bdL,0x000000000000011dL }, + { 0x6ecb81f1b4c2b8c1L,0x4b8c56014b97345cL,0xb37eac649a59f0d9L, + 0xa66ad129a90a37c7L,0x0f2869f7c18ae478L,0xd96066b5ba43a3d7L, + 0x0cfd6f7c091f5184L,0xf30d4f40005d8ddeL,0x000000000000016dL } }, + /* 47 << 70 */ + { { 0x8d6966bae1015a09L,0xdefb5a8cfc2b390eL,0x34a1b39c52254d4fL, + 0xdb691d108958a3faL,0xe17c12d40f1291afL,0x0ea4f61874a2bd6dL, + 0x64d7a420721c9f32L,0xc520a90d468a8bb9L,0x000000000000019cL }, + { 0x811ff4142196746fL,0x6c84aabaec8df85dL,0xf0a1815c502c2e9aL, + 0x2e613f1cb74e586fL,0xdfa1ddace514289dL,0x4de976896069c2c1L, + 0x42a9730362662f01L,0x95584b0f209c9f1bL,0x0000000000000161L } }, + /* 48 << 70 */ + { { 0xcb0dbd8c81e4d356L,0x292afafd4e0178cdL,0x3a7d9941430adf54L, + 0xfb7dbe480c8f7a12L,0x3191467eb36cc72eL,0x77375b0ae1c3bb00L, + 0x4f6d8307a22d64d9L,0xdd46b8bbaf0baa09L,0x0000000000000131L }, + { 0x07b578c9007dcc0fL,0xab944686094e6e2aL,0xc6bc5ecc762c8082L, + 0xf113fd5e9044d50eL,0xf84f6c1a3048cdafL,0xb3d315cfaf0b90f9L, + 0x8cc36b9430fc0e21L,0x5af00bf1a2975344L,0x000000000000013aL } }, + /* 49 << 70 */ + { { 0x0204a7ed30e14cfeL,0x5c1b090c936eb829L,0xe2bdd0f52ba5fc20L, + 0x2a73a40606bd827bL,0xd292485433abbeffL,0x8878d661123777d9L, + 0x14dfbc3db90150c1L,0xa843edd44ea45920L,0x00000000000001c5L }, + { 0xf21e1fcc3beeb206L,0x2fa375889c6f5b91L,0x974397a83ab64565L, + 0xfd643b96d524eaa6L,0x0c4a87767802c959L,0x66bd68b3d2e8b462L, + 0x3b3d987b5d35d420L,0x1f4edb49c91abd9aL,0x000000000000014eL } }, + /* 50 << 70 */ + { { 0x3b0bd56922fa69b6L,0x6e75e34a36478f37L,0x0a1ffa483270c064L, + 0xf278b5c8cbfa92c8L,0x03e1bfbfa2b9c974L,0x427d4a8c5815805bL, + 0x70075d82b9a9f3c8L,0x1f9ea976a8d98663L,0x00000000000000f3L }, + { 0x13a1debcb67afe56L,0xbc746aced4f90c05L,0x26c5d6f8a280bbb0L, + 0xd0247b69581ca4b4L,0x37e29a53be4842a8L,0xb47c7fa39ad28343L, + 0x2054355f055f25aeL,0x92c89f4292dd61c7L,0x00000000000001f0L } }, + /* 51 << 70 */ + { { 0x160e975afc9057a1L,0xdeb93807786662abL,0x9aa06f5246488ffeL, + 0x8a968143b1e1b1c7L,0x234c6e2387bb6c38L,0xe5f30ad246087620L, + 0x6a37229c0f760c37L,0x38bae97e1a50ae34L,0x00000000000001eeL }, + { 0x1326f1c92b609a2aL,0x9fd27c710b12c5f1L,0xf8ab5311a97362f2L, + 0x67b636400430e4faL,0xdeff5367d8964eb0L,0xb2cf669aa1dff262L, + 0x48a55fe4bc0c0282L,0x8cfe7c6a6cdb5eadL,0x000000000000014cL } }, + /* 52 << 70 */ + { { 0x64900ab5e105ffc2L,0xb3e0a69fd00c2920L,0xfe3a57e0acdc7d5bL, + 0x50017de528a32d2cL,0xdbe2ba7e5bd24047L,0x8f337130884e4863L, + 0x79022d566b48a2eeL,0xac5c8a0b0c2f309aL,0x0000000000000186L }, + { 0xaab8b2cf84bfc6c8L,0x50877d98c9fe9744L,0xde69bb18384d5471L, + 0xe88ee61e438c52a3L,0xa76f2afccce637ebL,0x75b5620ad56fd7bbL, + 0x56f3f8f0d04fbc41L,0x394174bf023bb654L,0x000000000000014bL } }, + /* 53 << 70 */ + { { 0xd023ab52c6bea32aL,0x6c51c915513932d8L,0x5aece30ad133acd1L, + 0x6fcf4832b981cb16L,0x8b2e8cad9ec1f734L,0x8ab4c75ad10ea732L, + 0x2a044ea51d74bf98L,0x73e479194c492848L,0x00000000000000c6L }, + { 0xe7a5d2cd98038ea5L,0x556046e204a82c25L,0x25d91782b242bacaL, + 0x51f3e0ee1c690836L,0xdbc96f2e730bec27L,0x70830a8102ea0f79L, + 0xc1c16df146e8fb12L,0x2ab499df30403687L,0x000000000000018bL } }, + /* 54 << 70 */ + { { 0x158102961f3b5f22L,0x0488e5021393cea7L,0xaf3e259b2f7dd31eL, + 0xc4a20bf305698181L,0x4f6f54d7c0ab5d89L,0x5057c46df04298f4L, + 0x9e02e2ef67d898beL,0xc88dbe64a3b4ef38L,0x0000000000000124L }, + { 0x3ab2db8ba57e0f38L,0x64b8bc290e21d3feL,0x8d667ea9125f5c6dL, + 0xdabc90e93b7d1513L,0x4e4bad620a9eb460L,0xe51a1df4ef2b9108L, + 0xd19961243d204330L,0x05d1c570c95c650bL,0x0000000000000143L } }, + /* 55 << 70 */ + { { 0x0c0baf4e9e1ac5acL,0x6560212c36c092b3L,0xfe47d88b97f69772L, + 0x60ccdb14e9dd8621L,0xd01c9fb94799fbc2L,0xf0eed0a832103864L, + 0x5253a877d8a18225L,0x98d6acb8b7b95f7fL,0x00000000000001e4L }, + { 0x2648e152738b06ddL,0x906b0951d8d81a05L,0xf65a31390c1f0749L, + 0x4c6f7f0c228e993eL,0x828fa196a36a2aabL,0xcb565b672d28a2c2L, + 0x56253a6472e6f445L,0x64f614b086c8a38aL,0x0000000000000138L } }, + /* 56 << 70 */ + { { 0x3a67b300e334c8abL,0xc08d0ff9984aa5b5L,0x8d72ed49cd8e1509L, + 0xae856df59b2a8535L,0xa34ef023c0cd980fL,0x29c83e84c243d7d2L, + 0x8ecdc4d8411fad2aL,0x26ab853b89eddc0bL,0x00000000000001a4L }, + { 0x3a060b7bbb5b50a7L,0x62adcbe78ddeacd4L,0x953850e464c7dc2fL, + 0x4ec4a15c4e7bf3a3L,0x1834b3ee7d5b2569L,0x729a63cc470a84c0L, + 0x9c80233ffa5e25d1L,0x7da5b6c70ec91aebL,0x00000000000001e8L } }, + /* 57 << 70 */ + { { 0x8ea14527251ffa02L,0x2bf1e67547d87a1fL,0x7b27c81091ba2e1fL, + 0x104c606b9b71cd04L,0xbcccdb93d614f681L,0x7acc330aacd80ec7L, + 0x6ae973894a27fd8bL,0x53827c1b27ccc900L,0x000000000000008fL }, + { 0x4d95d08cb8be4cffL,0xe400db2af9ffa040L,0xa0141a32b1c11971L, + 0xd126f1474397353cL,0x566cd105b5f4819bL,0x270fb6709b0536dcL, + 0x23af7e24bab1fd28L,0x9710b8e7df1db272L,0x000000000000007cL } }, + /* 58 << 70 */ + { { 0x8dc226ba4ad8bba5L,0x33460d86e63da7dcL,0x77a4605dc88d1aadL, + 0xed5b10f1ea39f371L,0xb70a14697ed77a64L,0x0e60327c61d9bd6aL, + 0x0783398bbf619adbL,0x3ec7209535f6c121L,0x00000000000001b1L }, + { 0x13dd11d6e87d8781L,0x0511b90514684b3eL,0x84726e9723c18f1bL, + 0x29ac0d6d26942427L,0x7d1e62d4f6e29d25L,0x2135742a96de6bd7L, + 0x496a330ced5c26b8L,0xc1ece08a9387f81cL,0x0000000000000060L } }, + /* 59 << 70 */ + { { 0xd254b41b30e4ed89L,0x78502bb1706e5eddL,0x0f80dc97668407b3L, + 0x29261f750c1adca1L,0xc8e281d5d1026701L,0x92b4da71f0a77f80L, + 0xbcfa39e991c34f07L,0x59668cfc83cca319L,0x0000000000000039L }, + { 0x92ab845a79387e8dL,0x59bad0ced71a614cL,0x0388357983ae2d62L, + 0xcda51bd9b1b30c96L,0x81794eba23f2a56fL,0xd5783d4ce9a2644cL, + 0x8c083a7a6e365e0aL,0x6bf3855df6e86aefL,0x00000000000001eaL } }, + /* 60 << 70 */ + { { 0x2edc22f9503dc30cL,0x8e15166ba819de2aL,0xde92a1c90881ff55L, + 0x5063c292425bc484L,0x7265e0a25894ffddL,0x29a93e2bd01afeacL, + 0xb10aae1e4f850b16L,0xd136e09e5bf5f95fL,0x00000000000000f3L }, + { 0xb339031cb292040aL,0xe3a653c6c8872fbeL,0xbba8b2a4e3b091d0L, + 0xde53daa4755f2616L,0xcbc9a13169ff06f9L,0xd7d107b972b153a9L, + 0xbf3d962eed2593a5L,0xf9c71c781eb4063aL,0x0000000000000119L } }, + /* 61 << 70 */ + { { 0x54e878315b00173fL,0x9491c63fe8310c5bL,0x224a487d193c27b7L, + 0x37533a42f6a4c698L,0x2f6a7d23faa79af9L,0xc21f52e2a74900adL, + 0x310382fe6980e5f8L,0x61df1496b9570f7dL,0x0000000000000086L }, + { 0x9519fbd250fcfb88L,0xe3699589db1e40bdL,0xd08ecf3c5d58a256L, + 0x42ca58a17f189165L,0x388309fef63d209aL,0xa9954b554f9fea80L, + 0x0e725ff40cafcd97L,0xfda100dac558c042L,0x000000000000003aL } }, + /* 62 << 70 */ + { { 0xe8fe168dfd2310a4L,0xf898b948a4b90dceL,0xf56e896e16d40ff5L, + 0xb637e62886078426L,0xf8260b3ffb6bd016L,0x8fdde3e65f7d485fL, + 0xf26d3120f6ff0bccL,0x234769001b371153L,0x00000000000000faL }, + { 0xc48d9d588885aa1bL,0x44b751afc5bcc29fL,0x0e786e998d1dbf86L, + 0xe7b9a49131eb141eL,0xd729dba6c2bc12c3L,0x6feb96ced668fcd3L, + 0xfe0d902270a496e3L,0xfbdf5ea4ef9ce528L,0x0000000000000170L } }, + /* 63 << 70 */ + { { 0xc62025d1d8d1bed1L,0x83a5876e694e25bbL,0xcd4bdf56f844819bL, + 0xbf1ff474a318be3bL,0xd44a10d049d8a390L,0x01e2b1518c6e34bdL, + 0x148d248d646df4a8L,0xc5c4aad079925c4cL,0x00000000000001ebL }, + { 0x5a268e4ba03ccff9L,0x2d03661edb04c064L,0xdfcf0af9ac3e2387L, + 0x90b2da8ddfc0530cL,0x9bbf27ecac30685dL,0x3c286c0f4d93abbcL, + 0xf8cf7f6864a024fcL,0xff5929d6b416d6d9L,0x0000000000000129L } }, + /* 64 << 70 */ + { { 0xc6999604bdf87653L,0x5383b379b3f0b8c1L,0x98c6ac3d0bf0435cL, + 0x2b04984e433c527bL,0x5308793c02073e7fL,0x4897091de1ef3fa7L, + 0x4fea6f39bb6b4777L,0x5ad4c5cc472023e1L,0x00000000000000e2L }, + { 0x0db5541ae6cd3ba8L,0x5504d5218c0bac0eL,0x32f61c5ccfac5e4cL, + 0xb8e644b58538bd21L,0x8517c72e51fc9949L,0x8d47a70cad6f4e8dL, + 0xb158e06317ab479eL,0x8c565f7b74f01418L,0x000000000000002fL } }, + /* 0 << 77 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 77 */ + { { 0x4dc47d473ad158a6L,0xaaf743f315320816L,0x4ecb999de6c5537cL, + 0x4daf6b4ee63c7840L,0x4bf5f19b02bc574eL,0xf65d174caf39b238L, + 0x0093f58bd91f67b5L,0x8ad3a5b346d5e8c9L,0x00000000000001c3L }, + { 0xbb99652c22171340L,0x4da02aaa3f1193a3L,0x7a73a7371776161eL, + 0x831def3aa5ff856bL,0x5f6a4f56da00dc5bL,0x2a23f5009ec3a028L, + 0xce2fd53b7cb32472L,0x894514811bb02b47L,0x000000000000012dL } }, + /* 2 << 77 */ + { { 0xc75e1cf3dbfed7ccL,0x2af884ee092e1835L,0x1e303d715472bf17L, + 0xb1ea537be7ad90beL,0x7fa6a69cc4d8b1cfL,0x5b5318ee1b57f5b0L, + 0x60e617587ccbafcbL,0x5832172a9c4acec8L,0x0000000000000168L }, + { 0x36bae29b9f2f8613L,0x9181194ad464a557L,0xf74c13fe52ff8f2cL, + 0x88d40dff0860d712L,0x405a03bb6c86e898L,0x61f01f2fdc243178L, + 0xea9771e091b667f3L,0x1db5cff3b26bd16bL,0x0000000000000141L } }, + /* 3 << 77 */ + { { 0x2366b92a98d9df56L,0xa626f80c44eface5L,0xae61b15e9040c6eaL, + 0x1a44a0e218d1d16eL,0xc6f45f75c7cf836dL,0xe0057ed6c87c35feL, + 0x568b05b9b88d323dL,0xfbfcae7a9a12dc71L,0x00000000000001feL }, + { 0xec63ad133dcc4d25L,0xc75d539ba54f349aL,0xeddd4102c5ea1e5dL, + 0x9028eb89a867f628L,0x2d3b3fd042846e4fL,0x23190ead8ed269b9L, + 0xa55122ea67d610dbL,0x83603cdd637c60adL,0x00000000000000faL } }, + /* 4 << 77 */ + { { 0x73f3a17d4787129cL,0x6ca94151e8c8b154L,0xae7257f060011616L, + 0x5d72f548d4459499L,0x11739701c3af5f62L,0xdc2f39ee9915b9b1L, + 0x1d6448e76a3a0976L,0x1263eb6a9216a12dL,0x0000000000000001L }, + { 0xbc6e933bd3c200a3L,0xe4ee3dd9bb9a9ff5L,0x3772c8f553b7bff8L, + 0xbb6a11b8482ad6bcL,0x59bbe81bcf1dd575L,0x2c9ad563891128dfL, + 0x4b190e9390470495L,0xb40f60c5d58762deL,0x00000000000001e5L } }, + /* 5 << 77 */ + { { 0x8c814a121edc8a1cL,0x7b6a7bf4e7395565L,0xc868143978ca6525L, + 0x57477d25fae6014dL,0x8d407fcfb71944fdL,0x4bab62eee154d68eL, + 0xe0db4fe2db1a8e36L,0x8b5a0a9e51e7105dL,0x00000000000001f8L }, + { 0x3c6ba57aeb49cea8L,0xd7100fde6fa16f5cL,0xbc73dcf2580249c3L, + 0x8fa02f243f4bc342L,0xb32061c81a285d16L,0xb348535c499f8f5eL, + 0x95cd0cbe63dc0bc7L,0xd1a09b04920c26c1L,0x000000000000008fL } }, + /* 6 << 77 */ + { { 0xab913473c18c21fcL,0x9a8d50e85a3c3b57L,0xa3f98027ab4214baL, + 0xff00eda9ed6199deL,0x7e5255d83ff38cd6L,0x200f60a17976e4e5L, + 0x2145dc0f6604bcbcL,0x26506027e30d5f5dL,0x0000000000000075L }, + { 0x74c1c25830bf2169L,0x0636d007d332125aL,0x5fae19411ab021b9L, + 0xa2e53c56b7779050L,0x0f15f2a1fc02474bL,0xf32c52daa91a9c2bL, + 0x016b2c4e09af347fL,0x3b98d8d9c8b5752dL,0x0000000000000195L } }, + /* 7 << 77 */ + { { 0xbfd693ce14dca7e0L,0xfca53d1fdf7201d6L,0x10ef9c293f39346cL, + 0x2f007c8150335445L,0x1cd56f642fef4d80L,0x727adb3f179a01cbL, + 0x043c868d1216b55bL,0x1725267a8b7e4431L,0x00000000000001ebL }, + { 0x08f27860c03b7717L,0x46184e5ad56540e6L,0x9d4413db0cd006c8L, + 0xec1262b6c613918dL,0x6dd0eaa7719b6e5eL,0x04a254658cea469dL, + 0x66575d7d57fe33b5L,0xf2df873dc1eba9ccL,0x0000000000000104L } }, + /* 8 << 77 */ + { { 0x620aff9a81128a65L,0xa463ffe7d6a9d2c1L,0x2206277add4810efL, + 0x413edd73dd0687caL,0xf4b12ec9249cef0aL,0x14dd6665c4b165cfL, + 0x0cf86a23c5c070c3L,0x42354fe984755f2cL,0x0000000000000131L }, + { 0x6f4741df2e9c881eL,0x2656911d43f26343L,0xebae8ae41c82e8e5L, + 0xceb1ca37f3900776L,0x07f2f8fe16721332L,0x59265673aad4911bL, + 0xe8799eabfc3bda3fL,0x55805723fd073355L,0x0000000000000088L } }, + /* 9 << 77 */ + { { 0xc1702e2d7ad5f4c8L,0xf923f2bc5a1339daL,0xa595a3bb549b6f65L, + 0xc610545ee1dba535L,0x7f51f3d42ebaad6eL,0xce7f8bcc6c9cae0fL, + 0x688732ab3d8f57f7L,0x2804a3276493278dL,0x0000000000000117L }, + { 0xfd4db14748347a3fL,0x6fcef6816be9c766L,0xb94a0653e9cbd7c2L, + 0x1642e8b6693b1957L,0x9d6c17e5b68dad23L,0xedc75322b0a1d8feL, + 0x6061c9696d7dbd33L,0x898893654e198b67L,0x00000000000001e7L } }, + /* 10 << 77 */ + { { 0x9c3c0e612af820a9L,0xe80ec4172b23949bL,0xfd6741edf39b5cc6L, + 0x7508d2c569195b07L,0xc5b0b05bf66670e1L,0xc3a6eff7b4fbe0faL, + 0x6bcdfee8439d44f0L,0xae90d63b912965c5L,0x0000000000000197L }, + { 0x7959bec9ee9e6f8eL,0x59f50c0987483ac6L,0x5deccebbacc28474L, + 0xd832648efced11d1L,0xd59cde8c46abbea7L,0xac18a27605da98dfL, + 0x315ebf629282c511L,0x67cf83eca91cc95fL,0x000000000000009eL } }, + /* 11 << 77 */ + { { 0xb25b845fc9f46a97L,0x5f2a1efdc18bd95dL,0x454be47ef18f3fc7L, + 0x0f14f6605543dd92L,0xcce66ca31e88d28eL,0x7de0041d6ad77c03L, + 0x0bfa9ef1875fe7a5L,0x459043152b7c660aL,0x000000000000011dL }, + { 0x5d7c18371811451fL,0x43278c38b3d82a76L,0xc1a74c9387525910L, + 0x6426e55e01983c0aL,0x9ef7fff22afe0faeL,0x54814953d94166c2L, + 0x356c3af7244a9fb1L,0x5b0b753ca6be34beL,0x0000000000000063L } }, + /* 12 << 77 */ + { { 0xeb936532eba11d2bL,0x5a99185776dfa0d1L,0x557cb975f1dda3f2L, + 0x31a06276bca5b098L,0x43b4ab995e00fd06L,0xdbca3db3e6b05745L, + 0xf82d9a4dfbd3914bL,0x9df9a855d9583413L,0x00000000000000ebL }, + { 0x0c6f9f1ac17aeac3L,0xadb82b657aa9309dL,0x5f7d18d0dad1edf8L, + 0xda0ccd7d6f7a4113L,0x21ff3dd62e459a42L,0xef0ced7a4b586ff3L, + 0xd3899d53cc45432cL,0x06cb34b4415e1762L,0x0000000000000095L } }, + /* 13 << 77 */ + { { 0xd87d4bad1db8928dL,0x6b3b3e07435b5743L,0x4d2de9b2332129d7L, + 0x69e49dacc8cf4e98L,0xf69c8eec8eda24cfL,0x0957359a143e9d82L, + 0xb85860d9519c4f29L,0x8280559d5dfb9972L,0x0000000000000126L }, + { 0x3d53979179a086c0L,0xa2641be22809fd90L,0x90c6e7b0d3b9b9afL, + 0x219465d05d6d7e22L,0x6faef26d4f1805adL,0x8ec78f8e9ea67504L, + 0xfde34e6960c2bd44L,0xada7e8b46e85d7c5L,0x00000000000000c0L } }, + /* 14 << 77 */ + { { 0x09710a2cb143b2feL,0xe8a5da4966ee5443L,0xd0c100132f12b4d5L, + 0x5e7b0e772671b1baL,0xb4dddf2e8a6348f0L,0xeacee7a2c4ecac65L, + 0x19a1c51d04d317f5L,0xd5075202f7bfac9dL,0x000000000000014eL }, + { 0xa82630e6c4383072L,0x739a02360f9afb1bL,0x7cd580240782c13aL, + 0xd5dca3e5269cb563L,0x534d1d4469d39b18L,0x8a99a7eee725e8c4L, + 0x4ae1a734e09b12d0L,0x96b8831a4511fd7dL,0x00000000000001e3L } }, + /* 15 << 77 */ + { { 0xd348f834f34ecfb3L,0x82423bf3a2b5a7e6L,0x11e41b02c531994cL, + 0xc4141466202d71c6L,0x60d3db1693b2104bL,0x2b9cc9bbbf1488fcL, + 0x820dd17464971852L,0x255efd4eb83dbadfL,0x0000000000000017L }, + { 0xc2d71a64d751248aL,0x2088c20bbb351370L,0x5fdf0cf4c33df9a9L, + 0x71e307bbd6796dcfL,0xecff035ef166c04cL,0x8f6ee686bd58429dL, + 0x51a81bd8027186e0L,0x52e8524b22705c39L,0x00000000000001e6L } }, + /* 16 << 77 */ + { { 0x2b14935a7ce70edfL,0x1d88d30ee3c6f5a6L,0xaa6220de207e5973L, + 0x692b3ff064b5e637L,0x04c9ddc8ff0eb748L,0x9e5ec39e44978142L, + 0x9cc06266fa674339L,0xe0d13dcddd3e430dL,0x00000000000001edL }, + { 0x74604722f38bd4ceL,0x473e4d3549da70f3L,0x1228875507d72efcL, + 0xdf89a5050a7521deL,0x674176564c1a6866L,0xe2c330233f7a0eadL, + 0x5e100468a7474dc1L,0xa47ca1a5dcc02d45L,0x000000000000001bL } }, + /* 17 << 77 */ + { { 0x6ec465e156e2014dL,0xe11db5d88be7c4bcL,0xe280c2ef46ee44bfL, + 0x9650200bfc287502L,0xa623faba21131932L,0x883521a67c64c58eL, + 0xf6cf505718803a4dL,0x3466bd19ec4cf07aL,0x00000000000000b4L }, + { 0xb89009ef651d7f32L,0x97e0c0afa7012731L,0x5a66579cdb06f6bfL, + 0x326054c1ae4ef154L,0x6701b07d9709ad6aL,0x754acc2876647a83L, + 0xec5c1b4194a4d548L,0x7da690fff296788aL,0x00000000000001e7L } }, + /* 18 << 77 */ + { { 0x89971aabe09fd32aL,0x8863882406d544adL,0xc2ce75b757b06c62L, + 0xcf839aebfde27541L,0x236be270534a25f6L,0x22e7c530c753df84L, + 0x8c9277e537d2ea4eL,0x48d1f633ebc61e9cL,0x0000000000000160L }, + { 0x89e3f1eb69876684L,0x90c977e026a0ed1bL,0x610a0a8534a0698dL, + 0x12b6bc1933cdaf22L,0xa40068414f80ee0eL,0x91146c1f7b956979L, + 0xcd77ddc326655fa5L,0x3f480d870f745a46L,0x0000000000000061L } }, + /* 19 << 77 */ + { { 0x40b96c1c3a259f14L,0xe7f40ea079e72202L,0xe8313e54d7267895L, + 0x173ce8f7559cafffL,0x494a51b3a6c1b23fL,0xc696bcceeb58815bL, + 0x170010c8e0dc01fdL,0x01086056d3602b64L,0x0000000000000061L }, + { 0x410f23cc8a1148f2L,0x9c1f4ffc71449f8eL,0x1b7c1d63e39b69fbL, + 0xb3c0a279581a2ebbL,0xf3a475ed4e37ab82L,0xbae74b39c0ce76a6L, + 0x10059c6f67feeea6L,0xf4b5db802b5f3fc7L,0x0000000000000039L } }, + /* 20 << 77 */ + { { 0x899ff5b3a39492e2L,0xba2ed9c090fec192L,0x1f3f792e373ec69eL, + 0x58892019804a999cL,0x2decbbf65680eb65L,0x4c4d1fb1f1991cdaL, + 0x81c7cd2da9bff737L,0x641d5365db793801L,0x000000000000010bL }, + { 0xd1a3e393e7b02799L,0xca23126f920e1690L,0x16229cabdae08e97L, + 0xe02535dc0dc624e1L,0x7f4a6c2707229ca3L,0xd00f68a7ea546fbcL, + 0x14c7a9060bcb65fbL,0x18a29801a778cdc2L,0x000000000000012aL } }, + /* 21 << 77 */ + { { 0x88ef7214a340a2bdL,0x6c21447fb3711634L,0xeda5e1cf59a1a9edL, + 0xec843ced532a2068L,0x5d030969a721eaa9L,0xf848b622c5c12f80L, + 0xfdbbe02eb3b5f479L,0x5a4a7e27ecfb32aeL,0x0000000000000152L }, + { 0xa0ebecc73bd579c4L,0x947cc8931ae87971L,0x55a335a5f997b3f8L, + 0x0fa52370c742c232L,0x04f557068fe480bdL,0xd5dfbec56e89bcf3L, + 0x68094b4fe7e79004L,0xb930cb0577e383c9L,0x00000000000000e8L } }, + /* 22 << 77 */ + { { 0x404b0ca7ef717037L,0x086a03c3afacef33L,0x1a82f5347d458148L, + 0xe4380258397e4eb4L,0x1330d8d4ca7d334dL,0x1838b028ce1374f2L, + 0x368e12f620c20e72L,0x5a1a189b19279435L,0x00000000000000a9L }, + { 0x938d0621ba67ae4dL,0x8edee31826d4e2cfL,0xf5476a05b5445042L, + 0x32fd912047791f6dL,0x9b896d1d278da749L,0xf0647a5b8027024aL, + 0x3dce74aede2c779fL,0xd238785c70d0322bL,0x00000000000000cbL } }, + /* 23 << 77 */ + { { 0x1389bd572c5be10bL,0xf417b76d6f40d705L,0x43eb4f0ae6bbc695L, + 0x83ee3dbbbceede7aL,0x3b7944776a96d4ecL,0xc8718559cc6adadeL, + 0xa5a422fe93cb083cL,0x88f55bde65b67cc0L,0x000000000000007bL }, + { 0xa38582d46eabed01L,0x8555c12c141ce97cL,0x8af7c38b358fc420L, + 0x40639ba9c43f8fcdL,0x187da0e6ebd2a1e3L,0xface1ad56d325bf5L, + 0x9f44ec9fbbc2d694L,0x9029fab4d125a7f0L,0x000000000000007fL } }, + /* 24 << 77 */ + { { 0x79b40005d7c3e500L,0x99957461a7cbe47dL,0x1dba911d53a81eebL, + 0x50049c2a54cac621L,0xc3052ce65c8458c0L,0xd4e4cc285cd9822eL, + 0xa64bd2ba9c89113bL,0x616f47f324a9a360L,0x0000000000000000L }, + { 0xb309b38536bb95e6L,0xf7808da5eeb3af75L,0xc9eb3a68ffef855aL, + 0x0d7c5382da981f49L,0x2176630e5a29fe14L,0x7c59c56f4b6dd847L, + 0x363fc08115c998cdL,0x28b9018e1c0e00d7L,0x000000000000009cL } }, + /* 25 << 77 */ + { { 0x2573417e81c83b0aL,0xdd9e807e048ebc99L,0x82a6c4a81b29f10cL, + 0x7d10a230142588d2L,0xbfcc4299646d2ff1L,0xcf6ec008b3d474d4L, + 0x4739d5a10fc69418L,0xc40c43ea6653b7ffL,0x000000000000010cL }, + { 0x35cad063fec6bc85L,0xf71acf741da4918aL,0x1cab540ece2afbfaL, + 0xda8b2b24c98eb950L,0xc727151dc45eb0daL,0xff80e6ff5e8f915aL, + 0x61ece0f0f43fe366L,0xea66de22262859ddL,0x0000000000000128L } }, + /* 26 << 77 */ + { { 0x474171efa97a4763L,0xd93201a034920094L,0x6481f28d5a35265aL, + 0xe3e4d437b7eff28fL,0xe9af6e1aae3a1e45L,0xc4aa6f1cbd9522f4L, + 0x7c03335bbc9780cbL,0x96d41e4a321d6c2dL,0x0000000000000174L }, + { 0x5f26167549113aa9L,0xaa8b464d7b5a62a4L,0xa5a46db0820a01c9L, + 0xc7faa813d78edb2fL,0x930e2c550d6a1d85L,0xa9093a617de2a013L, + 0xa127661d71d619aaL,0x54cbb5a5f1c8ce41L,0x00000000000001e9L } }, + /* 27 << 77 */ + { { 0xb2ba328e784b5740L,0x3aab9cb34128749eL,0x28f63dea025efe48L, + 0x50ac057754239c62L,0xd69d52d61945325dL,0x30729b23d6e44079L, + 0xb53986e0ba39b326L,0x00cee2b7000d74fdL,0x00000000000000b9L }, + { 0x4c6c851b9dad4796L,0xe5e9073bbb3f588bL,0xb353a8be7a520bcdL, + 0xf3e17105f9f53a85L,0xf1f9092b4ca27a37L,0x98d205f5591535ccL, + 0x36d397df9b0d9312L,0x274fe8314e3ed5a2L,0x000000000000010bL } }, + /* 28 << 77 */ + { { 0xa5c21ee422b55a11L,0xb3d53f0aaafcfa04L,0x801275fda03eadb9L, + 0x1d198806804399caL,0x90ebcc340a2e19b3L,0xe6e38f0aaea736abL, + 0x708a52b84a9edc51L,0x2eb66e95fc7ffffbL,0x0000000000000178L }, + { 0xd46591fd9e589f0fL,0x4f0d19c86d5aa7b2L,0x06709841bf48454bL, + 0x9ebeefe12b45c9c0L,0xf0a115fc6fb45000L,0xff84e50179ebcb41L, + 0x0ec79d93ce8d1212L,0x5566d34415ee3721L,0x00000000000000e9L } }, + /* 29 << 77 */ + { { 0x1ef4d33308cfb94cL,0x7441eb2b997fed75L,0xf5168da9af996143L, + 0x81618ae829a3693fL,0x19744ee0f0b96e6bL,0xb965ef0bb2fd289eL, + 0xe5e5dc6b0cebfaefL,0x8ff8be97049bc826L,0x000000000000010aL }, + { 0xa8eefd0dcb1a9859L,0x51a376b22ac2b4a8L,0x21bc8fa24625bc78L, + 0x415780cf59ca09f5L,0x91b61e743291ae38L,0x5571371deabadbc8L, + 0x03af77636156dc1eL,0x71baf8d9a32f5f90L,0x000000000000008cL } }, + /* 30 << 77 */ + { { 0xb983e450215de93aL,0xf9a04f4d388b1042L,0x4a5d19b5a992494dL, + 0x6daa0b565edd34c6L,0x47c234303575862cL,0x3088d50b4e9d944dL, + 0x02b04a29ada9072cL,0x78027f7acb9e0062L,0x00000000000000f0L }, + { 0xa0826f19f446b373L,0x7d93fb0f81263a01L,0xe6921097c78ea270L, + 0xfa538ddd2aa5c2f9L,0x01c031423df7010cL,0x955fa08be237455bL, + 0x056bc335462e5d09L,0xf4253cdec6b5db6dL,0x00000000000001f8L } }, + /* 31 << 77 */ + { { 0x7817f55b58d0551dL,0xb14d44372fba3e21L,0x0957c7c6213afec8L, + 0x16af19829c3d5a98L,0xce97f84f27ee4bc9L,0x9e67f2ecbb61232eL, + 0x21a4a6a3aacf3cfbL,0xaf2c734abe76c0daL,0x00000000000000d4L }, + { 0x3c25948c614d7f45L,0xe38052af2070cdd5L,0xa1eee814c0addfb7L, + 0xaddf7463aef0d0e1L,0x881ca72b6f18263dL,0x3ded9d2a4ed2eeddL, + 0x1fc7d77bd7c0367aL,0xd72254b3b3ebfcfeL,0x0000000000000047L } }, + /* 32 << 77 */ + { { 0xc9d22d532c51ab39L,0x0bb42c882f02748eL,0x576c89c847e18934L, + 0xf34cdc49b9513257L,0xaf97df129ed107a2L,0x860263daeead8f82L, + 0x6147d39bfe11c289L,0x3b94b3ebf41158f1L,0x0000000000000113L }, + { 0xc4c84d7cf747d887L,0x9a54f1a2f71f1726L,0xb4a79290daf5817fL, + 0x33dd5df87893855dL,0x567a1293f6953979L,0xf0c76bb60eac5025L, + 0x863fc87abb498d81L,0xf5d4e58ee9b3c11dL,0x000000000000016aL } }, + /* 33 << 77 */ + { { 0x3b17d547448dfb53L,0x7f2cb0729f642572L,0x63e29455c76a69d7L, + 0xca61b766bd3e3646L,0x37a6dd73810708b2L,0xe1a14fd91c905068L, + 0xd44e8f8c80ac4b2fL,0x835bf4e52af9b43eL,0x00000000000000b3L }, + { 0x8c346f71e06e063fL,0x1a328b150b7d8d8cL,0xbaf6a09807bf6a99L, + 0x87e133f9596da0b9L,0x0e597178cf3e907dL,0xd5f05f8ddf233a55L, + 0x309225a26b463c36L,0x300bdf0f0584aa15L,0x0000000000000053L } }, + /* 34 << 77 */ + { { 0x4e7a75991895dfa6L,0x47a75f95bdba3effL,0xca07fe2d3f9a18a0L, + 0x5980f02ac1d1c586L,0xbbb9347f47e52186L,0x64655f6ac4d3c668L, + 0x49b744d851222ef4L,0xff1a52afbcee213eL,0x0000000000000046L }, + { 0x07b176399164551aL,0x8670c0eb724fe5fbL,0x939b844418827401L, + 0x0057903fe2b0b7e0L,0x3715250a6bd60303L,0xfab5172cd457ae91L, + 0xfe0fffd5cb98bd63L,0x2442dd530529b127L,0x0000000000000045L } }, + /* 35 << 77 */ + { { 0x1ff6afce70614995L,0x8ec5c3c42d35576bL,0x3db78d6bf19c93b9L, + 0xb687f566561b78bfL,0x9a0059d21b471120L,0xd2335ec12a6d841dL, + 0x51f487eb03bae0baL,0xe0cddfe86348a091L,0x000000000000017fL }, + { 0xe81c1483f1461684L,0x37d83bb3baeed32dL,0x5d8cbce8fa24c9aeL, + 0x14b9a08bc19282f5L,0x0ffec978bc0d960cL,0xe3453f9dcc202f77L, + 0xe258d82ec95021acL,0x0b7e13ad3cb3f629L,0x0000000000000166L } }, + /* 36 << 77 */ + { { 0xc7ac11e626f1454fL,0xc3c07fb2b35c3517L,0xb3250791aaa6cc48L, + 0x386fdd4b80913dc3L,0x9ceadabacef02e0dL,0x8ffbd36041b82b0cL, + 0xa419d90fe5459cf5L,0x96b655a4d2be1233L,0x00000000000000d6L }, + { 0xfc4d23c02f0b4251L,0xde6584339c166c63L,0x5d2a686fc992b364L, + 0xebe9eab62aec7318L,0x1f276b32205d70a4L,0xa86bf8dbe3ad7c9fL, + 0x94a38b3aee6e87b8L,0x60dafef41af5d923L,0x0000000000000175L } }, + /* 37 << 77 */ + { { 0x5a3443b09f246493L,0xf304564dbcd26507L,0x0e97a06aa7092182L, + 0xc3db692d8477c80fL,0xc5947c4347f9b7bfL,0x5b16b09bfcb8be69L, + 0x6d68633942bfed11L,0xb2e2c5a703585ba1L,0x0000000000000102L }, + { 0x0c92464dce2a9c87L,0xdd3ba87f5a22f114L,0xe91630bee7c79285L, + 0xec9d61b81671061bL,0xb124aeea3135b566L,0xdb2e1760b108a419L, + 0x8dc457368dccb7e0L,0x395f8c1269f288caL,0x00000000000000d0L } }, + /* 38 << 77 */ + { { 0x5b44bbb6e8d951b3L,0x3fd669c6eda76243L,0xf5060d8492a121aaL, + 0xa3cdd2388ff0d7c2L,0x1fd8c08b68451b81L,0x0d473fa8947d2bc9L, + 0xb84cfb1558ecd9f0L,0xc1d74eaa5861f3a8L,0x00000000000001b4L }, + { 0x8992dcd5a7e3fd83L,0xaca06108361d9bebL,0x6e8a01451eea9b43L, + 0x468cbdff0dec9235L,0x13f19bff1f3f0571L,0x1c521a953db7e61dL, + 0x8315bb22648cc46eL,0x7a4b2e18e8060604L,0x0000000000000155L } }, + /* 39 << 77 */ + { { 0x5aeb03ff7d3ab094L,0xfda8c273a90d5cc9L,0x6883bcf2ddf59c2eL, + 0x6496e8504dec3d19L,0x356eab571c428360L,0xf15edcc7aa376191L, + 0x751db33905e75700L,0x90dae8887250d302L,0x000000000000000bL }, + { 0x11679572d6180ad9L,0xfed6254809ed164eL,0x14598065c141b48bL, + 0x1c1f0baca4575d4fL,0x207b297aa286b951L,0x971052a3ae4c9f63L, + 0x38ca88212a097f14L,0xe505b1ec718e620fL,0x0000000000000119L } }, + /* 40 << 77 */ + { { 0xf02ebe9596f52213L,0x2aba2dfc62f9f1e9L,0x7bbdde57934ef9f0L, + 0xd5c7882ea417af58L,0x55cdfe89dcdf7815L,0xf79051b18470a435L, + 0x364da0aa7e8593e8L,0x02c3d160975a5f81L,0x0000000000000034L }, + { 0x59493183ccd37734L,0x085a8cd2e645e991L,0x2d54d2668c4b4a86L, + 0x96cf84cf7b4079beL,0x4abade40ba683f56L,0xfb29c00bb5457b07L, + 0x6ed5fd3592ae7f9bL,0xb1f1a25fb5a9d039L,0x00000000000000caL } }, + /* 41 << 77 */ + { { 0x26cd556b3921e1a1L,0xaea7a4d9b1f5f62fL,0x8be062eb1ad9028bL, + 0x99c738b3f3c0f0ceL,0x5db59317f043ae79L,0x30903c23fa93058cL, + 0xe075745250dcde94L,0xb63ee08f5d35faf8L,0x0000000000000130L }, + { 0x9dade46088d5a5a1L,0x7f687f6624dc764cL,0x224429986af08677L, + 0xf6ffc5179857465bL,0xdd1d92cd16eeb385L,0x3b27c4363a9ab99fL, + 0x36fc22217b450819L,0x8a2ae9917c077c4bL,0x0000000000000126L } }, + /* 42 << 77 */ + { { 0x6edbc169d3c026efL,0xb990e974b3656143L,0x82dce5f11052b46cL, + 0x522b454f346588e6L,0x87be4c0a2f04bf25L,0xc013f791d83edef8L, + 0x208f49a8736ff68cL,0x00ae3843281e5af5L,0x00000000000000b8L }, + { 0xfb7c1540b14834ccL,0x7578c1c9036b2491L,0xb1328c4174f9c4d3L, + 0x9158cc04bb0a5630L,0x95d413897757dcf6L,0xe6be1b4f6bdc01a7L, + 0x92930721197a54abL,0xf8cbb795262c5de9L,0x00000000000000feL } }, + /* 43 << 77 */ + { { 0xd4e40c43d91f2e22L,0x760551b0ea9f06dbL,0x7271357be6424d8fL, + 0x1c139ebfd700c632L,0x36ffaff59c1ca226L,0xcc10334cc07ea685L, + 0x066963dbb8b2376aL,0x38066959a91e1636L,0x000000000000006fL }, + { 0xd27f9dc0aa8a23deL,0x9143dd55b035bca1L,0xd21f7d664d70575dL, + 0x3e86e93ad29defadL,0xff32fb430d7d088fL,0x268353357ceb5006L, + 0x9696d3a595ff6981L,0x6e866b5b44250a28L,0x0000000000000156L } }, + /* 44 << 77 */ + { { 0x351a043f060f4eebL,0x15209879d5b3bb63L,0x9698170c9178b96cL, + 0x5b90ee14085b927aL,0xc8993568b5f2f8deL,0x9b5e29d34847abb4L, + 0x46be1cebefd9a245L,0x8b4d783fe64a4af9L,0x00000000000001ddL }, + { 0xb7bef10844ca314eL,0x26be580eea2bd1a1L,0xf57173d964855abcL, + 0x21d2e1ed42aea843L,0x2a75fcb632f0d49dL,0x13316023b1c84fadL, + 0x45734d1e7a1b533dL,0xb1a134dd8eee750fL,0x0000000000000050L } }, + /* 45 << 77 */ + { { 0x2891a9cc118e30e2L,0xcaa1b23324d29c24L,0x299600b7614d15b0L, + 0xd3fa214bb2b193beL,0xabd77d87402684f1L,0x8c0b18c2fdec80f4L, + 0x977833ca8efc68d8L,0x80752a69b3a99b6dL,0x00000000000000c1L }, + { 0xab8b54aaaa4faf4bL,0xe58674b91485e938L,0x70e4907df9448c62L, + 0xb74aadeba5f687aaL,0x994221ada0ebfc28L,0xa2e0bbac8df568a9L, + 0x66e22982e1f06342L,0x800f40d53e73b5bcL,0x0000000000000053L } }, + /* 46 << 77 */ + { { 0xad24f8630abc1fc6L,0x04be06226625e9ccL,0xa0fd69eda80ce1e7L, + 0x3f61e48b44c92416L,0x2ff89da5ce72dfa6L,0x462258a109268f2cL, + 0xfb125f58b4b7fd03L,0xf72a865567c58ba6L,0x00000000000000e1L }, + { 0x4db84d835c451179L,0x88ccccc63b02f7d2L,0x03ac849dc542c185L, + 0xc20bc4c8b38b637bL,0x8076a9b8cdc5a419L,0xc712278bb3ddab32L, + 0x3cf2df106e8e74b9L,0x7b177eb9486b20b2L,0x0000000000000105L } }, + /* 47 << 77 */ + { { 0x8afcc24d382f5750L,0x3acddfde54812d70L,0x7299dc77cd9c82edL, + 0x1642c13900c140feL,0xa778b51691c0c3c5L,0x038b15eb2502380dL, + 0x0762404f0bd24e3aL,0xad1e2552ce726c20L,0x0000000000000196L }, + { 0x20a92f312523da44L,0xd1023bc8c536bc84L,0xe6b5f3a86c43897aL, + 0xadd1b4f1b3437151L,0x5687d3eb1a97933bL,0x07c6421383350406L, + 0x56874e419e93c582L,0x6bfc781c1f59dab5L,0x00000000000001a8L } }, + /* 48 << 77 */ + { { 0x6503932f0c7723e0L,0xd185af25a8934788L,0x5bc1660c792232f4L, + 0xcfb04aabc2ccd476L,0x59e84fee16844fe7L,0x736c2cd809c31ad5L, + 0xdf44215499058585L,0xf3b79464c3f2b46eL,0x00000000000001d3L }, + { 0x282464c7089e2e4fL,0xbe9d0bfa17d8eee1L,0x7934aed1fcf3e876L, + 0x76465104d4e606b9L,0xf114efb4fa4412cbL,0x8f9f32d2f1fc8480L, + 0x2879c7855c340a29L,0x3d28e8a7d570bcefL,0x0000000000000178L } }, + /* 49 << 77 */ + { { 0xe39823fd528505acL,0x8ebc47325c5f2cf1L,0xc44463bec2949fe8L, + 0x85161d86cce2693eL,0x07a25ac07ee556d1L,0x4f25af1952e857cbL, + 0x11c601ec54424593L,0x73ab0d66312c6359L,0x00000000000001b7L }, + { 0xa5a485e638d15a2dL,0x508d127b1473315aL,0x77f80913ce227761L, + 0xc5ab5ba6f973549cL,0x2feded3f26e06fadL,0xe3e8a9421884393cL, + 0xd10318ee59a646a0L,0xaf1ac30d53726ff1L,0x0000000000000152L } }, + /* 50 << 77 */ + { { 0x100b85a9dd94b4d1L,0xd8daf57a6587d8ecL,0x9310171258ae0fcaL, + 0x0f97b4f8f2987b29L,0x7a3e6862af6fb771L,0x3a285868b8b39256L, + 0x00d36274ee37b5eaL,0x52b11d2e10720476L,0x000000000000012bL }, + { 0x3fa5ea33242d8351L,0xf9d559b230f44661L,0xd2c61b6cb62b0b61L, + 0xc316078cfe0480feL,0x0ad7a72e4db4ea51L,0x624045e0f8bdda6eL, + 0x30f27b5ad9c158a9L,0x94b07694a3b997c1L,0x0000000000000152L } }, + /* 51 << 77 */ + { { 0xc2965b4b16ac65feL,0xc4b7d9409ab82e5bL,0x229ff0cff7ea0055L, + 0x40efdc6369eaa5fdL,0x93c5764644112d4fL,0x0b25310e2715e10eL, + 0xd6a9efe5c9974136L,0xedacc9706ee8d832L,0x0000000000000046L }, + { 0x893377613727b7e1L,0x88c850afd078941bL,0x58756ece7b823796L, + 0xde7bb167aafaba1cL,0xb057cc537a7fc085L,0x83b15ebba59b9fbbL, + 0x1319c88bc112199dL,0x74f8df4e55023975L,0x00000000000000d7L } }, + /* 52 << 77 */ + { { 0x52c39bdd29c86c06L,0x36c4a7422e6da397L,0xb429fe2916c08eb5L, + 0xa8cbbcaa5f62d74cL,0x49c149eb40b83aacL,0x38128a270b423a47L, + 0x947c4abf4f3121f9L,0x0b2c865c2ae48961L,0x0000000000000096L }, + { 0xd489f8b216565942L,0xc94dc15e2e387adaL,0x7ecdbea39ec45872L, + 0x122026994189738aL,0x57deb4ea1268576dL,0x59dbccf1d8ef43ebL, + 0xdc0d07a5acc68febL,0x519e8d571a01e26aL,0x00000000000001d3L } }, + /* 53 << 77 */ + { { 0xaf380ca366cddf42L,0xcac3e386a4625641L,0x6366ffed6a06256bL, + 0x9803be38a63544acL,0x75a7a48c4235d59aL,0x880b86944b0fa3b7L, + 0x105f4598e06fd240L,0x861670560370cf92L,0x00000000000001c6L }, + { 0xded452e494949561L,0xddae0321d7dd2556L,0x9f81b561326f74c1L, + 0x12b3e9632604eb59L,0xb7f0a72f30a9e3ffL,0xdc6d097e4df8dc6aL, + 0x5b82f64fb2ca84c6L,0x605abf186073c250L,0x0000000000000052L } }, + /* 54 << 77 */ + { { 0xcf8532fb7c6c5928L,0x809b06af68ef4167L,0x1e3f716e69dca4f9L, + 0x605a859dd655961bL,0xc5414e946c1180f7L,0x864afda1026b8a2cL, + 0xd181e149da3fadbaL,0xe7be322d8fce2449L,0x0000000000000142L }, + { 0xc85d4ffa5670ff12L,0xf7e91b0ec3a84a89L,0xdf19ed5befc145c4L, + 0x206c72965a8c3c4bL,0x0b97942db93b820aL,0xb1b890b7a51b824dL, + 0x013d153d20dce1a9L,0xefe9ded346743f8aL,0x00000000000000a5L } }, + /* 55 << 77 */ + { { 0x88d2dc674477ec08L,0x80ba5fac1411b3c0L,0x969b5230a98dfe57L, + 0x752ad8d4eeaf4654L,0x6a922cc35f3d4184L,0xc523a34808e7a943L, + 0x5f764b409cfe4145L,0x7395772bc03b9f6cL,0x000000000000012bL }, + { 0xa990597615682164L,0xf164f513939f3d99L,0x30f6e9db7016f097L, + 0x8890d32051fe1e18L,0x3bba183346117780L,0x70ac1f57c19f9604L, + 0x8457dd06d81e63e5L,0x79f823471aee9177L,0x0000000000000076L } }, + /* 56 << 77 */ + { { 0x8acd524bd326ffb6L,0x4a1091a39e731029L,0xb056d998600d299aL, + 0xfb695c26b78ebf7fL,0x2b83e30c73cc7f48L,0xf2364e614bcb6645L, + 0x6a707650837638c3L,0x6d1652bf21528a26L,0x0000000000000034L }, + { 0x353dc81f6d2b8fc3L,0xfd3003918627a2e4L,0x5f7c25651179264cL, + 0x69856301af46d535L,0xdfcd6cbd16f35cbcL,0x5d2268fb4fe07645L, + 0x2ca719dd29cac6aaL,0x9f8fcb728cbe9556L,0x000000000000017cL } }, + /* 57 << 77 */ + { { 0x4f2db278e6836ef9L,0xdc9db67431f1f845L,0x41dac672afb53ff4L, + 0x1f39842ea4d18cb8L,0x832a0120c8d7d103L,0x5af1307130d09edaL, + 0xba18cee34f80aef4L,0x919ff54e9b1ece22L,0x000000000000004eL }, + { 0x9e47ee63129039d8L,0x9b52315e4fcd8e90L,0x38df4512cbd444d5L, + 0x35db7331788cc2acL,0x50dbadc2e36c9026L,0xc6ef1044bca03510L, + 0x79e05a990a970df8L,0xbdd712ff5af0d25aL,0x00000000000001beL } }, + /* 58 << 77 */ + { { 0x9aa54906870f00c8L,0xeda2769c2f3b15c0L,0xe0532ca76d46ab6fL, + 0x9c0906af876b4061L,0x1bd77842076041c8L,0x6174e11fd89f7021L, + 0x9d479b3b9bd405a2L,0xf4f7c9ebacfbc196L,0x0000000000000019L }, + { 0x7913dcc655e1cddcL,0x1aed0ee4b8d46e1bL,0x15d4d50ecd4a8a46L, + 0x19f9f35e2c6ab887L,0x1572156140028363L,0x87e4c807d98c50bfL, + 0x3c2ea1ef5dd21087L,0x7f39af3bfc7cfde9L,0x0000000000000065L } }, + /* 59 << 77 */ + { { 0x0418f880e7228e86L,0x02a31e0bc2425928L,0x1ba3e262cd64b30dL, + 0xc90cd1893cb75cfbL,0xdda6046a6b60d4abL,0x700f6be365d4ae88L, + 0x6c19cceb2a55c16eL,0xdda5c411dc3d25b4L,0x0000000000000088L }, + { 0xade4b36ac8dae67cL,0x406b38c3a42ae110L,0x2ba70cf3e640d5b3L, + 0xdb2c34c2e6b4b6d8L,0x2e0c90d8bd73609eL,0x6d1be3ccade63bf4L, + 0xda9a3d0bb0ac8323L,0xc42d60d0d2f72670L,0x0000000000000076L } }, + /* 60 << 77 */ + { { 0x302ccef76dec3c88L,0x04db4fd6110918afL,0x895be3981815e18fL, + 0x87d89080690de8a8L,0x95d62d3304a44f0fL,0x206a7909fb546063L, + 0x327de8996eea1c3fL,0x729f6291fe7aa87fL,0x00000000000001e0L }, + { 0x0efe87e02a49d439L,0xccc22bf2fdf9dcd3L,0x22e02cc613639aaaL, + 0x9502c35f9ad086fdL,0x7d3551898b574050L,0x1ecc10ff410cce0eL, + 0x8f538a0a134236aaL,0x8f60cd8c27d66de9L,0x00000000000000a4L } }, + /* 61 << 77 */ + { { 0x55645b80c16661c6L,0x948e917c7d0ee031L,0x3eecac38b7e15502L, + 0x0d8e9f161640fcf5L,0x43c6dc1921b94ca6L,0x6e812bbb6a097c02L, + 0x2d3f616b6e983e37L,0x328a6ab8353feb24L,0x00000000000001b7L }, + { 0xdeacf95c5372c6e4L,0x16bf649897e667ceL,0xc73d49047d89c4c0L, + 0xedddbb70621449d8L,0x55eda935acd93cd5L,0xdb12b4afb06b3addL, + 0x2c09fe7ab0dddaedL,0xb5cdc9e307cb7ab9L,0x00000000000001acL } }, + /* 62 << 77 */ + { { 0x63077120aefe3471L,0x6afdb40a16520c6bL,0xdefe75d8e10ffec9L, + 0x61bb8618aba587dbL,0xe787478faa110a6aL,0x660b61e0adaeb173L, + 0x9c2ab425be6b827eL,0x05e45ab3bd4272fcL,0x0000000000000105L }, + { 0xfc20b8cb722ef6c6L,0xd78e5751dc4821b9L,0x6cebe18b1f7e4a73L, + 0xd82a17b7b3b3665bL,0xda96aa6da35af6cfL,0x7b5b59476f5ff66fL, + 0x43ad4a772606eb76L,0x358dcbd77d164e28L,0x0000000000000173L } }, + /* 63 << 77 */ + { { 0x980490160e955688L,0xf9f19c4243d2e2e4L,0xd2c0104dda5c9863L, + 0x700371f62cdd8df3L,0x5d7f0fa834cf1b7bL,0xc54ce1a18d7b8a16L, + 0xc4ba22b8ecd80872L,0x31c353dc77828799L,0x0000000000000032L }, + { 0xffc587a418dc7eccL,0x4973b0e2389fb0feL,0x65013621b185b1d7L, + 0xad6003d1a86d72c3L,0xe78484f1bad2e076L,0x20f1a5678d9f7dcfL, + 0xc351ed478c3d52ceL,0xa6280661ec7a67eeL,0x0000000000000146L } }, + /* 64 << 77 */ + { { 0x20084293eea8d666L,0x01afa6ae12780dedL,0xdbce851a5b79d11cL, + 0x280a43a009b895b3L,0x95c92e1a77c59c03L,0xd99606376b9ec1b8L, + 0x08c0bb007beace14L,0xd0ba7943eb76c9beL,0x00000000000001b6L }, + { 0xa14d2a3f15a51540L,0xfd25ffcb7211ba73L,0x020efe82cbeedfacL, + 0xdc5b6591aa5301e5L,0x25d3413fe1708393L,0x9a3fff34b0a1e5cfL, + 0x1892f1b7d310f9aeL,0x3ec31f629df3548dL,0x000000000000017cL } }, + /* 0 << 84 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 84 */ + { { 0x541bdd795338e99eL,0x2753b15c2656702dL,0x360263f1a7f6af6eL, + 0x42c9778702adc827L,0xb4b4a66527abff88L,0x1f9dfe514a9a1ce9L, + 0xf1fa56ac48ceb12eL,0xf0e55a366d0b967dL,0x0000000000000116L }, + { 0x7ed35bc349642f41L,0x69c8bc4622ee3354L,0x528466231911836fL, + 0xdb193bafbf4b87e2L,0x427ff5f44209f1e3L,0x4152fd13dae27044L, + 0x3b73f059a1f0ae7dL,0x621ace506f1503e1L,0x0000000000000196L } }, + /* 2 << 84 */ + { { 0xceb3d6d8ecc914faL,0xaaddd75d326df942L,0x8475ee47391b67b2L, + 0xad4cc87be0591e3fL,0x1aa4c9fd9129c3b2L,0x52172155525f0054L, + 0xa32fff27aeade029L,0x550ac763217c0ea7L,0x00000000000001dfL }, + { 0x3d4bbc04df12e80dL,0x82d0b17e8d5ad5baL,0x7b059d1e1e38ba38L, + 0xb22b3beb0f9c9ad6L,0x75948e7725833548L,0x15e78db4b7d8216dL, + 0x3c573681c1697cc0L,0x02dfd8b537f9ed90L,0x0000000000000172L } }, + /* 3 << 84 */ + { { 0x72c5cf1e83625a05L,0xb04221efd0b73c77L,0x52de6bdddcd8527bL, + 0x73e03488d491416eL,0xcfefb1b130e7b551L,0x872c5b20813a17faL, + 0x75f43fb9685a3923L,0xf057cae66960bcd8L,0x0000000000000119L }, + { 0x84e8d67954becb7eL,0x47d5e5da3afdb96cL,0xf0b5d59b04476ed9L, + 0xb2d7daee4c2984a8L,0x07fc2e0e986271e7L,0xa9895f88b3300786L, + 0x8f8fcd5a53a88830L,0xe9446200452d1a5eL,0x0000000000000181L } }, + /* 4 << 84 */ + { { 0xeb05b269a9e110faL,0x3dc96e495694769fL,0x22d5dcb190129a2aL, + 0x15b02839ddcab3beL,0x2784efed9819a2d9L,0xc19a373a83ba587aL, + 0x0ecfe7e631ac78ebL,0x6b7d9a50b6580d76L,0x000000000000009fL }, + { 0x20cf011e22e1f23eL,0x2bcd70c671dec689L,0xa4956ecdb968b2e9L, + 0xee6224fd8c68507eL,0x2aeb7d0c6e7c15faL,0xf857d1a55e1c74a9L, + 0x4f5eb8c6298d033bL,0xfcda7bdada7ef5ffL,0x00000000000000fcL } }, + /* 5 << 84 */ + { { 0xf21724305ce20879L,0xd48233b602995c9bL,0x5da336e0d39d6149L, + 0xc516d0131a37df28L,0xa7098d962b3fe1c0L,0x65cee9a96a3099b8L, + 0xd1df9d2e320323a0L,0xb1c49334cfad65d8L,0x0000000000000024L }, + { 0x3bbc258e902dd9f8L,0x13861a7c0e9ce909L,0x0f0497f50f7c4ab6L, + 0xef96a255ddc4a4caL,0x4655fd6414a185ddL,0xa983915491345882L, + 0x96c039266695b74aL,0xd88db243481da372L,0x00000000000000d0L } }, + /* 6 << 84 */ + { { 0xabe2205b68e52960L,0x773afddfa062a50eL,0x8c58ea01f1cf4a46L, + 0x3e88e23dc76bb8d3L,0xbb3b294bc501e0aaL,0x42cfd6220859b18fL, + 0x8c6dc95f16286e18L,0xd4ba253e0ecf00feL,0x0000000000000071L }, + { 0xc3970825eb3340eeL,0x4f981608b3ad699eL,0xffb5ea24c80bc4deL, + 0xb774f894604dbb78L,0xa164c4cb1c947619L,0x92bd21e5cf2b764bL, + 0x4b38a590ecca13ddL,0x1a47520a05cb48a4L,0x0000000000000166L } }, + /* 7 << 84 */ + { { 0x85e007a105e31f06L,0x8b43c7defdcaffd8L,0x53d13f277e4ab187L, + 0xf7d22a394d4e71e1L,0xf1daecf88d0a8b1dL,0xe51e2f3a90c409cdL, + 0xb84b6e0b46d43496L,0xb70b148ef5e87710L,0x000000000000017dL }, + { 0xfdcd92b7e45e1097L,0x6c5fa06295da5ef1L,0xd2fa00bb7bac30eeL, + 0xaada82e4f49d2088L,0xf8c4fc0f56827ba3L,0x04ef216b207c79acL, + 0x3a3a685138449d34L,0xf0ad4b5191900ce7L,0x00000000000001aaL } }, + /* 8 << 84 */ + { { 0xb4c52f11f3d7286aL,0xec47cb5cbbf67a41L,0x89d3382eaafb10a4L, + 0x4afd876806222b90L,0xfa16b295feb5416dL,0x0946f9d45d095b7cL, + 0x18526b28ff975e3dL,0x259cec2fb2f1c609L,0x0000000000000104L }, + { 0x545642a062e36127L,0xc3e4c6f71c38e6f5L,0x1d0e02c41bf9d6c4L, + 0x8a7a6fb6483ba43dL,0x7104aee9cf390b9cL,0x4acdccdf2578e2fbL, + 0xc17af6df73b3feb4L,0x2fb789d77977fa95L,0x00000000000001d6L } }, + /* 9 << 84 */ + { { 0xa14396d3e9cd61e9L,0xe5b79eec037dc92fL,0x88217b170156582fL, + 0xedc457471f70551fL,0x60be827f32d048d9L,0x2fe4c45edb6b91b0L, + 0x8bc71163ba4dc597L,0xe3fc3791936e1f8aL,0x000000000000002fL }, + { 0x8dfc5ec30139f399L,0xc93f22717334f267L,0x9b8d510da10066dcL, + 0x8a9d400ab7c157a9L,0x5e83d95d3ad20222L,0x2e244da8483db80dL, + 0xd434f50e97e427ccL,0x0c98d8fe73195f8dL,0x0000000000000014L } }, + /* 10 << 84 */ + { { 0xbf53d7cafcc691daL,0x6b8fb94f57f9d8b0L,0x8124062527343963L, + 0x98cf3891d6d682edL,0xf0ae9654119fed89L,0x61f5810a32803c8eL, + 0xda1c6f26ce561b4aL,0xa23e3c857b478bc7L,0x0000000000000048L }, + { 0x455e708f3530d96fL,0x38d3928e003d010bL,0x289f06c4764bfcb5L, + 0xcccf4fd96ecaae0bL,0x76496e196b09aa3eL,0x13cdb1b5f2f24787L, + 0x1040ea015837cafeL,0x869751f81de1b6daL,0x0000000000000027L } }, + /* 11 << 84 */ + { { 0x573811f6ad9570b9L,0xbaf2ec126ca614d7L,0x6f4a9a0bb735cb79L, + 0x850e4b736f9affb6L,0xe399dfb43b760ac1L,0xa7759b8e5cee0611L, + 0x58d84ae60722de06L,0x5ab2ed5483dbb9feL,0x0000000000000148L }, + { 0x98c24006df14f9e9L,0xbbd0130f0ad7bd90L,0xee55b08dc3c91b77L, + 0x16a18d954f6934d7L,0x5f191a312deed126L,0x776bcd3284897d4bL, + 0x83b4d25b7b0cd708L,0x6e6aa60cd9f50beaL,0x0000000000000181L } }, + /* 12 << 84 */ + { { 0x34383bb055bef397L,0xc14c123a43be84dfL,0xa0fae432057eb68fL, + 0x5c6c07b24903c2d8L,0x3e78de9d32bcfee6L,0x3d2d16af93bcba15L, + 0xafb2db59c206822eL,0xbbea1116a332e64fL,0x00000000000000fdL }, + { 0x20075ed100c6ea12L,0x2b1d0a044e4f3d33L,0xf3bf3ba82c5370c4L, + 0xe1675545a4b86668L,0x609432c3bcc6dc6aL,0xc09a32e36777d69dL, + 0x9248e09861f37219L,0x32650569f539662aL,0x00000000000000eeL } }, + /* 13 << 84 */ + { { 0xe82b6739d225d7c4L,0xc69d44ec0e8b47daL,0xd842c960d69237a8L, + 0x6ed0097ad24a796eL,0xef96b040030e4149L,0x55c049ac212e51d1L, + 0xcce35f170c44bb8bL,0xbfd67f2971c94884L,0x0000000000000196L }, + { 0xf02b65df4755c1c8L,0x3330ff77a8144225L,0xb4d9365aa39fb2b9L, + 0xd5d31cf295c66174L,0x7597fd7decc0ca46L,0xe7565e2ce11870a1L, + 0xc709bc5da36c5100L,0x4d319121639f0a7cL,0x0000000000000144L } }, + /* 14 << 84 */ + { { 0xaeb813e3c79e8798L,0x01ceafba48e50c58L,0x974bdb02569d8013L, + 0x83c90f107c7c5033L,0x2d6e1617702dce8cL,0x55eec88c3e07ead9L, + 0x9f9bb723106b9a4dL,0xbdbe0818e2b351c3L,0x0000000000000084L }, + { 0xaa512a7ee5058362L,0xb288f28904c001daL,0x0ceed00ca713504aL, + 0xff656d2de24f8ee1L,0x637ae9543eb57ca5L,0x4a21412e7342f010L, + 0x205cf0c2a9ce4de1L,0x4780b6a3d59aee6dL,0x000000000000005cL } }, + /* 15 << 84 */ + { { 0x83994e557fcc0c7eL,0x887856631c37f01eL,0x88a9a5679f196624L, + 0xcd828df25b3fc857L,0x470f8623581344f9L,0x85cf12a6768e83a8L, + 0x5d11327b9718a861L,0x9a1045d76e713452L,0x00000000000001c5L }, + { 0xd4c452e4711f46b1L,0xdc41b91e12fd1e23L,0x845897e431aa874fL, + 0xcf51cd7e8cd6171bL,0x1825bd254f183e74L,0x361a0e53b5da3216L, + 0x16349308601e0405L,0xe3a40682f094f144L,0x0000000000000187L } }, + /* 16 << 84 */ + { { 0xf762eaf77b602085L,0x3b6e62332d470147L,0xe1028d76d3cca3e7L, + 0xf3616dd8ccf39be7L,0xa35c4cc108e8f10cL,0xc413db107cb4b321L, + 0xeab81774c3249f4bL,0x3e7bdea7be83bc78L,0x0000000000000134L }, + { 0x86999f159d38d508L,0x0c9a990afb204d42L,0x27c4421dcb616f5bL, + 0x2cb6590d46ae31ecL,0x1a33f6ab309acf22L,0x13d99e2a80320210L, + 0xa13731b3341d1667L,0x7e0baeba4bfcc116L,0x00000000000001f0L } }, + /* 17 << 84 */ + { { 0x09ab412d6424425eL,0x74cc553061d3c0abL,0xa97378272d2c78c7L, + 0xb25da089e684f1f2L,0xcc4de4d2f911ef5fL,0x35b774b0c9b239c0L, + 0xf155c51653f100b2L,0xb58b7edbfde0c23dL,0x00000000000000c8L }, + { 0x176534ff99e35e49L,0xfdb72d0f3f22064bL,0x96fce9811926fa09L, + 0xcb7d400f587ef357L,0xe45d45ab0b2988b7L,0x7c8cc19e1cbac1b2L, + 0x8e43cd5f0a3cb693L,0xb06b000d3302b5a0L,0x000000000000017bL } }, + /* 18 << 84 */ + { { 0x544bbffad1aca0f8L,0x27a01dc60e8e570bL,0x23f27a75fdaaba6eL, + 0xf3a345d98626fd81L,0x439bd947a89f1553L,0xac968c8688113865L, + 0x7aa489c6aea28591L,0x386be8175e2e0423L,0x0000000000000198L }, + { 0x8a5f011f50c91fc9L,0x1f8371bf1c27ba7bL,0x7e54d98356505a0bL, + 0x108343e521aa0a75L,0x291295d453e9e9f6L,0x4398536d23f6347bL, + 0x864de21b44ba01efL,0x58d7f468737319f8L,0x000000000000012eL } }, + /* 19 << 84 */ + { { 0x90da648771065501L,0xcaa29c6b26a4582cL,0x9e4edc4a47ed26adL, + 0x42003fc1576cca06L,0x9781c36fa7d795cbL,0x61aa367436397bcbL, + 0x5a283a1bf9903003L,0x9e80ff413adf36feL,0x00000000000000c5L }, + { 0xf5ff812916855bc1L,0x65046899a776d1b4L,0x221fd17941fdef50L, + 0xf19ba04539dee7c5L,0xe6df5dec0fcab146L,0xcf68fd0a805416abL, + 0x710f16e2e4dea8b1L,0x703229d7af3ae211L,0x000000000000015cL } }, + /* 20 << 84 */ + { { 0x8622dc3ffa286a36L,0x51b25ca952dcd8b1L,0x3dfcd6c4753d9d21L, + 0x491384cce2aaad12L,0x016b19a77f8167ceL,0x8c03c919e7a09ab6L, + 0xc7ded8e3e8c1f60dL,0x318a4556c45fa24dL,0x00000000000000f2L }, + { 0x93decd4a3ff218c4L,0x84efb5a7f04927d8L,0x2f9ca93e1cdd14e0L, + 0x77747b0197364d94L,0xf0555c73f6ad16c5L,0x7fb0cf2f141826ffL, + 0x7c2e2ed7b21f5339L,0xabcba46bf76ea20fL,0x00000000000001b8L } }, + /* 21 << 84 */ + { { 0x01a139a713943e96L,0xa3a5224c0be158e2L,0x25b4c06101cb5d09L, + 0xd4f1a98e51e99014L,0xad1aad0e8cd226c8L,0xe791062d81749b08L, + 0x6ecee7e89a223687L,0x4f3163daa063dc05L,0x00000000000000d8L }, + { 0x7f1940fa1f3aa8b7L,0xbedaf19c58b91285L,0x4c309e63f113920aL, + 0xcce150924a8a95e9L,0x7e8c2c756d1f1185L,0xeb5093d138e24ee2L, + 0xbb1a329aa0414d8cL,0x7521eff1265b457eL,0x0000000000000085L } }, + /* 22 << 84 */ + { { 0xdacb242420f495b0L,0xab9079b8e08cfb82L,0xdc0a584e764487f1L, + 0x541bbdd56d17b2b2L,0x75e7c8239b665315L,0x9e5256ca8b3cf073L, + 0x53ce698ca963c000L,0x746f0c775ca2949bL,0x0000000000000027L }, + { 0xac741a8b7915623dL,0x64426fbc4a5edc48L,0x8a52fba3aaef5c08L, + 0x3c353caa91018453L,0x1e2985f81a96fde5L,0x200f91cc90733d41L, + 0xe60794477db133faL,0x274563f14eac2054L,0x0000000000000180L } }, + /* 23 << 84 */ + { { 0xd573dd7a12d8cd30L,0xdce706a9ec2de865L,0xc9b322f1a4ab8aabL, + 0x0ce9268098773392L,0xa1b40d60bf5196e7L,0xc5cefc2d898f2d1eL, + 0x4fe80b90c18c6f17L,0x87eb7298e4fe5bcdL,0x0000000000000181L }, + { 0x1038d18b1c63bac1L,0xff56f0c8735b6d04L,0x0f9b85f87624d96bL, + 0x8d0da28ef1d48f88L,0x3cc190897e1f8316L,0x75114fea47058d44L, + 0x8f7130d118e224edL,0xa0f4114bbb6215f2L,0x00000000000000a2L } }, + /* 24 << 84 */ + { { 0x5dc39d107da4f65cL,0x0973b4af52e2bdf1L,0x2e35974b195c5d3bL, + 0x2321d8c5de0856b3L,0x9b218d518e19f2b6L,0x414f483cb9eba23dL, + 0x5eb4d875d87c2c14L,0x8c3693144becdcf7L,0x0000000000000180L }, + { 0x8ce9961bd78615e3L,0xcb4dca5b7877078aL,0xa082b950067faf84L, + 0xbe4ef854a1ea4fe8L,0x1a746d028ebd8cd5L,0xfcd8a6be58ced7feL, + 0x5afef0183e2b39bfL,0xfd07234d1eb20debL,0x0000000000000028L } }, + /* 25 << 84 */ + { { 0x5f8ee39ad41d6132L,0xdc76ed945f63e2f3L,0xc24a406ab107431eL, + 0x8b30660eca996341L,0x5673b95c20747fc9L,0xe09168a8b0bed38dL, + 0x6120f80719a7ab41L,0xaf1f3376e6b66ed6L,0x00000000000000a2L }, + { 0x7c535035491f6023L,0x7d70019de7f908d7L,0x92f8ca9996e33046L, + 0x18c5df345fc7f094L,0x38c7e1eabe02b829L,0x7b6ee9b4cc301d3bL, + 0x55f41778b14d398cL,0x52a275ff5b3f124dL,0x0000000000000081L } }, + /* 26 << 84 */ + { { 0x48df4f176e539400L,0x6433abfeac7ec090L,0x9c935918c9474b64L, + 0x472fedd0406d83e6L,0x42cc03c72592c93aL,0xff2ae77db510b8b0L, + 0x22fb8dc513ebd07aL,0x3f093c2be656d363L,0x0000000000000016L }, + { 0xc46f526fd725a2dbL,0x95cef0f32be69c0cL,0xb941c71b329e02ecL, + 0x37a8cf5fe0fc49bcL,0xe5d012a888a9e033L,0x0a90a0c07d701e97L, + 0x4d8ce592bc2ddfcaL,0x4698c015e01f5349L,0x000000000000005eL } }, + /* 27 << 84 */ + { { 0x97ca07ca64725780L,0x901c76f4cdf4cb59L,0xba0472400a8dd50eL, + 0x5a6ffd1da8028447L,0x438d0474cd8e128fL,0xf92f83db9e7e41c5L, + 0x9504f799ad95ab9dL,0xb2df6f6578f9ca93L,0x000000000000004eL }, + { 0xae2d32fcceeb8c6aL,0xd66683f2eccb6ad1L,0x2132bf4fd0b462abL, + 0x65d7f312152b21c8L,0xc5abfb1aace7ab8bL,0x3c88dd8282ccd06fL, + 0xd1916201e023fe4dL,0xce3e0f140f882c2bL,0x00000000000000beL } }, + /* 28 << 84 */ + { { 0xa1a2d265436f1191L,0xa63ecd90846e768bL,0xce361cce96880f0eL, + 0xbb06470312cc9bb7L,0xe58b4cf9b6d5d885L,0x711f490d872f791bL, + 0xa31d70221b61790cL,0x1c0231435e010dedL,0x000000000000016cL }, + { 0xc4833f45b9f2ae47L,0x39556e38e6c5aad5L,0xe7889ba4714e6359L, + 0x7d4fa851edaf5d7dL,0xa860fdb59de3fcb8L,0xc4a44f663b213dacL, + 0x843b20da5b662002L,0x0e4569247cc1f364L,0x0000000000000194L } }, + /* 29 << 84 */ + { { 0xa1f3ae570844c579L,0xff79de39a6263761L,0xc2bc07f0a1ce54dfL, + 0xf6f36b5bc7a75e1fL,0x37ed63e547649754L,0x0bbee79b9e40b603L, + 0x3111770ad1396489L,0xa0be70b7655f8f80L,0x0000000000000118L }, + { 0xf396b09dfffbe440L,0x5c64b53ee98d45b7L,0x42aa83ff599b7ea0L, + 0x4130842825c6b232L,0xdf4377f764153f28L,0x7253ee06ddc00b84L, + 0xd6e3139b30384ae9L,0x47a15b0b5bbb7451L,0x000000000000002bL } }, + /* 30 << 84 */ + { { 0x817f91e777960e81L,0x16ec79224e9251d1L,0x20edc99036eecb38L, + 0xd1287d7e6d0c4b7cL,0x9a98e31842255d2aL,0xc5d8fdee1cbf6b61L, + 0xf4aa4eed4b8c971fL,0x506ed9b587a96b0cL,0x000000000000000fL }, + { 0xffb1cc83853daa7fL,0x650ab9962aa0d0a1L,0x1d1a02515ca4f753L, + 0x7e3dbe541252c13bL,0x256550cdffc132b9L,0x0c01f3a681cc2164L, + 0x174f7145eb5741f8L,0x6d134e21b5f4b761L,0x00000000000000bdL } }, + /* 31 << 84 */ + { { 0xb2113429b557c0e5L,0xa95168cadc18b438L,0x64839ff62228b40bL, + 0xc0d6a854c63f11a4L,0x80502e786ee1d154L,0x872c5a6d432e45d0L, + 0xeedcdd06c62d1651L,0x8ecee7ea2f605a64L,0x00000000000001caL }, + { 0xa5dc211b2d708b77L,0x4e92269ac396cd5cL,0x384e400c93709f53L, + 0x48afd1bab372eb3eL,0xc189796872262e2aL,0x0fc769ec4a02e904L, + 0xbee107dfe82e06b6L,0x47b15a79d838f656L,0x0000000000000012L } }, + /* 32 << 84 */ + { { 0xb6675f53da1ef419L,0xdea5203bad0b5824L,0xf882dc2fcfff1ea2L, + 0x8ef992e408eb5b24L,0xd80e2ecfc3d2321fL,0x4ca7becca5f96675L, + 0xe2ce41b01e1a9196L,0x9b3d81319890d917L,0x0000000000000094L }, + { 0x85c66efe87c9c810L,0x66fc21bdf644e538L,0x2b90cdb5e4835d87L, + 0x9a60f2140f91258fL,0xb60eb76484bebf7eL,0xebe2c36f96b0614bL, + 0x95acc92792a11210L,0x31101643f35ccb23L,0x0000000000000101L } }, + /* 33 << 84 */ + { { 0x1bb898741403ae70L,0xb71332db93c5c5adL,0x8d376cac631d9843L, + 0x6a64f3d5a0d90788L,0x65f635c0b16fa167L,0xe8b3efe222000a88L, + 0x4c11db4472cdd206L,0x64fd7c85794b4affL,0x00000000000001e6L }, + { 0x360185c79563ef6eL,0xedeb5d8659aa49f9L,0x012d2b3e6927b587L, + 0xba435a3d7c25729dL,0x222cea98cf3ce39eL,0x8fa5ad445dd816f1L, + 0x4d2112b530d8033cL,0xb60209556011d8d8L,0x0000000000000099L } }, + /* 34 << 84 */ + { { 0x63f01bdaa71c3878L,0x3d4afb465cf66f74L,0xf9583ec1678f8f6eL, + 0x9cbcdf8b63daaa0bL,0xf342f44f57521779L,0xf8840fe19fc0e1bdL, + 0xc9eaf6ad31105808L,0x33c51a393f6d43e2L,0x00000000000001b6L }, + { 0xab9720cfd10fa72dL,0x8df3a2db5c0d0c52L,0x99fcecd7c197aa7eL, + 0xea53f54b37c93ddaL,0x6f185d4115d2837dL,0x19f0b7534b1e1f08L, + 0xfe2fb6302131d7f8L,0x72d9a3561a76d7ffL,0x000000000000019bL } }, + /* 35 << 84 */ + { { 0x437f79d375c6b34eL,0xc0e3f217b9eb1ad2L,0xe876fb20b70a0c11L, + 0xf769ec0a5ab5e17bL,0xb15cd92e0e3e8809L,0xa0487df77a2baf4eL, + 0xcc0959837cf8334eL,0x1d8963d1c44ce5deL,0x00000000000000eaL }, + { 0xea4053f0120c4d48L,0x15f63fd5cac7a63aL,0x6a390224848ad40aL, + 0xec32269d76a755f4L,0xcda608f2eefe1c64L,0xdcb61661d558b4eaL, + 0xf98391a0dc807487L,0xe633fa599e31b651L,0x00000000000001bdL } }, + /* 36 << 84 */ + { { 0xb964e249e324fb45L,0x968e1e694d8f7829L,0xe883e2ce4313e21bL, + 0x62f2f3a5c0e654f9L,0xb2d921e6c6129771L,0xbb10882246ee4248L, + 0x27fb3a768f8b0e62L,0x3a49d6a6d36ce785L,0x00000000000000c0L }, + { 0xad32676fa616ee2bL,0x73b1140bc4840882L,0xf1464ae8588be8beL, + 0x24d0cf2d0a9bb743L,0xeae9e1c230c88289L,0xd8f78a5b642331edL, + 0xd5010f57537f40f2L,0x0a936df89609dfcfL,0x00000000000000ccL } }, + /* 37 << 84 */ + { { 0x97f48719ae3ce48eL,0x54690b991d17ac8bL,0x5c9c2f04b5e64622L, + 0x9e62f3506da3c286L,0x153fcc9a51798c01L,0x9d36b1d6f1674c75L, + 0xa7cef42f10977d7bL,0x00610128c0bbe827L,0x000000000000017eL }, + { 0xbe9358d9aaef8892L,0x53abaf01b2af87ebL,0x2b4001b86d6cb1d6L, + 0xa14df2ed1faf6b9bL,0xf88044d032289473L,0x83b6b3499918fd58L, + 0x52115a9d8fb5d528L,0x9acd77c8a0bf9026L,0x0000000000000191L } }, + /* 38 << 84 */ + { { 0xc91a51c7040a8d8fL,0x3cc49d9661a7ef84L,0x80b29df6ee19d30eL, + 0x5846acc4c76a6aa3L,0xf59e5c8b0f6e605aL,0x59cc663742788224L, + 0x2573ab2034f423d5L,0x80ee46c160e18d6fL,0x000000000000017bL }, + { 0xe7ec97098046e552L,0x492ef25050f412aaL,0xd4bcffad10a7a752L, + 0x6508108fc376b144L,0x977ff961ba934045L,0x07d99b2e5dfcae1eL, + 0xc4fed2e4aa955090L,0x294c7669e1d608b7L,0x0000000000000067L } }, + /* 39 << 84 */ + { { 0xccf1f6c5ef4f2f8bL,0xacbd47025a3525d9L,0xb3ca26135fecadcaL, + 0x46fc46a340358ec3L,0xd3e61faeea406aaeL,0xa7abaeca6d70ad8fL, + 0x53468d6374115258L,0xb276835206ffccd5L,0x0000000000000000L }, + { 0xa424a15acf32d90fL,0x28766c229c8153d6L,0xbf06c575395ba3bbL, + 0xbd027833e7a31dcdL,0x12a012a9eefa9763L,0x951bd91f08eeecf7L, + 0x1b2a8bbf81cc733eL,0x46edd7f005bc357dL,0x0000000000000198L } }, + /* 40 << 84 */ + { { 0xe4e1851f8482e5f3L,0x41c94a2d3be00950L,0xbdd52c7712e434aeL, + 0x985367cf5700e59fL,0xc9368d83bd1a3ee1L,0x7be0cf37f24fd5acL, + 0xad4cb8dc8c53e814L,0xfa2bf5414d59b923L,0x0000000000000100L }, + { 0x205f070765c46cf7L,0xe87fc3e236c06ff5L,0x40686149364e6680L, + 0x3e7a4dba44aea566L,0x1a43cd99743158d2L,0x8b6d491ef63802a0L, + 0xd4956a9d57ffabb3L,0x0416326261950885L,0x0000000000000181L } }, + /* 41 << 84 */ + { { 0xb1e98d6fc192467eL,0xed8333a1b0944aa3L,0x306ef165d1c49f1eL, + 0x707bc71d81d6b7a9L,0x29be1c1371882a29L,0xcc9c8eea35825332L, + 0xfa4ea1ed6d1089d4L,0xcef289b15f949cc6L,0x00000000000000b5L }, + { 0xf99439420c8b490cL,0x2a4684215ca1924aL,0x5ddce0e570f4aec7L, + 0x4b380387ab2f1d5bL,0x9da44a9eea16b553L,0x8f497cc8d5cc8e78L, + 0xef3ebf1567627483L,0x5686bbe0bbfb1b54L,0x00000000000001b1L } }, + /* 42 << 84 */ + { { 0x4662d28ec7e6847fL,0xe545ac9ad8ddd128L,0x2172153149f08bf3L, + 0x88bbc0079365959aL,0xce2248bb82dca136L,0xb666d008f10e0af2L, + 0xd415bc8db5c51813L,0x3651b829b851a49aL,0x0000000000000011L }, + { 0xe1ba9a43aa6302e4L,0xfb903793436b8d1eL,0xcdf4d3f7f178a0e6L, + 0x628bbcbbdbcf158eL,0x18e2270de95d3b9bL,0xa5a9618a9f902344L, + 0x41faa409a5c86b2bL,0x8332a0d2a1a1c2d3L,0x00000000000000c4L } }, + /* 43 << 84 */ + { { 0xe886e4cd45872d57L,0xee5bd4078124f950L,0x28bbc75fd3ba8e8dL, + 0x3fe2689b2ad21b8bL,0xf73d1b63843923c9L,0xe845cc510bcd7064L, + 0x6744555d4cf3485fL,0xd3ec166fa5a608c8L,0x0000000000000090L }, + { 0x1b78f5d6c66341adL,0x6652224f03d0da0eL,0x85b069c5afb3147aL, + 0x60fd70524a920fedL,0xb521aa799812ba7eL,0xada36124a15040c2L, + 0x2c982c738205d28fL,0xd4e80bba57750491L,0x0000000000000005L } }, + /* 44 << 84 */ + { { 0x5010064a8e955d63L,0x11caef9eaaaa54d2L,0x3de4eae733f1f9a9L, + 0xd559a26a9a5be2dcL,0xfa61002e0022eaadL,0x156c011675f376cbL, + 0xa67586806f193ebaL,0xf92ce94d44b2579eL,0x0000000000000191L }, + { 0x083cc46c2fb67bd6L,0x4f3c6ca21f06d1adL,0x0ee38f3985b65aa6L, + 0x093adf465c42e7deL,0x31fbdf11aaeadb6dL,0x6be3fc78b115c404L, + 0x7e385a3a7716f591L,0x18bda694c482d126L,0x00000000000000d1L } }, + /* 45 << 84 */ + { { 0x62bc238b9350ca2fL,0xab8c8c907ce8d14bL,0x330fffc7cb6951fdL, + 0xadbdbdf726228babL,0x97da0a346d86e130L,0x7fec5623a25aea42L, + 0x2922441ffd4335d9L,0xab2cd1dd1abb6d5cL,0x000000000000002aL }, + { 0x7d759a051a8bdc24L,0xddb3079c7d82c77bL,0x3eb72f6e47e40f15L, + 0x486385847b2f8e23L,0xe112ce8c04043e37L,0xb48be1b38da1281cL, + 0x0e10355b67f8bfd0L,0xf2b0c5434b82b796L,0x00000000000001eeL } }, + /* 46 << 84 */ + { { 0xaa7425c422ecf2b5L,0xc695477f535d1924L,0x49eaebfa73688248L, + 0x5d1daa50958195b3L,0x5e4b153e7c018a3dL,0xe42017076ee32e65L, + 0x96e3679190564352L,0x65162c18dfee3262L,0x0000000000000123L }, + { 0x128ef8610f1e66b1L,0x0b3b7f96243b98e6L,0x69252a134db87938L, + 0x6523a4f880af7609L,0x2883c39d8576bfc3L,0x1dc4d969021262edL, + 0xf1d2dc8acd5cecd3L,0xdcd1e118b56a9e1aL,0x000000000000007bL } }, + /* 47 << 84 */ + { { 0xe7bfb96f27c1c719L,0x1a7c15ff79d8affaL,0x5641d7e5ee475f3bL, + 0xbd5cbcc3a00e54e0L,0x18dc810eba44c745L,0xd7a3f693c8b3b3dcL, + 0x225735667779731dL,0x305ba162fa55fbf2L,0x0000000000000119L }, + { 0xa47c5ff2e6ca3cbbL,0xe4d73b0fbe4bf508L,0x7afb6b99d95ffd74L, + 0x0acb7b5955f22144L,0x162205d8205205dfL,0x8650751dcde671e0L, + 0x34f91ffb549b2867L,0x7dd0bef98cf1252bL,0x0000000000000047L } }, + /* 48 << 84 */ + { { 0x8f345bc8d78fd6f6L,0x35446be7bc603416L,0x22a05fc392954e8fL, + 0xcd51695bde517ca3L,0xd452c3c1c30b42e4L,0x71f0be42385847bbL, + 0x3e5604dafa66294bL,0xa32470c791efa7abL,0x00000000000000f7L }, + { 0xbe46e43f247c00dcL,0xa464ee494fa716e2L,0xf16522fa81328c41L, + 0x495048b6187556ccL,0x71224071c1682e5fL,0x5e33dedfb9f18963L, + 0xfce493c999a767d5L,0xc7f53c566ffa81f2L,0x00000000000000caL } }, + /* 49 << 84 */ + { { 0x7557d129fa582630L,0x0c29419fc3a8f93dL,0x05d98c11c43ea10cL, + 0xa42d9960b32cd279L,0x5477eb119d8e50e3L,0xd9df006a04037414L, + 0x1a8f5bce799cf5b7L,0xe5f831b3973a4d2dL,0x00000000000000b0L }, + { 0xb71509b84f43baa6L,0xde79572a94216d36L,0xe4e1219ff8c907f2L, + 0x718153f00c58e3d5L,0x375bc748b0b9c967L,0x74542658e9866665L, + 0xbbc3f48935637c07L,0x7be87a6602bef3c3L,0x000000000000009cL } }, + /* 50 << 84 */ + { { 0x6f3a54f7faa6af3eL,0xb6b5c7a3b40d2a61L,0x74488f8b7e5e78e1L, + 0x9785934f32458d93L,0x816bb076497bb349L,0x402cd0102bab1095L, + 0x645a00a73952775dL,0x81c168da32ccaf83L,0x000000000000001cL }, + { 0xf349c1a8e63682c1L,0x05b8208312f3fdf5L,0x4a66e016a3c212ccL, + 0xdf57f4624c8e1a06L,0x0a58dc1f3ce46f7bL,0xc2cbccbdbcb4ff74L, + 0x4181af1fcd9816e6L,0xa74ac7b2d9bef6cdL,0x000000000000004eL } }, + /* 51 << 84 */ + { { 0xf4c94110e40800bcL,0xe7fdd5f32c1adf77L,0x34cdd805433705c0L, + 0x1e052bace29c6896L,0xac99d1e357668ffcL,0xf2493acb108fe93cL, + 0x2525654594876b05L,0xb5bdfba4243ab702L,0x0000000000000043L }, + { 0xca1a99e24e61dec3L,0x3156650fcadd4f7cL,0x154d58e7316df624L, + 0x168f675ec69f07dcL,0x2b6132aff1f6e22dL,0xcf26d9cecaa1da1eL, + 0xc90ebd6649614b29L,0x25bd2d23c33f111cL,0x0000000000000104L } }, + /* 52 << 84 */ + { { 0x0b53f0b81b4d52d0L,0x8c4441776ef01b39L,0x3bd2523e3ae80d38L, + 0x9bc7180022780af4L,0x790c175dfad470e3L,0x6b0901ca7899421dL, + 0x02f1fae7f277dd5eL,0xc3f2ff0fa752993eL,0x0000000000000053L }, + { 0x6a5fdd41467789ecL,0x008e446af6c45a22L,0x29b94285a8a2549dL, + 0x97b6f905b790912aL,0x813ee7d71d18e889L,0x24ef40cffb27f9c7L, + 0x17804ebd2e5b2ee9L,0x46e274b1a23b11aaL,0x000000000000009fL } }, + /* 53 << 84 */ + { { 0x37405c0f2ec88247L,0x0efb4320bcd3980cL,0x6f5208443300619dL, + 0x55ddd732bc2d59fbL,0x9492d50639fafeb5L,0x497dfa7083d6b3b2L, + 0x256fc9c3bff9aafaL,0xf098b2e3bc00553cL,0x00000000000001a0L }, + { 0x614f8b947c52e7e2L,0x2a37ad1ffc24205aL,0x6789e9ab7cd65901L, + 0xfc1affa3cf4a3893L,0xe85ebf5395afb652L,0x1f125e08d894d2f9L, + 0x84ff44210581d1a7L,0x94d9ad851141226fL,0x00000000000001daL } }, + /* 54 << 84 */ + { { 0x2a6c39229fd5362bL,0xe3f57b2c9603ea6bL,0x9bf96cfc06b377a1L, + 0x78441c5bfb189eb4L,0x92b2f03859b24728L,0x36df97db7aeaa65fL, + 0xffaf8dc7577bb75eL,0xfdf8a723f94a0cd7L,0x0000000000000054L }, + { 0x9d98b4e63b18f662L,0x006a23929f5527a3L,0x146d79edb9fcb789L, + 0x1c2e41541a688f00L,0x9c3ad38a03723314L,0x20db0b042792f661L, + 0x3f8e5a9ad44e279cL,0xc1759eb7e4234ee0L,0x0000000000000146L } }, + /* 55 << 84 */ + { { 0xd93f2eb26a5a8f34L,0x03be88bf736696f7L,0xc567bc9fa6421a9dL, + 0xd3de6d94de629f74L,0xc0c799756b3804e1L,0x915c3c2d9fb8218aL, + 0x37a5348070dae219L,0x6846e38d019dc6dbL,0x00000000000000beL }, + { 0xf7e5952acfdfaa59L,0xadd136aebd5931efL,0x7d46541132cb2671L, + 0x185c32a081655330L,0x3c4b5ce10349dbfaL,0x1c3fed832ecb307dL, + 0xe46449a989c05184L,0x675cf8dd123d9b59L,0x00000000000000ecL } }, + /* 56 << 84 */ + { { 0x1278fa814bccde89L,0x5f87ab65ca1aa6b6L,0x3224aa7276b5225dL, + 0xf6eb6a973ea5228fL,0x54144015f78bb113L,0xa045a86710626f8cL, + 0xe3ff56bf38d5584bL,0x74caa5e96ff5aa1fL,0x0000000000000078L }, + { 0x8caf4a080806bd7eL,0x8362ba16c2f33680L,0xaa6661c891a5125fL, + 0x62bc9088a5e3e1a5L,0x5290b3f327286edaL,0x4af9e00621622922L, + 0xea41b46caf9f424cL,0xfeb0036d7f5a2f0dL,0x000000000000006fL } }, + /* 57 << 84 */ + { { 0xb092feff1b9c9b5aL,0x85d4a394ef27381bL,0xc9ae229257c413efL, + 0x81ef8394c16afc72L,0xba0aabca6f16377fL,0x973f71e7685374b1L, + 0x138a6e2ee8d2affbL,0x15c3a93400cff0aeL,0x00000000000000a1L }, + { 0xa37a19670e094a1eL,0xc8f1b7699121d92aL,0x1556bdb35570ebaeL, + 0x713b4c060db754c3L,0xc864a6fa87371dd4L,0x80539a886f6d0e2dL, + 0x69cdcf5f5dce8380L,0x175bcb246ce73030L,0x00000000000001fdL } }, + /* 58 << 84 */ + { { 0x463bdf0350cdc663L,0x00f4f1ac34ae8e83L,0xc4553a54d0dcb3a3L, + 0x765bdbb63611767cL,0xf523682af9a08ffeL,0xc9e2adb2d0c9ea77L, + 0x6dc611956b5ff81bL,0x45281bf570e2e065L,0x00000000000001c6L }, + { 0xef26494f7be2fd61L,0x06bccc4c1e97e9c4L,0xaf0deb0092a75c16L, + 0x4e67e0eaae8a4e84L,0xbb994c6fb27331a6L,0x9d9431e64250cc74L, + 0x1e6013cbce2fbad8L,0xfe2c5db4c8934179L,0x000000000000012aL } }, + /* 59 << 84 */ + { { 0xbf7ce9203eaeb879L,0xe4adb4412790ca4dL,0xaef87b4077d14c85L, + 0x9db9212d3d01cc39L,0x5d1080dd2db945cbL,0xd85c0a46a0ec741eL, + 0xa0db567c6f98c8ddL,0xbd5887fda6c88bffL,0x000000000000010dL }, + { 0x3d55f50a2ce32631L,0xbe582d2664339e56L,0x4e2f8bd7ab5210f9L, + 0xffe002bf641284e2L,0xa5b24d9844b55b21L,0x35944f8f399a9eceL, + 0x78b96886bc8d155aL,0x6d2b068993da1b6cL,0x00000000000000b5L } }, + /* 60 << 84 */ + { { 0x752f2383f47d22d9L,0x4436f59ab058e8a3L,0x30d780e7be6327e8L, + 0xf7e393b5f00c622bL,0x46d05f6528533fa6L,0x9642c8a0e01903bcL, + 0x99f5c21312b7afdaL,0x3b740059a89e1edaL,0x000000000000009bL }, + { 0xdf0a0c70304f33d6L,0x458c5da833038214L,0xd2045ba526c1e684L, + 0x217fbdb9379ac190L,0xd94eb4c18e21ec0bL,0xd12612ebc3ace041L, + 0x5e37a0e595fe0e88L,0x8c588a87aadc9beeL,0x00000000000001c8L } }, + /* 61 << 84 */ + { { 0xb3ad16e907cffecbL,0xe901374a7619fc54L,0x65759e1cf2eee444L, + 0xe4070ff0f73eefeaL,0x6d124a8bfdf9f9a6L,0x9c8abaeafc9dd073L, + 0x9b32d3d77a8681c4L,0x3f11eec648f8390bL,0x00000000000001cdL }, + { 0xb0168093d60307c2L,0x7c6c4d9c939fd410L,0x56673b7ed7727cfeL, + 0xa94cd30a12c5f670L,0xed7ebb3137b094e2L,0xff2c49e15fa3a378L, + 0x484ca05395672337L,0xf003dfa5bc56bf71L,0x000000000000007bL } }, + /* 62 << 84 */ + { { 0x838aaa72aa1a8108L,0xcb69952741314f71L,0xb86e9b187229923eL, + 0xe4b3edd49f464608L,0x8771316c1b071ed2L,0xb07e1b084e5a8628L, + 0xc26653e1c612209dL,0x4b3bbf396bb8d7dfL,0x0000000000000182L }, + { 0x893144f03ca1a824L,0x78bd4476853ad53aL,0xe55634ba95927c46L, + 0xfff6f90115cb52a7L,0xf4277aaa05f678e5L,0x6c3177a1ec5503b2L, + 0xa0cf20337725d926L,0x7509b601bfb02022L,0x000000000000001cL } }, + /* 63 << 84 */ + { { 0x3c71f272f99ccff5L,0x248c71670baa49d0L,0xcb6b725da2ef88aeL, + 0x4a1a6badb5a5e92bL,0xd7a8a6680140c680L,0x07311eb78037e44dL, + 0x0eabe9b35a276617L,0xa7103657db352138L,0x000000000000019cL }, + { 0xb1c0fed884d7f562L,0x9983df5d634efe2aL,0x9ba4a7697ef18903L, + 0xccf0889b53fb1f2eL,0x8330a65c863a63e2L,0xc9763fe49141ee1fL, + 0xa9145ee450a81aeaL,0xe21cc944bd46f62eL,0x000000000000004aL } }, + /* 64 << 84 */ + { { 0xbf73b24cded2eb86L,0x102900b33ea89845L,0x1d88c5073857cdb6L, + 0xa939a76b9be255d7L,0x28d517522ff653b3L,0x6d8170467642df2cL, + 0x148b45e157e353b2L,0xd565f776b34f0f19L,0x00000000000001e5L }, + { 0xca5185a2c4b5de1eL,0x527acebfeadf8e0aL,0x652b994b86479736L, + 0xe252209ad2f97a05L,0xc9b07fc9d6960578L,0x479ba949d264f983L, + 0xc80d32b5e8d9b938L,0x59280d580a6a7f28L,0x0000000000000177L } }, + /* 0 << 91 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 91 */ + { { 0x2db7055db286bac2L,0x0fce34c5fe2d537aL,0x8addcea5b95e9476L, + 0xf1bacbd41a7c7ff2L,0x28b7f0c9203fde3aL,0xdfa6cf793e844a62L, + 0x0107e9039627dc6cL,0x053f2778413fcc52L,0x000000000000011dL }, + { 0xf67e718bd10f90a4L,0x7dbcf1e00bc29398L,0xfd5e65d19130e9c3L, + 0x6149b3969c518e5bL,0xfb574bb4918e2279L,0x6c67563fc513415eL, + 0x60b90d7aba1b801aL,0x2239b9dfaf900381L,0x00000000000000c8L } }, + /* 2 << 91 */ + { { 0x2d2f9e08a2abb00eL,0x4c8c679d17648c6aL,0x52e2e9bab8520197L, + 0x7470a3cfe0898e48L,0xbfeb2a9190617e18L,0x247ea702f44de1a6L, + 0x7baf95b27b1604fbL,0xe9e81b78020c5856L,0x0000000000000159L }, + { 0xdeb81330da533799L,0xdc4de9f13996b5beL,0x6a8b7630d33f04b5L, + 0xa4554acb2bccad83L,0x7974bd472a7f5d61L,0xc481a752b01696d5L, + 0x6dfbefde8ca48da9L,0x2fda492fe7db0c27L,0x0000000000000053L } }, + /* 3 << 91 */ + { { 0x9ade14635a0a982bL,0xd31440afc586e5b9L,0x07bd1b34749ffc4eL, + 0x2f9bed8ea9f71f98L,0x73de5d52add7cf9aL,0x1b55d53a31a718f9L, + 0xf06707b8cde59432L,0x247e5a8deaf860faL,0x00000000000000d0L }, + { 0x76dcbffdf5131ee4L,0xc17d25f7d4e3d0eeL,0x195daf924a96ace8L, + 0x342d659305b1ca63L,0x4675cf349b40dafeL,0x1855efc293b0ec70L, + 0x94aacae1f878f477L,0x818b2f1a524aa9d8L,0x00000000000001faL } }, + /* 4 << 91 */ + { { 0xa2dadbb4cd046713L,0x63eac83e81afa390L,0x0d85b1ab422bc0bfL, + 0x9dfc0092d5895af2L,0x125b2e6c87ee0edeL,0x962988829eb070d9L, + 0x1a1178f85b0785c6L,0x9335c96bd4486f80L,0x0000000000000029L }, + { 0xe747e4433c5cb794L,0x50803223044fd597L,0x4bf3bcc02fc59de3L, + 0xd9f4dc3244245ab1L,0xbedf6330d2dd61e3L,0xdf8110a904d7c5a7L, + 0x852f2205bd400d1fL,0x4595afaf9c6e9cefL,0x0000000000000111L } }, + /* 5 << 91 */ + { { 0x72eb4fb66672a844L,0x2ed80354a581749cL,0x456d603fb3a028e8L, + 0x82bb6fec3703c343L,0xec001da6c2967450L,0xe6930d3c01cb4b41L, + 0xd3a7073dfc88dab9L,0x28564308bae83616L,0x00000000000001aeL }, + { 0x7a6b143eef89965aL,0x3d75a3d3e25394aaL,0x7185b011164ed20eL, + 0xf6744be64334a31dL,0x9715be2f8a86350eL,0x8d3ad1d585ff2738L, + 0x1081a847a2b1ebffL,0x7918917b3d13f45cL,0x0000000000000030L } }, + /* 6 << 91 */ + { { 0x1f6fad01e435a290L,0x2233fb1ca1797bbcL,0xe5c86b07adec0e98L, + 0xa6c3e3b32c38f3f3L,0x148c7cc4be4d76ecL,0x9a269e568f91b601L, + 0xdd4fde651872ef0fL,0xac81696c8aa5366dL,0x0000000000000123L }, + { 0xdd10f5bfc06df67aL,0xe02f88cfb5e3d640L,0x67533defb63366e5L, + 0xb81b500dd4f7515aL,0x258048297ac6fdeaL,0x05e776b30c92e736L, + 0xd366e4abfdda7ba5L,0x1c88e25338a7d99cL,0x0000000000000199L } }, + /* 7 << 91 */ + { { 0xa8dc56599f91d149L,0xa923bb49026dc899L,0x2690fd103064ff52L, + 0x5633caf40d3450c7L,0x923c18b90f268931L,0x7e333305cc000404L, + 0xe4913381c2160055L,0xc8b61c0db37bdb04L,0x00000000000001e7L }, + { 0x8f3cfed118432691L,0xf3409d8e0bbd5e86L,0x8094f20d1f240ab6L, + 0x8ae2ba7484473b65L,0x509f55533d950c04L,0xb377553f306e84aaL, + 0x32863abfc776e80fL,0x7de67f5d788698b4L,0x000000000000005fL } }, + /* 8 << 91 */ + { { 0x9b40a31f03d3477bL,0x57e0e72e400294feL,0xe8aa7d3fe3e8af40L, + 0x6a3d54748dd4d7c0L,0x564564fc65e1bac8L,0xf8584769108c5fa4L, + 0xeb211eacdbbbb595L,0xe06b04dc24a6b7f6L,0x0000000000000197L }, + { 0xc7689ffd29bfb3d1L,0x7aee57f372378c9eL,0x007255d96efe861bL, + 0xb8d3f65f6cfbbfabL,0x44629a5fb6d3923bL,0x49e47cea97373f3dL, + 0xccefc3e7e5d385ccL,0xcccad6e68461478dL,0x0000000000000061L } }, + /* 9 << 91 */ + { { 0x65e1029251b47bc2L,0x050f0ef396e11799L,0x05ca59da588c0f6dL, + 0x3fe6edce2ff2979fL,0xf5d22ed3902d3268L,0x66f1fad3053dcf39L, + 0xf59a2c579c249cbdL,0x376c29df17ad6077L,0x0000000000000132L }, + { 0x023fb75627e9548cL,0xeca768ffcc4e8a89L,0x9751524855744e88L, + 0x62133797b724a018L,0x047efbc1039f2eb8L,0x12605556a4f29411L, + 0x25f2a0fe026f882aL,0x58e45392f069b7f0L,0x000000000000018dL } }, + /* 10 << 91 */ + { { 0x8980df2d3699e35fL,0x9116ddbe7fee846fL,0xe8e6e54efa3d46b8L, + 0xb6c3996eb65bc109L,0xe549c8eaeffaee6cL,0x9553bb7b31410debL, + 0x3900d1cca660056eL,0xea4c30ef15f7dbf7L,0x000000000000018dL }, + { 0x0bf375e11b976fc5L,0xbfa9c68017e1cbe9L,0xd9c860a1a7478ff3L, + 0x3412562269e4e8e5L,0x0e98341ce1ff1512L,0xd320c6fe76eb4630L, + 0xa06cec281ea3e4a9L,0xbb00b6a08ef84307L,0x00000000000000deL } }, + /* 11 << 91 */ + { { 0x892e36ce5b432ad3L,0x65dcf3b180d272acL,0x4a5861edd686b1c7L, + 0x313b59e35fca28acL,0x1db0e208a29011f3L,0x4b91de51b632d0b2L, + 0x024ca341ce943710L,0x6427e1ac2e5db5ccL,0x00000000000000a9L }, + { 0xd6513c981850c1f6L,0xb93d2ced05479ac3L,0xe186562779609143L, + 0x8cfd054ed44fec67L,0xe60e2c83e580c3d8L,0x17f8ddb76f8b3a07L, + 0xc6deddcec0273a7dL,0x8020047a079b21a6L,0x00000000000000a7L } }, + /* 12 << 91 */ + { { 0x475427960f5bfa4bL,0x30d431dc7949b183L,0xf8988f286f450425L, + 0xd6f6e60dd3778a2cL,0x08719f9364d83e77L,0xc77516d1cb7840ccL, + 0xafbcd88077b2ccb6L,0x7037603dbfacc373L,0x000000000000008eL }, + { 0x7d1f72796e42ce5bL,0xc9c7aa48535c241fL,0x117dc1334fa7bc3eL, + 0x85e9129b83ac9251L,0xb0237b53de78fbb3L,0xe95c6512ab35c8eeL, + 0x8ba19c451f97669bL,0x9b9f0c8a7aab8881L,0x0000000000000127L } }, + /* 13 << 91 */ + { { 0x007dd941777f4caaL,0x61a0c071d02a6bbfL,0xc0651514126587eeL, + 0xee66732f97cb295eL,0x9c72c2da783373deL,0x530a0bde961281efL, + 0x595096121d87a2d2L,0x3c9c4775fe361e36L,0x0000000000000144L }, + { 0x190552d422a5f596L,0xee643d7b2beba873L,0xfb0ec2637529f7c0L, + 0x4aae3efb6677edb6L,0xeadd8fea23ac4ebeL,0xcc44798f791f6241L, + 0x69c5d73d0d94c9e9L,0xda1bbd06d7a37d3aL,0x000000000000008eL } }, + /* 14 << 91 */ + { { 0xc184af99246bc98dL,0xae516351e0904d25L,0xda6cc6c8a4fa486aL, + 0xfc9ef8fe33dd0f0aL,0x8981f399afd708fbL,0x655302032a22f104L, + 0x5a8d2895890718a9L,0x7f48ddf88c71713aL,0x00000000000001f0L }, + { 0x2e16fac769a4fbffL,0x1ef4ff56c3abf535L,0x8f332c083aee6084L, + 0x312a01982b2ef3e8L,0xb53036685ba34302L,0x728716190f62a886L, + 0x9f6ff887fcb4c8bfL,0xa86874a9c3743566L,0x0000000000000180L } }, + /* 15 << 91 */ + { { 0x146f727ef7c671dbL,0xab579f7e0aef41c6L,0x7e4ba80ab8fe8727L, + 0xeab360c1711b17dbL,0xb47511fe2b3966d2L,0x62e7f396f8e65dffL, + 0x3d7a6cc5ea306211L,0x1f4b50346432ae78L,0x000000000000002aL }, + { 0x3aa75ca8e91c698dL,0xd1e99a5bf01868a2L,0xcc1cd1bbcb461835L, + 0x5c3cec62d521fb7aL,0xad85cc2cefc4153dL,0x9f85fe129a592b99L, + 0xb9e983d15f43c9f4L,0xb9597f273e56eaf6L,0x00000000000001b8L } }, + /* 16 << 91 */ + { { 0xc8be45a27fe18629L,0xa5c577aaaf2dd971L,0x6e3a67f061570e7bL, + 0x2cfc162946c8f13dL,0x6a05848ca2754906L,0xfd319607d0da74f2L, + 0x0941d89d65b65761L,0xe3d2c4005821ae08L,0x00000000000001d0L }, + { 0x3664ae19a334b91eL,0x21285e386768d183L,0x0914545399758724L, + 0xd3debfa9ff52fa6cL,0xa1d9cb9ad614ce9eL,0x9c371626452e7305L, + 0xcb2d7e9229277a97L,0x13171ad82f307a9aL,0x000000000000010eL } }, + /* 17 << 91 */ + { { 0x2734f7ee0da19241L,0x55c59dd0eea444d4L,0x41e48f0d6be546beL, + 0x2d64bb8923f87d2cL,0xb0e56af43a6b2d00L,0x3393eb7224e16c9eL, + 0xf318640a2c65adb4L,0xaf23c472de501421L,0x00000000000000f3L }, + { 0x67c51d3ea5bc2d44L,0x4d7c5ba436622944L,0xb46b82ed7d4c50ffL, + 0xe6e6322325bd5c42L,0xd6c35c121416508cL,0x8897538f107cf747L, + 0x3117daae4b44b6e4L,0x6f2813a36be5042cL,0x0000000000000068L } }, + /* 18 << 91 */ + { { 0xa810227e65dad623L,0x01fcda5624662210L,0xe8543511a9154721L, + 0x2914e67ce47aeec9L,0x85243b1645a2aec8L,0x1055599c4db1dcccL, + 0xd8c2e07a3d1ee37aL,0xb240748691ff64f6L,0x00000000000000e3L }, + { 0x89d53742c47ad646L,0x887493ed21d95cc6L,0x9ec6e14c252a6effL, + 0xdeaa3eedb7c7af4dL,0x0b5ceb7cdc2be798L,0x95f753c3e8559739L, + 0x3ca3a7c8a188513cL,0x4f9912cfdb1ccc8cL,0x0000000000000085L } }, + /* 19 << 91 */ + { { 0x8d3434eb19505b84L,0x75856d66d34ed7c1L,0x2729b5cb9dbea3edL, + 0x01f91132230db209L,0x9b6962d157a8fccaL,0x506af49aacc7b180L, + 0x29049874256b28d1L,0xcafca60eac95cde4L,0x00000000000001f6L }, + { 0x80e8ade36c3f0557L,0xd1f00017614e9fb2L,0xc24e87cdec075c35L, + 0xf5c0c19b0803d1bfL,0xc8256d3c56042212L,0x77309bac1e9e447eL, + 0x41174c583958b51fL,0xb48e28ca88a66bf7L,0x000000000000011bL } }, + /* 20 << 91 */ + { { 0x66b11de35097807fL,0xd97001688956c6d6L,0xb44ab980f789112eL, + 0x8d84c890d8065a6fL,0x68bd1eb836556220L,0x2d78a2f0600ab712L, + 0x877475e699e8428eL,0xbaf046b5bd40b22bL,0x0000000000000054L }, + { 0x406ca5189164af2bL,0xcfdd3ad4600c8b76L,0x8b443270cb90ec32L, + 0x9f021a66e20d198dL,0x507f551ad2177737L,0xf1edd1ff8e49231aL, + 0x8f30cab6007eba51L,0x7b91be67d8676f16L,0x000000000000001eL } }, + /* 21 << 91 */ + { { 0x8135ca7369086939L,0xbf17399a0e19ce3dL,0x73f0d2c021d13a4eL, + 0x9e4c35ad3729460fL,0x323cde3e58ba36c1L,0x4e3c7f3ec77b972aL, + 0xffa35319fd8afb3cL,0x36171d68c30e0685L,0x00000000000000e9L }, + { 0xea5e35682a5c8115L,0x6c46f6048b0154e2L,0x7df48b8177c0512dL, + 0x9fde5f493a09ececL,0xb1b004dbc5c11a60L,0x0c476649dac83f4cL, + 0x1a15eea3cbc8f2e0L,0x993e1a33ee486143L,0x00000000000000acL } }, + /* 22 << 91 */ + { { 0x2bfa1554b203d524L,0xc6d86d8c64dafb80L,0x26807c023ee93e42L, + 0xa384b2f0de86dc52L,0xc5816a9f5725df01L,0x89bbaec758fbaae1L, + 0x2f452eb60957c4acL,0x119f0291913c1a72L,0x0000000000000121L }, + { 0xf173df1d2ef523f1L,0xf7491b475c934cd7L,0x83698ba3965c47e2L, + 0x7c88c285287406c3L,0x241e36fff3d293e3L,0x43c728798f6a23afL, + 0x0f6571eafb1ed946L,0x5a627fec5514fd3cL,0x00000000000000f4L } }, + /* 23 << 91 */ + { { 0xe8dcffc39c33a23cL,0xeec6350e83037ff6L,0xa373f6d772695216L, + 0xe4df9cb0099d5a16L,0x0173befd9af7d58fL,0xab067d55382cc41bL, + 0x3d34f02eb8a58041L,0xb209e9f323f980b7L,0x00000000000000fbL }, + { 0x1980fc9941c0bd9aL,0xa60001622a593908L,0x2a1c27e0070bfb85L, + 0xcd18df8fce5c648fL,0x288f117d5cd7f21cL,0xcfc5495f8bc00633L, + 0x39b2c847ff32ad8fL,0xebde0138e4bf5e9aL,0x000000000000010aL } }, + /* 24 << 91 */ + { { 0xec04ae998e006e2dL,0x923ac4fcda7c9902L,0xe91a96c89ec9a28aL, + 0x28004a30377c556aL,0xe82f82d4be756bbaL,0x4e9c4655155143b8L, + 0x667c4acfadd3ed92L,0xdb590aa6c2d95a2bL,0x000000000000014cL }, + { 0xcb7f053132237847L,0xa06ff9854e110073L,0xba69f4b935676e46L, + 0xcadc7250f1a9b32eL,0x10cc7f2281f0bbbdL,0x5967c22300cb192aL, + 0xc93fa9eb9b9c2e28L,0xb1d3b84416699f51L,0x00000000000000a3L } }, + /* 25 << 91 */ + { { 0x90d4d5b4123e7bddL,0x05b161ba1ac34bc4L,0x1298247b24947c92L, + 0x914a1d9003dbb06cL,0x8f033b39a0a6e2f3L,0x625982a2e7492fa1L, + 0xe46b4b027b73d40aL,0x8b9a26c0eef66bdcL,0x0000000000000008L }, + { 0xe4754290054f7507L,0x13a6f03cb0598a51L,0xece11d437539cf78L, + 0x3b7853f2ffc61cf0L,0xf7ffdf3512e72e37L,0x1c67b7d797dd7ff3L, + 0xc653bc89ecdd5b21L,0xd831303a1ec35d8cL,0x00000000000000acL } }, + /* 26 << 91 */ + { { 0xfba33000f6b1ab74L,0xa9e1f2f50c96eebfL,0xe84f0eda5c33000fL, + 0x33789a12153679c1L,0xda4629f589209ee2L,0xce3785b20e22fe7bL, + 0x53183b466f0c0ec3L,0x6fe1779e5285b4bcL,0x0000000000000056L }, + { 0x1eaf8ecca5d768d3L,0x224ee37310c1561aL,0x519298e8fc7af9f7L, + 0xcdca25634af7c25cL,0xf4096cb4d862bfefL,0xd49b6f4ff5223cbcL, + 0x678d2e9f1ac83c79L,0xaadf01b78f1ddc04L,0x00000000000001f6L } }, + /* 27 << 91 */ + { { 0x75ef961ad84fd84cL,0x0012c3ae9bfb608eL,0x5a1f3b50c70e020fL, + 0xd81bbadba2abf2c7L,0x2869fa049c0e2b8aL,0x4ff238ccd4d55cb5L, + 0x11663a7ff9176721L,0x60c84c2b1764febeL,0x00000000000001b2L }, + { 0x937b3b185215da65L,0x81b22f969dc29782L,0x7c0c13b1cbb740acL, + 0x830290ae8e2779a6L,0x27d939bfe3e00368L,0x0e2ecf5b729116afL, + 0x69e0ac2bcdb96f4cL,0xead47fed70704210L,0x0000000000000168L } }, + /* 28 << 91 */ + { { 0x27690556b119ec98L,0xa07fc4e8b33cdd83L,0xdd70a4e6959d2560L, + 0xc723b197a824df32L,0x387f408b6b104364L,0xe36f86cbda9c0a19L, + 0xee5df4d2ef6744a9L,0x8c583a3ff56ce630L,0x000000000000000fL }, + { 0xa32cfc657cb1662aL,0xa698c83930e77387L,0xc3eacb751453a17bL, + 0x86e25cca10ce2530L,0xbc4ec77ed137a2d7L,0x4c6746ca235dafc3L, + 0x8b168ec927d5122dL,0x0c75daef3d1a30faL,0x00000000000001caL } }, + /* 29 << 91 */ + { { 0x5cc5c9119b2cccf8L,0xc8000ca65c1cab93L,0xd80663f5fef5ea9fL, + 0x62906368e60a72beL,0xd85016621556648eL,0xcc2b6f93bb48531eL, + 0x21a9b53d7fbf49e6L,0xddd378ac7b9c185fL,0x00000000000001e9L }, + { 0xcb7033f7f7fc334cL,0xde062112a5b5d78cL,0x509a02a1701bf69fL, + 0x64339bc557e490a5L,0xd4bb88c029ce5872L,0x20944f831dd17464L, + 0x7d5c190934197eb0L,0xbcf57d5faaad6be3L,0x00000000000000e8L } }, + /* 30 << 91 */ + { { 0x3bb43beb00ef2a72L,0x0ea2ee348a65bed6L,0x4a07c01a25cf2864L, + 0x81f50428def2d374L,0xb5b902f731d5ce65L,0x740f1ce1478326b1L, + 0xaf7a25606905c052L,0x4e6e4f6e1fc70939L,0x000000000000008bL }, + { 0xc32ef5c1b1edaa65L,0xf0ab55ac28e6d983L,0x954900d5708aceb7L, + 0xefd9f5c13a326bafL,0x05fd87d8be1c4709L,0xa40702b17b6cae96L, + 0x9263a19265344d33L,0x141dbc82576f073dL,0x000000000000001bL } }, + /* 31 << 91 */ + { { 0xb15e9545d626ae06L,0x3a0722a1ab0b410fL,0x832cba6776772e76L, + 0x015d5fd9bc34d2f1L,0x067102cb87c77de6L,0x0469b3fc490d84b1L, + 0x5e2f5a08b3462225L,0x352eb90a0d1096feL,0x0000000000000113L }, + { 0xf9ef4a525930c83aL,0x5808cd0ba38b4104L,0xdbff69203cdabd8eL, + 0xb7881613d88a14eeL,0x79c0026b1b096991L,0x5e69a28a2c82ffa3L, + 0x607809fee1524e82L,0xe0f5ec4acfaec5c0L,0x0000000000000187L } }, + /* 32 << 91 */ + { { 0x336b06222feda8acL,0x4658ae220c0b391cL,0xe12f058f1f2119d0L, + 0xa1f96c94495525a1L,0x32efc7aecd7d9f17L,0x34e4fe2abea7a5ebL, + 0x0efd24927135c5f5L,0xd7a04eb2b83241ffL,0x0000000000000036L }, + { 0xf11b8e769ca40933L,0x1951cd54d06857c8L,0x0e9124f629f13914L, + 0xe6ebbbe33d67d0cdL,0x5483a2bcbf16f9c2L,0xd4e4ea52255ea8caL, + 0xee0db0b5734eef50L,0x54434b5116207775L,0x0000000000000017L } }, + /* 33 << 91 */ + { { 0x553daedb5944343fL,0x3606ec07d4d0ffc7L,0xc6524155ca243c3bL, + 0xd58cb6991527de09L,0xeede8aac13bfd2dfL,0x21ba74aff923c663L, + 0xc82663667eb66df9L,0x5afc421916d74e8bL,0x000000000000012fL }, + { 0xe4988134c59c0bfcL,0x068d052cefd2d387L,0x44132ab7b193eae2L, + 0x1debbc87bebb0f7cL,0x2e1126dfd9772c79L,0x865dc53f3bbb504cL, + 0xe1701501b85e7361L,0xe2a6ef6ee258a516L,0x0000000000000094L } }, + /* 34 << 91 */ + { { 0x76fd2af6bbcf1ef0L,0x74f728d1582e6b95L,0xcb8aa6bfdda9b1feL, + 0xb9f03364c5708bf8L,0x84ebfa3ca5d5e4d0L,0xb007dc5e583e8a91L, + 0x746afe5e82e7bb71L,0xe320d41037085932L,0x00000000000001f0L }, + { 0xd70e0d63282273ccL,0x5cf2e2c3305f484bL,0x04768257c5138617L, + 0xb603b06f9f72b38aL,0xbc2f8d4d238eeca2L,0x16fd9071880b51d8L, + 0x4c13664ff86838b2L,0x7de53d218a6ff68bL,0x00000000000000c7L } }, + /* 35 << 91 */ + { { 0xd9d533d44f5adf8cL,0x3734d7f8177b8731L,0xc155a11279082d46L, + 0x9fee9bee55c670c1L,0x4792e233bb026778L,0xbc36eade50fe0facL, + 0xff70c0e73da20a58L,0x417610c538fdb880L,0x0000000000000192L }, + { 0x05cfaa1554127b14L,0xa92f93dee146b94aL,0x80d3ad15e5af320dL, + 0xf6fdf30cf9b93477L,0x56e1e73f8912b2aeL,0xbcde8088a81d4249L, + 0x8711ca7358fe2d0fL,0xc2c9cd60328932f2L,0x0000000000000124L } }, + /* 36 << 91 */ + { { 0x657b00c3c6659533L,0x7c4ebb89d4f40b8fL,0x240ac0f607952574L, + 0x67788f2a2d1103acL,0x04a55a0c15dfac39L,0xe516c9830d854801L, + 0x25949bd2eba13079L,0xa603149e1cc2ef88L,0x00000000000000e2L }, + { 0x394021ec0c1c2efeL,0x06c411c6ad56f16fL,0xed5735a23c85892cL, + 0x744c27e3a7749e09L,0x48ec0665ae7e3e20L,0xf9b4163bac7bd334L, + 0xcd5581551a4bbacbL,0xd6b52b5459cdd2baL,0x0000000000000167L } }, + /* 37 << 91 */ + { { 0x39a2354ea18a42f9L,0xa6316bc8cc4b9ce8L,0x45cd304831be097fL, + 0x250cf0da411e55eaL,0xaec0f4a08756aa97L,0xbbed2c68e3cd749aL, + 0xdc1516bc6023d468L,0x2031a0cf013b7d7bL,0x0000000000000198L }, + { 0x66bd2998907d5626L,0xf84be0f5ef405159L,0x47913cd8a13eea10L, + 0xd98e273609412514L,0xd07e8ff1d7eeebc4L,0x2ccf81bb692ebfb6L, + 0x98259e9beaa3282cL,0x1e73d6761979dfddL,0x00000000000001dcL } }, + /* 38 << 91 */ + { { 0x17915cc92fd4fbafL,0x5ccfa56f987a738cL,0x9322bbcdd4f04e2bL, + 0xd11dc12a3d80faf3L,0x8a5b1f959751eb1bL,0xa0944deec74e364aL, + 0x7830f950bf5583f7L,0x3ccabd2290302a78L,0x00000000000000d4L }, + { 0x97af459227945905L,0x3e6475f8c89e8478L,0x07a75003061caa42L, + 0xf307f63fe96c6ee8L,0x116b590fb3ac7eddL,0x8cc5129fcc572d4eL, + 0xfbb1956538070b86L,0x142309e12214d88dL,0x0000000000000151L } }, + /* 39 << 91 */ + { { 0xe87d85e1849d8d67L,0x67f27415f71ba46bL,0x2a469ef7261c4c7bL, + 0x42b25fbb196b3a59L,0x4a06c13c64ab6781L,0xb5fb64097145eb2bL, + 0x2ef16fec84d837e2L,0x1336ee80fc35be92L,0x0000000000000126L }, + { 0x0950419772173df1L,0x08ed9987c1ed4d44L,0x60f415854ec72dc9L, + 0x1e96109cdebdeb7cL,0xbc5d5e01d4e266dbL,0x865f2779b97437abL, + 0x60aec81e4603b596L,0xbdff4a3579f89a7eL,0x0000000000000133L } }, + /* 40 << 91 */ + { { 0xdaf096439768c17fL,0x3fea42473fefbb34L,0x6219c7a10b58d304L, + 0xb1d22efdcc27f099L,0xc40b3e4087610d7dL,0x5130d3dd8f8e950cL, + 0xc50fef134b6433abL,0x8694a6fbe79d8b49L,0x0000000000000034L }, + { 0xbe5028acda708745L,0x573354a3d2e9e2b9L,0x65023a39bda28cccL, + 0xfd3f235151c040dcL,0xe8674a7754e038e6L,0x6c2a5e75ebadf602L, + 0x26ebf6e9a9330218L,0xe564a069001c8135L,0x0000000000000022L } }, + /* 41 << 91 */ + { { 0x93b89ac719647427L,0xf2e9800662f60cd8L,0xd17928788b4a860aL, + 0xeca75f8aa6f7f822L,0x553e987fa08afc7aL,0x68c7eeb375168c32L, + 0xe2d7c361f02866d8L,0x4cf488d01f457ef8L,0x000000000000019dL }, + { 0xb02e2aee5dd3a10dL,0x399ac0b774d7f78fL,0xeeb5d6194dadb32aL, + 0xe9749e2d31475847L,0x0e2894577dbb6ca6L,0xb1a8de40d01c8c73L, + 0x111bb1a717e9a87cL,0x38016c50525524c7L,0x00000000000000d3L } }, + /* 42 << 91 */ + { { 0xf2f5b47cd83d5980L,0xa9d9084742d37e32L,0xe0e804bb8fe68776L, + 0xdc4f8950138c0caaL,0x7eff3e0f5c3e9d96L,0xc0a1de28825548b0L, + 0xcb0ce3648a12ad2dL,0x3467ec580d44b1bfL,0x00000000000001dfL }, + { 0x80a72cd7353e4e27L,0x3d8c7245c5047f28L,0xa74798df7231688aL, + 0x99e24af065d80630L,0x5a653dea8475aa27L,0x89d84371a44391b2L, + 0x2eaf73f22f4f0562L,0xe1628cbcc9be07b0L,0x00000000000001b2L } }, + /* 43 << 91 */ + { { 0xf84751465841f56dL,0x5b972d1beaa26045L,0xc1bb3bb69326720cL, + 0x656b3cc491990d51L,0xaa493b911aa9c0a5L,0xd12902d4f2af0e57L, + 0x873e1f30136832d6L,0x7f93fed865bab8edL,0x0000000000000150L }, + { 0xef43b48fff75fde4L,0xd7cad9e14e9e8062L,0x5f0fc4bb495e772aL, + 0x685fa07c58df814fL,0x6b8d8ef7088c5dfdL,0xc2ecf7f51436df3eL, + 0x3c2b71f5f99a9736L,0x0a2a30a45b65c55eL,0x0000000000000172L } }, + /* 44 << 91 */ + { { 0x1fe9426b0a6c6d9eL,0x8a955028e8e6c259L,0x6575bc53fe2d3113L, + 0xaf4a1328b7cc80aaL,0xe3b2d1e559ce1d34L,0x555642307da20bb6L, + 0x450e75e56f0a3e73L,0xf4462fa27ff87e37L,0x0000000000000146L }, + { 0x0c211f3a58e16248L,0x902c342f4506b24fL,0xea4e8fd0a68aa254L, + 0x7a8a1409258fdf8aL,0xd77bc05e1304fbd3L,0xddf4d3af3e566510L, + 0xca6b3c5a7fb9c9ffL,0xaaf671967fac362fL,0x000000000000011cL } }, + /* 45 << 91 */ + { { 0x8f3d956e6b9e05ddL,0x8e7c2babc7ff7e65L,0x5a75719253ae6a00L, + 0xc96d8b795b54b6a8L,0xed3e7f1c5b7d7d00L,0x228f166f5ceb2b6dL, + 0xbfb91eee09dac21bL,0xa8279317dbe20b62L,0x00000000000000cfL }, + { 0xccd5ccd6ba990e97L,0xb945ae888236c13eL,0x01c61e4b7ec5ab76L, + 0xa390fe0b1315a1d6L,0xbb6f26932a162becL,0x782df1974811290fL, + 0xa16250e205a4beb4L,0x8a94424936670e18L,0x000000000000010aL } }, + /* 46 << 91 */ + { { 0x08104eead1978abcL,0xc4aa64182869f184L,0x1d3bdf3040a17f91L, + 0xd385a51c24b0f6d7L,0xc07b19a5e867b991L,0x91a34606528814b5L, + 0xb4c41a54b65a49e7L,0x9babbcf59df9f529L,0x00000000000000b4L }, + { 0x764aa96de287026dL,0x5314fa4f90cc1767L,0x8d66884a1c955f29L, + 0x5fbfb4e9877d0860L,0xb389496c4518d3f8L,0xa40da285971965e0L, + 0x92289bd13d3104c7L,0x5467d571480627c0L,0x0000000000000004L } }, + /* 47 << 91 */ + { { 0x73b067663d3889f2L,0x9cfaaec7975d71c4L,0x32dbe1cbaabb2914L, + 0x95c1be2fe4d3f5b9L,0x9f043bb55a9d6ef0L,0xd8fd47fa2124525fL, + 0x2ee657cb218e24aaL,0x53358623f62039d7L,0x000000000000005dL }, + { 0x1c9f2390e488690fL,0x74350646284b571dL,0x62b8f014eecc8839L, + 0x0d0727a2cc3602ddL,0x9a4a864741b32344L,0xc53ceac84c506cfdL, + 0xe07215a5fbf809a8L,0x74ccdf9bf0d0c843L,0x0000000000000185L } }, + /* 48 << 91 */ + { { 0xffeaae90a5f79719L,0x6e6df606d3492f7dL,0x2c2bb9c576eaa27cL, + 0x33d699cda76e4fa4L,0xfc90add1bcbefbc6L,0x2c7014b254672c91L, + 0xea55e101b935deeaL,0x12f035bb8a9ea2a3L,0x00000000000000a5L }, + { 0xa3e9e470a478a531L,0xd90105eb1992bfd8L,0x8c0b466dde6e1978L, + 0xaf1b491584e1c6f7L,0xca957a0824931507L,0xc5b18ccd670262dbL, + 0xc8b50dc0da787fb6L,0x087dea9c7d5253ebL,0x0000000000000119L } }, + /* 49 << 91 */ + { { 0x06a9262ad8cffaefL,0x0202733a16dcad26L,0x889461d885e4b6a7L, + 0x8aa5ee3ebf2b5a1eL,0x209afe0dd59e8c5dL,0x6fbc3bfe9fb34eeeL, + 0xa2cafea7854d31b7L,0x40dbcb67b34dea06L,0x00000000000001b5L }, + { 0x38c04e9a6fa0ebefL,0x7e538c4aafa0dcadL,0x7bda0e2a29d75198L, + 0x6af9b41944d6606eL,0x35cba42c03f90cb3L,0x7067e8fb0ac1b1afL, + 0x97044878e1e90e22L,0x3e5ea9f8d802e89fL,0x0000000000000041L } }, + /* 50 << 91 */ + { { 0x510da10c610f1026L,0x3905429820278fc2L,0xec273624264701f5L, + 0xa28eaba19f852f1dL,0x3a48f9a649452affL,0x80200f98532be9b8L, + 0xe1fe9faf3e0fd8e1L,0x9016f5592aed9512L,0x000000000000011cL }, + { 0x15a0b2f34d16a4deL,0xdf45147fc711f66cL,0xacec295a2e9d2971L, + 0xa363ba454d608ea5L,0x2be77f31ef707d72L,0x71e2efaca70f5e07L, + 0x8874489778c61141L,0x7088d771f36cb684L,0x00000000000000cfL } }, + /* 51 << 91 */ + { { 0xa15af0a98612eda3L,0xd20f6eee8cfdf251L,0xd13f13187edb43d9L, + 0x12c8caefd20f1911L,0xf718e3879d2dddffL,0xe5476ac9ae1d8619L, + 0x46996c8c6abdf7a6L,0xaf747be0d673af48L,0x00000000000001e7L }, + { 0xbca4d3b4f681818eL,0x97d89e7e1dc67c61L,0x8fe1d23272adad93L, + 0x0c3d0e0aab0d1120L,0x80755cb5354d0bbaL,0x8227290c29941953L, + 0xd6b2bf435bdc8c57L,0x72b415c4cc5efbb9L,0x00000000000001adL } }, + /* 52 << 91 */ + { { 0x9840fefb94eceab4L,0x3f2df52cb4ab428cL,0x5d01c629dc912e83L, + 0x69fb1a87dccd56f5L,0xae7a79433c692deaL,0x62cfa5b6348795b4L, + 0xe0b621352bff3017L,0x1355d15a465aa055L,0x0000000000000031L }, + { 0x715572c92f17bc87L,0x3d0761bb5408ce13L,0x32e1ebcc740d844eL, + 0x503de141845bcbc7L,0x5f5d73affd491083L,0xe357fd9f459050caL, + 0x1b55a4836d811ad1L,0xf503dd70e855671eL,0x0000000000000152L } }, + /* 53 << 91 */ + { { 0x99472f10e9570837L,0x59fdf7f0a7537eccL,0x6163b2fd4bf81966L, + 0x9cb1ce6d58669895L,0x5f1f0238bb6928bfL,0x87b6a8471944b59dL, + 0xa0dfa5cc4196844fL,0xd89a1cde26186526L,0x0000000000000184L }, + { 0x0601560613c8ebd4L,0xeb16ead091204191L,0x46cbe8543c28c13cL, + 0xd000b7abdfb0d4deL,0x1726c9a74ea29c57L,0x0f193196c781b82eL, + 0x1387110693994983L,0xb50560b19d7bf34fL,0x0000000000000168L } }, + /* 54 << 91 */ + { { 0x163701bfea642575L,0x0a6a2f5d2e678618L,0xe4c9b9f5804587ebL, + 0x6f8e5064ccc8fb0bL,0x4b69a7091c635a34L,0x5e86bae9b96732e7L, + 0x07fa20d37c643344L,0x77f6268664b84c35L,0x00000000000001d0L }, + { 0x06a5c9c908ae642bL,0xb472c3f14c44fcf4L,0xe52310eac2df057eL, + 0x72597d099a3c2a5eL,0x953d96935b215b2fL,0xe9a9436c573f77fbL, + 0xf577c9c6334b0b7eL,0x9dba2bccbce93fb3L,0x000000000000011cL } }, + /* 55 << 91 */ + { { 0x770c5fece69da7a9L,0x19f64e37a77befe4L,0xd3c78be1c75aa586L, + 0x9f4fad22a4140e40L,0xdacc138f56ea04c1L,0xcf2449f983fdff97L, + 0x7459e685c65440d0L,0x86ec5d4b46bcfca7L,0x00000000000001acL }, + { 0x6741957f376dbad7L,0x7a7f41f3d45e0c7fL,0x1ffdb37d40041998L, + 0x3df73b0673f33365L,0x7ef0296328023845L,0xe58c3a849cc74753L, + 0x4d442f576df77c6fL,0xe0517e5e00f4cea8L,0x00000000000000c4L } }, + /* 56 << 91 */ + { { 0x46d19e84e4ffc94cL,0xb0c6a22b46ac3defL,0x78b32cb951b0c5b0L, + 0xf3a9d47ca4d6703eL,0x564725a602e1858eL,0x962820cdda51085bL, + 0x07079fae5d66a816L,0x2975644b7efdb35cL,0x0000000000000067L }, + { 0x53e0fe0f027e7d4fL,0xe7919470fe2f6b5cL,0x0029df1fba6e0852L, + 0x4a8c72c2cb9b0c0eL,0x8e94526c4398ce0cL,0xe5535ec6945ea9caL, + 0xfed0915990686396L,0xce1523a9a17486e4L,0x0000000000000175L } }, + /* 57 << 91 */ + { { 0xb6a22a0200055b41L,0x1530d3c2738d7765L,0x74b844494b19e498L, + 0x7e8080589e7bef67L,0x99f69a43bffff80bL,0xfdb0e8b1b027fd95L, + 0xc5bc0fd033b928baL,0x6561d98bd273c3e1L,0x0000000000000124L }, + { 0x3b992ebc4c8a05b1L,0xd151de1554ce8100L,0xbb63a62b154cbf46L, + 0x3374fb329ce29809L,0x13994e41708b1aceL,0xb37f9846587049f3L, + 0x6b6708e773cace40L,0x25ed315a8dfbe7acL,0x00000000000000e7L } }, + /* 58 << 91 */ + { { 0xc6f86aa312eaab15L,0x3f4fc2334051be9cL,0xcd591d72db49c31cL, + 0x65ba21ffd2100c28L,0xdced627f512e30dfL,0x9c948bfd52d495f0L, + 0x3b246e9ca178617cL,0xb7e99939f772b804L,0x0000000000000085L }, + { 0x3ba75fc152793a53L,0xdf8f02a9c86c7a0cL,0xb7861f9ac573bac1L, + 0x16b41667a3b76aa3L,0x069da96a345dfe76L,0x1df94b267585cb88L, + 0x8b57ec9d4f2f7645L,0xfaa8230851320177L,0x00000000000000e0L } }, + /* 59 << 91 */ + { { 0xd8ac0bcf6c9950cbL,0x316eeb3196787697L,0xe1b0fb38589d27eaL, + 0xcb9d58fbbdfd0018L,0xfd39a23040bd2016L,0x47ae06bd222818c2L, + 0x7ca37a695d2a87c5L,0xa8a39329d2a51e11L,0x000000000000005eL }, + { 0xf1e98f9d31797318L,0xfeb139037e1cb61dL,0x7d84a9664aca85beL, + 0xe2dfd634b1056464L,0xf62a4661ae33a99dL,0xcc522a0cc86d2097L, + 0x8800d70dc3d29ab3L,0xfe0e17145aa40b80L,0x00000000000000f8L } }, + /* 60 << 91 */ + { { 0xaf525dfffd31cac1L,0xab4dd60075e22b8aL,0xd6df54601bcfac1dL, + 0x83d0b758ea498cf2L,0xc6c8a7db725379ffL,0x926a186bb282eee0L, + 0xb0e0b072c1496c2aL,0x8f159a3e48668171L,0x0000000000000101L }, + { 0x4494dfeda5edce74L,0xb5dadc35e28d5661L,0x320502550d978436L, + 0xaa350e02ace0dc5dL,0xff8c6c8dc85e3d76L,0xadd61548038e9206L, + 0xc5b0eeaf48b9ec2aL,0x3f1e7dc7c2645b2aL,0x0000000000000125L } }, + /* 61 << 91 */ + { { 0x911b6ec2d4d191d2L,0xa0be596398bbfebdL,0xcfc1858fcdb33929L, + 0xe838e59fcaf4b07cL,0xc3cf06ff253f9c18L,0x96d4c2242cb76433L, + 0x1bf6b527db17c2efL,0xdf9282d82792e421L,0x00000000000000b8L }, + { 0xd9b60a5fa94190e4L,0xa82f09cdaf66cfaeL,0xb65d0c46c4e5935aL, + 0x65c1bc0a65ce2172L,0x533f9f19629364c3L,0x522db0fc51c1cd20L, + 0x1038f65b19332e86L,0xb64007f864003cedL,0x0000000000000094L } }, + /* 62 << 91 */ + { { 0xd5070a5ea979804eL,0x1fcc05daa28853d5L,0x7537020daeea2123L, + 0x33583d857d3940eaL,0xdb4854fc6a16ee2aL,0x2fa7b6c674cb45f0L, + 0x94b299c273a0bac3L,0xf5bfbdf0066135e0L,0x000000000000008fL }, + { 0x7700d5f6ff090f73L,0xf186cb1328b231c6L,0xd080b318eec40e57L, + 0x9acfb30b5e6eb82fL,0xdfa57c03550c698eL,0xe32e90ca75f80094L, + 0xe4f0efa25d0871b3L,0x2c4e1314ff1ae785L,0x000000000000019fL } }, + /* 63 << 91 */ + { { 0xa1cf6e98abb1ce91L,0x94357a5ece988fa5L,0xce23c897403945dfL, + 0x8926b4a89ae2d087L,0x39d0ec46294d25f4L,0xd571fa4d0340cfb0L, + 0xd0389f1b25552596L,0x31c3d45476b870adL,0x00000000000000adL }, + { 0xa4be459fc1acb2bfL,0xf98de31e4912a631L,0x5058dbc3f8e05adfL, + 0x30b64887ecf93058L,0x6f0ccdd7a4a98333L,0x4f036282a5e29a50L, + 0x141c5a024774ab8fL,0x5b11663b58682707L,0x00000000000000ebL } }, + /* 64 << 91 */ + { { 0xe67ad2a7a597628aL,0x424dfe8947deb6b6L,0x42669bdc0ecaca8eL, + 0xae7a2f64d86f6549L,0x3858f56b652a2e24L,0x8e5611a650291825L, + 0x21c2061120feb545L,0x4e550a9ecd63163aL,0x000000000000005fL }, + { 0x4fabbb26b3b110bcL,0xebe74805036de290L,0xde88d2d633789748L, + 0x56a1b7b8dbf7865dL,0x0646d53de8707cf0L,0xcfb67fae64108a0bL, + 0x04f1401ba32160eeL,0x5998552d5f462a78L,0x0000000000000056L } }, + /* 0 << 98 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 98 */ + { { 0x80e86d53083e8647L,0x4c143f8de6bf14c4L,0x98965bebd250ddf8L, + 0x66ad16a097f5f7ceL,0xf3b79e6a91ca08f9L,0xc8478bb25555ae35L, + 0xa3b1085a3048210bL,0x400e0a3fc2e9f129L,0x00000000000001f1L }, + { 0x73dd8aabef069132L,0x4aa4c72566348212L,0x44b84c9436f78446L, + 0xf835da5068c9b6cbL,0x66b8e1d9a3536e11L,0x29ffbb214f4541d4L, + 0xcdb5baffa43e5d3eL,0xca8fca6dd75f692aL,0x00000000000001d6L } }, + /* 2 << 98 */ + { { 0xbda14abf4d9b67c0L,0x54478dfabf9ac0b2L,0x08943aedca457b40L, + 0xd82f3d75f18006c3L,0x434d99a148f5e818L,0x600c40e3c5422ad5L, + 0x8a6294594361f89dL,0x25fd287b9c176983L,0x0000000000000003L }, + { 0x5465deb69d02479bL,0x61fd5b4facf06848L,0xbda65f8547787599L, + 0x1689cdce23280fccL,0xb2efd35d62e25557L,0x19fd60739c9f12b4L, + 0xaf9a39433a87dae1L,0x20dd9c0ac5a84a92L,0x0000000000000101L } }, + /* 3 << 98 */ + { { 0xd8764ed5adab4cf4L,0x6a3f2257baf7618cL,0x6607661ba03bb43aL, + 0x16f0ad3d41029908L,0xf8dadf979b47f97eL,0xe690674d9dd13ddaL, + 0x8f5f3c154180317eL,0x9694476452c697a1L,0x00000000000001b8L }, + { 0x9e9715e5f7894511L,0x43c263b702f78d2dL,0x31b78c2547ad4de4L, + 0xe2f6454ce10ef4bbL,0xd0f816ad15787104L,0x169f4e00bc5c632cL, + 0x99c89fa79b77e99aL,0x7c758a0eea13b439L,0x000000000000003aL } }, + /* 4 << 98 */ + { { 0x067f879fc7734385L,0x787c7f18b4a51794L,0xa269c44672d9af69L, + 0xa4498e7387d71897L,0x28e3d84aa91814acL,0x8caf2f61f62fede0L, + 0xad36da315dc202bfL,0xfed8bc0fa4689189L,0x0000000000000068L }, + { 0x84a0854296aa1400L,0x871885f1cd72b4a4L,0x00954202e9d3027eL, + 0x19b350695c86a302L,0xbe420b636cfc52a2L,0x8b75228287175b19L, + 0xd1811b8c9a22611dL,0xf9c23d803e468601L,0x00000000000001d2L } }, + /* 5 << 98 */ + { { 0xfd9470df9bb055e9L,0x356be674b8a9cabbL,0x7ceda113f1c13a27L, + 0x2a717f78d70ae9b1L,0x971609234ca55928L,0x7f9e4e226e14f3f2L, + 0x5b8a03155a589e18L,0xf8c780b940b09acbL,0x000000000000008cL }, + { 0x2fee3ccb74d98575L,0x2d33151fb892607bL,0xf0fdedda3a2a25c9L, + 0x0c82e2d3124e2527L,0xdab009109bbb329bL,0x402e27ea52798e10L, + 0xd0009cb2f6d91403L,0xeb90c221ea21c5d4L,0x000000000000010cL } }, + /* 6 << 98 */ + { { 0xbb7c3bbf98814afdL,0xfa3eb481e568d703L,0x1b8f1e0658318259L, + 0x44c2c4a0d9df9cc1L,0x9906fec621cb9541L,0x4513855b90220398L, + 0xba8e04c7730f3d49L,0xf6f844eec5039f3dL,0x0000000000000062L }, + { 0xff651f70c32b230cL,0x2bf8f30c4ce8747eL,0x2748f82ddf061f06L, + 0x9ebe9d519b73f4e2L,0x428724b7b60edd59L,0x88ddeb09c85196ecL, + 0xe9a64c7a9ed85523L,0x2465d7a62e865fdfL,0x0000000000000099L } }, + /* 7 << 98 */ + { { 0xf8cb5eb1fe3b6e1dL,0x8200ee3a92dea299L,0x8cd62994aae59107L, + 0x2953835d17e38995L,0xebc59a3c6b24fdafL,0x847a4f3c89e2b86aL, + 0x68f32c1a505225e5L,0xd9d254f8ac4ce49cL,0x00000000000000e5L }, + { 0x357de1596d60adcdL,0x7ec27f7d8d7d0d59L,0x43af31858acd0224L, + 0xda2541a71f551347L,0xa13e7bac81ae12cfL,0x4952416780e2588fL, + 0x8b716928caeb93b5L,0xe32a7fc61642f543L,0x0000000000000146L } }, + /* 8 << 98 */ + { { 0xc52699b9e6a0bf64L,0x3d1f9b860d0e9732L,0xa14ca004e0882130L, + 0xa69508493d669257L,0x65c264d6d76816d4L,0x594a5d7cba522818L, + 0x8e44a20532ad6596L,0x74445203093718efL,0x0000000000000189L }, + { 0x63b108c628885dfcL,0x7ddd44ccc31cf288L,0x82b2cad4b5267f9aL, + 0x5b8a5f7b449e0cc8L,0x24a85f05e8bbb4a4L,0x694cdc1ca0eb93a8L, + 0x58943b3455741ffeL,0x381df171377ad2a2L,0x00000000000000adL } }, + /* 9 << 98 */ + { { 0x0801f029405ebc29L,0x416cbd9c9e0c3db2L,0xf0fcb43499f8654aL, + 0x6ab20655c0f9668dL,0xc4c78a18877a85deL,0xdfe8ab138d16c153L, + 0xdcd5e7c1183c66dcL,0x2f1f7dc246c3b605L,0x0000000000000173L }, + { 0x1cf1a9f24a25993fL,0xa792f66ce5a76bd6L,0x124befa3e06def72L, + 0x4b2b2ef9d85dc995L,0xf1b45b1645b0c395L,0x7b35ab2fcf5ffc87L, + 0xb7b346b5d34c4e26L,0xb00503fe39b8fa4eL,0x0000000000000013L } }, + /* 10 << 98 */ + { { 0xb5491319e335e6c2L,0xc892ed96b5f0342cL,0xcc4a8411e412e065L, + 0x2f8b20e3fd229e7bL,0x9ab982f9843e35f8L,0xf904caa67a728b27L, + 0x283c27cc8360e327L,0xd3a7f980e08c96a6L,0x0000000000000049L }, + { 0x11de6dcbf507feb0L,0x9033db6c32233dabL,0x39e46408c4586261L, + 0x0c31a9624bae870bL,0x5f833a79cfa69865L,0xa4e9e08b019af648L, + 0x928769ade156f8d8L,0x9ce74709c4a99988L,0x0000000000000150L } }, + /* 11 << 98 */ + { { 0xb53d5e9b4a51dfcaL,0x7b60443da3894de5L,0xe25e2bf7b1515643L, + 0x3486b05885451eeeL,0x2c0827a4d806fefcL,0x6cec283cdbe198d0L, + 0x589f669bf950d73cL,0x2988117f1a6cbf5dL,0x000000000000015eL }, + { 0x6f8031cd9cf3edd9L,0xf0ec99d2e35d3239L,0x06284ed6d954a869L, + 0xdb1fba06265e04bbL,0x94f3d6a2d08e378eL,0x5a060f9e778b4464L, + 0xbd9c9b10152f5a69L,0x7045807405281905L,0x000000000000004eL } }, + /* 12 << 98 */ + { { 0x13996f30e402d747L,0x2524a83bebc31a51L,0xa41699b795d7ebc0L, + 0x602112c7ad867499L,0xbf81b788d43b5a3eL,0xf3cd28581c736ceaL, + 0x01a6df580ff424ddL,0x6e527daa95500baaL,0x0000000000000171L }, + { 0x1d966ecbd1b77ff2L,0x2980d1c923985bd7L,0x20a275a635732ba6L, + 0xa1b81f99430369d0L,0x6052b0183c0a4d36L,0xb4fa4d6c8a790b12L, + 0xf4ee3079353165bcL,0x79ef46797ed670deL,0x000000000000002dL } }, + /* 13 << 98 */ + { { 0x7ace245a07c2e7e9L,0x64cab9a80fd5b38cL,0x6a04658d9c5cd391L, + 0xc6e7b879e6b8f8d5L,0xbc208a42349b740dL,0x67f77d263e4deb3aL, + 0x47b6aeca8c7f3b39L,0x74b9557530083c67L,0x00000000000000d4L }, + { 0x780d1c78d729d124L,0xaa4b4b8bf3bd1ac3L,0xcb0e1707d5afa9bdL, + 0x336ea478e41831fcL,0xb34289ae0e1e4c73L,0x3533cae21e38d267L, + 0x50b48d18bd7e62d2L,0x419bc9e607476b8eL,0x0000000000000006L } }, + /* 14 << 98 */ + { { 0x443c1b80fa605db6L,0x57287045107c6a8cL,0xe9fd39204bfa9bb8L, + 0xecb90ec960291f2cL,0x78b1fc15087ea126L,0x3caa818986592da6L, + 0x3a2abd00dd8c6d5aL,0x885c9499a771ec5bL,0x00000000000000a3L }, + { 0x15c5483b4ae49580L,0x66b59335ec88a215L,0x5486fe34207948b2L, + 0x1a41ac9453866679L,0x9f568c63de3497c3L,0x6b2cc6b5ee1a320bL, + 0xb2e2cc04d12d68ddL,0x3bcd8d271b2a518cL,0x00000000000000ddL } }, + /* 15 << 98 */ + { { 0xcc3f7489019ade2bL,0xefb874571a7402efL,0x073176ddb31586dbL, + 0x062ea41b73784230L,0x688de8033a8fa379L,0xe281a7a7a2e28e00L, + 0xa232a142a323d618L,0xd84cb9002792d666L,0x0000000000000143L }, + { 0x2f1e3f96de5d6ab9L,0x9fad76a0b1a97255L,0x0db69c89649a5f4bL, + 0x3a997973b899710fL,0x75ec0561af8157c2L,0x62753eabd270655cL, + 0x497669a80b46d35bL,0xcffb44c44b926857L,0x0000000000000084L } }, + /* 16 << 98 */ + { { 0x84b4e6f6783099afL,0xa94ca2294dc772b8L,0xcb768aa65ea29d0bL, + 0xd47361170fcb316cL,0xc411b7b79775774cL,0x410c974c96ac3f48L, + 0x1663c40aad24a12aL,0x87f1508da2a503bdL,0x0000000000000161L }, + { 0xecd265ced518bbbaL,0x8d77018ddcc67981L,0x1e9c19223ed1a18aL, + 0x5484ed8427440d05L,0xd319640942d659d6L,0x82ce3a7181354397L, + 0xd097534de7d7dd79L,0xda9d11cf084658f4L,0x000000000000006eL } }, + /* 17 << 98 */ + { { 0x2a6ad43f50e3eb59L,0x735dd00ebc32b62aL,0x317f6f679e89b620L, + 0x7b9e260c36e98778L,0xdd338b8e168d53a4L,0x83c196436f9a8038L, + 0xff1ec28cb607e41aL,0x2d7cc7618f05f0a6L,0x000000000000005dL }, + { 0x4276274d347c52ecL,0xbaff740b14ce5d7eL,0xb47b6a4167d528d1L, + 0xd0c678f7962850ccL,0x4891dea22b049133L,0x1786e620ada076ecL, + 0x09451905486b08f6L,0x948834d92bbf984eL,0x00000000000000a8L } }, + /* 18 << 98 */ + { { 0x92cfc98dea9c994bL,0xcfc7b48217371932L,0x820a2d553180e799L, + 0x0a164a5a5b0a82c4L,0x45ddc7fd7a87b97aL,0x582e845754b2116cL, + 0x371ef68d8f9eb2cfL,0x127d572a03067708L,0x0000000000000069L }, + { 0xde83da9cc02d06c8L,0xc262d58529c5e4bcL,0x32c73d8f44046239L, + 0x2bf12411bebd50efL,0x7a2729e160af2df6L,0xaef5ebfcc859d25dL, + 0x05efd7c034c0edc8L,0x89adc752185901c0L,0x000000000000019cL } }, + /* 19 << 98 */ + { { 0x485e2623be1f51beL,0x143ebff4f8173134L,0xa688b45a819e5154L, + 0x67fe58f8eff4fa2fL,0x449394c9d4ce70a0L,0xc37b8a8d12dd3462L, + 0x2a10cdc1217745f0L,0xd6435c4c17a00c1bL,0x000000000000015eL }, + { 0x3222b4a2ce699e69L,0x9473d565ecbef61eL,0xa183e92e8db063eaL, + 0x2f2df05d20ff011eL,0xd201a960916ed069L,0x6af80e44c72fff72L, + 0x994817c219a2150aL,0x63e8bea770fd901bL,0x00000000000001b2L } }, + /* 20 << 98 */ + { { 0x9886a22962c16459L,0x0e0236afe5aa41c7L,0x468d4c3a61fef10eL, + 0x8c3171ad3db4d163L,0xea4180801022bf50L,0x29c61bfddd95500dL, + 0x79073998fc1086d4L,0x18e62735d880bed1L,0x000000000000008dL }, + { 0x3772a3a6bab5ed67L,0x59aeaa70c4a82be5L,0xdefd15693fa10eaaL, + 0x60cf6bbd3d7a410fL,0x9c3ce822061382edL,0x4b45af6448a0b43dL, + 0x4c7b5a07752c5323L,0x638f2663cfd710f2L,0x00000000000000b9L } }, + /* 21 << 98 */ + { { 0xee1e449ab498ff47L,0xbe4cfc2e82915614L,0x45947d849510e9cdL, + 0xd413618b021fadf7L,0x896a62bc9cdecbe1L,0xf4113eb9627f1028L, + 0x5dd6e91605aaea6bL,0xa52dee6d9a1ac809L,0x00000000000001a4L }, + { 0xdf07521a7863cc22L,0x5bdf617356d0ea3bL,0xec462a8693dd110eL, + 0x4a53f3c18d68aa25L,0x45919920a7d4b28dL,0x2d2d42abfbc5e401L, + 0x2158573907fa46d3L,0xd5715aa9b937900dL,0x000000000000017dL } }, + /* 22 << 98 */ + { { 0x759c736628c6b666L,0x68d5cbc1f38bb6acL,0xa6acf7fb07508ff3L, + 0xdb68b1c90d31ee2bL,0xfc8f402a071bbf2dL,0xebed7bdc838e87a0L, + 0x3441693f9465d8b3L,0x6732878d1dcb390fL,0x0000000000000129L }, + { 0x358cd92b170795c5L,0xab88a0a20a185fe1L,0x9901aeb8c4cc8c02L, + 0x4b763c08e85250e9L,0x29f2c8154625a096L,0x153b9ed3b3124358L, + 0xb8af8770a4d48d39L,0x8e3217113164955dL,0x00000000000000a0L } }, + /* 23 << 98 */ + { { 0xccdfacd34cddc5baL,0xa06e3fe25047a9f8L,0x81206e0e9d8c2652L, + 0x75167c3e88419d67L,0x3c0442de1db10edcL,0x019f18ac0d0d486aL, + 0x56c7baeb64efbe20L,0xc7ac08b0f7c220bfL,0x0000000000000062L }, + { 0x07ea8fcd9f040b6eL,0xf02de9fda6b37d8cL,0x93fd1121da55f26fL, + 0xd0393a45090ff3e0L,0xf492651ae4b87e78L,0x4bc0e5b5eef1cbf2L, + 0x6f560378d51bb3b7L,0xf471b347c5efaa96L,0x0000000000000175L } }, + /* 24 << 98 */ + { { 0xb6d0eeb7510be54eL,0xed352173060b7322L,0x0f581a2183364ea6L, + 0x78ef831ef59ec71bL,0x09a472c9a055e4b0L,0x0a529bcb7d2404dcL, + 0x45f10074b461331fL,0xf80627f369a5a576L,0x000000000000006dL }, + { 0x12cf73e37362082aL,0x7cb89bc91d6278a0L,0x2b0c5312b9f399d8L, + 0x4403d4fb21e7ef6cL,0x34dd11819b453f15L,0xa38c5e981ae02a19L, + 0xa1ed1aaddce1792fL,0xab8261dba4d71c9fL,0x0000000000000036L } }, + /* 25 << 98 */ + { { 0xfad65c13dac2b992L,0x762f983d5304c00cL,0x4c7b49b7d2da32e3L, + 0x4c4122a83bf0d5adL,0xe3afec2564f9af81L,0x933f38f305217cbaL, + 0xb213496e96d7241bL,0x469b9aad37ce2c05L,0x00000000000001b7L }, + { 0xd105c543c1166f98L,0xff7e04a6a913acbeL,0xf07ebe34252a9c03L, + 0xcc7a8b753e7df88dL,0xf14f0a16e44bdc1fL,0x684fefd718c1bb4eL, + 0x498ab8c0925b286aL,0x598d3f5808702939L,0x000000000000002fL } }, + /* 26 << 98 */ + { { 0x06c90f137ba2cdc1L,0xb427ace8f70aa58dL,0x33fed4366970546cL, + 0xaf8ae02b0d5eafa5L,0x74b6f45d390ea792L,0x121708e487c81e88L, + 0x323a3672d186ce90L,0x1a66c0a48b4e7832L,0x000000000000006eL }, + { 0x331064266beef707L,0x722fb7844f59525fL,0xa73cb5be473798d4L, + 0x2bebfc77f44d5281L,0xf397282673030f11L,0x89c0d9dad93404e2L, + 0x4e76c1852e5aa5fcL,0xe95f8dc3275485ffL,0x00000000000000d2L } }, + /* 27 << 98 */ + { { 0xa925539c3ede62d0L,0x195ef41b574551f7L,0x7049c9c1f270aea1L, + 0x746d0dbf486bb267L,0x3cfb69b104d8eafaL,0xd0a86d9e8cb4df64L, + 0x8abf12841dbb9b19L,0xe7d381d1d3b0e425L,0x00000000000001f1L }, + { 0xa469a46539ce8ef0L,0x63cfb5adf537b1feL,0x2f84fd9dfb96b734L, + 0x3ddae07e2ef3ff5eL,0x8be74870350b4402L,0x0e014d43098f11eaL, + 0x2862b3b50f0784b0L,0x097303411327f2c9L,0x0000000000000102L } }, + /* 28 << 98 */ + { { 0xc9eed4f2599a7682L,0xf1ade4e4a4f4f82aL,0x7479dc8b6ab6d04eL, + 0x2da874dc271e42cfL,0xc700ca719265e2bbL,0xaf88ba91255fdcc6L, + 0xf68c29705705e0ddL,0x6ee1135b229fdc50L,0x00000000000000fbL }, + { 0xf4bd32a2a1a31961L,0x4e67eccfc4785c20L,0x8c0cd85d17f740e3L, + 0xf6930e3ad8c21194L,0x2c9d9c7376b4c919L,0xd32de819e69ea290L, + 0xc1028dd33724a066L,0xfd3f06ee7538d3adL,0x0000000000000054L } }, + /* 29 << 98 */ + { { 0x616e5cd45bf0af55L,0x6b79cc4dcd382cd3L,0x661574af66107910L, + 0xf7ad62be9aea6fa1L,0x6a47e48e8c0c9cdfL,0xaf5aca071e6d464dL, + 0xf180a69f8925c182L,0x282864092b7ca708L,0x00000000000000acL }, + { 0xb6bdf74fa48967ecL,0x95d640c0484ff456L,0x942032b4db3ef654L, + 0xd12caff283c4f9e7L,0x946903ca1b8fa837L,0x1cd9b1463e8e7338L, + 0x7376fa9dd4c41057L,0x50be41a35e26c299L,0x00000000000000f3L } }, + /* 30 << 98 */ + { { 0x152c38fcd581471eL,0x05b8e79160b4b12aL,0x5ff32ad285168011L, + 0x43ff030dbf64b3b6L,0xcb660ffbb119ea1dL,0x0d551ab6abe7c1cdL, + 0x7c012e803b76feb4L,0x6cafd8b9ffd0b552L,0x000000000000005bL }, + { 0xbc486f8d855d1dfcL,0x8a6440ef381a9c23L,0x5e97a549d9d1b832L, + 0x351cc9264d4208d9L,0x2c905f05f72a4413L,0x0feb2643c61c13c8L, + 0xbbebfa77d50dbd43L,0x3bacdd79c7cfee04L,0x0000000000000141L } }, + /* 31 << 98 */ + { { 0xa489600cd04efabbL,0xb1913b347679d8faL,0x071f212fbbe3b326L, + 0xa78728838b2e519dL,0xde545995b4e503c1L,0x5e5c3df6f58f6c94L, + 0x8ab21347bd887cc7L,0x894be454e5529c65L,0x00000000000001d1L }, + { 0x5628656d77eb1957L,0xad52a8d96df565d1L,0xa83f387c8e0b8311L, + 0xbddb40c83f3a1d96L,0x1f91eac2cc121356L,0xff612889d0bb2745L, + 0xa15437c096e19a86L,0x6ae3410f4efa63fcL,0x000000000000005eL } }, + /* 32 << 98 */ + { { 0xad3570f4d827a280L,0xdb625d88b2f60f8bL,0x81bd93b7ae94d18cL, + 0xc2fffe8af3ae3394L,0x06b0ec262e4a72b4L,0xf1285a13c7728dc7L, + 0xd97dee85a4190a48L,0x945e4869d4b0f305L,0x0000000000000023L }, + { 0x44587f935c577abeL,0x256b56424ddd2f17L,0x36e73dff0eab2bc4L, + 0x6b2b5f3ace0f4471L,0xce1f2f0fa25bae4cL,0xcf10076029866124L, + 0x0ec9b4d25ea0d400L,0x621b68580adece0cL,0x0000000000000162L } }, + /* 33 << 98 */ + { { 0x33e194dd9a7e3430L,0xe9d5ab561284bb95L,0x9c067ead0e58900aL, + 0x05355bd3e1af14c3L,0x9a3892ac06815865L,0x5a8c70c331c62938L, + 0xd8e522e3afee9136L,0x50e266510abe2cc3L,0x00000000000001f9L }, + { 0xd8ab109d2469ed04L,0x90fc703dbdad4fa4L,0x1d0882afda97fdebL, + 0x60f8b44acdeac89eL,0xc497d8d5af9d0bf4L,0xe30ef07a66044397L, + 0x614c26190dbb6093L,0x8eebc530ea3adb8fL,0x00000000000000e1L } }, + /* 34 << 98 */ + { { 0xfc84c7d966cf468eL,0xb05444b6a6dd0c36L,0x6ceefdc9cb75ae66L, + 0xa16407e1fcd19989L,0x0e4d008e3ea55530L,0xe2e44180a8796154L, + 0x2c96ee812bd73378L,0x84c750f4a6a094e2L,0x0000000000000046L }, + { 0x3ced13152f93d6caL,0xbc1e7b5cb492a410L,0x4e0238df9daeead4L, + 0x946fe9921e47d1a3L,0x6b3827027ae7ff3fL,0x305d7c604be9a4c8L, + 0x5b4a7ad35a3421c4L,0x97dd360169cf2602L,0x00000000000001f8L } }, + /* 35 << 98 */ + { { 0xc7c08962ae59aa33L,0x65e33dad2f839e55L,0x751e8014c7cf5accL, + 0xdce80c8b384a7c9bL,0x4ed68aa2155876dfL,0x68f1fd1e6a12109fL, + 0xe478bfc5df171beeL,0x25a6d4efa0d40af1L,0x0000000000000127L }, + { 0x3d08cf5216e520a5L,0x0e0c219aec295ca3L,0x4a11607f58c573d4L, + 0x2e94a059027c52aaL,0x5f86333bf001ea33L,0x1d5390fe8d19323aL, + 0x530581f92030a6c7L,0x13436ed1984258a6L,0x00000000000001c0L } }, + /* 36 << 98 */ + { { 0x1ff09f33cae6514fL,0x191fe11b261fb47dL,0x97e72c53bdf0b228L, + 0xf81cdedb8caad4a2L,0xaa5f28367542e922L,0x090df178d68e225fL, + 0x9adfb6202c8431d8L,0xb8f9bea70f010ff8L,0x000000000000016bL }, + { 0x78fc8973f422e828L,0x8496b0b5d149e76cL,0xb1f0911a565c8f4aL, + 0xe580f18ca79f3220L,0x8d3b71356fe124baL,0x8911601d673928b4L, + 0x193c9fa9e5209d66L,0x988483f1bea837ffL,0x00000000000000f3L } }, + /* 37 << 98 */ + { { 0x2c6f263b3fdaa30bL,0xc469da4af8289eebL,0x126c0a3e4f2905c9L, + 0xac4d69276852183bL,0x9f76c56965b948b8L,0xdd3297e86cc30f05L, + 0x83cca68afee0c55aL,0x7062249b13d1eb8fL,0x00000000000001ffL }, + { 0x66cca2f1488bb713L,0xe4fe61c013ce959eL,0x10ef025c5bd7aee9L, + 0x651aede32c4e3aa8L,0xe233186271ba632fL,0x91887184db1340c5L, + 0x5cae47ac291e4167L,0xbd1dc2ac61e04921L,0x0000000000000062L } }, + /* 38 << 98 */ + { { 0x9365bd5f6e280862L,0x3eb1370af4e5f679L,0xad83574dac728e89L, + 0x6011dce778ccbe43L,0x51010f61d20a552bL,0x614eaf6c6c29a90eL, + 0xb1d5f33caea2fcb2L,0xa001efcb12511f6aL,0x0000000000000114L }, + { 0x769ae8486be363efL,0xed582b28c1195a6dL,0xab11de93a3d3cde9L, + 0x6e467f11acd39daeL,0x0439b529ba6cd7bbL,0x71019d73fc6195a0L, + 0x9693ad57c81639e0L,0x31a722a10ed42ec9L,0x0000000000000068L } }, + /* 39 << 98 */ + { { 0x0c5c60509cd7678cL,0xac3eeaa649215514L,0x1d124f7ffad388f4L, + 0x1aed3c0e6d9c49aaL,0x7cb12edb54a3c297L,0x7f1e0889f2432aefL, + 0x62b2bae3fd1795fcL,0x48e20ba944caa18aL,0x00000000000000cdL }, + { 0xb1a6ca263b7036a9L,0xe53c3a75af94680fL,0x878870b5352a936eL, + 0xaf3c86ea55fc352bL,0x60a91eabd9b66071L,0xa2c246a297c722a4L, + 0xe1f2588cdfe226b7L,0xd1671ac1fc2267c2L,0x0000000000000079L } }, + /* 40 << 98 */ + { { 0xf9bfe015460032b6L,0x6410756336a6c19aL,0xb0712a3510d65254L, + 0x3c97ae7d2a9946dfL,0x28e961edbf77c7f9L,0xf937f17cbc8537a9L, + 0xed718f65057512d9L,0x7ee133ff8bc561a9L,0x000000000000003cL }, + { 0x675476a99e3441e0L,0xf8abe77c9eeb9df0L,0x0ce788421a10b4ccL, + 0xa6f28b21793079e4L,0x01c63657267d9dcdL,0x2de2e74e702b40e8L, + 0x28620c5a25617600L,0x7505685e5d4d9809L,0x000000000000008eL } }, + /* 41 << 98 */ + { { 0xc77c81dfb49ef4f8L,0xcc7a1f775aa25efaL,0xdc450c19b138a6f6L, + 0x05d2d28b8a09e2e6L,0x6c631d9e49886053L,0x0fb35d84bb82d381L, + 0xd8a0a353fa26cdc8L,0x8b49879a9a4448dbL,0x000000000000001aL }, + { 0x6d91d316fb8aa58cL,0x933d09813affa02bL,0x6f68fd557c5a5a04L, + 0xcad6f83f21ede5f6L,0x364829e50302650aL,0xabf09253bc326cd4L, + 0xfc23db4b75573e98L,0xe6ccc3f67891f2f2L,0x00000000000000adL } }, + /* 42 << 98 */ + { { 0xeded14459eb444e9L,0xd1605d6d257afeb5L,0xd9c649f54a25d9c4L, + 0x8f148eae369dfcd0L,0x72c632dc79197e8eL,0x55a15a077cdbf577L, + 0xdd50e58248c28164L,0xd9e03b72971e416fL,0x00000000000000d2L }, + { 0x63443c908a063207L,0xceb2fadf26fe1696L,0xfe06b1df27b56c4cL, + 0x75978d3adc74026aL,0xdd313d3424c9b16eL,0xd8d800e0eacc75c4L, + 0xdc863b4b595b73daL,0x638f474f0a6c15f6L,0x00000000000001b4L } }, + /* 43 << 98 */ + { { 0x4e30388a3fa31d74L,0xcedb2dfc4960cae1L,0x744a7745f4f45ce8L, + 0xd742c0017186bc80L,0xcafad4ad164ca760L,0x396ee3d486f8d7eeL, + 0x1084ed389b54c6bcL,0x289c4f8e0f7a5b68L,0x00000000000000b2L }, + { 0xf6b9e59783ea21b9L,0x3135f10773dd812fL,0xe1012d9a9ee85a75L, + 0xc9a47983ba06a432L,0xb237d66881581dfcL,0x1b0225611e35da83L, + 0x837cc40f94261e64L,0x11f933f5fb3344f9L,0x0000000000000161L } }, + /* 44 << 98 */ + { { 0x5cfc2f27690b6bf1L,0xdc6e6043f33ba977L,0x39dc991e82d11ba7L, + 0xfb80c94e2922c70dL,0x393dcc3ba5cdeaffL,0xd44d9c7ca687f07dL, + 0xbd25ebab33bc5437L,0x9a389625d71c4392L,0x00000000000001e3L }, + { 0x0ac41a2f228f0787L,0xabb96a60cde860b7L,0x22cd3ce604c04d14L, + 0x157669c6f65e7b0dL,0x0ef03f6167abced9L,0x9d2cd63a814717ccL, + 0x3991c34977387f7aL,0x3c1b38a58a80a7f4L,0x0000000000000033L } }, + /* 45 << 98 */ + { { 0x051a1d25a03c152fL,0xde506b47f51d02c1L,0x6a0458d855e35f7cL, + 0x9df1cf180c96a540L,0xb658cf9b47a847bdL,0xdb2efe9ce8d42a41L, + 0x8e2290741219314bL,0xf8fbac85a1cd9697L,0x0000000000000143L }, + { 0xfdf09168226d0a89L,0xb58b9906a82a9965L,0x88fdad1ff0b79bc5L, + 0x573b2da29ccc3019L,0x4c2a220d8173e9ccL,0x060bc07731f682e6L, + 0xdf135af96e7e3dfaL,0x78c191ca507811ffL,0x000000000000004bL } }, + /* 46 << 98 */ + { { 0x75627d858584bbbfL,0xbe77f77ec5670f07L,0x86ed433624e2e6cfL, + 0xe9f725fc83e2e6e6L,0x14b4015ff8a3131bL,0xb1211805c61a5f71L, + 0x024152443d5b62ceL,0x1aa1a84fcf15a0dfL,0x0000000000000171L }, + { 0x36f8293435d927f8L,0x22e3300715513cadL,0xc5e42e5783d8b14dL, + 0x33e90ea35251a8c2L,0x64c1eb44d96f4b8fL,0x52f2ec240f126f58L, + 0xef84f773b2a8cfecL,0x34949d2ed9d11c70L,0x00000000000001eeL } }, + /* 47 << 98 */ + { { 0x7ec007f751d62408L,0xe1669c21119dc50aL,0x8260e2ecf101ad5bL, + 0x6a6062580c5eae7eL,0x221708bd91ce06f0L,0xdb91beab76d79e37L, + 0xbdfb4ec33adee1b3L,0x61d216d1feb7511dL,0x000000000000005dL }, + { 0xf77d7881483fb649L,0xf16f36a8d54d0788L,0xece1796dbda356aeL, + 0x79f49ef894859937L,0xc4397e207982f311L,0xc98b6e076ce8f498L, + 0x6c9e0cfeb80ac218L,0x485d36a17b24e50cL,0x000000000000017aL } }, + /* 48 << 98 */ + { { 0x95f7fb41da8576c3L,0x42901717d151bb68L,0x7a0277d38777c80cL, + 0x6827807b5a429477L,0xb98e1de8941ef5ecL,0xadd50957575465fdL, + 0xd436cda505f5b417L,0x6d30bf0bc232aaaaL,0x000000000000012dL }, + { 0xd4b799a488873a5dL,0x2200c6d4d60dc1d3L,0xb784a12b75c91d8fL, + 0x6c8aa91f09fed40cL,0x48db563f5720cce1L,0xbb0de5c0f50bddc1L, + 0x4aa1fb064c4bc117L,0x905d18a31cb0da02L,0x00000000000000e6L } }, + /* 49 << 98 */ + { { 0x05ae51c09ef391c0L,0x6da01b3ebbec8ce9L,0x352303693ed0bf8bL, + 0xdde6b42cbd380ef8L,0x306e895ec96ab6ceL,0xcf75a84635797d57L, + 0xed402b30cc298916L,0x66ddc1ffcf95180aL,0x000000000000013eL }, + { 0xc472f66dfeb4d388L,0x446f756911b1a667L,0x4ebb535be578315dL, + 0x54f8ef708b039744L,0xf6b95976b877b79aL,0xcc4283c1f5430e02L, + 0x393ea2f9e1778b25L,0x26d55bfec9f45a41L,0x00000000000000feL } }, + /* 50 << 98 */ + { { 0x5617c67b09834aaaL,0x1520f46508ecedacL,0xb331bf9d63dc2045L, + 0x1633f5f6de5be628L,0x959dffbb65391a40L,0x53450d3241b0bbc4L, + 0xaa6442cbba6d681bL,0x7e6d47c0f07b5808L,0x0000000000000095L }, + { 0x78def4619649ca24L,0x970eeaad9fd68eb9L,0x2f170dfd1f670a41L, + 0x8c28d6fdfad9c708L,0x46892517acdef254L,0x0ca7991fb22ff7abL, + 0x42ef276428c61efaL,0x1b1b24d9879002b5L,0x00000000000000e9L } }, + /* 51 << 98 */ + { { 0xf6465253c4c46cabL,0xcc3593d92fc27318L,0xd17ca4d778519ac0L, + 0xb34dfb3947d5bfbcL,0x4fe4682e89d6216dL,0xdbe11575a53af405L, + 0xb622af037d96d9d0L,0x27441f0161f9b532L,0x00000000000001c3L }, + { 0xef5e7b08d1742a09L,0x77b47c7bf740e0bcL,0xbbb5e859b3c2b8fbL, + 0x676b7939cb40d08bL,0xb98d654932da9e1aL,0x42546a9dceb3d8d7L, + 0x0f1e38ea5687f118L,0x357ea1d50f472bbaL,0x0000000000000101L } }, + /* 52 << 98 */ + { { 0x85a59a49b81f2d5fL,0x5ab98b4cd12cafc2L,0x7be9e55c9036f3d4L, + 0xb6644d14ac17dda1L,0x0d6caf4ba23f330bL,0x960e78ef567899fbL, + 0xb6ec1c3f4bedf73dL,0xbe7e5caba971c6b3L,0x00000000000001b4L }, + { 0x20cb3a6a66a01413L,0x1fc8c49d71dd7853L,0xc6eea4c1d0f467cdL, + 0x72adc39ac442dc91L,0x1349ac1b9fd5a2e0L,0xa58d769226814c74L, + 0xbe23da1602685093L,0x411491b34f2278a1L,0x00000000000001c3L } }, + /* 53 << 98 */ + { { 0x5df613d86eeb2735L,0x9adffc0f3bab3590L,0xa9e579d5fbb0951cL, + 0xb2f2301d93eb0acaL,0xfd90af4277018d78L,0x5fb3dfa8d26e5801L, + 0xb5f4a0b58db45b65L,0x5152a2c96bff3350L,0x00000000000001a3L }, + { 0x3c3d0aae3bbe30c4L,0xab6a28c32b64812cL,0x552a8e1fd45d39b8L, + 0x00f9ad121a9ee27cL,0x0985dcca840dbe6bL,0xa511b246f8a3d008L, + 0xbec2ee441ff2d83bL,0x53a67bbe0e46b345L,0x000000000000001fL } }, + /* 54 << 98 */ + { { 0x63e9d7b2f8ccf1c3L,0x158001879260eed6L,0xa687c4759a40eaa7L, + 0xcead499e587304a0L,0x0ea47fe6fcd6e4a2L,0x1fd51b69b04a24daL, + 0x5519ae53a83160ecL,0x4f7d9444ab88175dL,0x000000000000009dL }, + { 0xcd73bd2d8fd64c7dL,0x8da0c21e69451d1fL,0xeb66fd3fc0f3a5ffL, + 0xcb267f27e9499043L,0x407f43f7df79a3d9L,0x000a877adb145264L, + 0x056c0e2af3952498L,0x6f893940b83e7528L,0x000000000000008dL } }, + /* 55 << 98 */ + { { 0x42495d2779a151e2L,0x040725e19e477378L,0xc4d382f1ef2c6b94L, + 0xf34dd70c2b5bb0b4L,0x3d2740c94a5150a8L,0x53de504865e4ddbdL, + 0xb110cd91f58703deL,0x5c3abd4e167ffc8eL,0x00000000000000adL }, + { 0x136129e5607e01a6L,0xed84c7926c809502L,0xf9c736f8a3f3ceccL, + 0xc43308d14cf83eeeL,0xc8ae4fca644d91efL,0x3b58a62bc25f1045L, + 0xdddbbba6a80de8c2L,0xb2c329cf2b831b25L,0x000000000000012aL } }, + /* 56 << 98 */ + { { 0x414530555ab463b0L,0x545b0b693248cab2L,0x25ec52d5dd9b920fL, + 0x73bf68542dcdb1ceL,0x8efb2a6d02bf830cL,0x794ab4165ea6b254L, + 0xa4422d12df2a60f8L,0x78bd1b524e9d5a0aL,0x0000000000000026L }, + { 0x3f341acbb43f9e41L,0x4a73e42a6991d638L,0x596cfd672d29b6eeL, + 0xc9184fe2940dd578L,0x283a30954a2883edL,0x0626dd7aa58bc7ceL, + 0xf2d6c6ef401334f3L,0x55b107493c254bafL,0x000000000000002eL } }, + /* 57 << 98 */ + { { 0x2d9295c1f2bf5edcL,0xf764d4c5cb524dafL,0x2ca917e19800cf00L, + 0xda3da70db6d553ffL,0x6a14076bcce4eb3bL,0x1bcbf4e8ddd246c7L, + 0x653ed1fcfc906223L,0xc0ffa99f49e555d1L,0x00000000000000e7L }, + { 0x6e75a577914f520aL,0x043dd7d0d4070fe6L,0x58ddc37ff47638a3L, + 0x2d28267b70ceddeaL,0xe1c529280b9f91ffL,0xefa6ec2805e983baL, + 0xdd61a80670d85f35L,0x9226446bcd48aa34L,0x00000000000000d2L } }, + /* 58 << 98 */ + { { 0x00afd4965d8690a8L,0xf86123ff5f81af58L,0x92791fc1d2e32baaL, + 0xfb0517927d442591L,0x05d935c115baf7c3L,0xaff80ea31ee6a681L, + 0x4c82f39458a15599L,0x691aa0055165d86cL,0x0000000000000160L }, + { 0x8fc906b0da216432L,0x2553176a31adbbfeL,0x0acd0c27fa180f5dL, + 0xcc22f6a295ca0db6L,0x2aa225bbd4f8515bL,0xcad879abe8685485L, + 0xa7889114fb4b2f2dL,0x5e2f58e958175c94L,0x00000000000001dfL } }, + /* 59 << 98 */ + { { 0xa3fff0780481f1e4L,0x2c928413de29a775L,0xdb08b5d3f7168a7dL, + 0xb2f81d56633213e1L,0x9928e38b33a95654L,0x42d16f450c473c01L, + 0x0662ab7be4c7ffdcL,0xbea6e39ef5484520L,0x00000000000000b5L }, + { 0x34cd343237cd50f6L,0xb969f3cd85686cceL,0x10558cab3e15b35bL, + 0x87aa8ec532f13ad4L,0x706b9ee99311191dL,0x38adfcb860ba780fL, + 0xa1485d172515550eL,0x2de9d6fd18b92a8cL,0x0000000000000052L } }, + /* 60 << 98 */ + { { 0xdcdfedb147c91a15L,0x89c1654de8003763L,0xeef03c09e074525dL, + 0x1a75460c0b7979d2L,0x58881343fed5ae1fL,0x3fcd3227ddb9340aL, + 0x1e4db699885bf042L,0x2923f9382340afaeL,0x0000000000000067L }, + { 0x934ef610cce2e800L,0x8db3f34bbfb68efeL,0xd78192d6c8df9008L, + 0x94929564d3fa0c49L,0x2d0cdbabfb20d848L,0xbe73f64fcd307b38L, + 0xbdf434f3183096a8L,0x009a7ab97bb336bfL,0x0000000000000096L } }, + /* 61 << 98 */ + { { 0x51482c31c0279b71L,0x5a64d764fa114064L,0x1cea19dc613f02e4L, + 0x584b4754ad92a890L,0x41d258fefe288772L,0xa46f97e2712b9833L, + 0x6834b1b47263bd06L,0x2377b6a5e840922fL,0x0000000000000085L }, + { 0x785b453e05243545L,0xf7cef5b951e1376eL,0x0b4baf26bf6985d3L, + 0x1b19968160b01ea0L,0x8c67d48322d1bd0aL,0x44534e2069d7265dL, + 0x5c66184f4d1b7631L,0x80173aabb4f2f684L,0x0000000000000139L } }, + /* 62 << 98 */ + { { 0xb2f73e97c2f18c9bL,0xafb9e244bc189020L,0x7940431bb4f03f21L, + 0xdf6d7a8da1743efeL,0x34b51488b2d1f43fL,0xa4c57f09da0bb60dL, + 0xca8497729e7a0788L,0xabb13fae3c00114bL,0x00000000000001f3L }, + { 0x82b4d795bc829c42L,0xf9f1dad153a68a72L,0xf4dd335759991d6fL, + 0x1602f217fff13f30L,0x55af8ec3e9568101L,0x36f1d1f1147a832eL, + 0x9f1dd9d2408cae6eL,0xe5d03de373b26e6dL,0x0000000000000193L } }, + /* 63 << 98 */ + { { 0xf3dd72a10f87eb25L,0x8ef640f8237745deL,0x49c04801fc90b8afL, + 0x836914f5bc493335L,0x3fc6bc7c66e73cf5L,0xbec3517371e8f965L, + 0xec11f247478c734eL,0x1806c5d2f53ad410L,0x0000000000000058L }, + { 0x24f77c0dee27650aL,0xf19cd730e89f447aL,0x2d4d5ff8af16422aL, + 0x4a5a3cf9b38edb7dL,0x4476596d9a775f5dL,0x2fda6cb49e5716c4L, + 0xe4926cdd84e5e040L,0x54c354ba388b853dL,0x00000000000000e9L } }, + /* 64 << 98 */ + { { 0x5d3f00ecbecff63cL,0x280fc963671a2835L,0x27cf9990849ad2f5L, + 0xa1cfa523b27c7f2dL,0x9d9a6870b8278b74L,0xd6176cdf9ee793b2L, + 0x9a98521ab31e9501L,0x4422e43e7436e438L,0x00000000000000d9L }, + { 0xda4f7464daa8dc76L,0xa5b83150919e1298L,0x00d3948d81c82c13L, + 0x95def32463724cdeL,0x9b57597b7d5a1038L,0x42d9d64e57432b3aL, + 0x9807d998936e411cL,0xe68eefacf1e57507L,0x000000000000003eL } }, + /* 0 << 105 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 105 */ + { { 0x45d90cf8876b33acL,0xe53e1a993ed58f0bL,0xd5d181f549d916b3L, + 0x1b1ef0402ec09be2L,0xe5787176b2113b57L,0x11d02e708073044eL, + 0x9d1c19e7a2ae38ddL,0x2662d4943634f6faL,0x0000000000000130L }, + { 0x454c8a736bd6208eL,0xd2447cefae37911aL,0x56ac592e69b8c5aeL, + 0xe7f564839b615bfcL,0xfac066e1bbe7fe62L,0xb2ac3ee1b7777b32L, + 0x55f8653315114adaL,0x15cba83e46497cc9L,0x00000000000001efL } }, + /* 2 << 105 */ + { { 0xc06a64ce981fd289L,0xb8dc2603d11fcd02L,0xea27a922770d2eb2L, + 0x83c64076da8f19edL,0x9f6d1b650ae6a311L,0x977277bf73fa8721L, + 0x7115ef98cc0292d8L,0x93237ae7d2e0c67eL,0x0000000000000069L }, + { 0x83481b595c151734L,0xba00bd740ac1576eL,0x48f33272f178d4f2L, + 0xe09fde3816492d95L,0x876124dbb9dfce3fL,0x27e7b1e503fe728fL, + 0x79347e4604f98f99L,0x63ec8c5d48dcf0eeL,0x0000000000000149L } }, + /* 3 << 105 */ + { { 0x82a80d88b23b9243L,0x1a102766d91555efL,0xdf3cec50f069fa76L, + 0xf6c125294319ca89L,0x9a11b37a0330eea5L,0xa5a659a902c12564L, + 0x3e01e4311612fa73L,0x85580747e35674b7L,0x00000000000001daL }, + { 0xfa500e83b5a20999L,0xb539909dcc336ec6L,0x76aa969e7ae2ac1eL, + 0x2144206ce0a9712fL,0x095fec7e3ec387d1L,0x5988797252c7840fL, + 0xb61a7c61262f5f93L,0x75bc25e7a76b83b5L,0x000000000000016aL } }, + /* 4 << 105 */ + { { 0x327d64092ccbb0daL,0xc5750f6adc39da33L,0x8b9eebbf7383f8f4L, + 0x9ebb5a8e256c4edfL,0x7513f7c0a917a2aaL,0x31eb5a3dc4824bd9L, + 0x2d6a4cfc2ed4a7f4L,0x9ee354aae0c9c30fL,0x0000000000000179L }, + { 0x20eb09c9cbf2d15bL,0x4574d31c2547c112L,0x76543e3dd38e79adL, + 0x1d35b8874edc05c4L,0x3ac58dc7aea4de85L,0x9aa6fa0aaf5715e1L, + 0x8869704eb163a66fL,0xc685ea7ad14001caL,0x0000000000000083L } }, + /* 5 << 105 */ + { { 0x5b6711a5052b505aL,0x3b32ee5adbb104daL,0x3a59883e14432cfaL, + 0x57b6d6bc88b7c74aL,0x363dc2628d5582a5L,0xbe6d3bf58928103eL, + 0xfffbeb0963b06525L,0x29acbf58fac9b5ecL,0x0000000000000002L }, + { 0x78696df41ce15975L,0xa9732060c78337eaL,0x288fe1958aafa892L, + 0x5de9cd9132a7be5aL,0x2f1baccdbec8ed8fL,0x88131f8db1f0d997L, + 0x8d15c4049d123bf3L,0xf460d41630a1544aL,0x0000000000000029L } }, + /* 6 << 105 */ + { { 0x95524dcea3342331L,0x2ddf9b2f0e413b88L,0x342e0ce86a03fbe1L, + 0x8e2dec3eeaa3999dL,0x3ccf191b5ca80ad0L,0xf93ebc99804b9c29L, + 0x97d83573f2282b48L,0x51f33e7359ecb135L,0x000000000000004eL }, + { 0x789b5cfc7d53f035L,0x5ff3ae9a209b881cL,0x09af30bd80dcd35bL, + 0x850cd0aa799b2059L,0xb660d065e2642327L,0x9d9424520f5134b0L, + 0x1fd8897381ab299bL,0xe126dbe9b6a2ba25L,0x000000000000013cL } }, + /* 7 << 105 */ + { { 0xcc6292a08a547e2dL,0x9a76a8e180565c69L,0xda4d2475fb616140L, + 0xc9cd45238b5a04c3L,0x8313b6c0554879adL,0x9a103a9522038226L, + 0x4e5db49dd06a57e5L,0xfc49d649d1f333efL,0x0000000000000063L }, + { 0x529f97e82f30ea09L,0x06692dc9416e0148L,0x534c9f6ef6dcca0bL, + 0x26f9f8409d575d04L,0x93f66c9743c153d2L,0x776b12bd9d1b22cdL, + 0xa44528e4b4000757L,0x738ecfb9881f46c4L,0x00000000000000e8L } }, + /* 8 << 105 */ + { { 0xbee094232a7f01fcL,0x75176a16998af9a2L,0x13273068fbe89a7aL, + 0x3e74aa6f13c42b6aL,0x7da022be579c61b0L,0xaa759216ff23e10aL, + 0xcf7f7d1afd852641L,0x0e9aaf1c1090b6c7L,0x00000000000000dbL }, + { 0x483b1f8c6d9ebcadL,0x25f3aadc987b2c42L,0x83a7bd03f70950d0L, + 0x67aefd6a14f250eeL,0x6e90c4f990070d75L,0xb9d65432035fdd1eL, + 0x190281bb2ab23f7eL,0x36008cb1b69015daL,0x0000000000000137L } }, + /* 9 << 105 */ + { { 0xca31701dc0f403e8L,0xce036ea893374a84L,0xf15352701872a713L, + 0x346ccdb0e92111e7L,0xd0427b8ab3ed8f01L,0xa165cec95fea17ceL, + 0xee36680a6bff1155L,0x0db2ff965a8d9ad0L,0x000000000000004bL }, + { 0x8f2638947cf7d4fdL,0xb1a229c024f483f3L,0x141a47114139f5c5L, + 0x4449f744e2a1f981L,0x1450ba3101c7860aL,0x5b968d93857f6d90L, + 0x63ac13fa1646bbaaL,0xb43e9791d771bff6L,0x0000000000000110L } }, + /* 10 << 105 */ + { { 0x105147eaa3ef560bL,0x4a60db0bdc1823adL,0xd8e3108b48c7d8d2L, + 0x562883f0a2aa9b3aL,0x59413cccf07a672cL,0x8d5b9b0ad952cc65L, + 0xc63448bef4ec1122L,0x82790e4d35ea3ffcL,0x000000000000003bL }, + { 0xb50f1a1a88531051L,0x5529d01f18f9cd8cL,0x57f285f266427c66L, + 0xc98f9e0abe17e614L,0xa773fa6e3bd58a1cL,0x8598347086cfbc69L, + 0x54cba22bdac04edeL,0x847b94b086e3311eL,0x000000000000015bL } }, + /* 11 << 105 */ + { { 0x1d61cce6ad9b2624L,0xc78885f8427ab78dL,0xd59f99b14857d152L, + 0x3346b89ca5824a84L,0x8430b19997613060L,0xf59e15a866033997L, + 0xbc442c8ea4ba1ed0L,0xb74f9fca46645f9cL,0x000000000000010fL }, + { 0xe65a7edb5a48ff11L,0x145010e14866254fL,0x041641c5b4e7b068L, + 0x54f8ae8efeb7efdaL,0x506d0109d08412eaL,0x4aed71b9d1b1a35eL, + 0xfcdcf29c36c821e5L,0xc9b518d3da012ad9L,0x000000000000015aL } }, + /* 12 << 105 */ + { { 0x2dc395a5556d2be5L,0xd27a811d2fe83cf2L,0x915fbe80e070e5d3L, + 0xc3e38ef0c80a61afL,0xc1b529f6b988671fL,0x74b7fce60412de40L, + 0x8ff8c2c5a6ec7d2aL,0x7d3093d743346692L,0x0000000000000032L }, + { 0x6d6fd108ef669f24L,0xdb66a863e60d43d3L,0x2cdb23a2da77cc60L, + 0x6baec01bd73a2bbfL,0x4ad344c27d5fe4c5L,0xd6cf895c9f1df645L, + 0x9cabb9379501481dL,0x3f57061a600abd58L,0x0000000000000042L } }, + /* 13 << 105 */ + { { 0x9c26b76ab37d43cfL,0x4ff11777927c55b2L,0x585193b49f3f5d94L, + 0x3d85adc371de2d3fL,0x76d195fa70a2a2beL,0xc91acdf60f514a93L, + 0x921b5a28d78a46abL,0xe46c201728239dc2L,0x00000000000000e4L }, + { 0x6d7969d8d3531ba7L,0xf409eb62ecfbe5eeL,0xb31fe091d2c08a17L, + 0x250b1f2efecaa70fL,0xfc73982e66500ad3L,0x4dac9fd1ffab2445L, + 0x507eaccaa3931986L,0x83eb25409bbe5225L,0x0000000000000105L } }, + /* 14 << 105 */ + { { 0xb9e3b8bb17a515bdL,0xf8d68dd2d396d767L,0x7de89508decadc2aL, + 0x9831907c75695a61L,0x4c5f2cc7a3b7d429L,0xcea07ffc4ee09e20L, + 0x309f2c3fef0d7f4aL,0x3227ec7f9643a1feL,0x00000000000001dcL }, + { 0xf8cd3e8373ee6524L,0x5570129e98f4749eL,0xc8a020dfd9f271b3L, + 0x0ab6677db2fbafd9L,0x0a0168838b084241L,0x466644d787d85db5L, + 0x3d631143fb15dbe1L,0x698e8237c6afa7c0L,0x00000000000000a0L } }, + /* 15 << 105 */ + { { 0xfb9c0840bf144a2fL,0x1fddb4d78eea78b8L,0xcd5da1617a68c123L, + 0x57bac9f5ce110d74L,0xbc329c6f726a02baL,0xf2e4e02404b8b550L, + 0x88018b6a9f558689L,0xd1eb211aa0192931L,0x0000000000000066L }, + { 0x5501bc7802a25d57L,0x5eabb394b7c28b33L,0x4a322d1b7fbda862L, + 0x67f3f67e6314f639L,0x06e2768557f63f41L,0x2d1fa432b4657646L, + 0x0338960d3950136dL,0xaa94f24fd48716a5L,0x0000000000000188L } }, + /* 16 << 105 */ + { { 0x80d935b163b7a2ccL,0x3315375ed6dc2acdL,0x400bb7948b895a51L, + 0x35962ff9b28f625aL,0xd75e5cd7ef66bec1L,0x3bea41715157464bL, + 0xf372e7c8605c7eb3L,0x3741fbf0e3301a57L,0x00000000000000bfL }, + { 0xa5388110d0c6184bL,0xcaf6355ef89ed74aL,0x4e170981fa93c1eaL, + 0xea60089b0dd9b47fL,0xa27efd8b3b9cda73L,0x5852497b5ee68672L, + 0x60f0dd9c45bac304L,0xf444a8f03048cd59L,0x00000000000000baL } }, + /* 17 << 105 */ + { { 0x95f255bb5c07340dL,0xf8de4c0b69160158L,0x234d0988543e6e26L, + 0x728fe0b5fdbbdcdaL,0x28415205dc5479d1L,0x0d10dabb4737ebe0L, + 0xe66b187c8bcd92a4L,0x7c1384f8268d40e2L,0x00000000000001b5L }, + { 0x8f6f9b23c6f33f05L,0x84188aaa402cf04dL,0x0c34d78fd3d3cb54L, + 0x99b6220be5a78cdbL,0xca0404e0f3f4a122L,0x3e0f1a94d9b7dbc7L, + 0x69544dcc1e434d32L,0x3c5e1f066cf599e2L,0x00000000000001a5L } }, + /* 18 << 105 */ + { { 0xd5c3ba5d9b7426e2L,0xbf007ff534f50a36L,0x1f35505edd9b1ae0L, + 0x4207411bf1170c6dL,0xa092286e1fa9f8c2L,0x8561f6d09aa935faL, + 0x25f6a210887aa36eL,0x1326468d916cf990L,0x000000000000014aL }, + { 0xc5b4aec75f4c5f97L,0x19078493fd51578bL,0xcce5c1b5d27f947eL, + 0x555a000791d55544L,0x673149265ee062edL,0xc2e2b9bb30473d65L, + 0x84a0705f0c4f654cL,0x2e7ddefcb53d901dL,0x00000000000000cbL } }, + /* 19 << 105 */ + { { 0xad0f207da13d6874L,0xaeef4cf48cb34cc1L,0x9d7d25e0cda81d1cL, + 0xe8c11d3a47b69696L,0xdcacc0c193463587L,0x8da07ec922a666bfL, + 0x586f6831dd7d59bcL,0x98e125d5a4e67363L,0x000000000000015dL }, + { 0x0f9878144fc1e663L,0x739c449ae54e65abL,0xc2f151cddacb89f8L, + 0x54c9468cfda8de8bL,0xfe379ed859e9db53L,0xe53c1246de0d711eL, + 0xc72d6669b716288dL,0xd98eb6947ff434b8L,0x0000000000000197L } }, + /* 20 << 105 */ + { { 0xd8828453ba55c50eL,0xfba129a6011f25cfL,0x92b111d214212513L, + 0x606c4cac10dc46bdL,0xa2bdb94ec971ab1aL,0xef64fc272ed13cfbL, + 0x4e6abf3b6d95da9cL,0x893ad58b6db4e176L,0x0000000000000093L }, + { 0x8aae0314242e9cd4L,0x7d6d2214e64a78e2L,0x03a80376d3b2f48aL, + 0xdd43f77b18e8cac0L,0xf140eb83098210f5L,0x0a31f295b84aa3f7L, + 0xdc0dce3edf6e0c08L,0x69d9d0ad1f6094c0L,0x000000000000016eL } }, + /* 21 << 105 */ + { { 0xed2e0249efb97167L,0xb89824df19f6e8d9L,0xe23ede3948475753L, + 0x70604a1063c1dad0L,0x722e21c1ccd7cad5L,0x6df0e5f2a10d5e50L, + 0x61ca16329aa48e17L,0xe65eba88f9569945L,0x0000000000000023L }, + { 0x70f4a192cb5b92ebL,0x2b9a208a8900f126L,0x76409cc9f97972eaL, + 0xf41dcf4ff70e601cL,0x2650a3476f181642L,0xfb733417318b0fe1L, + 0xc6b04e56b5a047eeL,0xca664142806a09c4L,0x000000000000002cL } }, + /* 22 << 105 */ + { { 0x1ac10389b68f339fL,0x91b38759ed194f5eL,0x3914798fba6b7970L, + 0xf3539fa888adc703L,0xafe49a1323a1eb1cL,0x07391e320e9a2944L, + 0xfe950d80c196e2e6L,0x757a6aec3d5aad1eL,0x00000000000000bcL }, + { 0x21f49f512bc1cf8cL,0xdb4a00d39672f74dL,0x1c85164cb58d71b2L, + 0x8dec2f612cd5ed70L,0x88bf4b050eed1979L,0x33896b492dbd7dd1L, + 0x0524e9cf77fa12f2L,0x42e60f7f839d35a8L,0x0000000000000029L } }, + /* 23 << 105 */ + { { 0x491564d68731bfb8L,0xe89203481128cfb4L,0x0026a96d348cd681L, + 0x58193c9ea0af729fL,0xa3e1089947a22bb8L,0x074a8f92dcd1bfa7L, + 0x45cbe7b054946516L,0xfea57561cd7b02f6L,0x0000000000000069L }, + { 0xb19a61bc10b93767L,0x2866d9cd9dcdb1b4L,0x8f540d9d3b401ae1L, + 0x196d3843ed89cc1bL,0xfaa6336ea19e3aadL,0x1132d0261b294d32L, + 0x9aff85ed6a0b975aL,0x11e2111302584ad0L,0x000000000000003fL } }, + /* 24 << 105 */ + { { 0xcacf976d657d30afL,0x8d99f439e599602eL,0x89b7ba5142c60609L, + 0xbe1fd86127695726L,0xd1fe4929cf48052eL,0x785c994775a3938aL, + 0x89c0d82562f5496fL,0xcf585a3f24237d10L,0x00000000000000eaL }, + { 0x3d2d602df48c877eL,0x56732dbcf094b87eL,0x5603dd5c4c825b63L, + 0x41a7454849d45408L,0x573734f5ddfc690bL,0x5fa016412c2993aaL, + 0x1bd91fcaaadfe3cdL,0x3cd67d47be83c452L,0x00000000000001d2L } }, + /* 25 << 105 */ + { { 0x1003b83b3cc92301L,0xf5e76d65f8f493d0L,0x2ce44912fd07df7aL, + 0xb2781226fa8fc5caL,0xc1188930371bb7b0L,0xd903037daecaa2f6L, + 0xb5a0c7b572ffff20L,0x250bc26809e28877L,0x00000000000000daL }, + { 0x51246eff1d835cf8L,0xca86aff7e2aa17cfL,0x4a286343ed95465cL, + 0x761b330a2e0ec7c4L,0xcec609e70e238a29L,0xaf57e17f2ded7b72L, + 0x783fee52dd97a8e7L,0x9e70a6bac9b0e6e1L,0x0000000000000148L } }, + /* 26 << 105 */ + { { 0x9ba159e917acbbacL,0x21d2cd40b3b2d4f8L,0x19d30414cf0c0962L, + 0xea38bfa1e46a922fL,0xbe6961f2463d079cL,0x811dff90c6bf5348L, + 0xd96c494b785c3396L,0xf3ad523ddcd944e8L,0x0000000000000066L }, + { 0x634d8e7d206d05a8L,0x15b16c22a1324639L,0x16b8add8e59be967L, + 0xfeae166da509f7ccL,0xb9c7e3979f5a873cL,0x7bc71c573ef93a82L, + 0x6b70e12d6115406eL,0xe22f4254de6bab3cL,0x00000000000000adL } }, + /* 27 << 105 */ + { { 0x7552422040c553faL,0x8b57e7c008e5d334L,0x69dc672bb0576f54L, + 0x93f4183b389374d2L,0xb75b0d40cc422c64L,0xb44b0e56287a6c2dL, + 0x6409c0e89c25d3baL,0x02c259d8c06b946bL,0x0000000000000089L }, + { 0x84719a851b45cccfL,0xca07d64720613663L,0x04055dbca18c0ce1L, + 0x2769ca934c88632aL,0xebd3eba9ae212295L,0xbbcca701d7d19150L, + 0x7890908d5520d207L,0xbe9223c03918f231L,0x000000000000001dL } }, + /* 28 << 105 */ + { { 0x7681afca91957c78L,0xd0e5379bf37ff077L,0xf5d60a4494adc198L, + 0xdf6fd776d87fe24aL,0xe361851fd13e39d8L,0x2513ccb0576ba50cL, + 0xfa50610afb934e52L,0xbeb4cabb8dfdc65eL,0x00000000000001b5L }, + { 0x578b16474b080c52L,0x5c27918e45a7bf77L,0x8eb701fd5007f0c5L, + 0x98bbf9989230a8a8L,0x82cf1074790f53d9L,0xff6b6226a375084bL, + 0x09a928db2eb7289aL,0xc28d4dacfe5e2315L,0x000000000000011aL } }, + /* 29 << 105 */ + { { 0x31bc319cc98a9c37L,0xa0bb2283a3984dbcL,0x3f301a6b54cd9423L, + 0xac47b1b806b84615L,0x26450a5582d1dbffL,0x1d5657363a97de2aL, + 0x47aed4ea512c76a2L,0x7ccd33197fd528d3L,0x0000000000000055L }, + { 0x2164253f5696d45dL,0x5e0bf77bb4f480d6L,0x1abf68e6540a1dfdL, + 0x17538afb9e1e823bL,0xa0f9656e310ac00aL,0xead8fe6396b4c493L, + 0xbb21e9ed4430ccc4L,0xe750b446f8db037dL,0x000000000000014fL } }, + /* 30 << 105 */ + { { 0xfba9eb2d3cec0762L,0xa9fcc5c16e39d300L,0x49efec4e5d9d4024L, + 0xf0de3c99a14df0e5L,0xf1a1eda4cd197e7cL,0x23c79138c8fa57dfL, + 0xfa27666efaf7ab50L,0x88c563d41b17e0b9L,0x0000000000000103L }, + { 0x354d24b33bec9b99L,0xa6f3879ea3b30a27L,0xad3cbf31c4a341d3L, + 0xc39733b81873136bL,0xb0e58f73d36a0e8eL,0x6de55e1efb08244fL, + 0x921fe1304a5d4a2fL,0x2ab1ca90c634beb7L,0x0000000000000176L } }, + /* 31 << 105 */ + { { 0x07c0f5899a41407aL,0x9c8bf2b39bbbd8e9L,0xa5b4bcf8e513a7dbL, + 0x85e61af7f2979de9L,0xf4ccbb625ca009daL,0x41d81be066ef1639L, + 0xc7870e2a74a14becL,0xd0817703e04c1567L,0x000000000000000fL }, + { 0x2be84421f2970915L,0x59794c74f26c5082L,0x8883a0bbf491677cL, + 0xcdcb38a31f01db0bL,0x7ba2101e6fba9ddfL,0x9228e0674b0b0f97L, + 0x0000e6124d5c084cL,0xe7be273d6bd21734L,0x000000000000016cL } }, + /* 32 << 105 */ + { { 0xd15ea6b41f8670abL,0x1957a0f1199b1007L,0x113aff43e3efadc2L, + 0x07c606090fdc6abfL,0x93e7ba310bb0014bL,0x0f6b7e135d34945fL, + 0x2303a74aa9f86051L,0x6c9fc83a312cf1b2L,0x000000000000017bL }, + { 0x5588cca5e501aaa7L,0xef0d3bc16e7ac7bfL,0xbffcd55147339a1aL, + 0xf83b4cb8c36eaf74L,0xa4e70ee39ed93d5fL,0x0972ac415c249069L, + 0x2e9968feca09bde3L,0x1b5b37298c5aec39L,0x00000000000000c5L } }, + /* 33 << 105 */ + { { 0xacc96c98b769fd56L,0x86d507327dae0910L,0xa1dd43651386165eL, + 0x38e6d2a3f3213894L,0x890e1328f8b379aeL,0xa6200559306788d4L, + 0xf88c4751a7def4f1L,0x6975c2db70bc1914L,0x0000000000000100L }, + { 0xa8f2f1c9b892640cL,0x5b1ed01e21f7b44bL,0x13ff919bbacaffcbL, + 0x76bae5427271c130L,0xf6daeb9faf5a9725L,0x9da7b38033ec07e8L, + 0xa5cf0ef503585e6cL,0xf79529ffe6ef4b51L,0x00000000000001a1L } }, + /* 34 << 105 */ + { { 0x1f7b4389bdeef89eL,0x6789ffd1066ce776L,0x671ae6869d25dc8aL, + 0x6e4eaae92a888ac7L,0x232dbd9f46a4af36L,0xbb17127ac6fea211L, + 0xe34ac986ff807938L,0x0222d0b4132b4057L,0x00000000000001cfL }, + { 0xf1511b8e8217c466L,0x04a4581414fdd478L,0x49147df5a79487d8L, + 0x9bd381fa23c6e0fdL,0xd91d805a9f74cf82L,0x0ab490b37a8b86c7L, + 0x090cf1b26d582a67L,0xc64ddf5627fa5841L,0x00000000000001c6L } }, + /* 35 << 105 */ + { { 0x3bdf7a2ad1814226L,0x07c5dfc8f027968dL,0x4bb29e991211439dL, + 0xf7c1b4f71caf12c0L,0xd7b807fb79f9fde7L,0x938bf15f5fb1dd69L, + 0x073eebfd1780e837L,0x642b138cb0faac2bL,0x0000000000000168L }, + { 0x56b0d340082b362fL,0x0bfe8f67f85cc2dcL,0x50c288955c32b783L, + 0x1e53f45c7b898114L,0x71b9ae7056cc9e6cL,0x56b5107c72099c86L, + 0x63311d09311f3509L,0x78acea51d6a5f3d0L,0x000000000000005dL } }, + /* 36 << 105 */ + { { 0xc2089b19fcc9c03eL,0x089cb861b3c3b222L,0xc98b822e1ce7d1e5L, + 0x3dabb2044977f564L,0x227cb87d5ea70d29L,0xdcafc8a47069304cL, + 0x20d80ad6802b1253L,0xca26c7426b904cd6L,0x00000000000001b3L }, + { 0xb9ce07cead8419aeL,0xa9b14ff2a9544909L,0xfd3d940bb7a73089L, + 0x5fc97e71d3d01c28L,0x84eadd2847850cf8L,0xb54183bb27f823aaL, + 0x557fd65129af6bf0L,0x180b581acc18b8a8L,0x00000000000001f4L } }, + /* 37 << 105 */ + { { 0x7a4cba6bb978f541L,0x3e6f49205343823bL,0x2bbd7ba217fbe204L, + 0x4026c8212de816d0L,0xc3b60a1a0e9b6bf8L,0x9ed6b7df684e996dL, + 0xf50e40abe73868b2L,0x10b2f15cc492b090L,0x00000000000000daL }, + { 0x221216615aab5f3eL,0x8e9e9c682529ac88L,0x12a17b7723cdd031L, + 0xef53fa6250d3b5ecL,0x2d09807c8fad5c6dL,0xc40e0cf6487138ddL, + 0x52d5db3b323038f1L,0x1425a7359d09f1aaL,0x000000000000019eL } }, + /* 38 << 105 */ + { { 0x59fed1c491a11ed8L,0xfe263c0a3180d98dL,0xdcdc10e42d368fcaL, + 0xaa2f0a1bef1e9926L,0x768984895745f2e5L,0x48830803112ada7dL, + 0x4c63137696c9e854L,0xc0e129f08250e11dL,0x0000000000000191L }, + { 0x457dd4a21e6bf787L,0xecd443ca026b528dL,0x4a02a60ad84d6716L, + 0x5a2a31fee3f215d7L,0x05c6ea2ef4789397L,0x78bb4bc49df48571L, + 0xbd322028c263a45aL,0x57bc9ae162fc82e6L,0x0000000000000131L } }, + /* 39 << 105 */ + { { 0xa51538ca93f73b65L,0xa209bc3c23ecb9fbL,0x5f4b4dbe6590de4cL, + 0x410efe51eba978b2L,0x2c2cab5d0159bea6L,0x506278d2a9e69f90L, + 0xcad07d66b89db270L,0x5b59625e22847a48L,0x00000000000000abL }, + { 0x4c305e6bb8cea678L,0x5842bcd09909f9a3L,0x53a5a114477599b2L, + 0xc7c5a7e8a735de6aL,0xec49f912c9df0d78L,0x794e2c2d3be4756bL, + 0xdb7b920d68824272L,0x3c4823e0e7d2549fL,0x0000000000000156L } }, + /* 40 << 105 */ + { { 0x8ff38aa4093edcb7L,0xc3c6b459e8f5564fL,0x613e5157b8935ffcL, + 0x1eba7a2ae10e448eL,0xcf8d70c9fc31845bL,0xe1e4d5bb77f5a42dL, + 0xe61bc59edd07816dL,0xa0928cb3e85fabbeL,0x00000000000000c4L }, + { 0x12e7717eacc0393dL,0xca036b26540be4cbL,0x4f59050e36dcf32fL, + 0xdd2e7ff9234b425dL,0xbd17895136b8d833L,0x132d27fef989d479L, + 0xc2049ea7ba09ef6dL,0xb526a9b5c83a3b7cL,0x0000000000000157L } }, + /* 41 << 105 */ + { { 0x06a4efb20228649cL,0xdffc2bcda7942aedL,0xcf2e51715b4eb8e0L, + 0x399bb5320052238eL,0xbc53cb0c46e3f1d6L,0xa020f3e430a71ec3L, + 0x340c96716ae10068L,0x0bcd58b7dc04715bL,0x00000000000000f7L }, + { 0x87deb87d8f9d8244L,0xc510965fb14747edL,0x5e084f544fa3df33L, + 0xdf3d534d10cd6916L,0x15345bf7a9379b76L,0xa6d306f97dedac05L, + 0x18243b39dd131822L,0x12fad40f9d9eee60L,0x00000000000001d4L } }, + /* 42 << 105 */ + { { 0xe47e92bfdad2a018L,0x7169fc3557faecabL,0x824b95d921861e70L, + 0x3691cc6191077fa6L,0x39b9bee4de1dab77L,0x3166f0042a4c87b1L, + 0x6279470b574cb51fL,0x0fd510aa9eac155eL,0x0000000000000040L }, + { 0xbe71e07d80d4bb34L,0xf09f4e405a9bdb09L,0x7045942d1a249979L, + 0x603c2c25ed241724L,0xa6fa617216c91e0fL,0x9fa0a49fce6e8b62L, + 0x9767b1486cd9fcd8L,0x223901649b1d52e6L,0x00000000000000c5L } }, + /* 43 << 105 */ + { { 0x2ef5152be65ed57bL,0x7cc90f473bb1fd66L,0x98f6873244e66f0cL, + 0xa3447ae327bd8ce1L,0x324c3ac08f22eaccL,0x838a435f0397222dL, + 0xc63bee579b0ecdb1L,0xd0bc1d6a9d2d789eL,0x0000000000000015L }, + { 0x1c6f47cf4202af3aL,0xe847a6104c99a557L,0x9cd3807e4b94f1f3L, + 0x703f916b8a2806c1L,0x8fb83be08cdede77L,0xb07b30bcc62bf497L, + 0x90f4da36187ed83dL,0x035fd29aed3131adL,0x0000000000000157L } }, + /* 44 << 105 */ + { { 0xb259c83324eedf44L,0x2037b6fb73545c4dL,0xe126c4834c157523L, + 0xf80292adb36b003fL,0x4ae3fcd9772a9a08L,0x424a61f919be1da2L, + 0x2e2b901d8026443fL,0x879864f1dd59dc90L,0x00000000000000d9L }, + { 0x4b838953b15eac94L,0x0c5fb88ce1a57ca6L,0x9a09e28ada88728dL, + 0xa80a4a9b6853a1deL,0x46a596bff0e07484L,0x6511ec6e7b61760bL, + 0xd2f6b328571859f8L,0x5e872f813cbfbf41L,0x000000000000018cL } }, + /* 45 << 105 */ + { { 0x3011bebc26674da8L,0x667107b215be6300L,0x0c1b7f26f8a583b2L, + 0xa10eba84d1d1ee29L,0xd03a1e2d00f8d4c1L,0x20d5861341f576b4L, + 0xae64da886806cd41L,0x75f1394a49176ef4L,0x000000000000012bL }, + { 0x5e06e1f20154a8b3L,0x8c54c4c780905a09L,0xed75aad004b741beL, + 0xc7be78f5779f5177L,0x1976c894ae9becfdL,0x20b11c80c14943e9L, + 0x6155f1fdebe9faabL,0xbba7d4a7a2d16b16L,0x0000000000000015L } }, + /* 46 << 105 */ + { { 0xc96ff6be3e035793L,0x8883f0151793d415L,0xc4f3ef64d04363edL, + 0x93a75f7f73f1283bL,0x9614a3999f7ba92cL,0x061bd7e241b95c66L, + 0x887792ff1c2276a0L,0xa80706d28ef5b98aL,0x0000000000000063L }, + { 0x2d7720a8b322dfefL,0x7623a0674dcabd9eL,0xc6936cd944e66fadL, + 0x9841e95ee16d312aL,0xe3e195b9eb94fa29L,0xf3edafaa2c284fd7L, + 0x7aeee8c08211a418L,0xebec3d3fb6305911L,0x00000000000000ffL } }, + /* 47 << 105 */ + { { 0xba2c92f5f2b0af81L,0x0d64e1ce1aae3f08L,0xa1f9d651cc19d12bL, + 0xc6f6a4bb78a15858L,0x12f3f8828d55e390L,0xb2791f72dc4b6dcdL, + 0xf51d8c9b758a15d5L,0x0f712d967cb6c682L,0x00000000000000aaL }, + { 0x42b424f386a49713L,0x63664f53b65c98caL,0x09fa6b470cee5673L, + 0x066107f5de3c1fedL,0x0797ca8db363b845L,0xe5df7026114d2e2dL, + 0xf827b59cf67fd6bdL,0x657c8f1421876f0bL,0x0000000000000112L } }, + /* 48 << 105 */ + { { 0x68a3040e09eb3233L,0x0a44434506fa4864L,0x579edeff83af8af8L, + 0xd619358709f49bafL,0x4597975c33f9ee19L,0x5318a888cb85cb9aL, + 0x6366031f423ff919L,0x32fa03eef3ef6e44L,0x0000000000000072L }, + { 0xadc2b71b8fe0bdc4L,0xa4b86022245354c6L,0xbd96981bed7a1ca8L, + 0xaaf0bf088d27b9f8L,0xcb56e8f255ef3926L,0xee69ee6af868426eL, + 0x04146106114f6b0cL,0x5bd940000b8d34dbL,0x0000000000000154L } }, + /* 49 << 105 */ + { { 0xbb9ce2cd216785d7L,0x6542eac454fec1d6L,0x26fad4f1cd6bc203L, + 0xd35862595dbdc5edL,0x9549ea9fae3698bfL,0x8c354042fe0d57b9L, + 0x9548e67f2676cd04L,0x059aec2b22edf4dfL,0x00000000000000cbL }, + { 0x21fe898ad316063eL,0x78930f796d38b86bL,0x72064b45d1154023L, + 0x3184619fc2fe7c4bL,0x9e47fa4ed2cc3286L,0x99484c69731d0886L, + 0xa10f25480273326dL,0x84c2f78729efc551L,0x0000000000000133L } }, + /* 50 << 105 */ + { { 0xc677d129f82dd083L,0x8a9a603584a35d1fL,0xa0e725f2f7707c51L, + 0x0e67f36673314f89L,0x5ae78b3962c79980L,0x57e1ea302a48db75L, + 0x1c77997bc8cb73f3L,0x69c6ee32c1a61b9fL,0x000000000000019aL }, + { 0x53abe39156756339L,0xeb1cdabe4edc4cd8L,0x15f70954026801e1L, + 0x9295a5207fe8de52L,0xb6eda43869a0fb2eL,0xd9e6e518510522afL, + 0x121fd3cf4e8e7655L,0x0b587ad6ddc0367aL,0x000000000000000cL } }, + /* 51 << 105 */ + { { 0x1c0ee685d50f1534L,0x9abb1c5827e3c974L,0xdc73da165ae04b81L, + 0x0bc4a178fc2d401aL,0xd5c269e4ba33ef71L,0xf858a1fc996faa33L, + 0x859915cd45b85f00L,0x7c507b4238b33ae3L,0x00000000000001f1L }, + { 0xbde761b6490593a4L,0x388f79d436f6b62eL,0x6129b78b1b33fc5bL, + 0x04a3b9f5b8eafc22L,0x4a718e577eab0b39L,0x14ea35580398ea2fL, + 0xd737bd1bfca0213eL,0x34c27b9174332a05L,0x0000000000000185L } }, + /* 52 << 105 */ + { { 0x84ace35ba08df37aL,0x8f2e690a3ece9e6eL,0xe2649ed7bd6ed0cdL, + 0xc2618e7d5b70fbb4L,0x3f072e8727f675dfL,0xfa17717f88590abbL, + 0x6aa5119dc257a2b9L,0x016cbe362f48d5c6L,0x00000000000000ecL }, + { 0xf7d3a01b29117b53L,0x3e544e8558d4d8f2L,0x63bf0dc8bba83f7cL, + 0x341d35ac60f1bfacL,0x84e37ee48ea4bbc4L,0x981744d7192d6dc4L, + 0x7b559536ca34c6e6L,0xf32ddf2b067ad38aL,0x00000000000001d6L } }, + /* 53 << 105 */ + { { 0xecf882c7ecfed198L,0xbe88ee265c47e839L,0x6c267c95a60cc381L, + 0x285d1e767ffdbff6L,0xee34af6f6e349fd3L,0xa331769599f4ea4eL, + 0x5f87359375d39105L,0x3559f31e8c9de520L,0x00000000000001fbL }, + { 0x61d45de2d0a8892eL,0xbbe42953bd8a2c6cL,0x7792057fe3e3ec8dL, + 0xd040df71566078d8L,0x8852496ad470a52eL,0x4b6b08597ebff173L, + 0x1195102db36b2801L,0xeb28646305df9bc3L,0x00000000000001e5L } }, + /* 54 << 105 */ + { { 0x6e8ef1e946b46e28L,0x7cfd8d1f86249c66L,0xa8f6559d7a5aa3bbL, + 0x2bee79a6f7abc1b4L,0xc8286725b48f60e7L,0x2b883127f717fbddL, + 0x7ed12c1e2d4ef9e3L,0xfc910265e008750eL,0x0000000000000024L }, + { 0x56569b61212f5d32L,0x97ddd0d69d4ab1c5L,0x639c33f149ee1f05L, + 0x97d1ba29b2886916L,0xef869ff04b553ea9L,0xaf504911b88cbafeL, + 0x524feccab9efa22fL,0xfa15902b321d6755L,0x000000000000014eL } }, + /* 55 << 105 */ + { { 0xdac797ffc2df83f0L,0xca75939070f59d50L,0xc53a3b9d8dae80d1L, + 0x4ee2534321d73857L,0xd458295ea7ae4212L,0x5b29d98f30385f03L, + 0xda3843b910deef6bL,0xd148ab67448387b3L,0x000000000000009fL }, + { 0x68ca489bfa10caadL,0xbaf2095ba1ccac6aL,0xcf4d1eac05a5bc7eL, + 0x67866117ff5f28e5L,0x62d6cf59182d4a00L,0x96353d36ae15e91cL, + 0xf5b32693175f7fd0L,0xa160046d521eaa66L,0x00000000000000f4L } }, + /* 56 << 105 */ + { { 0xfe88213a38540b67L,0x16613cada30b9044L,0xb06dce427861d61fL, + 0xfb5c9ffde2b9cbe1L,0xae4af79ba5ae6125L,0x56e69b5a3ff351aeL, + 0x68de3b3f31df9832L,0x8d2c2960fa3fc1cbL,0x00000000000000ceL }, + { 0x979c3dd49fd40f91L,0xca979b58b5640389L,0x855dd28e836ea10cL, + 0xbf40ee0e17f1cc25L,0xe02bbb2f5fd556ccL,0x6aadb11005f32f3dL, + 0x26bf5c119366213dL,0xc15f02c49a1ca27aL,0x00000000000001b0L } }, + /* 57 << 105 */ + { { 0xc2cf8600f06edb9bL,0x74745d77824ec696L,0x2a502a21bce3ad49L, + 0x0e2fac9ccc8c2f94L,0xb6c43b6ab26006bbL,0xf76dbb35c74f3d21L, + 0x3559f25ed6920ba5L,0x0203fe602af91aa6L,0x000000000000016eL }, + { 0xa1ee163458415056L,0x49c48a0274df3e92L,0x8a871514959f3e11L, + 0x2a50833460c01400L,0x06ab882ff3c1f3f9L,0x788fd2f2f3f8a3b2L, + 0x8d9f63abe5d2052fL,0x6f9136c2baebe4e6L,0x0000000000000102L } }, + /* 58 << 105 */ + { { 0xad4aeaece0792955L,0xc662f8dc6c8cf64bL,0xf96c715cccf91d5eL, + 0x798040378fbd7f45L,0xf0f3ddf401cb31dbL,0x0448603b334e8ec7L, + 0x1c9e8c7be737fc3cL,0xa9fdbafdff78b864L,0x0000000000000112L }, + { 0xd2c6c2de72547858L,0x82dd7a88ff8f4f45L,0x55db641e9eab2f08L, + 0x7243199dd73bcc8eL,0x55e708b46a907e32L,0x361af050db188207L, + 0x4e21c1e461f3c0edL,0x948142fe7cd3c766L,0x000000000000013cL } }, + /* 59 << 105 */ + { { 0xf937b0e53e950ff0L,0x6e24b4e5de551199L,0x9190ae60d524b733L, + 0x23890323e070cdbfL,0x3b61f098514e259cL,0x63091050729f0e0aL, + 0x74ce4e07d2ded515L,0xec98c9a7417d44baL,0x000000000000012fL }, + { 0x3ada2c8f63dc0f50L,0xe80a5a8ce2593386L,0xa39d716914c9dcbbL, + 0x084c3faf0c4fdcd8L,0x8833788bc79dcb54L,0x852a9f9b6aaccf73L, + 0xbe2be436ab0ef855L,0x8e522348aebf47baL,0x0000000000000147L } }, + /* 60 << 105 */ + { { 0x413a5117a2262a1cL,0x29c97c05795db169L,0xa03d9861d7ca7976L, + 0x8428c9aa730d1caeL,0xe2922ccf6fb7d613L,0xb8a33ca521533014L, + 0xf8c059a418266bd3L,0xbc7db2234ebc4a35L,0x0000000000000007L }, + { 0x6fce14a2f2d17d69L,0x91920c6844b28892L,0x9066e8d13c3cdef6L, + 0xf90115f8d5c6e3fcL,0x39f91f2fe9ce6701L,0x5ad4bcb4a1f30378L, + 0xc6b0e8f1aa92e892L,0xf2ab25c70c0ef133L,0x0000000000000036L } }, + /* 61 << 105 */ + { { 0xd256b02a6cb68a4cL,0x2b16306d2ee6b938L,0x4886fd97cad8da2cL, + 0x9a5accad3825c96aL,0x772d7b13cfe2cae3L,0xe16361a620e8843eL, + 0x32473b0333448c28L,0xf1a37e18be23247cL,0x0000000000000040L }, + { 0x63ebc83fb66042daL,0x583f691176c07055L,0x4f7630ae6a3fbd5fL, + 0xb785702efb0af367L,0xb7c837a554d558ccL,0xdbf9820a000c0c3fL, + 0x32b10b9d6233b57aL,0xc811d02fd02a7cc8L,0x0000000000000179L } }, + /* 62 << 105 */ + { { 0xf67675f773b899a6L,0xae60c0d9aaf10a99L,0x1090ce72648df5b4L, + 0xe325ffed3b186a86L,0x403f48e80c01ed9cL,0xf7453a5696c2e25cL, + 0x5151f743bd8e41e6L,0x76cac7110aecca2bL,0x0000000000000147L }, + { 0x5c51bf550e29a85eL,0x6c7fc654143718f3L,0x8bd8eebdb31a4a9cL, + 0x852d2032b22e7b3eL,0xf6cd67263b6c1296L,0x8f10ff0f4ca1aa08L, + 0x7ec8838028830ef4L,0x765904f339ee7926L,0x00000000000001c9L } }, + /* 63 << 105 */ + { { 0x2dde6e928637fd54L,0x0b87d325ba84f1feL,0xfc08553ab4af92e1L, + 0x7dcf26d634d231aaL,0x94da1a1a6fab3060L,0x33fbca8958b11020L, + 0xc68e11f0edc2b3bcL,0x21a56c3647f05ba9L,0x00000000000000deL }, + { 0x9daf1e861bb9fa34L,0xcc4688c2d6e87d28L,0xd32c7df423e13585L, + 0xb2b03cbacdd2746aL,0xc5c3af6b73eb2f89L,0x11fddbb38e2796fcL, + 0x3212dc8e0fa04a64L,0xc6628b6fd92a5784L,0x00000000000001d7L } }, + /* 64 << 105 */ + { { 0x671757a6e2adfecbL,0x22eb6b422ba58ff0L,0x845ad5524e6e393aL, + 0x3dc6f7cb60fb46a7L,0x9240526b0a84396fL,0x13c8f57c56152715L, + 0xab02366c6c45e7a9L,0x6d4f07bead159399L,0x000000000000006eL }, + { 0xf69fdd7f07f6e883L,0x946b2efb3f45aaa4L,0xc3390ff156806edaL, + 0x22764fab21d28bc8L,0x363b41d10c20eb57L,0x7d693bb1f6e0fe71L, + 0x5ba2e46a2816dfc1L,0x53c3d41ab0f7ee72L,0x000000000000012dL } }, + /* 0 << 112 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 112 */ + { { 0xc2d7eaa1c6884ce1L,0xd7b35c4f9faf6322L,0x8d1a1d85337ea884L, + 0xbcc6a8090f946ebeL,0x45eab172754fb8d2L,0x1e0b251e91dbd926L, + 0x4b61112afdce8503L,0x80737c4915fa4759L,0x000000000000000aL }, + { 0x1973c88b40260e22L,0x83078cb7e48dca69L,0x4815d67fb199e3ebL, + 0xf57bde1f7b4de5d3L,0x88a1ebb5e139dc10L,0x7c20e7c66689dd21L, + 0x40cba34680884875L,0x5c82140239171e05L,0x0000000000000062L } }, + /* 2 << 112 */ + { { 0x922f12dd4e187fffL,0x5edc9b962faa9f8aL,0xb7c218f8c854c83dL, + 0x3dfcbf3265052d62L,0xbd809261053d38c2L,0xf871b4be31d0c279L, + 0x6db21156f30b4a34L,0xb50605148db71e6aL,0x00000000000000d9L }, + { 0xa08ce4a86bedc91dL,0xdf89cac0a0bae827L,0xc035a514aa3f520fL, + 0xd5e9631fda672398L,0x8a5c34867150a93dL,0xe176d6c30d1a66e5L, + 0x1aebd73874a2379cL,0x42d11fe6279c3eb9L,0x0000000000000139L } }, + /* 3 << 112 */ + { { 0xf2937edecb4c6645L,0x1e8dda6d1b68f3aeL,0x25a8cbca5c45eae7L, + 0x6f07c48eb1a328e4L,0x473a67acef4b3917L,0x5e4eeac1208c0836L, + 0xca67351bec92f538L,0x34a8f9f8177cf537L,0x000000000000015aL }, + { 0x7d7f6d7ab9a3216bL,0xa4ad0bab0ed36d7fL,0xd58507f3d4b646cdL, + 0xbd275213a2950840L,0x00f031c076c25f08L,0x300e0a0c3f46258dL, + 0x6ce16cd6a03dfbd7L,0x35e0211d40c879f4L,0x0000000000000001L } }, + /* 4 << 112 */ + { { 0xa033349762383e65L,0x4f94f82b681092c0L,0x583b450378a19ca7L, + 0x81c69ed050e7ea14L,0xbde43e882042e0a5L,0x752a9d3e6a9f7938L, + 0x865464d9af321417L,0x5bcc326fb5794804L,0x000000000000000aL }, + { 0x7714afcab958f657L,0x3b927db46e725914L,0x9ef1925c7fd9a9d8L, + 0xed13fdfdbe7e139eL,0x5b57aa066e80a43fL,0xc9c221be810f9412L, + 0xe6536f3a04ec7d73L,0x77765db5db4d13e7L,0x000000000000019bL } }, + /* 5 << 112 */ + { { 0x21fe770f84924113L,0x2c5d06120ab300cbL,0xe1699e81ed6c7197L, + 0xdec38f45f5948795L,0x79db54b83a48da6cL,0x95f9d633b3f3af61L, + 0x3e4e733da772f809L,0x48dbe53a605e477fL,0x0000000000000142L }, + { 0x5ab3454e86f2714fL,0x87d83de7939b3218L,0xb980fbc096d16ff0L, + 0xba3cfaaddd96a283L,0xed41324416c3b69fL,0x57968830968c773eL, + 0xf315b38ced4d4dfaL,0xa013acc5a24cfb0fL,0x00000000000000caL } }, + /* 6 << 112 */ + { { 0xd064803e24e3c650L,0xa90109303b9aec0eL,0x407b393219f6f56cL, + 0x045d7e95d143be9bL,0x2933b6be47a9533bL,0x01b01ec62bc7f731L, + 0x723abd9a043e4c5dL,0xcd8a1bfd25c4e8afL,0x0000000000000095L }, + { 0xd719eded6d8a2f90L,0x08b7c6af8b5c2378L,0x027c6cb7c01df5dfL, + 0x1c020ce0155cbce9L,0xc3aeed933deb94d9L,0x2cae43c7a6f7fee3L, + 0x54637a4a7512ef37L,0x3b64366081582bd4L,0x00000000000001e8L } }, + /* 7 << 112 */ + { { 0xc6d7698f7e59dde8L,0xb1ab83ac08aab860L,0xe1972059c7fcac6bL, + 0xde4446ae656119bdL,0x8dfbbedef9f8d976L,0xe3b8e6d2009be348L, + 0xa8ead6ee974cb79fL,0x4a566503b7a415bfL,0x0000000000000002L }, + { 0xd661701b24c0d839L,0x6a3200d111c338cbL,0x5016ba3237f3769bL, + 0xf4396987a00f1d08L,0x16abf9062add49aeL,0x17dc53e804f3dbd4L, + 0x9ee15f5354a44690L,0xaaa77f7ea27e4c96L,0x00000000000000bfL } }, + /* 8 << 112 */ + { { 0x86dd8f04def62f7aL,0x42cd01b9cc9e5065L,0xd50b855643275c45L, + 0x404e04a59814155eL,0xf7e62f98266b665eL,0xd68e806e86eedc55L, + 0xb23e4bf34332001bL,0xe0b405b2366d78c2L,0x0000000000000150L }, + { 0x6e4c1160d49d6c3bL,0x10c209647ce72d17L,0x0c0ba5325475d05dL, + 0x77d8dc831523a4f9L,0xb053a8be5693cdc6L,0x5e0897a3e947e953L, + 0xde13f4517899d5d6L,0xbafca001ca78c763L,0x0000000000000013L } }, + /* 9 << 112 */ + { { 0x668d132f5602a1b0L,0xbc4a769fef7a02a1L,0xe67a3b7a4126a867L, + 0x6bcb37d5a775d18fL,0x719b93ef11901e4bL,0x3aaa34d531f7386aL, + 0x7f81f95156335bb0L,0x22ce53aa1d9a8fb5L,0x00000000000001bfL }, + { 0xb901f8b5580d76d3L,0x9ae57d49a4d6e345L,0xf7af2dee4e5b689dL, + 0x5d79160110e321e9L,0x43f66981301f6090L,0x8835a85f23c4f0ccL, + 0xdab2de3b083b5b68L,0x7fc613f51e97c0faL,0x0000000000000051L } }, + /* 10 << 112 */ + { { 0x9655cac34d110319L,0xf55e56d996e35209L,0x6f375ec27c15d50eL, + 0x2d38334326e468ffL,0x943096d75c103732L,0x4b30c0d40b0e523aL, + 0x4169da7ced307fccL,0x5c1e47e9da5da806L,0x0000000000000158L }, + { 0xaf9ea9b5700e6489L,0x05d34fdbb2f6bfc8L,0x3b2dc12594068bd4L, + 0xb568253751b3f965L,0xc1a3271e109ff65aL,0x79ffae0e4b377ba6L, + 0x5e0149cedbfb4a77L,0xdfdb05a969f94a1eL,0x0000000000000143L } }, + /* 11 << 112 */ + { { 0x98cffe72a8e16494L,0x990dcdef990f6ae5L,0x2ffe758f7718e6acL, + 0x08fffe7f94b86a5aL,0x29ec8e3824f249ecL,0x1115cf1e702e2efcL, + 0x42d3b998ac7863bcL,0x9de49c4ba8469112L,0x00000000000001b3L }, + { 0x23c141d47f0f461aL,0xd622ebb51a75ea51L,0x671dac9b98084ef2L, + 0x930fc09d8fbe7d8aL,0xb8ef755e8fa1e70eL,0xfe65e8a389920cf0L, + 0xe79b849e876f0425L,0x1c6ba4df46c016e1L,0x0000000000000130L } }, + /* 12 << 112 */ + { { 0xe8f06c2f869db704L,0x1225fe8e1be3a442L,0xe40e87fdf139548aL, + 0xafff68201a7dfd4dL,0x883cbb604489813bL,0x21f07a4520c07c1fL, + 0x037847dd761bfeb8L,0x2879d201fe9d73f3L,0x0000000000000098L }, + { 0xcc3e104798fd5063L,0x9e3888796f0c1179L,0x6f8e0ca8f2e65336L, + 0x51d688edaf342355L,0xfef444c177e831aeL,0x34b47f9f18abf899L, + 0x4f7c42ecf9d15d64L,0xfe977f078747ba4bL,0x00000000000000b8L } }, + /* 13 << 112 */ + { { 0x1d7948e7e31a2875L,0x305e549d55f21221L,0xaf6d2fc5ebbf39ffL, + 0xeabb12c80ffed2feL,0xaf421cd07b32d227L,0xf8226502a9bec07cL, + 0x065359ac149ef0f7L,0x9e5e784e4f1cbce5L,0x0000000000000027L }, + { 0xe64bdf95e8995641L,0x4f00ac4c4e803ed5L,0x04ac0dec6b51176bL, + 0x6de249642c3fe476L,0xf765f2240dfd1e8dL,0xdae016d744709cc0L, + 0xdcaa2f3b8ee0b68cL,0xae531c980b6bdaa5L,0x0000000000000192L } }, + /* 14 << 112 */ + { { 0x586d8be345624df6L,0xc58ca225de489ac4L,0x2131279186184b44L, + 0x1bacffb7566e654fL,0x795c06ee841818e5L,0xea81a1f7a64fc54eL, + 0x4202ba3b7bed4199L,0xe8e7eeb7fe735c4aL,0x0000000000000195L }, + { 0x3c3260519c58b094L,0xd321218c31dda195L,0x2bf7a3196e707d92L, + 0xcde93cdbd88f1ad2L,0x80b6b9f4ceafc8a2L,0x8ddef6d7edd98905L, + 0x999d8539ce356fbaL,0x57048edffb834c3dL,0x00000000000001d2L } }, + /* 15 << 112 */ + { { 0xb4c8a95f54f6c3c2L,0x75cb6585b4bb79eeL,0x42acf9813af10b62L, + 0x390e90f9b1fbe61eL,0x8b38992a73c905dbL,0xeb8eeaa4b2772547L, + 0x50f226739fb26d56L,0xa7fd3621bc8ff539L,0x00000000000001fdL }, + { 0x9f5f71c047cb14e9L,0x57efde78a4eea6f5L,0xffdcce979720279aL, + 0xf2602f0c42407354L,0xc32e952e4f8047b7L,0x6bef44fb120ea438L, + 0x64d8e221d5ec1b83L,0xd99027b78ef102a3L,0x000000000000003bL } }, + /* 16 << 112 */ + { { 0x8002b6d63dca9409L,0xd31714c1163b8124L,0xca655b42720a124aL, + 0x88388dbe9596636bL,0xa321822dc0b7dc61L,0xcdd952c85824e60fL, + 0x79ff44366cb4916aL,0xa115847e5f88dab8L,0x0000000000000009L }, + { 0x1eaf3d400aed0c43L,0x7a7196f7ea2a20a4L,0x4312341497781948L, + 0x856126d0e88d93fcL,0x96dac31fd5710224L,0xc23548f5243e6395L, + 0xf351df8b9613a79eL,0x0dae5d50626fb1fdL,0x00000000000000e2L } }, + /* 17 << 112 */ + { { 0x9e8c65562d81c8fdL,0x361ab7401015a8f8L,0xd8bacdbc343077c0L, + 0x37cd82a04634dcddL,0x0977ad1e940d6c70L,0x97fe72e327c38270L, + 0x412f62f6a07167acL,0x08deab9b451a09bfL,0x0000000000000150L }, + { 0xdd02b8d3abb38852L,0xef1b8f1d41ed4b96L,0xbfd538b1da6597c2L, + 0xf98a304e10475511L,0x232d62f619d54040L,0xa2b919809183ec5cL, + 0x5ce198ce8f3f79d5L,0xa5c7192628c2e16aL,0x0000000000000028L } }, + /* 18 << 112 */ + { { 0x8aaeb6bb3f28f069L,0xe57bd85a41cd19f6L,0x7a7033fbcdd073b6L, + 0x5fa5d883af974d72L,0x034fe2f8317d5354L,0x022dad78dc1c3c86L, + 0xb115c5fcdaddc33bL,0xd20ed9cacf71be7fL,0x0000000000000076L }, + { 0x906767d94036188eL,0x3c3cac57a74401b6L,0xb26d32b46272050aL, + 0x01cd2c3ee4d06b8aL,0x7caabb449244b30cL,0xa4c1874d25c6219fL, + 0x53a70b98afbb215cL,0xe357a7f7f2280543L,0x000000000000012aL } }, + /* 19 << 112 */ + { { 0x21419b4f117d5755L,0x6af1ac64475ac8a6L,0xc1226e159cb49eebL, + 0xd61aa69c2d3f8486L,0xaacb9140fb60ad9dL,0xe0fb2170b59dc623L, + 0x5aac6818eceaecdfL,0x1e88ae0f5cdaceecL,0x00000000000000fcL }, + { 0x823f91bd24c8c72eL,0x3f419586c2b074dbL,0xec995125d3c89804L, + 0x8c98840af3104c36L,0x90445c3bd88cb43aL,0xe9d1da4f814367c8L, + 0xf5edf338d4636152L,0x316aba3a39ac293dL,0x00000000000000bdL } }, + /* 20 << 112 */ + { { 0x1ef1df604dd7f14bL,0xc190c8b79206dde0L,0x8c0095679fc144a7L, + 0x0ca2f30a0f11e938L,0x3f06119ad15a3b46L,0xce68675d1a911f3bL, + 0x3e234a0e7dd215dbL,0x909d38cd6028bdf1L,0x00000000000000f4L }, + { 0x49b323bb7b25c4f0L,0xe0a212449f498976L,0x8ffcd809bf2a0459L, + 0x1a6890a9cea1e7daL,0x6e2e2b99ab0cff78L,0x650e3170c5d1287eL, + 0xb5248d4d8ae5d083L,0x0951c926d7596441L,0x00000000000000c0L } }, + /* 21 << 112 */ + { { 0x9bf0fe1536b5757bL,0xf078da917322f698L,0xed973489b52119b0L, + 0xa22cb0b80dff82b2L,0x8f83dba01da278f7L,0x15a6cc1299da0c4fL, + 0x2f67fdfe5e334bd0L,0x25ab5ab602e8e1cdL,0x0000000000000125L }, + { 0x269f859aa8d8e447L,0x575d716c1b24a12eL,0x729ae77eb95f04daL, + 0x4372f9de30e0ed29L,0x673c0bfd398cfa49L,0xbe9ff4fca3ec6966L, + 0x5ce7e5e3d01c14e2L,0xbec8d2f406373e24L,0x0000000000000040L } }, + /* 22 << 112 */ + { { 0xd93727e0486bb813L,0x04283a3e1fbd99dfL,0xdc4788c99b75b637L, + 0xe6851f5a32167da9L,0x998f7431fbc4ea2cL,0x7a9c33d1a9183057L, + 0x6b3473245d20e7e2L,0x18a168600b90c80cL,0x00000000000001ffL }, + { 0x38a94d64bc888e85L,0xfe7b79ac9df19c05L,0x00bbefebf654ee0bL, + 0xebfaab4e7bfe85a7L,0x48deed60b19482e5L,0x287c06898db1c527L, + 0x5a6b27ad4c8eb494L,0xf68182ae0d447f2fL,0x0000000000000118L } }, + /* 23 << 112 */ + { { 0x4b816e241c2457e4L,0xbb437cd3f149da13L,0xe0be68f94998c797L, + 0x883b58207dbbacacL,0xe1da5fd525b58074L,0x77d77fde69592848L, + 0xd1f2bc60c4c21c04L,0xf2bd7b785acbe370L,0x0000000000000114L }, + { 0xbea3154637b024f6L,0x7f15fc8a9fbcaae2L,0x1b56ad116021c3bdL, + 0x9356fdf096b118b0L,0xb7865be1daa0804fL,0x3a2ca9e99574d0d5L, + 0xaa5adb0f5f999f14L,0xc25f9c38ebde48f6L,0x0000000000000049L } }, + /* 24 << 112 */ + { { 0xb4c7662650ee9086L,0x2d9cf5f46a31d89fL,0xff6bfb32dff60978L, + 0x5176dbe145a61de7L,0xd00d55447d0750dbL,0x0d83b80c85c9b411L, + 0x2dd6470906fb9605L,0x69b0c1333d535903L,0x00000000000001d7L }, + { 0x94871938210a0c39L,0xf8331fb90c93f6efL,0xf7ee035f253771b7L, + 0xbfbf3ac00f1e874bL,0x6d02567e077969c4L,0x29a752a783d55d94L, + 0x4f69563f088bff67L,0x7b1161060b77d6f1L,0x0000000000000034L } }, + /* 25 << 112 */ + { { 0xba6aeb1a33687c9eL,0x7d52e286f08ec854L,0x53187e3da1008096L, + 0x6cc41c9a2b238ff7L,0x5c95188b399bf250L,0x4864754ddbe60f4bL, + 0xd266be6d3c9440abL,0xe59c36d4977447cdL,0x0000000000000128L }, + { 0x7c29f965bea1476aL,0xb8fec71e7624ae90L,0xfa1cc18fde04ae65L, + 0x00fa80b93a6b1843L,0x8ebd2e7305e7520dL,0xfc12b6a59d714810L, + 0xa219d25f3a532a09L,0x1121083c45999cf8L,0x000000000000013cL } }, + /* 26 << 112 */ + { { 0x137ada30d2de83eaL,0xda2153422a878fcfL,0xa6a5cee60e171f25L, + 0x3f69086f2dd4c3c0L,0x86088100fb43ba86L,0xef809bbfbb0ccb0cL, + 0xd79c4869c4d677f2L,0x2b2bdbb20404add2L,0x00000000000001aaL }, + { 0xdafc91eb22edf44fL,0x79590681f4d70677L,0xac8808fd3d9bfe05L, + 0x2b0c8d47afdedeb8L,0xc767c9b30f819111L,0x26d2b198b54b2fdaL, + 0x17a4d748425ecf19L,0x3d553a2e27bbc127L,0x00000000000001d7L } }, + /* 27 << 112 */ + { { 0xaf086404ae4f64fcL,0x24ec81035e4dac4cL,0xa4251f19f4188728L, + 0xb90da169ecde3424L,0xee6c23889d75d76cL,0x5cf9b73e303f7e0cL, + 0x4f0d6e9e45d8766cL,0xcbbe41aa6734beb6L,0x000000000000006cL }, + { 0x11f1929f6460b3c6L,0xbddb7165e81dc98bL,0x0dfd5fdd725622c8L, + 0x7c4b0cdc2c8090abL,0xf2605abf9966a482L,0x5ef91579fab0a8e3L, + 0x334d92ff7a5431ecL,0x5440c23516dc21d9L,0x0000000000000086L } }, + /* 28 << 112 */ + { { 0x9fa60a6af5784616L,0x54bc84246184acc2L,0x57d5d2c41435c018L, + 0x5f94475b333621cdL,0x5b7740d06e210ce6L,0x624d301d0dd3dbd6L, + 0xad599276a4216433L,0x7e1140bd4b3817e0L,0x0000000000000032L }, + { 0xadfe76b487f3e336L,0x88a9a1231a1b9b6fL,0x1bac6e4f14c14b8aL, + 0x8a531f4dbc215bebL,0x874545261c42d401L,0xc65a168cd62ecb42L, + 0x38188b89f24bf291L,0x94f7d5dec788c069L,0x0000000000000194L } }, + /* 29 << 112 */ + { { 0x7d9d6f15fd868b43L,0x3308e6e8e1332d89L,0x53a64fc25c2ea868L, + 0x819f727861bab859L,0xcd683cc16fc55f42L,0x46ec76818e4ac49eL, + 0x16e180e3b85ccaf8L,0xcec7676e180c0dc5L,0x00000000000000f0L }, + { 0xcaf24b54f10d6c7dL,0x3bb37a70423e6ab5L,0xa40e49d25eb685afL, + 0x512b2a6ca52af2beL,0x495633b5253da10eL,0xa87169e1ac1316b4L, + 0xd9ede4ff3ea71d6aL,0xa7bde55c751fee86L,0x00000000000000c6L } }, + /* 30 << 112 */ + { { 0xb4dbbfc31d447fa3L,0xc0180e814e1f0ae5L,0xc4aa44e04e921352L, + 0xed72abd5dbbbcc8dL,0xbe0924e6f0fc4050L,0xc735729a7e5a1390L, + 0x23b83364443bdb32L,0x72df0a07ec53bdc4L,0x0000000000000048L }, + { 0x2c24dd590a570f18L,0xe436ab7291876f65L,0x5b92bcc4ffc6ae4fL, + 0x3d6471f3c9ebc873L,0x43efbc5d44292f69L,0xd47637134a107c1aL, + 0xa367b573248b7c52L,0xf099ea0671daf202L,0x000000000000013cL } }, + /* 31 << 112 */ + { { 0xbaf9d6a6b3e1adf8L,0x20ccb29705fd0ec2L,0x3241615122d8be8eL, + 0x8c0084416a2db0f8L,0x1ef0ebd4551dd651L,0xe6cc97edaca8e550L, + 0x9c6f70152c1571afL,0x6ffa2b32b9c3f7d4L,0x00000000000001f9L }, + { 0xd200e04cc91c0f39L,0x0e096f28fd99b77cL,0x9594a20807de898dL, + 0x55a442ae69bcb511L,0xce1d9649e015f1b4L,0xdd4a639869edda8eL, + 0x3955b85fcca25768L,0xa003c09dc949f4a7L,0x000000000000000cL } }, + /* 32 << 112 */ + { { 0x0fd02a81c0326cdaL,0xb52279ac0e5cf6e9L,0xd0e8f1cf682ad865L, + 0x01eb99642e847be3L,0x30468f94689baa91L,0xa466fa2bb4584d20L, + 0x99847f1b327c510fL,0x206bd41d26e6f2c0L,0x000000000000010bL }, + { 0x8a04fd93ba3011f2L,0x14c757b9af6c7e86L,0x4987100779ecbe33L, + 0xedf33e226185a74fL,0x35ad7383d4a03368L,0xeb5a269ece6efd32L, + 0xc83c5fb3208fbf53L,0x35030bb2702bb900L,0x00000000000001deL } }, + /* 33 << 112 */ + { { 0x286bf9c2facb4117L,0xddbf33d2133bb3bdL,0xe0c21ce9897c4fe6L, + 0x3f3a249c219126ebL,0xe925c2cfe9c7e211L,0x7999d7c0f418a42dL, + 0x24d5577c585a7c91L,0x580ba3affad6e5c8L,0x000000000000005dL }, + { 0x596e7dc97200c090L,0x8460b9a4531d17ecL,0x19f9aed95e4e1067L, + 0x522379e68a7a6444L,0xa6618a0e7cdc51a7L,0xb285055396e7f790L, + 0x605b7e1ff732fdd1L,0x0fe9b751fa8b87e4L,0x0000000000000028L } }, + /* 34 << 112 */ + { { 0x0686b81946cb78fbL,0xac50e15b367be8acL,0x6b74a620211895c8L, + 0xc35a52e83817a3f9L,0x9d3a57cd4fc49432L,0x771878ad4df33060L, + 0xea4ae8015c216dbcL,0xecc02683da89b496L,0x0000000000000193L }, + { 0xea1efb5a768b6f09L,0xd97562e7e3ab79adL,0x61ad861c094a606aL, + 0x614dfe95b42bae9dL,0x0372b7a5558bf490L,0xfec9f31dd1f2089bL, + 0xc31fd1236d5d51d4L,0x26dec1eb0f1ef668L,0x000000000000004bL } }, + /* 35 << 112 */ + { { 0x26ce089dbac679acL,0xa9d9673a64c08615L,0xfe92cec3f9d9d747L, + 0x4616a4dcad6da7b2L,0xaaddf97cb6f6b8fbL,0xa1cabda1c32a0803L, + 0xe8591a79c55e17e5L,0x3bb889adc7dc707aL,0x0000000000000191L }, + { 0x22158400511bd374L,0xd10d5a727f1f4d0bL,0x5b40bd90e732bf5cL, + 0x88d82eb665a81746L,0x4f64f578fbcebfe8L,0x867c0094eee40ddcL, + 0x6934932a6c500cc0L,0xc117a16b667da9c5L,0x00000000000000a4L } }, + /* 36 << 112 */ + { { 0x5c7994b4abedee4eL,0x3e77983ba4fbf6d7L,0x9d7f330f30003463L, + 0xb2ec7f739a611df2L,0x595627f9a405b4d8L,0x187ea7ae0ea408bfL, + 0x2b087af141b1dec4L,0xd8d66597807e4012L,0x00000000000000f6L }, + { 0xfb1b1154ea7b83ffL,0xcb3c2b19d3b88c90L,0x8c15c0b5ff85a044L, + 0x65154a83e62fc44dL,0x1fbf5b7f26ed762bL,0x81d36b15c7b364c0L, + 0xdb0091f1bf2553b1L,0xb6dddcaf2845f359L,0x000000000000016cL } }, + /* 37 << 112 */ + { { 0xaa5114266f231058L,0xc6482f93928adf7cL,0x523a25c347a07da7L, + 0x4ab79da70c795a5bL,0x9a5026f119711fe7L,0xcf2212a729784bc7L, + 0xe59e7fa59822fbdeL,0x5b4e997e94e459efL,0x00000000000000d8L }, + { 0x085b185681605f9eL,0x8e3365c4f006bca8L,0x71d7b84b54838742L, + 0x2b18649aea5a336eL,0x3268764778f7c79bL,0x2b02db33998d6a9aL, + 0x830a0a96ce221a5dL,0x6c02d7d754314b97L,0x000000000000007aL } }, + /* 38 << 112 */ + { { 0x618ac20093031925L,0x160d9b20afb97ccfL,0xbd4b01c66aed35f4L, + 0x202c1b39edeb2215L,0xd52fb1cb468764e6L,0xd6da21af72b76e0bL, + 0x98740e64527eb510L,0x443caf748fdbc02cL,0x00000000000001b0L }, + { 0xd1d700ea574366bcL,0xb40254dd9b0e1210L,0x3172876c607a8809L, + 0x624a23de5ac14d73L,0xdb1e5debed4d8a80L,0x5d717d8f2976f6e8L, + 0xf4e1d47f42f699c6L,0x433896d520c0ece7L,0x000000000000015bL } }, + /* 39 << 112 */ + { { 0xc648e6401f24b1f1L,0x783449d73e5f7c72L,0xa57ac4eff3742d85L, + 0x61ffe0c9835552b1L,0x62834ef697fdac2dL,0x02703b63f115fbd7L, + 0x44f47a41aa559e1dL,0x1e7e37c5d15332beL,0x0000000000000134L }, + { 0x427f35ffd9dd4be1L,0xddcf13578da4e6a6L,0xe76af9ebdda54617L, + 0xe3e193dbf90ee15eL,0x39a4c7219604bac2L,0x14789a4ac4760af0L, + 0xb38e14c8cebf3c19L,0x40f7c343d9f78662L,0x00000000000001dbL } }, + /* 40 << 112 */ + { { 0x7d46f1e8b4cf8254L,0x3c44cc7f8651132cL,0x3875ea48ee02fdc3L, + 0xd5d464377fd9ae7cL,0x25ebde075fbebf38L,0xecdce3a5ffa2efbbL, + 0x6d34a645aacaf39fL,0xca5b72131c664594L,0x000000000000012bL }, + { 0xe9ad950f97a7b67dL,0x37188f256fb141fdL,0xced17e9ca1d977f0L, + 0x8137e722c6c3dbbaL,0xc50f36d663a37fe7L,0x68e1ce2f8fea88a9L, + 0xbee6e56df5023489L,0xeaa15a26a2efd265L,0x000000000000013aL } }, + /* 41 << 112 */ + { { 0x504c9c3d75e5a8b4L,0xe977ce76a03d92b7L,0xc3f27a726d9519acL, + 0x5832b7597ec79b04L,0xb95c1a2f6e80227fL,0x0c329956fb9974faL, + 0x3359e49ef8bfa9b0L,0x2d20a5b0e2a1b8aaL,0x0000000000000034L }, + { 0x4fcc30afd686911eL,0x308f27ac164c63ecL,0xf28538a1ee74882cL, + 0x1b5788b17d109e31L,0x1f74b17e6527057bL,0xa00fba0ef43780f0L, + 0x43f846884577d23eL,0xb6a7ae44decaaa4eL,0x00000000000000eeL } }, + /* 42 << 112 */ + { { 0x032f87b82c06f35aL,0x64d3ff064e7bb1e5L,0x08996732093862e9L, + 0x3170addcccf193beL,0x641c81835da05299L,0xe296c9ff4ef3361dL, + 0x3995b3a1e8783402L,0x0800b438e47c8177L,0x000000000000012cL }, + { 0x6dd8391f62e4d595L,0xdb4d7411ab9a174dL,0x927d7c28f1eb7186L, + 0x26657338bd593ae9L,0x3bc2051cf5628e63L,0xa97e4b8b685408d4L, + 0x28251aebd2f3e22eL,0x4a08d5bc83d10ac4L,0x0000000000000043L } }, + /* 43 << 112 */ + { { 0x2a8ce2836288507bL,0xa635d48fdd390d24L,0xeee7975ed1b87f8fL, + 0x162563d6d731058cL,0x3d84bd12fbab97f7L,0x5c918d9c5fb43dffL, + 0x3c4bd11bc0a07876L,0x8d4ddedeb4d4f961L,0x00000000000001faL }, + { 0x38e6acf7ee510691L,0xa15ceec44c94614bL,0x50a5b266995668e0L, + 0x0d4a02f08e0821feL,0xb6f05bdce7b13d84L,0x8f749724f4633064L, + 0xe9c87eb2218611c2L,0x266a06987e558ce8L,0x0000000000000193L } }, + /* 44 << 112 */ + { { 0x1a590ef8fd2add3aL,0x9a67ccb86eafaac3L,0x32b6e3eaa95c46a5L, + 0xdedf7158cc6fd626L,0xb43928588add9774L,0xea5bd604df9136c1L, + 0x8675163af55a7019L,0xb3f4557cc3a96febL,0x0000000000000111L }, + { 0x2835680edb0e4f25L,0xe969cf144cab4022L,0xae88c607bfa42739L, + 0x27cb7e15c80b87daL,0x4bb26559fc7432f5L,0xd083dbeefb7371b5L, + 0xa47fb4e384e05decL,0x2872d4720a3ab26bL,0x00000000000001c3L } }, + /* 45 << 112 */ + { { 0xeefa89d6ccfb12f2L,0x2096c071dafb741aL,0xd3d1918e4619fd06L, + 0xfddcceb9da05c012L,0x3385b550dc794688L,0x038c5c77c3911912L, + 0x770139d611721c23L,0x2266e021c30f428eL,0x0000000000000015L }, + { 0xfa215837ef6edd0dL,0x1c26f889bb764661L,0x7b94a37bba914166L, + 0xf571f7e9042f65bbL,0x38de83e849b57e28L,0x30b1332bdf81998cL, + 0xc8abf5abaabd0834L,0x4a0c78cfc8381e84L,0x00000000000000afL } }, + /* 46 << 112 */ + { { 0x3d5a2cc2b9b6cb9dL,0x451655a6344f2f58L,0xb1b47d157fa3f835L, + 0x968a6841f42c60d9L,0xb84b7308f8f7c5adL,0x452a354d89555eb6L, + 0xef55dd0ffdf2cb27L,0x74eb80451f99bc70L,0x000000000000010fL }, + { 0xee70cf6eae8f7d18L,0x0c7c6a33cc7856a6L,0xa0b262b553cc50b3L, + 0x885cbc7856cd61c9L,0xe804e59f27357b72L,0xa25e4e2ff0de1262L, + 0x6f15c8fbe66bc201L,0x553d0f8e2a012e0eL,0x00000000000000aaL } }, + /* 47 << 112 */ + { { 0x984e30a1d77350faL,0x2d8d4baba501ad13L,0xf6a76e16e10ee73aL, + 0x512b31c76955b642L,0x25a977e01e88bd3bL,0xf5a5f60798821007L, + 0xebb4e8ed81819b38L,0x359f9c62004e07f6L,0x00000000000000d2L }, + { 0x1c0218a4499f21c5L,0xce12310336fc822dL,0xa9a55a60d384c462L, + 0xf891a118812e45c9L,0xadc5eb313ecde0e1L,0x189f50c1e4e0da26L, + 0x34b21a3323276e3cL,0x490c316c9467a0d8L,0x0000000000000192L } }, + /* 48 << 112 */ + { { 0xa59d74fd4ae20507L,0x46d0adbaa86a468cL,0xc004c36b0df04371L, + 0xa0f3cad9c6138633L,0xfe2133c1eeb5863dL,0x0ef5dc4884041aa3L, + 0xbc3f0e491707f418L,0x3677e9ebdd7cdf90L,0x0000000000000178L }, + { 0x454656623a62115fL,0x4985e0a08192fa59L,0xabd64eec2ae07a9fL, + 0xe0e94a6d9182d301L,0x5230272bb543c76bL,0xb7a9f6ce5b5b137bL, + 0x45903dec3e7b2872L,0xade6111744296989L,0x000000000000001cL } }, + /* 49 << 112 */ + { { 0x1641b779225fe8bbL,0xb2c04a9e0995ebcaL,0x0270ad40d7b634d2L, + 0x54301c3a0235166aL,0x17a2655a84428da6L,0xe4849f07bf319f15L, + 0x54346197a3975f9bL,0x1400bb7353454131L,0x00000000000001adL }, + { 0x1d8cc6c47b133fdaL,0xac430140c99c62beL,0xe438a1d959380ab9L, + 0x984c0ed8d0ce3602L,0x56f90ee1bcab7a0bL,0x55067ca825fcdba1L, + 0x7fde8ced07db34c1L,0x70a106fa9448fbdaL,0x0000000000000110L } }, + /* 50 << 112 */ + { { 0xf5c0e5baf99caf44L,0x4b0a642c89af2ab6L,0x082ea9e1c6f1f294L, + 0xc1da27771a9eb5e8L,0xce038334379f1b75L,0x662775ce806524f1L, + 0xd4200a961de789acL,0x94e49840f985d4c7L,0x0000000000000188L }, + { 0x9765c5a05332e704L,0x35e54513f357c532L,0x871a4dcee65f1729L, + 0x100d2bbf9b2d7c46L,0x58590df34c23bfcbL,0x8ffec9bf59d15a0dL, + 0x33c0e85e6a2a3312L,0x6fb39c77d2846bcfL,0x000000000000002fL } }, + /* 51 << 112 */ + { { 0x2f6183097b4858b0L,0x4d4d2b80f34c0437L,0x4bef8204699cc258L, + 0x13a4f63696736be6L,0x6ca2df088c828b7bL,0x0dbf95001564f616L, + 0x03806a3426723221L,0xadd4715f3594f49bL,0x00000000000000feL }, + { 0x2255c005d91a7a23L,0xd31821aabd8738f4L,0xf417c40758c41cdcL, + 0x1bf2e581ce1b6aeeL,0x13ba4d9331a003ccL,0x02658dbdb9d35167L, + 0xcce7251fcb565afcL,0xd17dc6a8d300b06dL,0x000000000000011aL } }, + /* 52 << 112 */ + { { 0x023b057fad5df81cL,0x0906bab2d1df9f2fL,0xd8d8a173709fadfbL, + 0xa8ba2a86a6c45ef3L,0xeb1d4e331be5b53cL,0xdef9c19e56c39843L, + 0xbd1763c0c1d3eacbL,0x0e7eca5142490ba4L,0x0000000000000088L }, + { 0x8d446e88a5f070b8L,0xe8136967fe9e8b21L,0x0a26424b816d519bL, + 0x0dee87a30bdd1c2bL,0x2c6c6287b75b6cf0L,0x50558e41c28a75f2L, + 0x40727fe07d2b4ab6L,0x34832548349c668eL,0x00000000000001fbL } }, + /* 53 << 112 */ + { { 0x135a24aeba593c17L,0x024315aeeeadb96bL,0x549040a3872ee831L, + 0x05781e1ca527850aL,0x97a7a4a46ff2f4fcL,0x667bef40328e2318L, + 0x133a153c13ae6aacL,0x25e691f7dd80ce7dL,0x0000000000000041L }, + { 0xbf2b901433cf2b24L,0x74df7e9c1ad04c41L,0x5cb289df132de33cL, + 0xbcd1bec484f41349L,0xc0f49a0c36f55149L,0x578571e03398aefaL, + 0x65823d11b11cd39cL,0x44f1a009ce63b025L,0x0000000000000042L } }, + /* 54 << 112 */ + { { 0x8a2b478e8343cec7L,0x2820d2fbda374a71L,0x9d473bb194af8efbL, + 0xcc54c95f16b56d2dL,0xa0bf40a0430f13f0L,0x6a56a1dbecc6cb08L, + 0x6180fbfdb7bc57d0L,0xd6b165b76bdab048L,0x0000000000000100L }, + { 0xf5b147b7f8e4648fL,0x97d37c258437bf24L,0x9465879086556798L, + 0xf1321e101f3fcab3L,0x37d4d1ea7351c0deL,0x59b6bf365fc92770L, + 0xa950de8086276f0aL,0x367635f61786be75L,0x00000000000000b6L } }, + /* 55 << 112 */ + { { 0x31f3a5c38e113349L,0xa374734db1c20ec8L,0xa6f0e423d1eafe52L, + 0x1f32e96c0b901678L,0x895ce01dbb18837aL,0xd91347a55d9930e0L, + 0x7a43364e9a86a004L,0x590ec9096b157d23L,0x00000000000001cbL }, + { 0x7e3b46799392608eL,0xe29453e8a9c8b459L,0xf460b030993c90ffL, + 0x91ec7bbc33b15097L,0x743fbdfe1a68fecfL,0x224e914959860761L, + 0xda87ca63eb1dabdfL,0x6f509876cf17d2b4L,0x0000000000000058L } }, + /* 56 << 112 */ + { { 0x951aef0c34412809L,0xb20f279499403457L,0x5a30efe2df842006L, + 0xccd7c89150a68d91L,0xb750afe1f819f912L,0xc0017d9a4fa1c8d5L, + 0x19a5f2cdbcde79b8L,0x7b6686e03b7f4682L,0x00000000000001a4L }, + { 0x74060a6acdce424fL,0xd35ffef26ee7e0a4L,0x8e238effacd0a03dL, + 0x95d28a08f5cb5bb7L,0x654858407771be75L,0x3b753ffc4873ff72L, + 0x1bcb2bc8c60d11aaL,0x005e5c5d3dc06c4dL,0x000000000000008cL } }, + /* 57 << 112 */ + { { 0xcd13e3389eb436baL,0xb0ac873ea2da8315L,0x4666e06810f3dbf3L, + 0x0e5832027644be64L,0xd682457a397db83fL,0xd55546a11b832130L, + 0xd6dcb98d8ecffff5L,0x7e95eb40c3368e15L,0x0000000000000125L }, + { 0x8239cf6862306bb7L,0x3ab70de64ec9caf2L,0x4387316c86af4457L, + 0xc60d6e709327cf2fL,0x0f936e7caea1796eL,0x184ef761f8dcec2bL, + 0x447d5a965eb06c81L,0xc969bcce3764ed27L,0x0000000000000145L } }, + /* 58 << 112 */ + { { 0x6a9ea90ba4680770L,0xf9e6ec6093314278L,0x69360f538727b4c9L, + 0x5f20e2ccaf754475L,0xfe3f4c4aef4d31f2L,0xe25edaa3fd4dc244L, + 0xb255796172dc983aL,0x7ab62907a1c19791L,0x000000000000002dL }, + { 0x6af3e2c939a3be8dL,0x7c100f6bba83593fL,0x933408151f986d85L, + 0x6a3e36274c8ce9beL,0x263f3820d0fe0d3fL,0x100201e1f9a54852L, + 0x34ab0e71c2175ba5L,0x105312c8a23a1598L,0x0000000000000196L } }, + /* 59 << 112 */ + { { 0x1a844aabd7391a5aL,0xaaab287d8b4b3501L,0x1435f8a12bc5061fL, + 0x4588693ec9044802L,0x9e86c1820a553513L,0x7bd0602fc8fd44d9L, + 0x894b901621d26ce4L,0xc0f06181bf73bfb1L,0x000000000000000bL }, + { 0xe6ec15971782220eL,0x4c2e6efc285fb2b4L,0xb688822d2edaadb2L, + 0x129be3f8338c9913L,0x0836465246d13d20L,0xbc93fd48530f4ffbL, + 0x8b6d9caf472dad14L,0x7a977a52a37172c2L,0x0000000000000037L } }, + /* 60 << 112 */ + { { 0x1b26fa58fe5246ecL,0x45b69d4ea6d48abbL,0x4bd1bf364b47c822L, + 0xe543f2c9f015149cL,0x1f82bab432c44a8eL,0x6f27aee23824ae58L, + 0xba8cabd695e3e7ecL,0x9bbb31bc66b8eb78L,0x00000000000001fbL }, + { 0xc88a99b9e2708288L,0x7e228e8097db1cd7L,0x84c064e5c935e31eL, + 0x0d69185a2f2f010dL,0xfa9c64e661a440ffL,0x9d655e24a4bf903aL, + 0x776e8661bacc5b4eL,0x39a98e6e18231a7eL,0x00000000000000f3L } }, + /* 61 << 112 */ + { { 0x3d7416191aa43b05L,0xa196772534da61f6L,0xc6a9881c9fe86cb4L, + 0x312a9d68c3b88c86L,0xbb34c3cefbce7f28L,0xab59cf050eefb825L, + 0x174945c17c8ca5a1L,0xee1a3e9f78d5f3e7L,0x00000000000000b2L }, + { 0x23545e86246672c5L,0xea0ffabbee486373L,0x5d0ad0371dc589cfL, + 0x7c9e301d57718f3cL,0xc9772db800fbd5beL,0x9821a095463668b8L, + 0xc76345bdc3c57757L,0x4bd844a73c2d1c8eL,0x0000000000000060L } }, + /* 62 << 112 */ + { { 0x8539e35c7e332459L,0x00fc0e9f98112339L,0x66bb5320ba989b61L, + 0x4c6a3f25209282c3L,0x14ad058dc440c82dL,0x6d4b647649afd02cL, + 0xe2a127a77b3f32dfL,0xf3a4d7633c921ff2L,0x0000000000000036L }, + { 0x88de18f672d6240fL,0x980bc77f373f12c2L,0x6558ca734246eba3L, + 0x903c033b4ec51cefL,0xd83342185dd505bbL,0x51a9e645e8bb1731L, + 0xb32588b6e307ec2dL,0xc9a481b31c46ca72L,0x000000000000015cL } }, + /* 63 << 112 */ + { { 0x2b5bdf56beea4241L,0xc1e3e0225b740107L,0xfc8fbae009238bd4L, + 0x991d834280953392L,0x3418731d37d2efb3L,0xea7940f99b0e2279L, + 0xe45b37101a57c70bL,0x69c37528e4e38879L,0x000000000000009dL }, + { 0x6f566050b423111aL,0x8719ea62fb467f2fL,0x805bec724b45fb47L, + 0xfd2169aca28673f3L,0x90d8d9757d896d31L,0xf553651583762591L, + 0xc7fc36f483122a6dL,0xc145ab75e6c19a5cL,0x00000000000000f8L } }, + /* 64 << 112 */ + { { 0x809dc9ec3ecc9997L,0x005bcab58cf9605cL,0xfc78790eaa1b4ed6L, + 0x6d80a56d687f88a6L,0xb7640965ab9c0398L,0x61fbd11cfeda1c18L, + 0xdf0fb213831177f2L,0x39e15bfe11a2d00aL,0x0000000000000049L }, + { 0x05d1b9cec407557aL,0x0fa7f405ffe62203L,0x0267252d7efa3c26L, + 0x445c8adda59468e2L,0x205a0236cd1dda49L,0x3a2196f39f2bcbc8L, + 0x9c1158d8cd2760d0L,0x1ad9a2f4e806abf2L,0x0000000000000044L } }, + /* 0 << 119 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 119 */ + { { 0x97f22a4110c355dcL,0x550f2515bf8056baL,0x253c72a37ef07316L, + 0x0f1f8cd585b059a3L,0x8d432e48a7e4d4a3L,0xcdf3981d7b2c78cfL, + 0x15d98f56dfd71053L,0xc95ecb64949930d5L,0x0000000000000195L }, + { 0xd5e23bcd4b7d613eL,0x8603f848f66ecebeL,0xe2cfaef127a24540L, + 0xd35bba9b2e9e7eaeL,0xb7f17d307f8c1bd4L,0x901506c4e0ab51a6L, + 0x3e0d62a390755f2bL,0x8db637dd3c553544L,0x00000000000000c1L } }, + /* 2 << 119 */ + { { 0x9ebd0a79e09d583bL,0x8890ceb33f312038L,0x254ff069fe1c7d97L, + 0xcd82a50a3825dcb4L,0x065bf2f94c37cde0L,0x59b7c4e2a7b458e7L, + 0xace6716e7d105c29L,0x84d999349f26170eL,0x0000000000000107L }, + { 0xdca7480bfaada7a3L,0x203303d30d4fb693L,0x0ec1c4d2b9ef7822L, + 0x2b97216d594552eaL,0xb93e533eb59c5490L,0x50d31ad60d6c3787L, + 0x0f1014d65cb84476L,0x0523796c02b816adL,0x00000000000001faL } }, + /* 3 << 119 */ + { { 0xaf1e4ca2a7db5dafL,0x7cd7bc5fcf367163L,0x1bc98e50f65d85a6L, + 0xb86fe8a7aeed093fL,0xfebaef0941e74246L,0x7c4ae3825d97753eL, + 0x93352c2db192c8c4L,0x0edbd295f0753edfL,0x000000000000009cL }, + { 0x12c415e884e12bc3L,0x6f90b11c8582ffc1L,0x67cb4af0d8c1070bL, + 0x97c8ccb18a79326dL,0x69ed5c55e5125d94L,0x496103ae1b1baf56L, + 0x32066cfdbf98cfc1L,0x77c898116d9f4999L,0x00000000000001d4L } }, + /* 4 << 119 */ + { { 0x186535555b9bbe35L,0xe67d27e0e3d2342cL,0x06c2f1f39775eeefL, + 0x1d98dae9e778d2ddL,0xd04ae2693d8c8e86L,0xc49c1b77d8c7ff3cL, + 0x33c474c9dff1687eL,0x4b0397567350001aL,0x00000000000001d5L }, + { 0x3a01b07dfe70659cL,0xe8bb7a25ec01b5d1L,0x42f9b07d713a5bf0L, + 0x9c1f15cf370d1490L,0xd2804385895c898bL,0x72905d530194561fL, + 0xf467f9e0828ce8b5L,0x54d2132d53e3859fL,0x0000000000000183L } }, + /* 5 << 119 */ + { { 0xf143a867c3316e20L,0xf2bd4f50bc497e24L,0x2aaad2ef37ab1547L, + 0x9ce62d3f3f7713f9L,0x3246f9c020486bf8L,0x3db017e20da96c8cL, + 0x56b674701a782cc7L,0xc4c70e3b136bf566L,0x0000000000000058L }, + { 0xd8c1bb99ac35dc35L,0x7487501327c7a0dfL,0x45a025f618aa1c21L, + 0xdd7ce0f04280e892L,0x8fc15397ddb495aeL,0x51b198def1891c05L, + 0x81cd5f32268f273eL,0xec2aa11586b31e95L,0x00000000000000fcL } }, + /* 6 << 119 */ + { { 0x7e4fc745603f4f62L,0x300e74342f0eb97dL,0x0e64a63411be4195L, + 0x6c160b9d52b038e7L,0x4d130d75a62d7fb8L,0x83b6412b23b6bf71L, + 0xe92b5b0719d0a2d2L,0xf547f666fe79aa1bL,0x00000000000001dbL }, + { 0x609495e1a1364f6bL,0xc196a1c69de725e5L,0x24e0c338051a1989L, + 0x5b24ecc17132b77cL,0xc38889537d5f2440L,0x847186d0ddffd474L, + 0x0ea28f9c643c84f9L,0xac9296ca4cdfcc12L,0x0000000000000151L } }, + /* 7 << 119 */ + { { 0x8010e2ec682e5ed7L,0x231b6fc4ccd555e2L,0x08b20819b4cc2bb7L, + 0xba49a7a8b38f4af8L,0x6ee2df34ff5875adL,0xe10d9cba25883081L, + 0x7b41a2bf599dc323L,0x01679b17a6aaa35bL,0x0000000000000060L }, + { 0x3396ca47c09ee443L,0x4eb85f5371662a3dL,0xd9dcc6710ef4b6bbL, + 0xaead8bf43c3199edL,0xd5ee8048216c81a9L,0x5e60fffd6f00e815L, + 0x115afc95967359a3L,0x316b88ba9c6fb569L,0x00000000000001cfL } }, + /* 8 << 119 */ + { { 0x62e30f146b33442dL,0xde4d2e1042123a0aL,0xa1e5e2dda59b4649L, + 0x2b6aa7d5dfa5d292L,0x83f0033e00c25b90L,0x2e6143261d598a8fL, + 0xa5aa5a26df8943faL,0x7459318ad4fb7104L,0x00000000000001a7L }, + { 0x53ed625bd67084d6L,0x47a2d5950d232f5dL,0x593a7030f8863270L, + 0x3e87b5ab993051e2L,0x02e51486ca5387c6L,0x7612f0c7a818de0fL, + 0x32d26729b42f317fL,0x48a9b12fb9a3c2f4L,0x00000000000001ddL } }, + /* 9 << 119 */ + { { 0xb331e5f4f0c18c18L,0x34ebae25ed4f1f6eL,0x1d43ad4aa25f2fffL, + 0xc75517b81c368e17L,0xeb289c54992ad974L,0x8ad87538f5e364bbL, + 0x914d356e089ad0f3L,0x65a5ea69d54982caL,0x00000000000001c4L }, + { 0x51212cfc269ce3e2L,0x43a6142228f9247dL,0xfaed8540c4abf936L, + 0xeffda37241f6c6ccL,0x766e3a2bd9497366L,0xbfb0e6af67baae9eL, + 0x4b943b2b4cf8e2eaL,0x14ff3bc90579b694L,0x00000000000001c0L } }, + /* 10 << 119 */ + { { 0x60b11099cc0ae72bL,0x51cf7973d3588531L,0x62aea2ee1f018f3eL, + 0xceaec7a028e42557L,0x695f7cce52df0599L,0xf22825452a1b3f00L, + 0x8c2df32e6394de22L,0x18ce083e2db1770fL,0x0000000000000088L }, + { 0x7989f8bf4ba02f8fL,0x22fc813fb69bd72bL,0xf4eefa600a383c73L, + 0xd7aa3e7e6dbf9f7dL,0x98113db2d2d50a4dL,0x47e078b14d81782dL, + 0xef4cea6327df6aceL,0xf720012f577eefc9L,0x00000000000001c9L } }, + /* 11 << 119 */ + { { 0x8a7250ac0b860356L,0x68cc18db827712fdL,0xf51e42975c278210L, + 0xd739081ce152d377L,0x3be304ef809d58a8L,0x2e2866fe1d8e32f8L, + 0x079e155045a93bd3L,0x0b6e67a48277ca0bL,0x00000000000001d6L }, + { 0x554f41797dac30e9L,0xd57280b1379d715aL,0xd12134dcd0185268L, + 0x9fb040dd55b10542L,0x89f90cbe9419c076L,0x058fa0d6397cc301L, + 0x7b1ebc8b10ab543eL,0xeeadfe47674313faL,0x00000000000000e1L } }, + /* 12 << 119 */ + { { 0x45ff6cffe1a83401L,0x7b9b4eed54a79e2aL,0x979b1475933bb48aL, + 0x135e2594634d7ec6L,0xc08ba7b4d5dccbbeL,0x78fce165c3826372L, + 0xb1f172e431102cdeL,0xb6fd8f343eba4396L,0x000000000000000bL }, + { 0xf7d351a9b4f622a3L,0x80215fc752af1195L,0x967f5f190e0e3c73L, + 0x4e05f75f9f80960bL,0x76c30e858eeb6e11L,0x9f270d1556a7acb6L, + 0x9236e11764fff770L,0x80c87737e474ea10L,0x00000000000001cfL } }, + /* 13 << 119 */ + { { 0x991489171d928ce8L,0x727f54c83b241dc3L,0x837f9b5e5b7b0e99L, + 0x60b9885a65c90179L,0x025c5092a0723b78L,0x6f465cadb35a299dL, + 0xbf3e23b849ac4709L,0xd3d8a7547a8c1a2fL,0x00000000000000c0L }, + { 0x855e31df15d9ac7dL,0x57c4d92146e48a7dL,0xed2f5b45d787acaaL, + 0xb004403b65895d32L,0xb671cad3324326eeL,0x419aace1f7db0750L, + 0x5079c99935051f2aL,0x4785a42e08e80cc5L,0x000000000000001bL } }, + /* 14 << 119 */ + { { 0xd3c4e96b03d54c5eL,0x962ef31f65450ba4L,0x4a75aeb64dd512c0L, + 0x4c4d23aa233d2675L,0x42cfc07c0055293fL,0xbbff60c5242a0317L, + 0x3f27d5c95c4f2c32L,0xca352c57746505b8L,0x0000000000000080L }, + { 0x5751eeca92a1ef30L,0xb83a993e1f9eee84L,0x5eec2ed72e0ac56fL, + 0x95ad825bcc0d1e16L,0xd3aa7ba68af2a112L,0x3035c3f8c7ef1fcaL, + 0x5b5bf3e55216a429L,0xdf187ddd0fcb5720L,0x0000000000000025L } }, + /* 15 << 119 */ + { { 0x78efe954502ba658L,0x1acdb02ca79d89e5L,0x6c83c8808cccff30L, + 0xf6b6cc94db66a880L,0x9f374ff9ce82c740L,0xf353329b9f06fbbeL, + 0xfb73109a9e34d30bL,0x4ff19c4c2ae4fb4eL,0x00000000000000d5L }, + { 0xf3329d5a4e4bec08L,0x14090d979e97c3adL,0xc413b1e477844381L, + 0xa1649708be56dbe2L,0xba1a525241d4f044L,0x2c7da53909d84593L, + 0x5c1494f9961312e6L,0xece942c3d4cd25b5L,0x00000000000000c3L } }, + /* 16 << 119 */ + { { 0xa549238eb310bea8L,0xb542445a99bb8ab7L,0xa930bd6a8c208b86L, + 0x1b97c831168af442L,0x18d4fc46a291bffeL,0x1d202d44a6b461aeL, + 0x433dd2d7c57c66fbL,0xf21b5d1d2a79f133L,0x000000000000001bL }, + { 0x99343aa32ee37543L,0x904b8dcd7eff1bd4L,0x4e1b01fa9711ab88L, + 0xf8111ac33af62c90L,0x59ccfea158910f9aL,0xf4d713a358e5f398L, + 0x6cbbd15fb9c12355L,0x6181d1c0c3f59372L,0x0000000000000170L } }, + /* 17 << 119 */ + { { 0xfd1909b68d45b896L,0x40d662ac394c6c7cL,0xf5cea1dcf74c3e17L, + 0x52641b5abf892800L,0xba56fa53cfae386cL,0x34d5d6e508a7d7cbL, + 0x93a70eb6eb77e443L,0xe1f4539c3df2b1acL,0x00000000000001f7L }, + { 0x654e70fda8174410L,0x5b339e60fbefce00L,0xcbd494e6957674d3L, + 0x59a2fa2ba3719a56L,0x84b7ef4361be71b8L,0xd993ba7996bbd063L, + 0x4e360f9d87a626feL,0x47fb3ca54c60de10L,0x00000000000001c1L } }, + /* 18 << 119 */ + { { 0x64f8ec4c94dbcef0L,0xc97a5fe1d006d598L,0x7cc416b15fee99baL, + 0x5e76561a698113acL,0xc81c6079ffa11af2L,0xc04b5048cdaee971L, + 0xa79ec707c46bb0b7L,0x20aedd0f45f1cfd3L,0x0000000000000189L }, + { 0x678e01fdf1aa0926L,0xf6794950809be2ebL,0x9420aa4b9f89e63eL, + 0x80440a43f8be307bL,0x5f745612dabbb2e6L,0xc68ea3c278a61df1L, + 0x37c8c2f6d67e5c46L,0x32cac5262b38ed94L,0x0000000000000008L } }, + /* 19 << 119 */ + { { 0x8e864ff05d5cfcd6L,0x43e2041c6f9fc03cL,0x2db99b7e23583833L, + 0x5dccb3a8339ff572L,0x06586d385e30a99aL,0x1c5dfb435976d778L, + 0x23751881d86629e2L,0xa5d2aa9db94c022aL,0x0000000000000164L }, + { 0xb460335a0d36e04dL,0xffae905ee8e64c22L,0xe3a3a0890ef4ac2fL, + 0xf2395b267d86626eL,0x2601b8cb3adcc664L,0x5fbf5face8832bd5L, + 0x6ae36465bff22aedL,0x8df993b2b0541b7dL,0x00000000000001f8L } }, + /* 20 << 119 */ + { { 0xc2ab59e873ada2ccL,0xbae59c15cbec8583L,0x91b446abcf3b6345L, + 0xff7a1af20becb4e3L,0xa2ba8e42707dec53L,0x3ad25c5191ee1072L, + 0x50624ace261414e7L,0x52c9513adfad31d0L,0x0000000000000085L }, + { 0xe08f4efe933887cdL,0x88ef090ae39784afL,0xdf8d631e0d1e77c0L, + 0xa5d3a5292f764f0bL,0xd782f2bf2a993c96L,0x66bd58533d305512L, + 0xdaa06e5fcaafa3fcL,0xe5ee3ae28e21902bL,0x0000000000000189L } }, + /* 21 << 119 */ + { { 0x4a29ed0cfac4faffL,0x845117baae022e6bL,0xdabb52d4f8d70af3L, + 0x24026deac5e6fbb3L,0x44b230ca918eea16L,0x0ed0c105bfaa5514L, + 0x57c3322682aef559L,0x67bcef99cd389b75L,0x00000000000000b1L }, + { 0x7e8088902867777eL,0x8de1b2c3af7d1830L,0x2755e5556dd95dfeL, + 0x64641564b86f4994L,0xb629925edfc94844L,0x5297aa1c77dec939L, + 0x1f706f40c445f067L,0x9384cd0b480c8c54L,0x00000000000000e7L } }, + /* 22 << 119 */ + { { 0xb191bfc95379208aL,0x2a1f2ba91ad2a396L,0x8a482274cff511f3L, + 0x20fb75155b03819fL,0xedc652327fdde9d5L,0xa73b9708f88e8979L, + 0xe0bfe37f5e93f0a2L,0xbe3b0ef20f1f13c2L,0x0000000000000008L }, + { 0x52cd7f260e0288fdL,0x6dc435d7872f25e9L,0xfa45bea70164564dL, + 0xdd4667eb9d109fcdL,0x1e23bd77b282d30cL,0xb3bb1a7478610e7cL, + 0xf48ee3dfccedda5fL,0x2483a44a16942c44L,0x0000000000000048L } }, + /* 23 << 119 */ + { { 0x05a88878734d704cL,0x40c181a63077bf30L,0x8a9a744692a8af26L, + 0xda1c1e825393db91L,0x5ed215541599d840L,0xfadc24032c645869L, + 0xf2bf75501e1b5dd7L,0x2f512f9789250c44L,0x0000000000000003L }, + { 0x0b1020bc1e05e15aL,0x8998b2fe07c12cd3L,0x1f9de8b8a849a0abL, + 0xed5d9ad7b34dda41L,0x8f576dfd3941866bL,0x6b2ab23bdf701b54L, + 0x1e8c11f936179a14L,0xb9f4893243e693ffL,0x00000000000000a4L } }, + /* 24 << 119 */ + { { 0xeb1706ffe55d9dc3L,0x128f942b47cf0bc8L,0x6a5d94a8cc1ceeedL, + 0x8736f0290ca71858L,0xc100090dd1fd8b55L,0x5970b8829caa1f54L, + 0xafe5e74c9266c865L,0xff310a7a2889325fL,0x0000000000000005L }, + { 0x4b0a8d9bae5ed924L,0x81e086f41a8d0a30L,0x6c209d40190f4957L, + 0xbf69cda925fd95f5L,0x5a7a31a215c6321dL,0xc4f9d4e866200541L, + 0xa675123925161f67L,0x3134ad6fd5ff9687L,0x00000000000001e1L } }, + /* 25 << 119 */ + { { 0xb5f514a7e0bb67bfL,0x9e40b6e4e48a475aL,0xf889e93751dfc4c0L, + 0xd6a56b5fa94b1777L,0xa53c024ce66b7400L,0x54aa63659c499f6eL, + 0xa318725c40eea1d1L,0x959b22025fa02502L,0x0000000000000166L }, + { 0xf4ac61080c71fefdL,0xc0ccefc5a79fdca9L,0x4c540ee1fc90c9f8L, + 0xc426e0d1c1d56f4bL,0x5dc15d4c5b7bde88L,0xc5e8b846ddd7a8bfL, + 0xd8f63c9b8700dbd8L,0x96966bee03f867b9L,0x00000000000001a3L } }, + /* 26 << 119 */ + { { 0x60ce32b9138490feL,0x3195cd4ad2685b04L,0x0e36b35e239110a6L, + 0x63129c87d4740967L,0x994570fadc6d3f99L,0x8edae6940031cab0L, + 0x504b1b8f7af36802L,0x5cfd2bdac466fec6L,0x00000000000000c8L }, + { 0xd5dd304e703de0c0L,0xe7e0f651df8244d0L,0xc5bf943c240569ceL, + 0xa100eda82824e162L,0xb7b7b3b63dda41f9L,0x48f90dd91e5455a9L, + 0x8c821e2a7b1de463L,0xe653f0674ede44e9L,0x00000000000001ebL } }, + /* 27 << 119 */ + { { 0x459c2acbe7a0e738L,0x3e7b0714c6f21da2L,0x5475f4ffcb875deeL, + 0xb951bacce164b589L,0x457e8912de96aba5L,0xb774ee72fe8219a9L, + 0x8978e772f097ebadL,0x12860ee828f4d4e3L,0x0000000000000061L }, + { 0x360293f7aaea6fdaL,0x31008270991d3924L,0xb8b3a28e0bd95d8eL, + 0x2f38092f695f3004L,0x581cce8636edf878L,0x96dd5a9a14d73e23L, + 0xe318676fff47dbc9L,0xb767451baba7fae8L,0x000000000000004cL } }, + /* 28 << 119 */ + { { 0xbc008f6f12853216L,0x4bf12ce42b48c455L,0x54ff9bd2fbafa90aL, + 0x5fe9e7e641840f1fL,0x042dcb9c65e15455L,0xcfd8e3cb4c57c3f3L, + 0x4c57920afcc0e297L,0xb4cdc9adedd993d0L,0x000000000000009cL }, + { 0x6b6ee23c382e05c0L,0x17f4b2c3b8821acfL,0x48517aa5b5db9ea8L, + 0xa86c1f52c23154e4L,0x5c13f1540f4d1d9dL,0x83e69e90e13257a3L, + 0x4cfd01f2e6b1bd31L,0x04610d106f53c1d4L,0x00000000000000d2L } }, + /* 29 << 119 */ + { { 0x96f21b5c587ece4eL,0x2ec1c0d35e494d28L,0xb2f2b82c71276480L, + 0x1556d678fc677d2cL,0xd2d1d79ca8cd767fL,0xa317f1c6a04e42aaL, + 0xbb445d70ddedc4ffL,0xc2a0dddccfff4b00L,0x0000000000000048L }, + { 0x743026ad46ec7a40L,0x909df3034e4b2c97L,0x1942a04f54f5aea2L, + 0x2316f22aff478c5fL,0x1aa79b3decddd9c1L,0x2a9f761432e997bfL, + 0x58b8aaba9b14edfdL,0xf2e3dd78e507dea0L,0x000000000000004dL } }, + /* 30 << 119 */ + { { 0xd498bb74e6cf516eL,0x5142e4d31d808796L,0xa5e120b141cf0189L, + 0x50b0208b41823038L,0x290dda83eca548f7L,0x3617a82941bd5a4cL, + 0x3ee40e94717cabccL,0x7801f6aa97f09b10L,0x0000000000000175L }, + { 0x00bc1f19e1e5636cL,0x3ee1b2a1790abe39L,0x423dcd56b6935099L, + 0x951fb17eb8189d05L,0x2086426183d235d9L,0x1d060a7799abd480L, + 0x393b00f9871477e4L,0x3d751bf09a968951L,0x00000000000001e3L } }, + /* 31 << 119 */ + { { 0x6b238a6a447a1655L,0x87e723a1b29ad51dL,0x2e1618a3cb5d4927L, + 0x61ce36defb5ea331L,0xe66a95a69dbf6e02L,0x7e2d71c88ff6b838L, + 0x19ff9378b2144d22L,0x573686d3dbc2f8afL,0x00000000000000ceL }, + { 0x027a78fb3b8116a5L,0x431129b0105e775dL,0x0b111a77a199ce40L, + 0xe282a92c3d72ffeaL,0xe4e9e0d2f41b8ebdL,0xc6775590ed468e58L, + 0x585f9c702df78db2L,0x6c1d2b10c6120772L,0x00000000000001bbL } }, + /* 32 << 119 */ + { { 0x0bc2fc66a6a08edbL,0x4f83dbe481d46942L,0xdd379bc14e44b819L, + 0x6f409f1ab5785cc1L,0x3fac6dda40232da3L,0x83b4bfdc11b9bc84L, + 0x4563149ac91e3874L,0x210062c6f562966fL,0x0000000000000126L }, + { 0x6a00191e9548d9c4L,0x800f66bcd9716285L,0x3a8c05a315dd9859L, + 0xaf6dcdf5ac571627L,0xeae07417006a2a90L,0x11b368fbfcb7c955L, + 0x5e47618fc4abe2b7L,0x328f44c85ab9dd68L,0x000000000000015dL } }, + /* 33 << 119 */ + { { 0x2a1f44bf35f04663L,0x35863dbcf2cb12ccL,0x65966eb50645618dL, + 0x121be877084d9977L,0xb293bc95a71b0855L,0x15cbf71b3e8b2bbaL, + 0x18432ee58df913c0L,0x47fd2827e04dfad2L,0x00000000000000e0L }, + { 0x9ff5d0451f756bc0L,0x149333a7cb99ff05L,0x514778ffc7520037L, + 0xf9e3810a69a033b3L,0xc107c7f739edc855L,0x613b65b44f1cf022L, + 0x8ad96f8dcc6df762L,0x5d46de0029fc7d62L,0x0000000000000000L } }, + /* 34 << 119 */ + { { 0x1d850ba68665bf9cL,0x0a5bfa5ecd57b87fL,0x06ecc39391eb1ac0L, + 0xc92b6bfb47a8713aL,0x4dfcc9f1b5a6c2dbL,0xeec1b0b4361c81ceL, + 0x44635bcce1d3f510L,0x73ed43e468bdd964L,0x000000000000009eL }, + { 0x5ef7da45e203e241L,0x415015340b472483L,0xae2fd0edf8c38f62L, + 0x497f78e07f8125eeL,0x3b25ec8d2f86cfe9L,0xdf5be34f63202086L, + 0xfaac26690de5c0e6L,0x515f0bca5c14a0ecL,0x000000000000002cL } }, + /* 35 << 119 */ + { { 0x388204a5856f6cd7L,0x904812166cf197baL,0x464561f15fb4bf7dL, + 0xd134b856f100c475L,0x6ff4dcded7592257L,0xd76fb6af4d3963e5L, + 0x5158524f9e80e9eeL,0xe56520ec2e0e44c9L,0x0000000000000002L }, + { 0x8178bb1d5481b113L,0xa4bcda1d0bcdb35dL,0xb5f582f01dfd501eL, + 0x0a69c092b148ca76L,0x2d5d8f611ffdab41L,0x1559fa8d0416db07L, + 0x3e0dfea55da20aaaL,0x65feeb54cdae4b30L,0x00000000000001b2L } }, + /* 36 << 119 */ + { { 0xb99c8cbbb9f77625L,0xbb9f09212461cc35L,0x8b93dbd9bdd1edb5L, + 0x5a53e36e7de8aac6L,0x2aa01adc4d122c59L,0x9beb895d36e58874L, + 0x49cebc216f3b18ebL,0xd6ceb0334676c58aL,0x0000000000000075L }, + { 0xa914bb8e5fa2193eL,0xc4e659990f747a2cL,0xa6d517b9957743dcL, + 0xcdd3ede4d4c39e00L,0xbcec7332aa4c0329L,0x3ab06883425fe620L, + 0xd1cc5d15668dfa78L,0x4d96ddae52b7579aL,0x000000000000019bL } }, + /* 37 << 119 */ + { { 0x4be1055088151d4cL,0x9d380be1cd317b85L,0x135a9b2500b5933aL, + 0xb2a5bbf5bebf245cL,0x18fafe7a7fd975c7L,0xd68068c9b48104efL, + 0x1aa0df9c8079c090L,0x8efcc46fcdbc1da3L,0x00000000000001d2L }, + { 0xf773a926f6609360L,0x43b75bbd140a56e9L,0xa4c713e6d3a0b820L, + 0x0f821f3f5de9d334L,0x1c6c4f45b947eadcL,0xa974aad7fbb18aeaL, + 0x414b97de33ca4d45L,0x809bfa14f7bbcc4eL,0x0000000000000076L } }, + /* 38 << 119 */ + { { 0xca350ea9e37b698dL,0x9eb0052ef86416b9L,0x784ed0be5ed684a6L, + 0x2b41249bd685ba76L,0xcbb7f9bdf8885679L,0x18973477ded73fb4L, + 0xb89489b5a787e3a6L,0x7bc63168aa40bdecL,0x0000000000000079L }, + { 0xfec9cd3b6d62a5c6L,0x91f481cbbf766a62L,0xb55ba1f2286b4265L, + 0xf3e28afc6b10ea1bL,0x1945a36d4e4ade55L,0x9823132fcb191569L, + 0x0c4ce33732178d49L,0x5f0a08e255189f04L,0x00000000000000a3L } }, + /* 39 << 119 */ + { { 0xb7849d36fe2477a8L,0x311c25a323f26d2cL,0xd6601942c5f1b989L, + 0xc45c73d160698449L,0x8f52a35889025c50L,0x289f61fd4503f0efL, + 0xce5ee29ccfbdc0b5L,0x56a6fd6fd0947666L,0x00000000000001fcL }, + { 0x53c2998e1173eb9aL,0xd7e525fbca74814cL,0xe6a9bd42e7ae0a8bL, + 0x26587842f9aa587bL,0x464bbbf3f79d2faaL,0x89a0bd3355f3fb1aL, + 0x8fe1ac20cbeaece8L,0x2387e29f36016942L,0x00000000000001bfL } }, + /* 40 << 119 */ + { { 0x6d20e58e82000ffbL,0x1ea966716e7d118eL,0xb0cb325041c88eb5L, + 0x973b977faacd407cL,0xf20cd8f6f0ca7a17L,0x2f0ef22e9fef2038L, + 0x23d15a01175a188cL,0xdbdddfa829188156L,0x00000000000000a3L }, + { 0xfb1a78b3993a35e7L,0xf7badc2f131829bdL,0xf9e29ac9a58a4099L, + 0x302502e0c7ced6a2L,0x0d09dac9f1cb6d2eL,0xb31805d7d96fafbeL, + 0x8124802b7410b804L,0x8720b403d824baa3L,0x00000000000001ecL } }, + /* 41 << 119 */ + { { 0xa7290d9c48b1f38cL,0x5b8c285d132d4075L,0xb97ab2320f6612c9L, + 0x278778d5f8cc7252L,0x1bb9cfb9f88bf532L,0xbf27cee0fe2d82b3L, + 0x568fe3492c626677L,0xa29cfa7b663a62bdL,0x000000000000010eL }, + { 0xd8eaa38b28160c39L,0x5aa3566b05af5ea1L,0xd5de166d4c045d02L, + 0x3d0029b93de6e8d1L,0x6ef61a9745c158b3L,0x2a76310bfa47b87aL, + 0x2d4cb8f174440b9fL,0x2f1d69bf22788f42L,0x0000000000000061L } }, + /* 42 << 119 */ + { { 0x2aa57c505668fefdL,0x1d383b64bbaeb298L,0xe4be715ad8a2f406L, + 0xc0d9df9754005292L,0x423dc4c773eb56abL,0xa5651c8e01586953L, + 0xa4afc432bf57c12fL,0x0fae1a0b3db56d05L,0x00000000000001d6L }, + { 0xb6a8f2764827d50cL,0x6096d31c6921e003L,0x4100d72e88d61542L, + 0x872c450a56e575e6L,0xf25cee3e5a82b93aL,0x797af76ca2a32ba6L, + 0xa1e2af54ff02fe90L,0x6ba03aab44163fcaL,0x0000000000000052L } }, + /* 43 << 119 */ + { { 0x45e97d51cbeca15cL,0x92ecdf862b747064L,0x2dd8552961bc072aL, + 0x1ff9552ac1c0783dL,0xe424c5fe4c9235f7L,0x702ed8f71a1bccbbL, + 0x44ce8000061a4767L,0x09ad63882e4d5303L,0x000000000000018eL }, + { 0x93a69860c1a12d88L,0x20fe34ce34096d41L,0x0395762d55ed8738L, + 0xb52c3127b21aa0d2L,0xb3ee6efe8c5c0ab9L,0x7ef6aa092ab17c19L, + 0xc7dfe85539c623b4L,0x4b4e21ec470a5812L,0x00000000000000c7L } }, + /* 44 << 119 */ + { { 0x41f1110cc6c5882fL,0x269cfa8003a383a6L,0x063066bc3fb339bdL, + 0x893cda4818001024L,0xdc69f450a35accd7L,0x7bc9d8c37bfcdca8L, + 0xb3034c8661dc6f02L,0x65e4d3ac27f28f29L,0x0000000000000026L }, + { 0xd485c332700bcf1aL,0x97bd7f097c2ae8a6L,0xd07bbb26c2fe444bL, + 0xc637e7632b702432L,0xb4e830218750ecc1L,0x1bd3bfac22178bdaL, + 0x2deb354ce56f8538L,0x9ffac3411745164eL,0x00000000000001e6L } }, + /* 45 << 119 */ + { { 0x83369d68f61454ceL,0x3f00ac930ef465c0L,0xf372ca70c9ac3434L, + 0x7a4df749a652a4c2L,0xb5cbf0aa6d77058aL,0x016c55b4f828b256L, + 0x2d5b8556574471d6L,0x3802a2adf605e691L,0x00000000000001efL }, + { 0xe250d04a621747a1L,0xe1815010f73b751dL,0x2911a81dab76c836L, + 0x47e76c4cd4ec7b58L,0x13bcbfc531868dabL,0x71fd1826ad6217bdL, + 0x58d008b0332e4a37L,0x6b046494cd88d120L,0x00000000000000bfL } }, + /* 46 << 119 */ + { { 0xb0dddff0f61ce5bfL,0x162bfccb7644e88bL,0x62f50e15ab7a4b15L, + 0xa34c92f13b3289aaL,0xa78127d8ebab98c4L,0xe805aadc0908f884L, + 0x237a8b9aab0ff641L,0xe0017b5f1b7ff0baL,0x00000000000001f3L }, + { 0x871e2a2d3a0be93fL,0xab5b0dab375f5672L,0x4db1185439454995L, + 0x5586e23420d5e373L,0x45ba928391960be2L,0xca170fa228c6ac75L, + 0x73af1b708481edb7L,0xe7f738bd66cd194dL,0x00000000000001d0L } }, + /* 47 << 119 */ + { { 0x5e706dac23b4a6d5L,0x1ce385acc2e54919L,0x6aa1ae21e6a1b808L, + 0x8b6fa602d276b630L,0xa045ee27b1666aecL,0x0d94dfa8f4de0f17L, + 0xb8fdb55612f63f7aL,0x16083fc661173439L,0x000000000000001bL }, + { 0x1b8ddf1802095fabL,0xf10e700fe66b44c1L,0x0b6d14a584bc01ddL, + 0x34f7a7e595aace58L,0x068003fbb0cef64bL,0x071161805837ebefL, + 0x09e3dbb9530aee02L,0xf16a7550c2d924daL,0x00000000000001dfL } }, + /* 48 << 119 */ + { { 0x7504bfd02e9b52d8L,0xc14c2e06f96980f1L,0x9f8a9d759fd5b1e3L, + 0xf182865b45156aedL,0x50b0cc368ce9cc26L,0x795f14da99833447L, + 0xbfb65a7c8bb4ee45L,0xf69152996ad0b825L,0x000000000000015cL }, + { 0x7515a268f20f5a41L,0xa9f0c26ab40016f9L,0xa1e9255c68244585L, + 0xa7f9b9b63593b04aL,0xb3b584b379b006cbL,0xf8f232002fa6001dL, + 0xded262cf2d85354bL,0x32684f9602a212c4L,0x00000000000000ceL } }, + /* 49 << 119 */ + { { 0x28f6eb1f7209b0aaL,0xd3ad776d5ceeb136L,0xf5776c122c51355fL, + 0x68ceac473c98f9caL,0xffe36607a189cdf6L,0xa6aa2cb0c41b5335L, + 0xb6f97b13f4228d0fL,0xc08f5f9ad8499d39L,0x0000000000000170L }, + { 0x64fb5744ba209520L,0x285a6f3a3e69bcfcL,0x94ed78757004b6d1L, + 0x58cae99fdf95df4cL,0x33c25bed249942f1L,0x65d8b14cba4429fcL, + 0x49855d740b951c72L,0x19c4896828bf84baL,0x000000000000017bL } }, + /* 50 << 119 */ + { { 0x4c9d8cacff2d8b65L,0xc45bd92d179f3040L,0xae72e5018e5cdc37L, + 0xaa449c76f3403c8cL,0x7cc0b59ad969f8fcL,0x82d42d85953bc5f9L, + 0x42d22f167329ed61L,0x1346f831d79da154L,0x000000000000013bL }, + { 0x453eefc095d4c1b0L,0xf368d0a9c0151480L,0xe5e70e121c88eed0L, + 0xd2d4e183ede8dcafL,0xb3c1aeaf2b7c0e96L,0x267cfdc3d7119ef6L, + 0x726315544d66f240L,0x35a5053f8b3ba01bL,0x00000000000000ebL } }, + /* 51 << 119 */ + { { 0x18e992882d99b995L,0x91eaf7bd18da99ddL,0x6cd867b276baf30cL, + 0x81ddb37e79f3951eL,0x1a31a11da660cd44L,0xfd1d7abdc64bb6caL, + 0xd06da3853ad047a1L,0x0551dc5d18500d26L,0x00000000000000d4L }, + { 0xd655dbc4a56c2d9aL,0xefd030860031587cL,0x72e9f59994e89201L, + 0xe40fe6c62ab9e1cfL,0xe096cda83f4facbcL,0x8f1256653d8ae874L, + 0x757c85715e1ae9d2L,0x194254584eb4b540L,0x00000000000000f7L } }, + /* 52 << 119 */ + { { 0x2b59d0e19914987eL,0x0dd9211ffdcd7c4eL,0x8e64f4307374cecbL, + 0x2871801fdb74abe1L,0xec2a36fe7eccd46cL,0xa352a1fa019682e2L, + 0x4d33e93e89fdb09dL,0x0766c47e0aa4a0efL,0x000000000000018fL }, + { 0x0b8e93435eb6bdccL,0xcc0ff53bc54f61f0L,0x4c56f488f18e3583L, + 0xe68caf9247091732L,0x3ae769850463bfb7L,0x5e1c264ec3731f02L, + 0x02c43734b13b2407L,0x39c2841a6ae98aa9L,0x0000000000000053L } }, + /* 53 << 119 */ + { { 0x2426d599fceb982bL,0xafc5ece1fee285c2L,0x350815619b7e6025L, + 0xc7320a8d2bf6fcb1L,0x45755ed78c9bf467L,0xbc3282102df44bf3L, + 0xb259e7efd74d2ff6L,0x674facb030ed2509L,0x00000000000001d0L }, + { 0x861acf33a2f0cc96L,0x62db05068367db2eL,0x639033ec2b1fb906L, + 0x91f2cf72b5b5f399L,0x76236bf2b0eeeb34L,0xbcf075451b047c87L, + 0x071f150ba29bc4c3L,0xef42687e05dde2d9L,0x00000000000001f5L } }, + /* 54 << 119 */ + { { 0x7a8eb2ab0ee6708dL,0xba7e08b7f90168e5L,0x763aac0d3f58f2dcL, + 0xb0cdc84a7ba2a4dcL,0x02a8bd35f1e3b519L,0x5f39ac1b8e3b9f2dL, + 0xa042d7e89d98f86aL,0x8096461ca9e64b14L,0x00000000000000afL }, + { 0x3d09672d4e3e44e8L,0x2506dd9dda7c3de8L,0xf85d30baf4829b9eL, + 0x082923844dfd7291L,0x915707e19e64b3dbL,0x19683c2c819a0b64L, + 0x23cc1a36324f5d0fL,0x3d11e9fb33ce2655L,0x000000000000012eL } }, + /* 55 << 119 */ + { { 0xf68386fa888aa958L,0x613390b5d9d0f67aL,0xeaeacf1eed2656b5L, + 0x74b550a872877de3L,0x57fa4d941ab845b2L,0x1225595224eb57b1L, + 0xa4f4c0fe98a50b36L,0x52f5c07e1ae96581L,0x00000000000001f4L }, + { 0x30898f2d1028fdddL,0xfb3f5650a0344437L,0xd5033e856a58e784L, + 0x9e51e2e6ee46b6efL,0xd5e841f005a8c5f5L,0x15887595fa9bc00eL, + 0x8adf4c9304c01ae9L,0x4a9cd9bb9d9db32aL,0x000000000000016bL } }, + /* 56 << 119 */ + { { 0xaa6cdc01e7416b5fL,0x92bfe795ec1feae2L,0xeeb400bddc173db5L, + 0x8609dbe0bff7846dL,0xecf621229d201903L,0x4c15e42f5a3ec1d7L, + 0xcd2b85015119af15L,0x981a0c08add9cb95L,0x000000000000004fL }, + { 0xa6d2f3a0186db260L,0xb259fad5b0c24c87L,0xc68d1b53c7f64d0dL, + 0x38c1c8c2543c50b7L,0xb3f7a5c61e042c56L,0x2083f846d4498e97L, + 0x41c5fb320b7d08c5L,0xc5196c8d9c2caa96L,0x000000000000007aL } }, + /* 57 << 119 */ + { { 0xee862a82e2056fbaL,0x1f4e8288d286c0abL,0x55158dbfed0751cfL, + 0x3e72a1b69c51cc83L,0x07e9544a464cbf5bL,0x06b7f08d84ca5228L, + 0xfd4e306720cedbbaL,0x441f23d2fd663b38L,0x000000000000014bL }, + { 0xb56e86d029cf364bL,0x2e4cbbe886ee100fL,0xa2ac7cde1af4c403L, + 0x20b602aeead7dfd7L,0x32db7a58a912eeebL,0x7e46fc37cd65cfcaL, + 0xa61ce9ec64e513e6L,0xc46e45ad5e355c59L,0x0000000000000082L } }, + /* 58 << 119 */ + { { 0xd9446d12d08d5183L,0x1930a976ff54d766L,0x4b5f889fe0f983c0L, + 0x3f9cb2d9f4cf0b95L,0xf25e0f78a3e156f3L,0x85698419471f2ed8L, + 0xcd2e901f340547cdL,0xb735b0afe5b67d75L,0x0000000000000090L }, + { 0xbd5dc2a524d04605L,0x81b22ff6cdaeea52L,0x35e5177fc2dcbe54L, + 0xa021681e5d5c2a50L,0xb6316fbb7772bd63L,0x7513efa7fac05cb2L, + 0x2ebe68f969e4bb0eL,0x4ace321e7d9692bcL,0x00000000000000bdL } }, + /* 59 << 119 */ + { { 0xfd326e55d25b6175L,0xaebfd41920d968c4L,0x470743454b5e33d5L, + 0x0c5e0f18553ad718L,0x435094bf6a41609dL,0xe4583ca75b278266L, + 0x62009983871212d6L,0x1404bbcffc7a545fL,0x000000000000002fL }, + { 0xe34736dd5f7295d9L,0xf5e4b0cb1eb15ba6L,0x1815b6fdfdabc947L, + 0xbac35e1fea16f54bL,0xd9adc92196fefacdL,0x8bd671a7a338e668L, + 0x024352aa43905638L,0x7f4f43cc43d3b2ecL,0x0000000000000084L } }, + /* 60 << 119 */ + { { 0x74409db3feed975fL,0xe70514b682733880L,0xfe1b718ab5a41e01L, + 0xa2059a6544b53e78L,0x25c2079fb1fe6720L,0xea6df9fad9cd24f6L, + 0xe5f1f5b4fca222f5L,0xe8f6dcb4dcef0479L,0x0000000000000039L }, + { 0x2cfc5286b2a16b82L,0x25ecb74775d40713L,0x4b263a4755dda1f1L, + 0x0676b9c117aa9c19L,0x6f2e9310476acc39L,0xb27ef44fd5c4e15bL, + 0xb5d3e4f79d72b9d2L,0x8a3aeb37ca49521eL,0x0000000000000038L } }, + /* 61 << 119 */ + { { 0x1a80ebd82d914976L,0xd5d1c8cc996eff0aL,0x06ac9a8535cba3eeL, + 0x381d54f62e809546L,0xacf4ce4c769411f3L,0xa64b28314a37638cL, + 0x13d99aaab7cc63d7L,0x462b14c7a591857eL,0x0000000000000192L }, + { 0xf3807c3ad22ea880L,0x76f9339636cd3b1bL,0x77d0bc89d2ab27d0L, + 0xd7e7f64e87dcabebL,0xef3f8eaa3ec8afddL,0x698141961205cc30L, + 0xf89c8a8ee16e9331L,0x95b0f6fd2d30c290L,0x00000000000000dbL } }, + /* 62 << 119 */ + { { 0x3471f7f706f7bfa9L,0x912385e739e980bbL,0x829fcd40e8774d7dL, + 0x637e17490e295af0L,0xe2aacc7d9fab3a4eL,0x1e074bab0dc9e073L, + 0x53fe3fd6bf348272L,0x0779d4332877f11bL,0x0000000000000076L }, + { 0x44b3a7711763e639L,0x50a471be9d95f8b3L,0xb50122f106d99fbaL, + 0x6fc8784613054c47L,0x33befbe2d0e71575L,0x1cac97e3789e115fL, + 0x3f61f57e455d8c54L,0xec75111ee7dac210L,0x00000000000000c7L } }, + /* 63 << 119 */ + { { 0xae4363ed7c759c6eL,0xb663d67dfa6e2ba0L,0x745abea27791af0aL, + 0x708b4c271cfea43fL,0x90ce598808d390fdL,0x3142798061acbc72L, + 0x413c40bf20dfc34bL,0xc7577fba6be74f89L,0x0000000000000054L }, + { 0x4f315b816225f675L,0x08b7537d99b0789eL,0x0723ee516f760ca5L, + 0xe6fd90d08bcdfc98L,0x40eb0f9177226310L,0x6412fda8f8b2e5bcL, + 0xf1421d2a7e40a5b9L,0x846449c3f273b934L,0x0000000000000165L } }, + /* 64 << 119 */ + { { 0x7ac007ad8451ad30L,0xadb09d590905b6a7L,0x96b382333ed8d9dbL, + 0x7ba1ab90144aaaaaL,0x2d31fb344abae176L,0xc0471119d1d9cb6cL, + 0xe56b681221c9fe02L,0xfd040d70efbd1643L,0x000000000000009eL }, + { 0x91c517c02975af75L,0x7a77c8f2834de3adL,0x7dfd1527c6e95530L, + 0x982a2eef0ea03560L,0xc340fe70c7e0205dL,0x302ca446735bc119L, + 0xcc1072285c271f62L,0x9ba4e55bc486fbdaL,0x000000000000006fL } }, + /* 0 << 126 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 126 */ + { { 0x6a00d57da5e6776fL,0x8a138da8dbd9bd8fL,0xa059ed07d5fddaa1L, + 0x4f6122d91a42e212L,0xe72e39c509d704dfL,0x025696a8f15340ddL, + 0xe75fa17db760bafdL,0x1b7950b1017f537bL,0x0000000000000109L }, + { 0x1e1af3bd3f72ec2fL,0x0b95f25a8df112a2L,0x6f3e98aec4b96ea1L, + 0x5b890eecf3ddf2c6L,0xb4e902c3d1b40c40L,0x331cef2678e129b2L, + 0xdf376cfca4f572deL,0x9299466aad414479L,0x0000000000000103L } }, + /* 2 << 126 */ + { { 0x36252c7bd5a6eac2L,0xe218a649cae5dc4cL,0x690f3b49b249829eL, + 0x4858d6576e5a7464L,0x6e4b2ca633ae77e0L,0x28cfd8ba871586adL, + 0xaadaabc9d63d4d38L,0x2ea000e98acdd480L,0x0000000000000043L }, + { 0xbb0525eeaa905e22L,0xd6ad676ea045363fL,0xa0abe10eadbb3a6dL, + 0xbf7d435a49793facL,0xd96b7452c4dd6260L,0x4e64c9930aa813f9L, + 0xac364502001a15ecL,0xc33efef71af6493aL,0x00000000000000eeL } }, + /* 3 << 126 */ + { { 0x4ac52cc0248119eeL,0x69c12fe2732cf0c4L,0x4ed67c2a560e7ac4L, + 0x82f29f7e79346ba1L,0x47607b94f488c23bL,0xb39bff863dbb4817L, + 0x91de351665116670L,0xafd0ce8fa77373f0L,0x00000000000000d3L }, + { 0x9d96404bc2a1911eL,0xa43a089a21382683L,0xf16276e883e13d93L, + 0x581897d09fe988eeL,0x3ad0dd8b998d0f46L,0x078f1a9bb6143261L, + 0x303d457410d18924L,0x0bc45e0c82a9a0a6L,0x0000000000000076L } }, + /* 4 << 126 */ + { { 0x9aecae2d6a3cf15fL,0xd9d0b49a9c959376L,0x4aad945457cc7e25L, + 0x683618323876fe58L,0xccc6f2d82bd9dc24L,0x3970525c605bafbeL, + 0xf17524eda2c5499dL,0xe30b69d5b0c7bf4fL,0x000000000000005fL }, + { 0xa71532adb5d073b0L,0x37159fb3b4fb97b3L,0x15b655b27a1fff6dL, + 0x55ac89ee9e08a89cL,0x13cfab6bbc027776L,0xa57a7f289dece9a9L, + 0x65a3d815c03a1d45L,0xe5cd61ea95515dbaL,0x0000000000000100L } }, + /* 5 << 126 */ + { { 0xb5c48a06cfc6b235L,0xaa7c3f7009f1e479L,0xb6bb50cf899f94e4L, + 0x3e0823b34b850b6cL,0x4777c2c2795acd34L,0xde6999f169c9c574L, + 0x5d3e7f18cae69639L,0xa0a30c05ee76b726L,0x0000000000000095L }, + { 0x0d1df4b3760bd806L,0xe76381175f56b063L,0x876307d8fe5fc610L, + 0x0db04d83536a446cL,0xf020f6b4437b971cL,0xd5eae3a0ee10ad1cL, + 0xd8041973920ed5ecL,0x42825e955216c588L,0x0000000000000147L } }, + /* 6 << 126 */ + { { 0xb0da1338de123c1fL,0x0e0c698e649a8652L,0x812b95f93c9b1fe1L, + 0x15e7b770d3b3714aL,0xd75d98da400398eaL,0x86663448a6666434L, + 0x8a820ce79a26a89aL,0xbcd785914d7f2fd1L,0x0000000000000058L }, + { 0x470af7d334f3d449L,0xa54f2eca47a36f7fL,0x1a1afce9ab8573e2L, + 0x180bb5940e1d26f9L,0x0adc2f21c2f87c19L,0x0292b1813d39dcddL, + 0xd116d62af52b928aL,0x803ab7608dc43b17L,0x00000000000000f2L } }, + /* 7 << 126 */ + { { 0x9fabf32c1b953f8eL,0x6a1919f297c27760L,0xffc5895b235fa996L, + 0xbc6a06c9a860faa0L,0x08a3f0b8f5cf032cL,0x1b50f6d7e54d862aL, + 0xbef9525a2ee95ce6L,0xd29d4cbec0cf300eL,0x0000000000000078L }, + { 0x3298f5bef6f748fbL,0xa1477e05624cb33fL,0x790a733bfba77178L, + 0x5c71c7fb1a852470L,0x412df1e51e5e062fL,0x1deae6b98494f310L, + 0xe28bc9e2a851449cL,0x0a8819713101ceafL,0x0000000000000076L } }, + /* 8 << 126 */ + { { 0x64107659d44e9b1cL,0xdfe2b3ed599694f3L,0x348b163455939fefL, + 0xeb5fb1860b2f8b7aL,0x19d2becf5a1469b5L,0x24c67ff96bea8f5aL, + 0x1a4037a85924fde9L,0xdd0e9acefe0231aeL,0x00000000000000f0L }, + { 0x5b3f06a237cee8e6L,0x3ccc49c67e4a80a6L,0xe8729cc405b38a1cL, + 0xa51323391f4373f1L,0x1dac8246b3a773f0L,0x9824b4980193d5b1L, + 0xb63e677ac4db540aL,0x9c74537f5f10a7c1L,0x0000000000000094L } }, + /* 9 << 126 */ + { { 0x04c48d47fb40834aL,0x597722f2ce2a8ae7L,0x60d17d9eeae4d77dL, + 0x621c37135b996e51L,0x57bdcd302bc1e0daL,0x8dcb34eedfdc6cc5L, + 0x5252936144ce5a05L,0xbf52929bce1a2ef0L,0x00000000000000dbL }, + { 0x1b8e10a725993998L,0xfb6d649983675966L,0x03318a2d858e41c6L, + 0x6b6b365e33cd6c75L,0x3aa091143500ee2eL,0xf2ac959d6840c8b3L, + 0xf677e86103445169L,0x2a0d3377b52f2064L,0x00000000000000ffL } }, + /* 10 << 126 */ + { { 0xbf7a23f2f851b8fcL,0xc1bd1b74db32c685L,0xd0ab7f93138501b8L, + 0xe7c5770454cdc320L,0x6b64807ada0a6e60L,0x84f83a22d75c0a8bL, + 0xa555aa9537b4524eL,0x56787a37b6459681L,0x00000000000001a2L }, + { 0x4479f49de83cd71fL,0x4f53ebedc838cf55L,0xcba234498ff72ff6L, + 0x203ff691435a3557L,0x51f0d2949bb325e0L,0xe3da5139806ceb26L, + 0xcc4b204ebe6bbdd2L,0xf05ef8e12e3bef4aL,0x00000000000001ddL } }, + /* 11 << 126 */ + { { 0x8726192031da68e9L,0x8d660db73619c127L,0xf3d5a3e2f282597bL, + 0x9f1535cd54448812L,0xaba3e02b18f499d5L,0x70d4159c457d8737L, + 0x4a86b7edcd6f52acL,0xb48981d296176e37L,0x00000000000001dfL }, + { 0xe05b90319a61218aL,0x1933ad6600381905L,0x32c647a4988b8b38L, + 0x4dda10b72c5224bfL,0x2083d17f002c8769L,0x76184ea513c0705fL, + 0xa4ab64210b653f0fL,0xaa24960e3a9ff008L,0x0000000000000010L } }, + /* 12 << 126 */ + { { 0x21cbb2c74ce9abd3L,0x7f157a4845073c39L,0xbc887f720de2380eL, + 0x295bad1c6f96a397L,0x9bb39be48ff6daffL,0x65b52767d80498e3L, + 0x1b07529402b936a0L,0x04c89f2fb90ec0fcL,0x00000000000000b4L }, + { 0x47f62ce6f3bf4b3dL,0xf79921250e3e0bd4L,0xe1555bf8a14a6183L, + 0x8169e02567f3936cL,0x62dae8ae846ce4b0L,0xcee40b04fe0b0558L, + 0x520c3e208fe270e1L,0xa1f2420ec44de629L,0x0000000000000150L } }, + /* 13 << 126 */ + { { 0x87a4f58f95002c4fL,0xc0f5a15375cb2b64L,0x462b10584fa3bd39L, + 0xa53461051d56f6ebL,0xb2b70b8860c20135L,0xdcbcdccb9df24a37L, + 0x71d030c375cefc7bL,0x389a788c776a7674L,0x00000000000000faL }, + { 0x26ad8f5398b8d9f1L,0x33640e375d442332L,0x4831351d6a8d3878L, + 0x8f84df35bb4f8ecdL,0xf9ee728f0921bbf5L,0xa4768d023391c771L, + 0x07b56316b867956aL,0x9e1ae1cb2338f7bfL,0x000000000000018aL } }, + /* 14 << 126 */ + { { 0x3b0ae395e4f69814L,0x5ff5c84e6a747e34L,0x35e69b0a29ef0396L, + 0xd1b54c35835654b0L,0xe247e93979cfc63cL,0x7efb57321f7bdd7aL, + 0x07af836785156ce9L,0x3437319c68ad0803L,0x000000000000014bL }, + { 0x3162f3727ac81dc5L,0x9bb5caf4f08565feL,0x467224e5d19abfdbL, + 0x92050b4fbbfb8528L,0x2a78b72b6812bf36L,0x33a409687b2efcf0L, + 0x4ebd9e465991d0feL,0x68ffa516e8555f3cL,0x0000000000000100L } }, + /* 15 << 126 */ + { { 0x415876531dc87e3bL,0x40fd387a843c306eL,0x6000915d01f6cef6L, + 0x86df2dc147a8966cL,0x63a5132075e86f42L,0x6ee59bb6b9751faaL, + 0xbee42edc6eb0fe81L,0x7d4ceebf9df2a3daL,0x0000000000000188L }, + { 0x59eec8519e8c84ebL,0xbe9bc225ac158f45L,0xb50d7f7ff403ec17L, + 0xcae0e31f86689313L,0xa85359400a613f4fL,0xd2fc94710a594025L, + 0x900273e8256edcefL,0x1b5776c2b67ee0d4L,0x00000000000000d5L } }, + /* 16 << 126 */ + { { 0xc773faacbc875baeL,0x5ad3cc9408dc8719L,0x23a9cf666fd3dbdeL, + 0xf9c47b69a4c8d8feL,0xaefdcd39fc6d9e13L,0x3647f3a92ef2fff0L, + 0x7fbf9d741f53ee3cL,0x9199963d85e5a502L,0x00000000000000efL }, + { 0x191173743ad5b178L,0xa5892671f086fdf5L,0xa828f5beaeabb8b1L, + 0xa9c1a33e62711d4bL,0xf8a2e9b101e1de48L,0xe5eb2de9568801daL, + 0x3e0b7a845f2f8052L,0x074d3c36ddb76ffaL,0x0000000000000160L } }, + /* 17 << 126 */ + { { 0xe3bba4c44e1d104aL,0x23d230985486ea3dL,0x23f6a18676d19da0L, + 0xc286b66dbbcda9b8L,0x9e42651ea2ccd664L,0x597a8783ea75aa04L, + 0x688cb16b74bcab22L,0xd983661aec74a513L,0x000000000000017bL }, + { 0x7065739f19c41974L,0xc026315ab663d886L,0x0417e4ca5bc05ce0L, + 0x9e43b822c05ef3e7L,0xd2e8450ef16ab5d6L,0x278211c0ab315709L, + 0x9bb0e36146c481b2L,0x222788b5f3acac77L,0x0000000000000164L } }, + /* 18 << 126 */ + { { 0x9a36088c8517b761L,0x1d49f690e77f2f41L,0xaef7ca451b4af218L, + 0x9629c9c7b17e4fabL,0x052e9acce55663cbL,0xac455c5874b22e1aL, + 0xcf000252de25ce2aL,0xddb6e6c4449e7684L,0x000000000000008eL }, + { 0xaf28bf6377f6ffe8L,0x3b6eb30b6fd5f4b2L,0x77b4244aef362208L, + 0x5db842e4ca41a1f6L,0xd7a8774263a815d1L,0xe9a9c316ac2312b0L, + 0xcbd383be87fa009cL,0xbe2a91d2de5be94bL,0x0000000000000041L } }, + /* 19 << 126 */ + { { 0x1c18d8a676f3e8f0L,0x4521da83a2651f7bL,0xc8d2991e18f7ac31L, + 0x96c25646c128653aL,0x852b44c9acc53a44L,0x9fad53c52030825fL, + 0x207273a08d2d18e1L,0xd9dee4ec3094fcfaL,0x000000000000011eL }, + { 0x3720de1cc5166970L,0xbcc50de4fe148454L,0x059615420b197ac1L, + 0x940a3e6cabb72c37L,0x19cec56f63a3ba33L,0x94b04c0ec28fc6deL, + 0x337e7309aade99efL,0xa9b60e49406b3fedL,0x0000000000000148L } }, + /* 20 << 126 */ + { { 0x3ca5ae614d0a48d1L,0x39648cec7f021444L,0x3c92b2521e90c6ddL, + 0x776994bc94a49837L,0x53e54c207ddbf5b2L,0xfaf593c7d4576141L, + 0x17e000c226cc7ab2L,0x9484de380bf5318aL,0x00000000000001c9L }, + { 0x7aa1d4d397546078L,0x06b60ee9f1e5fd32L,0xce10bdf08447917dL, + 0x40fd8d610e3de8abL,0xa3c917af34e29cc4L,0xa7bb1b542c27de76L, + 0x6992979931e6c453L,0xc668fbe42f185b22L,0x0000000000000142L } }, + /* 21 << 126 */ + { { 0xe0dfb4e582a7839cL,0x724fc7b532068c9fL,0xf8510404a4a51707L, + 0xf65fb0e634e33830L,0xa1ae29bf48e5b4d0L,0xc70a3beaa3d3410aL, + 0x131c23e466a5d1beL,0x5eb1c285d107cd51L,0x000000000000019cL }, + { 0x7b61971c7c414730L,0xc722293b3a7e4352L,0x0d3203c6f5ae7457L, + 0xaa7fafaaf773fd74L,0xf88c67cc68830e7dL,0x5a71236c09730382L, + 0xa1661f7d1701c04aL,0x4e8979467ed93ad0L,0x0000000000000099L } }, + /* 22 << 126 */ + { { 0x10ea083c5ec936b2L,0x6011069c58b11715L,0x3e58532003a1796fL, + 0x538e1d0310eabb5aL,0x4b9640632ab5d141L,0x7d34fa8206aba7cbL, + 0x81692c655f1e613cL,0x2f3ed8894411f352L,0x00000000000000a1L }, + { 0x3cea6506f6fe4425L,0xb246f4318a8d8686L,0x69666392fdb1c07dL, + 0xf8e2a71872b25c8eL,0xc2f6c4bc10e81a1dL,0x5c6746ed3d2788f2L, + 0x92d9bfb31d236efdL,0xc65dc241f922e874L,0x00000000000001b3L } }, + /* 23 << 126 */ + { { 0xa01148ecf4a21f7eL,0xfd85ea9c5d0894d3L,0xc1349c182e3e497fL, + 0x6ff182da3115b3ffL,0xc67455c26be8d31fL,0x559369800aa15b5aL, + 0x35904c6ebdd17173L,0xae743b7d52b5b531L,0x00000000000000c3L }, + { 0x4785276788ebfa67L,0x7a8daccd6235572bL,0xf8c7cfd6e9fb4fe0L, + 0xece07c0c6f9f658aL,0x121e045b2550b943L,0xdcf493e709e11ba8L, + 0xf120183ac50b4aa1L,0xb68ec6f24668c0f4L,0x00000000000000fcL } }, + /* 24 << 126 */ + { { 0x953b8ca7af0603f0L,0x4b5eced77e21e713L,0x427652e12864dca9L, + 0x547f5fb516cbfbb1L,0x6ad85c7ef75f17f1L,0x469af7948222107eL, + 0x20d4c8e79bef1085L,0x867d4d0b152552e6L,0x0000000000000142L }, + { 0xbb5ffb0cf3f26574L,0x047c1bc015cd38c0L,0x76c5dbf176f8e575L, + 0xf7eb7b0e20d33c17L,0x1e9cc21e6d879e9eL,0xeb2edc8340ca3223L, + 0xb0aa90a8290d165fL,0x7c17d3b223c1ef71L,0x0000000000000055L } }, + /* 25 << 126 */ + { { 0x91d91ecf643d458cL,0x962345a55e8691a1L,0x883ac05d1af94a9dL, + 0x747a7302f476f098L,0x44c326ffd0667890L,0x344aaeb5f76f1602L, + 0x60e6437c716561c5L,0x412f5a0546540e4cL,0x0000000000000104L }, + { 0xb0553215617eda16L,0x8d95ffd17e7cdce8L,0xe1162d136fa892cbL, + 0xd69a1ce67041a11eL,0xb8559697a144ed95L,0x6ec56d46024f8ca9L, + 0xefc9cdeb514bf316L,0x69d2c9b290a22342L,0x00000000000001abL } }, + /* 26 << 126 */ + { { 0xc9f4133c7443182bL,0x8a6b2562f5d6eab0L,0x8ab8fc0b96e13ea9L, + 0x1070f3aa810771e3L,0xe8745a7c01bb3865L,0x97bf12d49586f6d4L, + 0xa82edb725d473130L,0xa75508ecbe3c9bdbL,0x00000000000000e9L }, + { 0xdb554bec480c4283L,0x647fca1d341e42eeL,0x7dabe114766ae5bbL, + 0x7b5db510e7581fb7L,0x4f1647c0bb3a71a7L,0x5389934264d27664L, + 0xebda4815606cf2edL,0x7a8fe4ecccc611a5L,0x00000000000001bdL } }, + /* 27 << 126 */ + { { 0x7e2894589cb55c95L,0xa834c287ecf5f9e0L,0x4a74f1cd2bc1efeeL, + 0x6a1f60941ba0db9dL,0x3cd9e239da42e0c4L,0xeac13ef43e2dbd01L, + 0x99353f4571c37766L,0x5c633343c0c9f425L,0x00000000000001c6L }, + { 0xc7d01c367439c08aL,0xec5613e9d32aca4fL,0x884fb18182f492fdL, + 0xa551f913b6c01487L,0x15b7ed76d5a46ea5L,0xe961a4ea688f0277L, + 0xc7addc6a16149e7aL,0xb3ee69e80d41a979L,0x000000000000001fL } }, + /* 28 << 126 */ + { { 0x80fd3dd3c1e4a87cL,0xf2230946e63e847bL,0x9d51e3a3477178b7L, + 0x06b089a460b2e4deL,0xf98e4cdc53ccd5f0L,0xfaba504ae9e18aecL, + 0x22b799be34dba1caL,0x75409d50a4c9ca6eL,0x00000000000000f9L }, + { 0xec619962617902e1L,0xed26be734c58c299L,0x083e94e46d1c30e8L, + 0x2c8f71158be269bdL,0xf7c13ea9e0fed762L,0xb6c9bea0b517e55bL, + 0x7d656933aeb4bec0L,0xf386fed9ca01bf37L,0x000000000000007dL } }, + /* 29 << 126 */ + { { 0x9263b871dc1dc230L,0x0208cb151bb6cdf9L,0xe767b789fbb07097L, + 0x180f1502fe499e7bL,0xeb8fc35d782cd35eL,0x0e5aede16d18dc13L, + 0x596979e9075b5f1cL,0x93548273ddab0649L,0x0000000000000108L }, + { 0x6484acaddf9a9c9fL,0x698f20c8bfee6c7dL,0xb3f89ba614c8b2d5L, + 0xa1b11b16c07bd4dfL,0x5a5eef740acd17d8L,0x5f2d1074f24e1b0bL, + 0x161a5489766b5674L,0xb208a6abda3b10e2L,0x0000000000000145L } }, + /* 30 << 126 */ + { { 0xd57b41e5ce2874a6L,0xfbf0d623b5bd9b26L,0xab64b932fd3750a7L, + 0x12be25d6586fba44L,0xf28a20cd1402ddf6L,0x058c0a6028b9102bL, + 0x27678cf4d68b4aa4L,0x927445e5de23211dL,0x00000000000000f9L }, + { 0x46a5c0939c6e96d4L,0xcda4538f010f2c55L,0x5053aef6b0b776d3L, + 0xb0f5c0f895e614b4L,0x0d42f943f2856b07L,0xfe51414f426b1275L, + 0x49b65061a5de95fcL,0xf4a6fb5cadbdf4beL,0x0000000000000040L } }, + /* 31 << 126 */ + { { 0xcbeee2169021308fL,0x2959093bd6ece14dL,0x68d757404093d151L, + 0x77a6a05785259344L,0x63b1a5fb6d893564L,0xd7bcdd88e3e35d1eL, + 0x76f862a445c13992L,0xe466691221730cecL,0x0000000000000056L }, + { 0x23e1ecfbce1e1e57L,0x3dd3e08822c19c3aL,0xa7d0301032146b0bL, + 0x35193697fe806487L,0xa9f13fa54854ad0eL,0xe4ba06e756149ff5L, + 0xfa5cbd737e6032dbL,0x3f663df3adce1658L,0x0000000000000197L } }, + /* 32 << 126 */ + { { 0x1036eb9b66fd07caL,0x6ca52cc16b7fb490L,0x512e973ed3e0c270L, + 0x889980bf73d92d11L,0x38b4cfe4a4005eeaL,0xb6f992cc8ceb4313L, + 0xd0ac2f8d6daf7c23L,0x1ccfbf17e32a93cbL,0x00000000000000c2L }, + { 0x7bd9d6f12f508ccaL,0xe82d7171595a72afL,0x25d0297697512873L, + 0xefc1de8b8cf39fbcL,0x25e6b77f9a1237f4L,0x9f3b73e7d4d98b5dL, + 0xe1fda62beccb07feL,0xdb813b03625350cfL,0x0000000000000014L } }, + /* 33 << 126 */ + { { 0x7907b2c97fcca66cL,0x6516825362d05422L,0x94e0752213f45a4fL, + 0x1ac91e11c4c62129L,0xef3c27c76a4cc2acL,0x5677109687867eccL, + 0x091994ca1289a9e2L,0xa3d70e2a75d15d56L,0x0000000000000004L }, + { 0x5072005570aedaf7L,0xd252fab22cf95f71L,0xcb0c8766830ec191L, + 0x3fe6bf257e18f064L,0x5c5ce223ae21206aL,0xba44c780c5b7f1b0L, + 0x339a7894181afeebL,0x68d02a575ff231ffL,0x0000000000000134L } }, + /* 34 << 126 */ + { { 0xe8ab1445c5eaf5e9L,0x95d96c674d22ede6L,0x03c3ab6cb7a0fba7L, + 0xeb3b5b6796487583L,0x171befc1c3e7d0adL,0x8b43bc1ffef58ae3L, + 0x11800901171b454fL,0x6a42fc801f5358b5L,0x000000000000002fL }, + { 0xbcd8373de74453acL,0xefaf2d8e8b03d1efL,0x7e17c8902b434397L, + 0xff3a65b4b0f49b6fL,0x288d883d723ebfaaL,0xe152b3f524e32fdcL, + 0x2f044966e2dff973L,0x16f0bf33f2c0ae62L,0x00000000000001b3L } }, + /* 35 << 126 */ + { { 0xdc6b5651efbc2867L,0x077952546b03925cL,0x2860cb96535d4160L, + 0x273f6ef8fec0f37bL,0xb7b144148bb7ccdcL,0x012561602d4018d9L, + 0x84671a39bfddf7afL,0xd52d837cbaae273dL,0x00000000000001aaL }, + { 0x4b1d19cb649da549L,0x4c354d7d9cecc0b3L,0x64e4665700376779L, + 0x247cc8922e59378cL,0xb4be23495c6e18b1L,0x5793c5c864dfd529L, + 0xc486d6656c38c470L,0x0494793f93ade6aeL,0x000000000000015bL } }, + /* 36 << 126 */ + { { 0x2b675aad33c1333aL,0x5b635d5de3f70bc6L,0xdc1b161fcc3993dfL, + 0x3a966f3c07e1c9cbL,0xbd684a329fcdec80L,0xd7b51c19f12c7088L, + 0xadfdefb4f549b4dcL,0xe542877b58db90bfL,0x00000000000000c0L }, + { 0xd7a8f7cbf2b37e98L,0x0898feacb303eaa2L,0xc40968038ab3ec65L, + 0x178d3f6fd7d2f42fL,0xceb00cd5c31552ebL,0x85d9d2b5d878396dL, + 0xc6b51a1f2173b3b9L,0x3de5f48ac997717fL,0x000000000000008dL } }, + /* 37 << 126 */ + { { 0xdce090edcd84da75L,0xb1165394678048f7L,0x58e1c4df92f6f8caL, + 0x8347365693eda7aaL,0x88359b5039500625L,0x455fcb1a6424b594L, + 0x11ffcbdad7a86a6bL,0xabcf989ad68b45afL,0x0000000000000121L }, + { 0xbba5977cf6d851a8L,0xd4a4f5319a39399bL,0x933ce57bf1b1f725L, + 0xd9e63d1970d12e1eL,0xe8a4a94f4c3e73b5L,0xb54fbc144103bbdbL, + 0x8b96cc8c90f25055L,0xb660990e7ed06482L,0x0000000000000154L } }, + /* 38 << 126 */ + { { 0x89568720a7253d96L,0x1f68812e0b93c9e3L,0x14077833b832d791L, + 0x9374abd0a1b1c6f2L,0xbc4a0f3f2945ec29L,0xcf31921a8bc301a2L, + 0xd2aa436db992ae7aL,0x9a3d6309f1dcf7b2L,0x00000000000000b4L }, + { 0xd6cddd57e108567dL,0x97dd4615bb904179L,0x9724b58f7f8e9eacL, + 0x20258807decb2b7dL,0xe3ac333cdf5e10b6L,0xf0ee0a7bde6510deL, + 0x502e4677f77ffa98L,0x05bc4724ed6c6731L,0x00000000000000a0L } }, + /* 39 << 126 */ + { { 0x14ea632747944f6cL,0x4b542fa18e12e641L,0x9e73d65b716c29b4L, + 0xce4fcd50943d8274L,0xc2d8f4bcd50b1954L,0xf56c1429bcaa5d11L, + 0x4d3009e5bbec9109L,0x74cca9dfa25b9016L,0x00000000000001eaL }, + { 0x0d3ba441245ef27fL,0xcdfaf08b5f09ac0aL,0x20f8b98d19f3dcfeL, + 0x0de8607dec975f66L,0x9137d362b488e1c8L,0x4bd2dd285453d7a0L, + 0xd98457be533bf6e9L,0x492026fc9c747fd1L,0x000000000000019eL } }, + /* 40 << 126 */ + { { 0xfbefb8c9644f809dL,0x5013f5ad50b747faL,0xab3c8de1a054a0c4L, + 0x687c5279c608ef1dL,0xfd4c40096bdf1f8aL,0x49caa9ab72a4a4a9L, + 0xbbf3951812790b7cL,0xe0535999b6373459L,0x000000000000007aL }, + { 0xa2dcab70fe2fb036L,0xc2aa9ad018b31f0aL,0xb5a76e592be108efL, + 0x68ff6f12ba3ede8bL,0x6da7ac98e860842eL,0xdba409b2969f77f7L, + 0x490dd67bf92b2ef0L,0xfa837cde26207a6eL,0x0000000000000143L } }, + /* 41 << 126 */ + { { 0x19f334f8515e0800L,0xe86806502435b94cL,0xbcb190dd8313a87dL, + 0x834b84a8ec5a36baL,0x15203a655639ac59L,0xa8752a705230a929L, + 0x37ae9b10e149343aL,0x3cdbed17ccf9e664L,0x00000000000001a6L }, + { 0x7a8926e6dc707695L,0xbdc0bd41fa849e17L,0x46621657668bf25dL, + 0xcdac0561b33a596cL,0x359a86131244fbc0L,0xb6d5d31e02b73688L, + 0xebcef3d390394d81L,0xec76090ef03972baL,0x0000000000000140L } }, + /* 42 << 126 */ + { { 0x951148835e5ef22bL,0x9125af3b086c572cL,0xb24ef04ff594e704L, + 0x414a4c09671591deL,0xc586772f55b0d554L,0x41daf9a1e3d723a3L, + 0x7a8f5eb2def1abf4L,0x60825c418dc54c39L,0x0000000000000185L }, + { 0x40b62591f51569e0L,0x0696b545d752fec4L,0x60b8363c88119cd2L, + 0x234f3e85ae327b8eL,0xd31f4f0acda9ccafL,0x3d581d7377e5ebc5L, + 0x4e9b38118e20d693L,0xc3db07bad0c29c3cL,0x00000000000001c4L } }, + /* 43 << 126 */ + { { 0xbe476abd5b92e5ddL,0x7338532b16d1237dL,0x022f670f6870cfdaL, + 0xeb8dc7c79b90dabeL,0x393200eecda571a8L,0x0c6aca6791ac6936L, + 0x14346e26488cb230L,0x273065e39536c901L,0x0000000000000030L }, + { 0x13aa183bc593d9b8L,0x48ae6dc7343198d8L,0x7b62448571798e8dL, + 0xea83244ea71750d3L,0xf55e8ab5f6a39a13L,0x1728d30129d92e75L, + 0xd50d981f10614337L,0x6b0decaf77d6383fL,0x00000000000001f8L } }, + /* 44 << 126 */ + { { 0xea856cb6b878c367L,0xa9c546214a4fe60cL,0x1019fc69abf32138L, + 0x6c4b6ccd06730992L,0x1a311a6d0e41375bL,0xf0248235114992a3L, + 0xf41b1b39764b38c8L,0xce60da46dc105e9eL,0x0000000000000114L }, + { 0x6aa1e85bb3994b60L,0x71512ea2c86972bbL,0x0741f2c4ee618490L, + 0xca37bb0da39e9c5fL,0xde4d4610ac5d2a89L,0x4f6c0db09ab5d88cL, + 0x81bc4e8876ac9892L,0x69c37cc885406e8bL,0x00000000000000d0L } }, + /* 45 << 126 */ + { { 0xdee629187f42e8afL,0x5cffbdddd3afaadeL,0x4a1b49fdcb2e4014L, + 0x50618d1c0d3d2d10L,0x33f3664eaae624b4L,0x0cc69b93cc062199L, + 0xb24a2b9bc82d9ff4L,0x3aa76f87ba908201L,0x00000000000001deL }, + { 0x2be85cdcf5e32e54L,0xbf49717e92be486aL,0xc399c36657a1a1deL, + 0x6fca0189fbd93af2L,0xae241e2234367725L,0x09d626afae886ef0L, + 0x98529fc5b2523cdfL,0xb9cf93bb7627b317L,0x000000000000016dL } }, + /* 46 << 126 */ + { { 0xec182c95314f1ef3L,0x639755d63bb94f80L,0x68f1135f4226cd5eL, + 0xea726aae144b80b7L,0xf28425f789e8c673L,0x6b1aa275b32750e6L, + 0xad084021d82069ffL,0x8a46c57d9ec866b1L,0x000000000000001fL }, + { 0x67b95be0941c5c8aL,0x276c490eded9fac8L,0x8420e9525734f9f6L, + 0x02f4cdf9bda6a56aL,0x7a37266e3ae782dcL,0x3980321a3e284d56L, + 0x36a300897c1d0382L,0xc0e8d093bc4d5b39L,0x000000000000015aL } }, + /* 47 << 126 */ + { { 0x067b652bab71a0a4L,0x556b7ba427ec168fL,0x5148f2daa39161e7L, + 0x37949e842818aeaeL,0x06837989d63cdbc0L,0xecae47c32c2781d1L, + 0x3a35acca4f826df1L,0xbe89aad66bd0dfeeL,0x0000000000000069L }, + { 0xa31d586b613eaca4L,0x01ae304c231ef718L,0x85eddb49ac30afbeL, + 0x535eb7b938cfa36bL,0xb71c1334f948b8d0L,0xf0bb2d35722426beL, + 0x022926d36e23d8a6L,0x9229e9f90a053ffcL,0x000000000000014aL } }, + /* 48 << 126 */ + { { 0x2d8061ae51e200c8L,0x2ed4baeacba53504L,0x7a99581d1a816471L, + 0x918011cd8e03b1bdL,0xebf02ce3403bb07bL,0xe34fa50b55b600a9L, + 0xc9b864cdbfd6e3caL,0xd321624427effe1aL,0x00000000000001ddL }, + { 0xcfade4e193849908L,0xc0f031bf48a0e903L,0x7d2b8b8ebce49719L, + 0x42d55ff0b518922fL,0x1f87d2e3d6fedaacL,0xd2e6ac95fb5ffdecL, + 0xfcf0b4b31a3a896aL,0x5a3b6cf15070e85bL,0x00000000000001d9L } }, + /* 49 << 126 */ + { { 0x6fec5fae1ac7901dL,0x4e7d2ad3ab3555bcL,0x6cef79e4c1f20c1eL, + 0xad3307c2b04f2014L,0x51bf0ad4f00438c5L,0x77be5b640c555e7cL, + 0x656812453f950e12L,0x233d773f91066329L,0x0000000000000136L }, + { 0x1416db4b63b0a3cfL,0x0c8a94bac1652c6cL,0xc516152a81614204L, + 0x48b25ff63cfe5f60L,0x1d525e0bfc3f6c21L,0x9e60149106bb637aL, + 0xb042eaaac477a455L,0x538885a61fc4039bL,0x0000000000000026L } }, + /* 50 << 126 */ + { { 0xd8969d84fc075646L,0xfb95b2b9cf469b45L,0xd0558743461890b9L, + 0xc4a7c4d62d5be147L,0x7bdddc2abf485a2dL,0xe51570f923629993L, + 0xe4e0e7eca9fa2458L,0xe6106cdc8e0678aeL,0x0000000000000190L }, + { 0x2c4c56ebce32e8eeL,0xd94ad3f5a5593d72L,0xe854d1ac0ff0a233L, + 0xa406b8375145ee0bL,0x0e3584ca488da729L,0xb6ff4f5380b8f316L, + 0xb363e9cc402b0513L,0xccc22a8755811946L,0x000000000000019aL } }, + /* 51 << 126 */ + { { 0x47d2138b3580621bL,0xbd4c9c9a1474dbb0L,0xf1481018ae0e3eb4L, + 0x6a093b167078ee4eL,0x4224d1f910edc815L,0xbb73d935a375ffefL, + 0x218c7befdaa15567L,0xda7f1dca1a74f587L,0x0000000000000027L }, + { 0x811b1e8e92233eafL,0x5000dc5d3554ea4aL,0xecf34661944bd497L, + 0x0450716d15e5b3a0L,0x95e2529d671d928fL,0xd19305ae3946dfe5L, + 0x0f2065a76211cca3L,0x0e1476a6c806fd74L,0x0000000000000181L } }, + /* 52 << 126 */ + { { 0x84b08fd1b927faf4L,0x236eae2654fc3722L,0x1a7cb9bff4836cc0L, + 0x251833480db8508dL,0x36a19e7c9d1b8764L,0x8eb89180de352f40L, + 0xccf372836e1a3ea0L,0x3e550c0a0a72c4d4L,0x0000000000000136L }, + { 0x8e83af37f7b2ea82L,0x03c78b22395bdbdcL,0xc229423fef042d8eL, + 0xf9eb603d4ed515f9L,0x2f56337627176ddcL,0x8390b596112af839L, + 0x617bed410774a6abL,0x52903a20d5f44b62L,0x0000000000000014L } }, + /* 53 << 126 */ + { { 0x4e4fe470af6efb14L,0x6e9bbcf0be657506L,0x68100c8e2c3159ddL, + 0xc0b59ffa5bbdf11fL,0x08681fbde6d49ff6L,0x7169ccb7e31b4c65L, + 0x8703c995c929042fL,0xa12f13323361e4ddL,0x0000000000000196L }, + { 0x00ebad68a0783eadL,0x02d1925992856a97L,0xfdb7acac51bb0952L, + 0x5d46997c526c7fadL,0x9835ebd5c7a75eb1L,0x2ac64c34c3655307L, + 0x13101862da8f9faeL,0xb0f6ab8ed9f230d0L,0x0000000000000012L } }, + /* 54 << 126 */ + { { 0x6cc723d437603fc6L,0x18055821d352721aL,0x942c0d47fe656ee0L, + 0xd20be2694b170f87L,0x02f7ba5088d08be0L,0x53e64fdf1346bef5L, + 0x2d23c4d4ea146dcfL,0xe68a09a37987bc92L,0x00000000000001bdL }, + { 0x7620c264359c5796L,0x5846dd812b06b415L,0xf5b1ce8c2b242c54L, + 0x3a42814aa2888a57L,0x4b2d89cca26460b1L,0xe3423f2974533daeL, + 0x3da2e8a63e9f0bf9L,0x53b6f11b1454ac77L,0x0000000000000021L } }, + /* 55 << 126 */ + { { 0x06ee24893ca6735bL,0x70fee9e83374e617L,0xe8b3fa54d4a16bedL, + 0xf364a301f3c1e5d0L,0xf16d5ef13c3e0753L,0x248d4e50d21da609L, + 0x242edf3ef60f6054L,0x3fdfc80c0c24e00aL,0x000000000000008bL }, + { 0x0a18a90bc3170235L,0x5c1bf5dec086e9b1L,0xcaa2d5f62cd7657aL, + 0x51a3c67591ee757eL,0x16c99b21a186249cL,0x2c5561941b8cadffL, + 0x9c58712f16ef2913L,0x6ccee5a5004b31fcL,0x0000000000000078L } }, + /* 56 << 126 */ + { { 0xa5dba4e1352ecab9L,0x6b02e6822c778abaL,0x2540e2cbcd90027eL, + 0xe9273e1028ef0df3L,0xf06ac93932993c75L,0xff0dd2dd17e95ccbL, + 0x67647a2edba0d5a0L,0x88aac34883a00096L,0x000000000000017fL }, + { 0x5b0503a0a1edd5c1L,0xa3b6b42ad77e1a61L,0xfbbc558fb0ac4790L, + 0x6e091b612c98fea0L,0xe9dd74b179d85e44L,0x01c6eb8996cb7b14L, + 0x3f220aba0a79bd88L,0xb30988e26365ab82L,0x000000000000002cL } }, + /* 57 << 126 */ + { { 0x54a308fd559f11f2L,0x31e2721b647cace7L,0xf118348be8222f9fL, + 0x97c4a761d5c359beL,0xded55d0b43515850L,0xeaf9aac7c43c5a7dL, + 0xdbb25a12d68e503cL,0x282258dc50e474a2L,0x00000000000000c8L }, + { 0x7464fbebff35e9a7L,0x522a0c6c38c685f9L,0xa70a2bd404a05041L, + 0x0637bf682fc8355dL,0x33130aa254cf363aL,0x3ad9a6df0d4657f1L, + 0x9e49f74a60758bb0L,0x2024ecfce25eb216L,0x0000000000000085L } }, + /* 58 << 126 */ + { { 0x33f1a6e161e91c74L,0x4ff19110e6c0cb10L,0x5c42e957f600ab08L, + 0xdde03dd42f9bb837L,0x3641e9391a234d0bL,0x53f8620f0b9b78b0L, + 0x1fe0b61aa1409944L,0xe3dc9c0333cfbe8aL,0x0000000000000053L }, + { 0xe668c6ee20c3c089L,0x427daa6df27385e3L,0xcac71fe61f72a4abL, + 0x6e5eb2cfe1329f8eL,0xe46c870f4e37087eL,0x5831ca51ad032d30L, + 0x5991353dee77c07dL,0x3362598e0bd85cd0L,0x0000000000000138L } }, + /* 59 << 126 */ + { { 0x9eb88eadeb34a93cL,0x6d29e37a4701b69dL,0x426b10f150770987L, + 0x73548e71d1f0f072L,0x80f016da0def01d1L,0xb1ccf96a3ea4825dL, + 0x39265cb1e0b3c83eL,0xc978e8b42b3039c2L,0x000000000000007eL }, + { 0x730a6fbc3e04c5aaL,0x67b705f33d44b9f2L,0xefa40dc7e961f235L, + 0x5d20afc2c64cbe5bL,0x7e3e1033b0a4ab7fL,0x26ca57a6425506cbL, + 0x0205449170798bbfL,0x2397723fbe05c4e1L,0x0000000000000144L } }, + /* 60 << 126 */ + { { 0xaa272aaad19d50a1L,0x3571d10bb1d206b7L,0x13d9fd10c3b75ad4L, + 0xd8dfb50b546daf84L,0xd2b77b3a3a1f736bL,0x33725766683f310cL, + 0x5c27d38b9fc3b081L,0xd74611bad1642ff7L,0x0000000000000026L }, + { 0xdb5eee71f30e1c09L,0x819fdb72ca6da656L,0x312f0d734cde6546L, + 0xa45ffdbb7bf7c656L,0x3fe1359af71f5257L,0x6ee0ea0ac34e9a29L, + 0x969e2682b1e77c79L,0x1e8323864a901cb8L,0x00000000000000daL } }, + /* 61 << 126 */ + { { 0x22ed87b1b25beddfL,0x3683f8776daac239L,0xda6806bcb77d6ed9L, + 0x046324c87a1c73b5L,0x5302a1f0280619fdL,0xff691f965d3ab015L, + 0xfd8e1f0576a75d3cL,0xb170a9c48883921dL,0x0000000000000199L }, + { 0xe2e4582834cf1693L,0x4e53f2ccb6cbeb25L,0xd7c26c5a13f317d0L, + 0x51871564f2f46ae0L,0xd17031e8abf83111L,0xe07adfc84579ad64L, + 0xa9461bc123467da6L,0xcb2976479eccc563L,0x0000000000000035L } }, + /* 62 << 126 */ + { { 0xb146ed5aa9f72a7cL,0x484b8997a7f0e604L,0xeb7b7cba9531f3daL, + 0x272a057cae6515d2L,0xb5afd269686fed9dL,0x3495b87c6d05afcbL, + 0xd60b71ce218f80f7L,0x2d850946e0ded104L,0x0000000000000002L }, + { 0xadb631e23692300cL,0x30aa16333329dfccL,0x8546c0fa71ffd9aaL, + 0xc5e4b3590962f556L,0xf09a2444bd391207L,0xdbec490c71cab26bL, + 0x4055668615b145deL,0x5f18aceeceab1ac1L,0x0000000000000170L } }, + /* 63 << 126 */ + { { 0x62cc9557f077d63dL,0xf30f4a8be8f5f5f9L,0xf98c9bdcde80ec73L, + 0x167d81b80c4c8e3cL,0xbb7ff344653cd736L,0x60725f05591730e0L, + 0xbe4ef60d1803adcfL,0x04ed04c0c5127350L,0x00000000000000f1L }, + { 0x5d2cbf0b776f33beL,0xd1a495b3cd90ab6cL,0xd47c850fecb6e7d7L, + 0x75dfa50f266f13f8L,0xfe272a12e317dca2L,0x14ce7728bdff1777L, + 0x3dc3926220fba381L,0x148ac59d6c2259d0L,0x000000000000012cL } }, + /* 64 << 126 */ + { { 0x904f2d4bdf9314e0L,0xdaae850de7a00aacL,0x79231083582efb03L, + 0x80f1c283ec7fe6d2L,0x2d5b3996199d74a8L,0x5f120b9b395007e7L, + 0x30d237734773f03eL,0xf4c192733b78b686L,0x0000000000000121L }, + { 0xf103ff6dfa8b51f0L,0xae7afb5140e2bdf0L,0x1130380e83254171L, + 0xe83501b8cda10d95L,0x1057771e4f3a8c01L,0x8f52196aac807069L, + 0x3609b0aaa5623821L,0x8c25790694a0a7f1L,0x00000000000001dbL } }, + /* 0 << 133 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 133 */ + { { 0xe2445c75cb0284f6L,0x7266ed21f0d3b65cL,0xa6f7fb639c0cd13aL, + 0x68d77e3537fee05aL,0x1d21ea95dd708c8eL,0xc75f44e793ba8d14L, + 0x8bb1876d90e34ca9L,0x7a3621e4b70e8cb2L,0x00000000000000b8L }, + { 0x0fb7c958e1d0f420L,0x34b8bcf2fadb2239L,0x1800038b6a67f72cL, + 0xa8e3236ac19f70a9L,0xa73dd94cb908c753L,0xd7569d95a6fa426fL, + 0x5ab784f60a295ae2L,0xdc2bd296583f0f13L,0x0000000000000039L } }, + /* 2 << 133 */ + { { 0x4624399658603a1eL,0xf6bf54496310788dL,0x178120864c799cc9L, + 0x20a0d4e63f9c7982L,0x13340a26930d81caL,0xb248675461520fa5L, + 0x0e9480cce3e2cbb0L,0x11f633c1c61dd54aL,0x000000000000008eL }, + { 0x34510e9e07ef7a73L,0x242ea6950e00a9dbL,0x1a146ea20ead5b7aL, + 0x52c0a70bf54a3534L,0x4b85dbbeb8113e6aL,0x62df74261446fc2bL, + 0x3a79c26ee9fe597aL,0x6327cecda7afdc59L,0x00000000000000c7L } }, + /* 3 << 133 */ + { { 0x8bf9e0791c403388L,0x219a3154046f9ecaL,0x3a8ee73bcec86e4aL, + 0x0a31124e281f2e27L,0x6a6fb93716bc3255L,0x0706364a8ae63821L, + 0xcce86c65323d4b3aL,0x0c238449fd6cfbd4L,0x0000000000000174L }, + { 0x031202de7faa8a49L,0x97b770b2579d75fbL,0x5cf4413ea5e0d986L, + 0xe93b56fdcc936d96L,0xe4ec80d2b1c5576aL,0xbf8972a2da093bf5L, + 0x0c5a98bdbcbefd15L,0xa19b3e630e6f9d2eL,0x0000000000000131L } }, + /* 4 << 133 */ + { { 0x0f73c85a8201c48bL,0xef502f8a4c9caee3L,0x8df7699641d84048L, + 0x93f8a34bba0f3b82L,0x0fba696cd6a81ed8L,0x0be6c83196622944L, + 0x0de9a8f2085495f9L,0x12d843e6580b2551L,0x000000000000010eL }, + { 0x507820fbe51de678L,0xe404cd9317831a13L,0x4818c9d1721196c2L, + 0xefa01a64cdda697fL,0xdccd32f0bab8b58bL,0x51ac6e1fb4a19bdaL, + 0xd402693f4e25340aL,0xd6567590afbbec44L,0x000000000000018aL } }, + /* 5 << 133 */ + { { 0x07cf170343ea64efL,0xd1dcaf5fe29ad51dL,0x6bc714cbfe586ba6L, + 0x95d455c4a564449aL,0x786b09c6a3f8c7b9L,0x6b2825349a9f9acaL, + 0xccceb672df41c312L,0x88da8f633a5f10daL,0x00000000000001c5L }, + { 0xf41c956600a865dfL,0x8eeaa9fb92362410L,0x232c80aa699ea6c5L, + 0x86ad242d2e4ce53aL,0xf54d983fc51e3b6bL,0x5fef2fb86e2dcdcfL, + 0x08b9a493e3f0b5dfL,0x32594fcd676f65f9L,0x00000000000001cfL } }, + /* 6 << 133 */ + { { 0x79541ad2f0258929L,0xb69d65afa7ffda3cL,0x7a83f701fb3fccfeL, + 0xb8c83005284111f3L,0x7b48d4acff594c0eL,0x4364f321b301dc8bL, + 0xccdb0184404f2b6cL,0x52239a4b8f4e85efL,0x0000000000000144L }, + { 0x1dd594a2f650828dL,0x98b50e840aad2d93L,0x491fb295fd90d0f7L, + 0x511abc0b7fc0144cL,0xeb240aa88d0085cbL,0x926f083c4f4e0a76L, + 0xf5e8865df32a6e48L,0x8fae6ec023e3413aL,0x00000000000001a5L } }, + /* 7 << 133 */ + { { 0xe038336eae697aa9L,0x1545b82e6b122bc8L,0x2acfc7a2465cab13L, + 0x396d60549e0670bdL,0x7099e416d4ef9b8aL,0xa745f4a6f144b1b7L, + 0xe859dac381092b9fL,0xf4bf2786ca8dba24L,0x000000000000016cL }, + { 0xd43fa47a1d8e919dL,0x7848e02298994d0cL,0x88559fc88b5c1e94L, + 0xd810594a0ee65987L,0xc6b9837b9476a135L,0x08b04d2778667d75L, + 0x3d35806e815d0639L,0xd703da9b9cea4585L,0x00000000000000feL } }, + /* 8 << 133 */ + { { 0x714c9d4d6e4a010aL,0x9e8621eb98bbdd05L,0xf3f33305efc60f48L, + 0x4e392a4c0706c352L,0x5d0f7b3a94dcfc6cL,0x28c343062434d058L, + 0xdba377bb41885900L,0x0ed4082dad73df88L,0x000000000000012aL }, + { 0xc0899758fcdadfebL,0x1638c31454319374L,0x1831028ec8ae469cL, + 0x41093ffe3a0e36a7L,0x75c6667b2860701aL,0x38c843a264de0a06L, + 0xc6e2c8a0949389d1L,0x754f22b73a1cc818L,0x00000000000001feL } }, + /* 9 << 133 */ + { { 0x90d5cf72e8548dffL,0x0b0082421130f27fL,0x2b742a08fd094a2dL, + 0x8002cee1fe8642b5L,0x94ba68b089e850d4L,0x9bfe83a1084d0ab9L, + 0x6aa91c002d2b5f7bL,0x0886530f383c1125L,0x00000000000001b0L }, + { 0x039fd9101b334478L,0x762fdf6b772b30dbL,0xf0b07b33950b4e70L, + 0x704eab49aab6690eL,0xfe91d13cbd747525L,0xb40f6f7ea5ab8cd3L, + 0x9f66e805fe523b78L,0x232466335f117f27L,0x00000000000001d3L } }, + /* 10 << 133 */ + { { 0x3660997e1d29651bL,0x017c92f21c43ee22L,0x399bd07278817d09L, + 0xe7886c7efcc2e4e7L,0xd86cddf76c75869dL,0xbf7491ac8d350f4dL, + 0x794d402ad99eda1fL,0x93a31aa3ba86bb24L,0x0000000000000155L }, + { 0x3be864461ba82751L,0x2fbb5d14599c74c0L,0xb91bfae79528f6e4L, + 0x9718d1dc27ecc011L,0x2e6ae4695685c69dL,0x755ef4fcf2f01e8fL, + 0xd7d5ab2292d8a1aaL,0x0b96bd64fa7d56f9L,0x00000000000001f5L } }, + /* 11 << 133 */ + { { 0x803708af0a32a860L,0x611b39670a9ffb60L,0x1d73591bd1c64167L, + 0xabefe36c919b5b6dL,0xa6e40fe88e76d295L,0x59af88d392cc3d6cL, + 0xed75288dc978e6c9L,0x1d4f6dc597c882eaL,0x00000000000000adL }, + { 0xa71e31457368f4a8L,0xc5b4991a47a78cbcL,0x718cfc2581954234L, + 0xd154abe8846e6039L,0xfc6cac6c2e7fa9b9L,0xf8c914314a4c53c3L, + 0xcc073a4bfaa6ddcbL,0x3980d5f90aec7bddL,0x000000000000009eL } }, + /* 12 << 133 */ + { { 0x53f7cc1dcd9c3b89L,0xd01fc88e37c09eedL,0xdaad94c7fd4ba95dL, + 0xa7bab576f6331227L,0x4919757ba6066bfeL,0xa8d525106197a2b5L, + 0x1a7c2cdd80fe8f9dL,0x47a0587c292e069aL,0x000000000000016dL }, + { 0x0374fc0618a5170bL,0x80e54bec68f03073L,0x8de8d710a373c6a3L, + 0xc25d052115e254d7L,0x97fb5a235d0da356L,0xae1d930aa3099c32L, + 0x4a362d78b750367dL,0xeaa2fd7c6c7e426dL,0x0000000000000160L } }, + /* 13 << 133 */ + { { 0x7d18c1c82affd347L,0xd1380fcd2983736dL,0x595f7723c63223ddL, + 0x631cfbe5b484b6fbL,0xbd2861390a6ca8ccL,0xee72e38d4f471ca1L, + 0x032046a1971ca142L,0x479212432a1dd8c0L,0x00000000000000c2L }, + { 0x35e6b9e5d7ae4e8fL,0x5a00b0f90a3feca7L,0x1c342e080a0a3d2fL, + 0x1e8879ddee0b5c36L,0xa420ee79ec8626cfL,0x79a1917573f33218L, + 0x1258124a86bdec5cL,0x691be2ab06058dd7L,0x0000000000000042L } }, + /* 14 << 133 */ + { { 0xcfa90e928d85981bL,0x44fbbe308470ae90L,0xb1c923e3b4a1c507L, + 0xb2d07ce9ff4afe17L,0x6efdd4cc1fefef7fL,0x9dc388240fc6d59eL, + 0x90a95ff8cd639f85L,0x97907edbc6d5cc6bL,0x0000000000000012L }, + { 0x4321d7bac15ff87aL,0x266401efa1c681ddL,0x71b9f08c84b64526L, + 0xa59beafcfd5df281L,0x75c4a3cfbb791707L,0x48cac59c01ce567dL, + 0x14cd0e56a00a8628L,0x2e9e9b3f0cacd47dL,0x00000000000001d7L } }, + /* 15 << 133 */ + { { 0x234103cfdeacba24L,0xfe7118f964b60ddaL,0xb600dd482a7d4f4bL, + 0x60177286754ef7dcL,0x49a59cffe9ab0cb2L,0x309aeff80cbfcc2dL, + 0x0a995830b4cb626cL,0xe5c24958357da294L,0x0000000000000108L }, + { 0xdc25d5d416fbf2d2L,0x76f7dd357630d805L,0x0fa599f1883ffd97L, + 0x81490dda0c07ab63L,0xb0afa4b335d3a0b3L,0x5339cd6c98872553L, + 0x31702ec41f849f90L,0x111738088b6e6311L,0x0000000000000180L } }, + /* 16 << 133 */ + { { 0x58821c974a0c0913L,0xb7707006307795f2L,0x44f5f5bcea20c664L, + 0xc23f5ac39c5ec494L,0xe83e4fc380affd1aL,0x7e92884d50102ac6L, + 0x632405b8a3c31198L,0x1995f831453cc03fL,0x000000000000009fL }, + { 0x8019071721e50847L,0x794b63d991f3efd5L,0xe9d69357a95c778cL, + 0xbd298eb0d04a14c3L,0x40a105154c5cb3bcL,0x296fd944f9c2fa5cL, + 0xfee5fe79efb9a49bL,0x12e8d7b35ba9db09L,0x00000000000001d7L } }, + /* 17 << 133 */ + { { 0x7978a92b15e89a09L,0xc21eaf079c8cbea8L,0x977fccc67d83c497L, + 0x037b22f5a4996ef4L,0x79b075dfb1fa5efdL,0xff169c5d895b8646L, + 0xb91366e1139eaab3L,0xfeac1026a55cf585L,0x00000000000001b6L }, + { 0x0e827a8e83e55daaL,0xa7a27e77cd68ce98L,0x2cc9672e7d586572L, + 0xd055fa46a3ff0e35L,0xafcc63023268d057L,0xccf7b4f3294e84d7L, + 0xe972308991a61758L,0xda4feb6b72a08bbcL,0x0000000000000098L } }, + /* 18 << 133 */ + { { 0xa3c2e3b89a33dcbeL,0x69a1af65846e1f3fL,0x325e624b23d94d03L, + 0xc9f9a9fe7b606aa5L,0xe660b7b341d6f8e6L,0xd9b994a8e7a4b883L, + 0xd97bd77226a3c490L,0xed529b01110a6d8eL,0x00000000000001a4L }, + { 0xc956d69d12852a59L,0xe0271878b91f3a5bL,0x70ca2552c30df3d2L, + 0x1b322ad23ed9a3f0L,0x2bd7fa803ed7b14dL,0x2b0e40c9937ca92cL, + 0xf0a680d7c88bf5f3L,0x2c1717bae18f006dL,0x00000000000000e0L } }, + /* 19 << 133 */ + { { 0x4f62538b805ba0bfL,0x521cb0d1ab71e577L,0xbbc91db992f0b4c1L, + 0xd97b96acea45846aL,0xce2a73bef99195b7L,0x4808f872335360b4L, + 0xeb784cc1e09fa8c9L,0xc9d0de8524667ffcL,0x0000000000000098L }, + { 0x8aa32a023abb4464L,0x8539cb99aa2eeb37L,0xf97a730fe6572cb9L, + 0x49a2ec198d17e745L,0xcbb7f722581a6561L,0x5271b9e7fd748594L, + 0x7df05c8555efe907L,0x12b457a3648b30ffL,0x00000000000000dbL } }, + /* 20 << 133 */ + { { 0xc2eb058989126facL,0x796ce4619a94396fL,0x40eca49d99c7ecccL, + 0x20ed57c7c492a4a3L,0x4089296506fa952eL,0x4690915c906f1b9fL, + 0x099b286b548ce05eL,0x293617b76fe582a4L,0x0000000000000094L }, + { 0xa34b04313831cd9dL,0x51eeddccb647a424L,0xcb7754a1e82a9c3dL, + 0x59bd6cabb14c49bcL,0x8f6d2d2e8ac8f7a3L,0x5f2fbf4346ce83b7L, + 0x93ece81c0d68eb87L,0x01271a1304d06b9dL,0x0000000000000067L } }, + /* 21 << 133 */ + { { 0xe26b19f2d74f6e10L,0xf3d3187d2ce14a93L,0x389726ad0da15517L, + 0x370c53b748f5a2d4L,0x052065fa2adf6e52L,0x35193065bae051ddL, + 0x82351b7900b33478L,0x03e7d6d80cdf4884L,0x000000000000019eL }, + { 0x383538be2e446a7eL,0x36190c9adb8a52deL,0x51d74979946c8943L, + 0x01bf13a579d3b4c4L,0x7f27c8610f2f49e2L,0x6ce84ff46a734dc1L, + 0xec6f79ddbb5ff5a0L,0xdb6e4fe3a96d9a23L,0x00000000000001bcL } }, + /* 22 << 133 */ + { { 0xbeb1d116cfb1ec29L,0x3f1502f0c5f0152eL,0x501456fe75a4ee8aL, + 0xde95a49a23265eafL,0x9e9e8319c0b85f89L,0x04d93a60647d2726L, + 0x63a54a59cb65cc94L,0x56766e88f173d58aL,0x00000000000000deL }, + { 0x9bccc01ebdb6a51cL,0x3d893bac5ac0d14dL,0x4a89af9516e16ea6L, + 0xcd7eba6798db611fL,0x90e2d187e84acf42L,0xfdb5ecbce1053f4dL, + 0xac124000e8c4a365L,0xf724f56ecb5910b3L,0x000000000000017bL } }, + /* 23 << 133 */ + { { 0x0978471e01ac4394L,0x057a926de4d2cc08L,0xece99f2f002fbdc9L, + 0x07bb884abe832818L,0x7f396535fb1c9404L,0xfdb86a9f6ba39a7cL, + 0xf4b372d9665c78e8L,0xc1b321e9dd520572L,0x0000000000000006L }, + { 0x51f1b8b8841a3ed3L,0xf49b313516b4a01bL,0xe01130df5af2717fL, + 0xb62166e4eea414c3L,0x7991e1ac050b5040L,0x8d632c2207cfddfaL, + 0xa9033c3efa37f8b0L,0x542b8994790c1190L,0x00000000000001fbL } }, + /* 24 << 133 */ + { { 0x1711039279d6593dL,0x7d88866ed6d1e388L,0x27e0aa5fc9a20e92L, + 0xc82ef455b781bc4cL,0x36888ad0a1e9a984L,0xa51d1804962eb333L, + 0x3964bf4a6589216aL,0x88904ffa7529c9fdL,0x000000000000016bL }, + { 0x9e0aca53e59081d4L,0x8b9066c009c4315cL,0x51d18e8b44dafb2eL, + 0xa86cd85c3c757835L,0x7534cba2ed8b25e4L,0xe1db4aebe59c5ebbL, + 0x77af93de72024396L,0x0779f6af77f5a9beL,0x0000000000000090L } }, + /* 25 << 133 */ + { { 0xfa55f353e1eee7e8L,0xe3728f0f832a86f4L,0x08ddaad44cb9e21aL, + 0x9b49d70e9cf0f3d5L,0x8ffab00c2fd4bb7bL,0x7b24a72f1435c1efL, + 0x390f209023687920L,0x1ae25cb77c75e684L,0x00000000000001ceL }, + { 0x881170cb4ea45115L,0x7cb03b3b2fdcf8cfL,0x0a7628ad560c4124L, + 0x44811560823820e9L,0x61ed767b6994e17cL,0x94da851fb95b76c6L, + 0x78bdec4b3c1ed403L,0x463d6cc3f1d85a84L,0x00000000000001bcL } }, + /* 26 << 133 */ + { { 0x4b1cf5b9c8514599L,0x609b60221834d2b1L,0xbe5b9326da5dde47L, + 0x64409ae86c87d75fL,0xa3567801e8d1e7fbL,0xa9b932338bf2915aL, + 0x91fd8fe62941cc91L,0xed0e7455cddffdf4L,0x000000000000010dL }, + { 0x913d2a5deef7108cL,0xaed5d7062be9d4d0L,0xfa01ca9bfa991f21L, + 0xf3518ce8e99b856dL,0x981faad4e99672cdL,0x539dd546e7f474e0L, + 0x9bda2db4157e9f13L,0xb8a88661ee003dc8L,0x0000000000000081L } }, + /* 27 << 133 */ + { { 0x8521387559df2f72L,0xb509af3789687b4aL,0x077892b261f8a34aL, + 0x5603f3095d0fee80L,0x6d42d34edbc6cf37L,0x0cf8c9c4a3920fb1L, + 0x655e26bfbdaf412dL,0x7272f887e2208eb2L,0x0000000000000141L }, + { 0x62a68859878f4a5bL,0x0146336435dc49edL,0xc3a2caaec6d3081fL, + 0xc6b69c2ce990949fL,0xbf1385e215a826edL,0x0419547fb367f8c0L, + 0x389ab431323d3470L,0xb0453b45669f8c98L,0x0000000000000180L } }, + /* 28 << 133 */ + { { 0xd815b0c258bc45e5L,0x91a14033dde59521L,0x2af00d61185031ecL, + 0xb1113f8df49ae2adL,0x623869a703db5dd0L,0xa27f52a38b084e1aL, + 0xffae28447ec2a78fL,0x316a4a837d788dcfL,0x00000000000001edL }, + { 0xda151fa30ae71753L,0xd2c6156661148b48L,0x5e1d6aa1ced8d6e9L, + 0x4c784fcdacc9df51L,0xb5715fadfeb5fd68L,0xe8aa4f453c4bd41aL, + 0xc295cf2c83a6506bL,0x6ebcfbe68959cd18L,0x00000000000000aaL } }, + /* 29 << 133 */ + { { 0x1e628033cbc78ba4L,0xb1b10d9e6af66a7cL,0x22732085003e1d98L, + 0xf76bf139b4ef67c4L,0xfdc2dc37b969d917L,0x8e9b0e633e84da86L, + 0xe1860be5f0916be7L,0x0e800a237c15f060L,0x00000000000001deL }, + { 0x213138e0bed62300L,0x2919defb59a9fc70L,0x076e46c4a40aac25L, + 0xba2e019231a4812eL,0x8fc6de496adfcd6aL,0x38d17f427d95423cL, + 0x79adb39063487d5cL,0x58a207a931a424a8L,0x0000000000000025L } }, + /* 30 << 133 */ + { { 0x35c5164a45d58f3bL,0xe8b634c956a438f9L,0x9534282771b03d58L, + 0xa8c2ac9f6b9023c6L,0x4d90973f13636501L,0x9d371c592eb26e43L, + 0x24e2ea7d97f169a3L,0x6307489ee3af53edL,0x00000000000000a4L }, + { 0xa142cc05e27954edL,0xfa9cfb5de2d9d9dcL,0x7d78edee9c8624a8L, + 0xf0c2bd235976c173L,0x9727567c08649437L,0x516662a467e6759dL, + 0x5bc285768c8f1790L,0xa62faec4956463faL,0x0000000000000068L } }, + /* 31 << 133 */ + { { 0x6a9df5fbe3f77972L,0x52703588c85f0f3fL,0x29c45e81e6d57b9dL, + 0x3ff4393a0be4937bL,0x5c85f2ccdeb7dec9L,0xf885d42872f17b38L, + 0xc839bd481d95a39eL,0xfd14d7c7dd7372cbL,0x000000000000010fL }, + { 0xe9a106f0dfc4e535L,0x8d1efece352ca519L,0x4649995c18b5a799L, + 0x17e6bc99cda75652L,0x9b7edb4dc27545bbL,0xb196ef1436d9adb6L, + 0x2a3244496ef504feL,0x5a8e3e9fb9b92a69L,0x0000000000000095L } }, + /* 32 << 133 */ + { { 0x0d200089e0b8b7c6L,0xfa7c2a74e0c3a66cL,0x47465db046e3e5cfL, + 0x2bd1dd818537bb62L,0x748d70127165a234L,0x4d3737455c718337L, + 0x40c0f48e189ce8cdL,0xd018ce08a2f751cfL,0x000000000000014bL }, + { 0x5513201a7ac22c09L,0xf559e050a1cd3533L,0x6e1eba1ebd4031d0L, + 0xbfd8a1cc532b1d53L,0x3fdee4cf5a15b193L,0x226693f7789bb143L, + 0xa0d4dc89dee75e9dL,0x438d3544f09a0c6cL,0x0000000000000189L } }, + /* 33 << 133 */ + { { 0xae388e5fbc7cfa67L,0x519392e5c4a83747L,0x4ccc4f517e71db2dL, + 0xd0613eab43b51f80L,0x512eb54540a16cffL,0xfb154e87e61e6026L, + 0x676633c8c236be41L,0xad0ef2ceda383a00L,0x000000000000010aL }, + { 0x551ec1eb5b304592L,0xf85423b68c0864d5L,0x4e1550fb4b330062L, + 0x09fe089b38cf2f38L,0x5b9116c2970c0163L,0x9098bb3fcdf09e59L, + 0x7e65e01668bd9e33L,0x5c5d66f89b52bf2bL,0x000000000000000aL } }, + /* 34 << 133 */ + { { 0x72d78fb610dd1d61L,0x8be49d9a7bd8cd95L,0x3026eb2662e523ecL, + 0x7bfdd7dbd55b37baL,0x7dc305b0effcc9e3L,0x00a2eb23fa3415e8L, + 0x44ce408be37bc2bcL,0x0988ece81f78d0f0L,0x0000000000000098L }, + { 0x0e7dd8fff0f57120L,0xb519a4f6b2d8b7daL,0xc28b9d98aaa4606eL, + 0x63655931b7ebe8daL,0x7d5e49937aa83d31L,0x8fc247695d7113b2L, + 0xaa723099af3672e6L,0x57a2ede46113f6bcL,0x000000000000017fL } }, + /* 35 << 133 */ + { { 0xbab36a6c9c8171b5L,0x2b92b467e3b4a5e7L,0x44181477ef7b6955L, + 0x14887f78ea6af659L,0x9d6c37d31334f773L,0x46ad0d60b49909b1L, + 0x5e6f4e00e13b1c8bL,0x1342dbff52e575dcL,0x0000000000000028L }, + { 0x845e56bef3dd7aa6L,0x35d1cd6645b0451eL,0xef47dfa5d812844cL, + 0x58c3217d8a377dfaL,0xf1c825800e7fcba0L,0xd8b870a470af2953L, + 0x8b04f1bb44705ea2L,0x985e4d4d60d8733fL,0x000000000000016dL } }, + /* 36 << 133 */ + { { 0x6339d37cd0d9229fL,0x6005c395e584a476L,0x6d0069ae28566e91L, + 0x429756900b0315faL,0x79dd1ffdec520a49L,0xc838751c0a8b3c1eL, + 0x0d28edf14f3751baL,0x500d09678bbc87b4L,0x0000000000000118L }, + { 0x7c7e207e14e4c072L,0xe63e49dfd3416c7cL,0x477aaa052eab5b31L, + 0xb73c00ada919f8d1L,0x3f892c301613ac11L,0x04d69886a3169be8L, + 0x8a2cf2a7f534f014L,0x3181e5a8a0b80c93L,0x000000000000005aL } }, + /* 37 << 133 */ + { { 0x0ab7aabfdc9ad56cL,0xd23c15c5eae45f77L,0x8890ba1666310ed7L, + 0x78aa3af2f6769617L,0xddc04ace79481281L,0x93d7c93609693faeL, + 0x89f579dca7f87b7bL,0x9fd68aa4c0811d4dL,0x0000000000000090L }, + { 0x90acac1c3d570677L,0x2db8af73915160c5L,0xb68f09301a9bd834L, + 0x92d8f38a3bc34baaL,0x6cc48f4d2c2a218eL,0x29cb31dccf297452L, + 0xa425d2e8b3a8e929L,0x778988ad9d279aa0L,0x00000000000000f2L } }, + /* 38 << 133 */ + { { 0xd64682853c9f5c09L,0xe9c6093417a4877cL,0x30e2768b789a9e6bL, + 0x92c8cc80a7f44fb3L,0x0da7ce7e21477158L,0xd6e1e02b94e9e544L, + 0x9610eedecb4dc8d2L,0x47f8263b903a1fa3L,0x00000000000001d5L }, + { 0xecd9b15649d6c575L,0xb862a41f882a88f1L,0x49423e64ede1dca0L, + 0x4c5fda7de7b3c050L,0x949691d28a4614e3L,0x66bb747b0b9f0904L, + 0xd4d91254ab12272bL,0x255b9e8717d749cfL,0x00000000000000b3L } }, + /* 39 << 133 */ + { { 0xa875e013bb5d1c4aL,0xa3f2502ac3e007c7L,0x6de7cc3766e12aa2L, + 0x879a3b104a63a2bcL,0x6b31dd72854e1969L,0xd90cc9a146ea617aL, + 0x379e82c25c4a71f7L,0x8f516047281e3dd5L,0x000000000000009bL }, + { 0xa0fcb1903b84b7abL,0x0a52b50e0fc3d3c0L,0x32f65ccf32e1e6d1L, + 0x0ab8ab6b013731eeL,0xea4c3be597b82568L,0xaa6c59b1f617b5d5L, + 0x84b5dc5864f766bdL,0x1408d8b8df0ccd58L,0x000000000000009dL } }, + /* 40 << 133 */ + { { 0xf9573f8d640e6a08L,0xae4874b31138e3e6L,0x9de063dcda7c7652L, + 0x3f4f25905c5e679aL,0xbf26d5286355457cL,0x071b6eb12fbdc5abL, + 0x66f75278b8344ed2L,0xfcae83ac52898292L,0x000000000000014cL }, + { 0x13b69d24c652cbb5L,0x072f96e6120253a9L,0xe8f88c7564985f28L, + 0xc7eafd4f089a1e10L,0x6d4d0fbc9562b680L,0x6e4e5af7b91b73bfL, + 0x253f58ce07278b89L,0x1df25657e8a56798L,0x00000000000001fcL } }, + /* 41 << 133 */ + { { 0x7366c523a6d0298eL,0xfc9896739908952fL,0x4e7b7e4b7cad6846L, + 0x7d61390da76096ffL,0x121c9c4bbdc2d1afL,0x9b4a5607a0731325L, + 0x037059e473265b99L,0x8674868ee48a42e1L,0x0000000000000097L }, + { 0x78109eb260bf7a21L,0x84264885d3af48c1L,0x07659bf119b54790L, + 0x8ea14ceb95d6aaf0L,0xcae15147cf069d4aL,0xc76144d6c7c72fb0L, + 0x2b3a00a10d04f324L,0x2b1ccca3b23706d2L,0x00000000000001d3L } }, + /* 42 << 133 */ + { { 0xb2737edb4397a1d4L,0xa3cc4752adf24307L,0xe7076bb998c5ca38L, + 0xda5f14a29cbf2670L,0xf17c4d57900f3687L,0xa5da2a2bc47f8b80L, + 0xfe06debef4dc7298L,0xd171fac45a85e086L,0x0000000000000059L }, + { 0xcf8e159ada084c91L,0xe537e29b9bb027abL,0xda5d260c2b9929b2L, + 0x79587899c6d406c9L,0x4901def2b1285e88L,0x75514c448458dd8fL, + 0x942b087b90071771L,0xb3bc605d576fe985L,0x0000000000000185L } }, + /* 43 << 133 */ + { { 0x20ff84d0abe1e0d1L,0x72bba8f48b5ddd1aL,0x17ea9fa311158d5aL, + 0xdde176b4d05a1a67L,0x26a74063002ecc58L,0xf2a0564dbcefb61bL, + 0x5846fefa3d6fdda7L,0x362dbb738d841101L,0x00000000000001daL }, + { 0xe7957a081e70f44aL,0xa9dc89fa1e152397L,0x871ecf03a8f6bed7L, + 0x01428e993a1d5e32L,0x6cfbafd4d04d7217L,0x31acf44166b784e1L, + 0x68da3e28dfd839c2L,0x0eefd3f7a67bf28aL,0x000000000000005bL } }, + /* 44 << 133 */ + { { 0x16ce6ba9219403a6L,0x704c37ed72f83290L,0x1af9f9d700efb6b4L, + 0x456cd8d972b63e74L,0x98256804fe6873b0L,0xcc5ec6b62243b040L, + 0x0d6b88a1bf16f3a2L,0x19632b44d89006b4L,0x0000000000000188L }, + { 0x8b9574fa6b48efbaL,0xaa2a766f9517e3beL,0x134f8f42c7789b73L, + 0xde6e8af2e435cc54L,0xfdb8a89202acd7e2L,0xe7fc99b4db22b516L, + 0xe33eb6b38a467a40L,0x7c98831102aaf81cL,0x0000000000000117L } }, + /* 45 << 133 */ + { { 0xc69fa235f9f382b1L,0x138dde0b97936162L,0x0193fb316ab8c45dL, + 0x52b3734d5b963d2fL,0x68bcf54c42cb98caL,0x946127ae86c80ea7L, + 0x2e85d3b8ea0c5814L,0x89b5ae1d0c05cfb1L,0x00000000000001efL }, + { 0x30ec78ba83ead66eL,0x89ee8df807bc31fdL,0x3ee5b5dfe40653a5L, + 0xbda53ce7c0d84d01L,0x8719017a332fd5e1L,0xe2c26ec1af60a467L, + 0xfe92a92c5324fdd3L,0x4fbb7d03fc90f8e2L,0x00000000000001d3L } }, + /* 46 << 133 */ + { { 0x41517372268ee4c2L,0xf2ef66f1567195b7L,0xdee93cfd536b7c76L, + 0xf4c81df537122f67L,0x8e18116fe95c1d48L,0xe55b76c713761ce1L, + 0x067b6632af514d6eL,0xff26860207d9b612L,0x0000000000000050L }, + { 0x153007507f8082e5L,0xba3bf499a614746aL,0x9bfb85fdc02fae9aL, + 0x326b1d5cef48c254L,0x62d6db68098f53e9L,0xcd5a5120d68a1895L, + 0x9cda1c53b571d2d1L,0x6745a05b13894f7bL,0x00000000000000a9L } }, + /* 47 << 133 */ + { { 0x395e92dd10b4a524L,0x2f607e83a39bfaf5L,0x56f5226da4ecac96L, + 0x43a914ee1330398cL,0xb856077396dc6548L,0x7d6df876b7a3e898L, + 0x119324ecf20fa238L,0x094f660ac05709d8L,0x000000000000009fL }, + { 0x0adf96dc3dad163eL,0xd9a70fac8472aa53L,0x87276f1499571aeaL, + 0xdb8f9182eae49b95L,0x0e612c83e0b31c76L,0x04a89299b29554cdL, + 0x2cb3c97166eb58f3L,0x9b80b548127517b4L,0x000000000000002eL } }, + /* 48 << 133 */ + { { 0x6df6776bc353a269L,0x0d232f2ddace9809L,0x7649e04403d5d78bL, + 0x59710087fc283d82L,0x86d65eeee2ee05abL,0x3d1fc3f227851d69L, + 0x3b86ed771781977bL,0x4d4a61be1ac0290eL,0x0000000000000161L }, + { 0x2deac320c5400967L,0x026949097381541bL,0xd373ba709cd429a7L, + 0x547f0e86928b72c5L,0xb1c2f84c223ab61aL,0x06c039941115f60aL, + 0x6e5b0cbe82136cfdL,0x08e4a467489d7de6L,0x00000000000000c6L } }, + /* 49 << 133 */ + { { 0x032b213a98212293L,0x2c3771f84175ba18L,0x34d7387f11656e9eL, + 0xcf12328156425813L,0x2b4ef4b4722fee93L,0x7db8afb2799c6333L, + 0x0ab9c8aa511cf76cL,0x14dcb970c785bcccL,0x000000000000018cL }, + { 0xa5db10cad3cc86c6L,0x6c6a06a4abf9410aL,0x9057b0ca268823c4L, + 0x1a3de830fa5744e8L,0x695f7952038b260aL,0xe0394707ae89d231L, + 0x24a3e94c86b0b57aL,0x1acf30aee57206e7L,0x0000000000000161L } }, + /* 50 << 133 */ + { { 0x60eee52395f7e189L,0xe5499be0c5942327L,0xd385ce00d2ee8132L, + 0x4cfdb18cfb6609d3L,0x2b4bb533b29a9768L,0x21ee5b2937a92df5L, + 0xd24d2625d93a8b26L,0x432dc9a2102ae479L,0x00000000000000f1L }, + { 0x9df9906be725bdbcL,0xe95ee011daabbb73L,0xe681c90af4d6dfbeL, + 0x062b8fb003bd2ae3L,0x0e25ec30f61d8f86L,0x78c7612e5bb8dbf5L, + 0x0c8b6a8545b562fdL,0xd972652ef4549f6fL,0x0000000000000123L } }, + /* 51 << 133 */ + { { 0x17b6f280f8148b30L,0x2114d1ecfc765a46L,0xc69d56bdd97da81dL, + 0xe2ef34422e97a94aL,0x05212af05bfe5bbcL,0x98c1299f03cfe2a6L, + 0xbeeb7efdee28e6caL,0xdeb67b9c09994359L,0x00000000000000bbL }, + { 0xd4578886fad7480dL,0x61e757a46cb49108L,0xe2811c9d28480964L, + 0x14ffd6bb2a68d261L,0x6afd6a0aac401fe7L,0xed21c3f69db834a7L, + 0x877e265b2e156034L,0xfda68f45935cb0feL,0x0000000000000190L } }, + /* 52 << 133 */ + { { 0x0b0a8610ac5e165dL,0x21152a8c85cb157bL,0x6db9a7ce551cabe6L, + 0x7e36ec91dfda6187L,0xf8727bdde8ff337eL,0x6009e6f01152b37cL, + 0xffbdd5570cbded62L,0x4f1c3db00152b926L,0x000000000000002eL }, + { 0x6b32d8b606c8a9bcL,0x734eb64a79b60428L,0xf680ec0030394592L, + 0xff99fb910bfa6092L,0x43b696b7cbac9513L,0x3029ffe7fecf53a4L, + 0xe36787ca6985a6f8L,0xa1c08a99132dbdbfL,0x0000000000000188L } }, + /* 53 << 133 */ + { { 0x81e74494c49d4659L,0x981c641e95f5147bL,0x84d1d3de4d8b3bd5L, + 0x1aa8242301a6e411L,0xa98db43ce663d148L,0x1f4e05fed67ed0c0L, + 0xb53dbab2662faa40L,0x8d1f14ac9c524ee4L,0x000000000000003aL }, + { 0x0d7f01a90f85e070L,0xecc933cbe5063726L,0x683848d9dc641f20L, + 0xd714d8d086dc3268L,0xee00e70ae4fb106bL,0x4c0af171fdd9367fL, + 0x9085d9012d77d729L,0xa4b755e3da1b2659L,0x000000000000014cL } }, + /* 54 << 133 */ + { { 0xba16f14aab5c411eL,0x73bf410805bb77dcL,0x6e5e936e0cbdb790L, + 0x095df1e82806dd17L,0x93d31cb976115ed3L,0xad42b79f4dfb1145L, + 0x27fb4d2ee478b2adL,0xa0de22b349819e04L,0x00000000000000e1L }, + { 0xbae8522996dab504L,0xe3f3c7d5dc069f77L,0x844307a2ed7f4f3cL, + 0x3a8486cde2db6d25L,0xd085347ded20d8e4L,0x6af9e096f2d8d426L, + 0xbefd13e374c38168L,0xbd6ee56577f7349cL,0x00000000000000a5L } }, + /* 55 << 133 */ + { { 0x4119d514d028e912L,0xbc53213293adafd7L,0x0176ca03e31467d7L, + 0x109f2dd7dbe712aeL,0xc991ff43dc87e626L,0x986c9664bbe227c2L, + 0x55b27b44ed625994L,0xab0d9892fab4c1dcL,0x0000000000000184L }, + { 0x73fa82dee846b7deL,0x76b39d20978e6d94L,0xbe90c6022955b242L, + 0x258fcc6d60653be4L,0x2884e00013a12e7aL,0x6e23077d6c5283a7L, + 0xc52f652b50650616L,0x76c21d604a4564ffL,0x0000000000000115L } }, + /* 56 << 133 */ + { { 0x06fec2df700cd193L,0xf077afee002863a6L,0xb60894fd6cf0c0c2L, + 0xc7d01cd0f8b7c551L,0x7adb74e4c33c337aL,0xb4b5767ea8867af3L, + 0xbdb10aa62f3d57a7L,0x80d5ac5eb1ff9d7dL,0x0000000000000116L }, + { 0x0e0df41c91867318L,0xe3cbb1ae1912bed5L,0x006dcee9c15465bdL, + 0xf4768566352746c5L,0x567e2bcf767e554dL,0xa4eddfaccfc14923L, + 0x5c661c2e5e096c3aL,0x5b13488d8123fb58L,0x0000000000000045L } }, + /* 57 << 133 */ + { { 0x7efb5d55427705e9L,0x7441763e3aefa5a4L,0xcfc1631a7dd7503eL, + 0x1931431763a1ebd5L,0x28701646cb713fcaL,0x14e962bd8b152c31L, + 0x257692ff76400fa4L,0x0f6600204b46efabL,0x0000000000000046L }, + { 0x3fea71317ad6603dL,0x972475c8e4e9e4c9L,0x4bd0c751cecafbf3L, + 0xbddcfd670c1312bfL,0x0a572cd16dcda451L,0x6c9ccfe8e4c9282fL, + 0xad69bc9f267b4addL,0x504419d562517ae3L,0x000000000000009bL } }, + /* 58 << 133 */ + { { 0x874b11b11d0861d5L,0xdfb04016f49936cbL,0x2882feda5a6b94bdL, + 0xfa80197cbaeb3a74L,0xc53a57fd3f8223adL,0xc4dff53dfc41cd27L, + 0xff8ccf62cb813e8fL,0x243607d1e9325b2cL,0x0000000000000128L }, + { 0xdeb8c7f10169da0dL,0x13f6f33fd5072565L,0x5a8a4c2f5128d693L, + 0xefe2355be809d274L,0xf9fbb0786c9a8373L,0xe2b94a728f2e8165L, + 0x0bb7b087052f5589L,0xb3a7cab974dc02e5L,0x000000000000017dL } }, + /* 59 << 133 */ + { { 0xbc9b22ed1648af13L,0x4cc818be46cd5d3aL,0xc5656c38380e7df9L, + 0x7c961b149f628ec1L,0x96ddf65e6035c381L,0x9fb033981ee39576L, + 0xf237b837459ee04cL,0xcf997019c1178539L,0x0000000000000067L }, + { 0x35b3503f7d3970f7L,0x24df0bf854532bd5L,0x09e847bde9c532dfL, + 0x0c5f797166449ee6L,0xfb9cf1629563558aL,0xe7e49989a521d4caL, + 0x6a8c87b04a7e71d1L,0x0f9600f8c64c4fe0L,0x0000000000000121L } }, + /* 60 << 133 */ + { { 0x941f1a68f391b195L,0xf563c05b48755ceeL,0x02fa0eb4a4b11b13L, + 0x89f09fe4110e0044L,0xda3601eccdf17866L,0xb63e2b31821c239bL, + 0x72afb2946a4cfc70L,0x4cc16417876bb699L,0x00000000000001bbL }, + { 0xf02270e155c4416fL,0x1a69d7a0a8fb98a1L,0x4f5219550debae64L, + 0x0d93fc627a75e48cL,0x7555793a2e11801bL,0x6e275e97618c2327L, + 0x57957f18f1af9ebcL,0xe16aa048085f0047L,0x00000000000001d3L } }, + /* 61 << 133 */ + { { 0x959cb1705b5721dfL,0xbef678407757dfadL,0x296da84fd14066b5L, + 0xf63609e59024efb6L,0xf643a52c8efdaf37L,0xc512f72a5fdff43bL, + 0x35dea1a55c5a2b0aL,0xbbe9f38e3bdccba5L,0x00000000000001efL }, + { 0x585065f80662190fL,0x1b566da784cbad73L,0x20937378fd439316L, + 0x00e8c5423908a556L,0x7f10e264f315c479L,0xdccc1dfc4840a392L, + 0x6097b7f6a789f4f8L,0x7d593eef0fbc7e15L,0x0000000000000187L } }, + /* 62 << 133 */ + { { 0x9c59a11981b51f74L,0x7a7aacec4c9c20b2L,0x8ab9de7d0959f510L, + 0x3bc2215deff0cd8cL,0xd609a192ba5ab07cL,0x2dc4323dfe7c9044L, + 0xd20fc5a275025d5bL,0xc38808f868ad1441L,0x0000000000000054L }, + { 0x41574f55f8f1594cL,0x0e9628dc130cb3f2L,0x30f8407c5375a79aL, + 0x28bee5a986522cdcL,0x75a4472e6be35431L,0xcb6da55b06f2326cL, + 0xf31d9ef60acc996cL,0x75b5edf92e86b7ecL,0x0000000000000140L } }, + /* 63 << 133 */ + { { 0x3e6fae603dad7855L,0x5030d3e21224bc29L,0x23be6fa4604102deL, + 0xe2fd452d186e1249L,0x04d431a74a13d329L,0xef754a14c4ee6e42L, + 0xd4a33388d01ee315L,0xce211eb300a21f02L,0x0000000000000120L }, + { 0x066c034572a9514dL,0x18de295c8b1c5dd6L,0xeade73adf43fa0aeL, + 0x5e1c485dbc9f2723L,0xc998a5fa0a88330fL,0xe42f25e4f90d631eL, + 0xab6b3a19b31f7dc7L,0x6c02cfddca2d7e01L,0x0000000000000061L } }, + /* 64 << 133 */ + { { 0xae917a54a084d91aL,0x4a1095a88a3ae45fL,0xb7a358faa30ee909L, + 0x807aca835aee3272L,0x58d144a9681d54bbL,0x36569b8d3352c5e8L, + 0x75b7e2f1e5d9ba90L,0x348c70da08068bb3L,0x00000000000001d8L }, + { 0xc2bca2f5309376d1L,0x87ca2cb0457d23d0L,0x7dccf53dacf94fc7L, + 0x0646dea871e898bdL,0x8a053df3c8bd817eL,0xa9e9c6825a002253L, + 0xb23c781375a16c9fL,0x2500d5a13f81c2beL,0x0000000000000043L } }, + /* 0 << 140 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 140 */ + { { 0x7facf165d19906d0L,0xd8e059afad92d455L,0x7356696f6ebec576L, + 0x67044e551d6d311dL,0xe2bc81dc3a2e50cfL,0x860036dec00ea37fL, + 0x71dfbe61ff32d69dL,0x298b766bf502baffL,0x000000000000019fL }, + { 0x5e5e123901bb03f8L,0xd36c35835304ba94L,0x5025f960b49f6a95L, + 0x72dd554f75e5b6e3L,0x85bfb398869c9d17L,0xd71cf71f9cace4a7L, + 0x0f6fd3ad7291dd9dL,0x5da8a689d1b5758fL,0x000000000000018bL } }, + /* 2 << 140 */ + { { 0xabdf1b02db567888L,0xf47d5cd15a14bd4aL,0xa3483f68b56ec127L, + 0xf09422294e65795aL,0x225b339b2de6ae78L,0xfbfe7ef011792afdL, + 0xf27b31a04548c2a8L,0xb4821ea3dfbd3ba7L,0x0000000000000159L }, + { 0xba06333ca1c887b9L,0xfc6eef566b243e38L,0x345900c3596ec79cL, + 0x243b17ab43e33d8dL,0xffa7e30da9370da9L,0x3dc587a50f09347dL, + 0x0fd1b2463343a72dL,0xe4acefb48372fcf3L,0x00000000000001d5L } }, + /* 3 << 140 */ + { { 0x99d91691af8c8ccdL,0x2be2648633764c92L,0xb6e681492477aa78L, + 0x90430fc6c52d4964L,0x4ced96c54e1647a5L,0x5b52a279c1489674L, + 0xab7f4127c26ec42bL,0xcd5528fcf1097b9bL,0x000000000000019aL }, + { 0x7785add63a47e5c2L,0x9c82fae9635cb4acL,0xadcb0de0ced83245L, + 0xa42230c8ccb91b16L,0x6af43e953a3e11a7L,0x36288135a3145d6bL, + 0xb74a3e63e2ded224L,0x15df0d2948aab5a7L,0x00000000000001bcL } }, + /* 4 << 140 */ + { { 0x25455c2ae3e97a29L,0xd497ab4fd3028514L,0xeadb13ea914ae0c9L, + 0xf9f6a27533e0bd85L,0x1103bd6053851649L,0x445b182dd66c948dL, + 0xf74f853bedbecea9L,0x643db0530d27351cL,0x0000000000000082L }, + { 0xe5ccd1170fd56cdaL,0x421562c371495df1L,0x865b3f9ab2d1a036L, + 0xe0407f28d3e4f471L,0xa64f74fccb7e4c42L,0xe87e464d1d1e7bbeL, + 0xcdb9e777abad4a1aL,0x6e1021d7e76deed0L,0x0000000000000161L } }, + /* 5 << 140 */ + { { 0x089fbb85b7f279fcL,0xb3562a0b547a1efaL,0x40a3fa3c98080868L, + 0x90ce47dd94597f7dL,0xb3bb20850c6f4b4bL,0x8c24eda75953ab03L, + 0xb8b110f04eee13ecL,0x3ace7b901bdb3cbfL,0x00000000000001d7L }, + { 0x33b6b36f335c7820L,0x43efeebc6b2b5992L,0x3b56649d36ded655L, + 0x15419020a9dd98a3L,0x08d081b563f405aeL,0x5b1f59d02c588c8aL, + 0x745c687eff822e70L,0x7ab7b9f169ef398cL,0x000000000000017fL } }, + /* 6 << 140 */ + { { 0x568e60bc840e5c81L,0x12b35cd301788f64L,0x70422fa0123e9018L, + 0x6ceabfced9cdbe2eL,0x67032a107626e2dfL,0x0888ca79d250ef1cL, + 0xca1fa245d5ba46afL,0x590f29972f6be510L,0x00000000000001d1L }, + { 0xbd6dd880f2a02370L,0x3ae5f4ac20d42c5eL,0xd2b300f46c08e487L, + 0x662be2ef896bdb34L,0xe7632d9b07011f7aL,0x9481b843e7e83b8eL, + 0xeb7e2084a30c0fb9L,0x53e59adfa71241b2L,0x00000000000000cbL } }, + /* 7 << 140 */ + { { 0x49d93794c2911aecL,0x586554f1a810c993L,0x867b77a3f2322411L, + 0x6083b099c2e4cbc5L,0x0445ed8e7cbd094dL,0x417ba195e0feaa26L, + 0x432b645fc178ed78L,0x429282dfc9948690L,0x000000000000010fL }, + { 0x3045657baddc286fL,0x10f36372863fa4c0L,0xab804fd7dca96fe2L, + 0x6b0a12a618c34d71L,0xbae3dc772a7c0ea4L,0x241e63a0abc48f80L, + 0x34efeaa3ef1843f8L,0x321d171a8498816eL,0x00000000000001c1L } }, + /* 8 << 140 */ + { { 0x26e479f9b9b07fb9L,0xf8e49cdfc03b5adeL,0xa0d66a21d525e891L, + 0x8a062d759a62e3d2L,0x4b025cb738704c65L,0x2b7a6b60d440836aL, + 0x3e55fe53a3ca3ac1L,0x59577407f1031f19L,0x0000000000000128L }, + { 0xa5bb1eb02b4edfebL,0xaee5a2174dcfd68cL,0x582146cfa19f4f45L, + 0x4481152269f7c258L,0x72d7ce9a7f9333c2L,0xe6c88fe2b1dae530L, + 0x1367ce5c88b50de2L,0x10367dac7150206bL,0x00000000000001a1L } }, + /* 9 << 140 */ + { { 0xd210619a37f4b554L,0xa2cc4be40b01fa7eL,0xbf3805805488768eL, + 0xcb190c6f8a100ae5L,0x6f82bce1749bf03eL,0x0914bcf0c75efd73L, + 0xbdae235886e6b29eL,0x6e286b36ec5deb30L,0x00000000000000e7L }, + { 0xf4eb4fefff080663L,0x0702453df93ba24eL,0x1ee5c4e556edfb3cL, + 0xdc5bef4b7afd8870L,0x74bee7cdc025f4abL,0xaf63727b5597771bL, + 0x2a51fee0dc0cb4faL,0xd9eb893d309e2a54L,0x000000000000017aL } }, + /* 10 << 140 */ + { { 0x991ebfca6cd7b62bL,0xb8571f56009d1d74L,0x14d27d7c441cb94aL, + 0x4021306519e77736L,0x9aaabb26cb20ade1L,0xa283eb52533f6f74L, + 0x84ae1f5856dd48e0L,0x275ab1ad91f11a89L,0x0000000000000118L }, + { 0x61f3c5b415960882L,0x036d46e444f0a7a1L,0xf6c5a1f94bb28f39L, + 0x610f8313f1cd4bc6L,0x571ba872fcde5b45L,0x23c35a185f066ef3L, + 0xf878375da40c5fd0L,0x9fc32ccc1158dc83L,0x0000000000000167L } }, + /* 11 << 140 */ + { { 0x86d444cdd1d81ec5L,0xe5f57d64c29b3744L,0xe606ada6f74f9589L, + 0x5fa4dfad28f6b922L,0xf90993ec5b5d30e9L,0x307d234bb0541c49L, + 0xc010a0daf75b0402L,0x2f36ec8b2d0d3135L,0x0000000000000168L }, + { 0x1ebdfad4b51917f2L,0x7f7f6c94411faa6eL,0xaef4c9a7d74ec215L, + 0x3f7ff15d4041ef68L,0x21d6dddc30b6b53bL,0x1ecac9bc6f918cebL, + 0x05ce1210fcb67a22L,0x7aa44454c5c0e9e3L,0x0000000000000139L } }, + /* 12 << 140 */ + { { 0xac513c23102cc86bL,0x34aee96bbfd0baafL,0x47047407eaab0d63L, + 0x3a1eff07ccd57509L,0x53f237cca81c8d03L,0x7742e3f1cc16c904L, + 0x5325d4796e2e981dL,0xd959b9f8289e6427L,0x000000000000004cL }, + { 0x3114735e26f5e6cdL,0xd17e0d5337c5de44L,0x5adf2f3f002a7993L, + 0x7c1f1f6e176f2f70L,0x01d7f210003758e2L,0x8a2a198693beb852L, + 0xbded219b40b61e5bL,0x2450d9f2a7427ce0L,0x00000000000000c4L } }, + /* 13 << 140 */ + { { 0xfef846878597ca44L,0xa1379684d227c76eL,0x661764327517d749L, + 0x1420872d295d8438L,0xf296988cd91221f2L,0xb6115a026241e88eL, + 0x589dce4ea0969d25L,0x4318724ee35919faL,0x00000000000001d7L }, + { 0xf3ea462a41fa8073L,0x8f577c2156900887L,0xb4318c9929e43377L, + 0x821a520ca7686967L,0xb62b1149354c5868L,0xfc76e87f6a9dfe8eL, + 0x8aeb05c6bdc6443dL,0xabcbb41c51a3fd6aL,0x000000000000000fL } }, + /* 14 << 140 */ + { { 0x06bc23ab58dd91bfL,0xbfb489e37fcdce7cL,0x50a99333f0d69619L, + 0x6f1a78a3d65751b0L,0x10ac9bf8e1dbd9d2L,0xe3d584f0133b5409L, + 0xa52cba20db27d29bL,0xbee71f220ded25f9L,0x00000000000000beL }, + { 0xcdbfa2cb8d49e1d6L,0xea7e04c2752b7a25L,0xc6cd12c9915cdb88L, + 0xf5d56504b292eaddL,0x2d0ac7ff26dc8f0bL,0xc6e0722d551bf7fbL, + 0x17f9acd4c0a3d7f5L,0x0cbdeb4f3498ee12L,0x00000000000001cfL } }, + /* 15 << 140 */ + { { 0x8d2b73570149c225L,0x6f048760b1294d56L,0x4feebefe53097a6eL, + 0xdeb1abbeca846c24L,0xc920fe521ba7f7c5L,0xa851d87174295fbeL, + 0x8a977818717e5fe3L,0x4e35d70c5320f185L,0x000000000000000fL }, + { 0xcc43de63bfb2d00fL,0xfc526e5170414bb4L,0x45574b72696ef1cdL, + 0xf29b91cc4f96a64cL,0x17b2e3503d419bbfL,0x13f2873916dc8aacL, + 0x3a8afbb755b08a44L,0xf1cecdf34e23f8ddL,0x000000000000000dL } }, + /* 16 << 140 */ + { { 0x69cad3ccc4d6ab08L,0x3adb57773b8990b8L,0xd0cad8ce8d958801L, + 0xcb572e666d728f9eL,0xe3d9e7c4cd5131fcL,0xafceb6b06145dc06L, + 0x12ecd392e213043aL,0xbd5992943a64c87cL,0x000000000000000bL }, + { 0x8623bdbbf6ea9cf1L,0x3aad9495547aa650L,0xd3d853fcbeb27159L, + 0x3d25a64830b40833L,0x12d188e8dec51bd1L,0x836330d2b348c3faL, + 0x9df50cfe73c2ea59L,0xb5dffb2061ded0b8L,0x000000000000018aL } }, + /* 17 << 140 */ + { { 0x4b138db0a2237262L,0x6dd1135e1ab9ad06L,0xc31f50c4c7cfff02L, + 0xc89ab22d6ab45111L,0x5b8461c35a62394aL,0x0f8a58be8236491bL, + 0x3660e9f30b0011e4L,0xc83dfd989395955cL,0x00000000000000d7L }, + { 0x8c4f8327e2177d42L,0x893a9166a8a2cb6cL,0x27d47141b6f7405aL, + 0x2285f6249e2be11cL,0xb55b89d717722bbfL,0xecb10b0d28d3d548L, + 0x9682096f6c97c7cdL,0x5d84617f8588d1cbL,0x0000000000000008L } }, + /* 18 << 140 */ + { { 0x75cc12737f775b7aL,0x0e0a040e4f3b788eL,0x2eb00b21038bc8ccL, + 0x750b88536ea1ad7eL,0x0d93be1bd56b81dcL,0xdd262f462a3c3eecL, + 0x421995083289b172L,0x0fd1c19221cf4378L,0x00000000000001a8L }, + { 0xc038f20f787c37a0L,0xd8f48746f363e515L,0x5d91de9f54a85207L, + 0xf72c0e8f953d5932L,0x4e37af2f3f25bdebL,0x03d9084d0ee85cdeL, + 0x64e551feda041351L,0x55fdee4e6067100cL,0x0000000000000096L } }, + /* 19 << 140 */ + { { 0xb4254ee4e048f571L,0x2529deff0aebb093L,0xf1f1ecbca72853eaL, + 0xf100ab831a56b014L,0xf09cb22e9d01985fL,0x65e22e41cd855cb9L, + 0xfc3e23afafc54970L,0x8a0f8e16847cde07L,0x0000000000000105L }, + { 0x6c27488e9e09346bL,0x38666895d7783338L,0xd28155fd191e59acL, + 0x5454681fe04ff0aaL,0x27bbb0f76362c683L,0x18a787f430340b44L, + 0x22879ce10a64c12eL,0x0eeb0304b5ac5768L,0x0000000000000164L } }, + /* 20 << 140 */ + { { 0x1a9a04ffc48a91ccL,0xe11f57dd5723e9eeL,0xeff193d3f573d896L, + 0x4c2ce4824e62f80eL,0x9f1e43a7010a0e78L,0xe9c1be55f4364c5eL, + 0x548071daccd4aa64L,0x9eecd41439d22af2L,0x0000000000000064L }, + { 0xccea349ea1aa4684L,0xd2413abfc247f97bL,0xecfa10c1aa385b03L, + 0x9b0d25ddf77ac70dL,0xcffb885d12dfd388L,0x89e1174d6f33dceaL, + 0x37e1423cdf3cee18L,0xe4ba71be378c8bd0L,0x0000000000000110L } }, + /* 21 << 140 */ + { { 0xabb17070333bc84fL,0x41090912d03c10ebL,0x8adda5ae18e32b11L, + 0x7bbc3c10f86dc4a8L,0x9092ae12d649c287L,0xdb06dd82bc3a65cdL, + 0x18ca4d753e07b6b8L,0xe291bdc47a9f4b35L,0x000000000000001aL }, + { 0xcca919f56830e001L,0x748a24a9e3117f7cL,0x9ed8b483a3222af9L, + 0x745dc8ae55996204L,0xf7c9afa042dc624aL,0x900e3f41449efd5dL, + 0xed319f077084a837L,0x3e4911c944052eecL,0x00000000000001c2L } }, + /* 22 << 140 */ + { { 0x83bd9ff7daa696bfL,0x7fa14326059a611cL,0xb526f26264d7924fL, + 0xf1ef5c6fc1cfb749L,0x6a2d7d3f6892bb09L,0x34b326a1d69bfa95L, + 0xb702cd98f8f49ec0L,0xa32611a39b8d2fe5L,0x0000000000000028L }, + { 0xbd0e80afc982fe86L,0x3c59319f0292e026L,0x1e77d11ec72215d4L, + 0x39d2b2c6fa0d77edL,0x22ab784f71ca01efL,0x87ece833a7e8268dL, + 0xb5d67f6bd1682c8aL,0xb18f577d677b9508L,0x0000000000000183L } }, + /* 23 << 140 */ + { { 0xa02284c4e2be6438L,0x234448f60d1b27d2L,0xdb7d087adaaed164L, + 0xca39a1f9bb959ba3L,0xf87e2f49a4b8b5f8L,0x559e905f4d5f1b60L, + 0x479466a072370c3bL,0x63d28e16b0bca628L,0x00000000000000c3L }, + { 0x147e8781e8d32ad2L,0x40e530111472ade4L,0x40f05d9a97cc63c9L, + 0x485dff28f13f5454L,0xe360a4c25824a4c6L,0xac09f1c4ebb6e58eL, + 0xd80077bd1e778bb7L,0xb549c19d6ce4263cL,0x00000000000000dcL } }, + /* 24 << 140 */ + { { 0xae6b428f554b88b9L,0x26bf6b6c8313c74eL,0xae7e5874ce24b917L, + 0x26e268738b3162c3L,0x2ba4f3edcaad88f4L,0x6e9e5051375a3f9cL, + 0xa754cc49d70210f3L,0xff8cb757e4553c0bL,0x0000000000000119L }, + { 0x93f6a0a4ae1c208eL,0xb56b6a4ad247d4e2L,0x4cff7d0391009637L, + 0xc6b56f6be0efc126L,0x81f5d59c42de039cL,0xfc4c1d26f36854c2L, + 0xa66ab1792d280f60L,0x6c5468c3eece88ceL,0x000000000000000dL } }, + /* 25 << 140 */ + { { 0x1df2fe5226b04f55L,0x43febfabf8717ce0L,0x6ca4a4c906a32d96L, + 0xc625ce6c9a25aad0L,0xddcbd5755bf093c6L,0x4740594fcb1fbaf2L, + 0xa1e50234d12cede3L,0x27b1937203f2fcf1L,0x000000000000010eL }, + { 0x5b9999d1c137a9c5L,0x1580a031522d2decL,0x21b2cc0a35220780L, + 0xc66015fbb9286a1aL,0x5ecd634a90d3aef9L,0xd9c91a6d88d39d33L, + 0xbc02dc02864919a0L,0x110830f991b49396L,0x000000000000019eL } }, + /* 26 << 140 */ + { { 0x897d947ac5d1e1e8L,0xbfcff6ae0587d4d8L,0xb5f3ffa5279d1ebaL, + 0xa4c3eb1cb725ec65L,0xfe037a76cdbd4600L,0x424c015216a86cd2L, + 0x23645a597b3ea150L,0x9e913a47c9db1cddL,0x00000000000000baL }, + { 0x714c4dcedc4fca02L,0xa1c699ea5ff04ea5L,0x6144a96f0ffeae0fL, + 0xcd2e50d7203b8a69L,0x0012fc770d2c230aL,0xdbc4f25bfa8b6928L, + 0xea8941ce060f3141L,0x86836d7b946f207dL,0x00000000000000d2L } }, + /* 27 << 140 */ + { { 0x95a4c2bcfbc9136fL,0x9ffd305bbee15a48L,0xe445467ef86e497bL, + 0x7bbb4922bc2519c6L,0x9dda7889bc9b2de2L,0x9ec70bb743b88ea5L, + 0x5b58943ca1bb75a8L,0x3426ff7c5c5cfe2dL,0x0000000000000042L }, + { 0xbb152162963204a4L,0x19ab08f23ab85bdcL,0x266ab5ad034cca2fL, + 0x7a27928d8eb8753eL,0x405302a272272090L,0xea7479374a010ee1L, + 0xfd2764875eb40da3L,0xf8d4f8029b6cf000L,0x00000000000000c2L } }, + /* 28 << 140 */ + { { 0x0587d31801873864L,0x6c4940d1a8758f49L,0xd5928f463f1f13cbL, + 0xb0917630481afcbdL,0xafbbe96c2804e91dL,0x1016565133ef9d5aL, + 0x7284a58ea2c3796eL,0xf013d38a5ff369a3L,0x00000000000000b8L }, + { 0xea1ad141d71ec602L,0x5faa6539449f5326L,0x30080af4455cde9aL, + 0xc4bcfdcd6affaba7L,0x52e0ef42801f9569L,0x5abc27b2c80b0b5bL, + 0x3c4cd4339e99ccf2L,0x63e689dd6703e3c9L,0x000000000000010eL } }, + /* 29 << 140 */ + { { 0x55b2acfd76824739L,0x7f2d73ede9167239L,0xd7e63103419b9e86L, + 0x4c358f755f58d997L,0x9b8771ba635a21ffL,0x5f3394307708bd7bL, + 0xa0de93faf730a252L,0x19524f0de8a755e8L,0x000000000000014cL }, + { 0x426e17b7446f65a0L,0x93c214feb669d1a9L,0x93fd2087a657e7bfL, + 0x16820da66b9354c7L,0xea8818c3c6a24661L,0x8b673d76ca97c304L, + 0x028065fc59d65adbL,0x6e75b61ccb0656f7L,0x0000000000000098L } }, + /* 30 << 140 */ + { { 0x94bdabc696b632d8L,0x5685cd96c04f512bL,0x09135f1dba7c2f6aL, + 0xdf4bccbf16711651L,0x9339f902ba842c06L,0x84649cec4ea6ade7L, + 0xf98ce1ace76c4520L,0x25d796d082b2e5edL,0x0000000000000159L }, + { 0x190184670ef41247L,0xa8aba9cbb06ab440L,0x6a2c8f0d424575efL, + 0x29542661a7641251L,0x4aef28806063cd71L,0xb6521c4661007b1cL, + 0x44b83804d1f05f1cL,0x96545d8984a8d313L,0x00000000000001c8L } }, + /* 31 << 140 */ + { { 0x56c2c87fcabeb4c8L,0xc0acf4c98c3e6148L,0xbe7fe6ccc20e488eL, + 0x423761668330b587L,0x5af5ced080883de3L,0x4b6aaaf85add9229L, + 0x7c7ad512ab13ab0dL,0x14bc975727ddc17bL,0x0000000000000036L }, + { 0x92d037848d17337bL,0xd5d878b97bd6dba0L,0x9d229cd79e89007bL, + 0x3c014e2492b07e2cL,0x533280fee56c30b4L,0x41fd5f6ecb3790f6L, + 0xc5e9b610c89facbbL,0x89429ae2d48d455fL,0x00000000000000cdL } }, + /* 32 << 140 */ + { { 0xe843d0350c62942aL,0xeb792fc5d8920cd4L,0x3fa4154e90be6fd2L, + 0x002631cbce2557abL,0xb37b952a226742b6L,0xa7658c0afeafdc24L, + 0x62a7de67b5a09597L,0x25074f6a9bb3cfdeL,0x000000000000015eL }, + { 0xe6db0f49d03cf65dL,0x1187c8b40347dff1L,0xdea411166e4ce605L, + 0xac0ca35322313708L,0x395ddee3b6a5cadeL,0xf31abcb1c6d80c86L, + 0x013371b1a07f0747L,0xf54231898f55b804L,0x000000000000014dL } }, + /* 33 << 140 */ + { { 0x31cb7a50f9a638eeL,0xb23e940381388b46L,0x1c0b9ebc7bf805a3L, + 0xcacce3dbd40f4071L,0x8360c4c6652cc8e7L,0xeb2829c4327a1acdL, + 0x4fca72678eedc8bbL,0x49a6630aca904d48L,0x00000000000001edL }, + { 0xdf7aff0abe745b6fL,0x15b66c0630f736dcL,0x4efc54545d6c3ba4L, + 0xa1becaaa4ec61d29L,0xda6f8551b17a9158L,0x889c4a939eed3eb8L, + 0x6f5dc762cc33be94L,0xd0f2a30010588a5bL,0x0000000000000144L } }, + /* 34 << 140 */ + { { 0x93b6f1877e5836afL,0x0e7009435c3af4b9L,0xdb3b4d704b4f0f02L, + 0x0c196039d71d4375L,0xede744582ed230c5L,0xd0630162add9da94L, + 0xae533629cd2c0ec5L,0xa93a8b231108523eL,0x00000000000001b3L }, + { 0x6891494b70a36874L,0x5c15139dd85b565bL,0xc8104f07b620217bL, + 0xda49717001c964eaL,0xcf38a9e7631c7daaL,0x631856c19e124493L, + 0x057b8cd2ab9d4cd3L,0xb3d089bab13791b7L,0x0000000000000013L } }, + /* 35 << 140 */ + { { 0x27d87b3b4032e076L,0xe37fc28f6aa218a2L,0x55a525b1ad652b86L, + 0x82ecdf89db0e1642L,0x7951a419d974a454L,0x4367ac17f8195a7fL, + 0xad2b7d0d86b0cad5L,0x36113772d6140139L,0x00000000000001a8L }, + { 0x98f04f1c8ec6a745L,0x1dfaa104644bb3a5L,0xb9c03e2bd331c407L, + 0x15188e1662af07a8L,0x0b2f2cb67f329a15L,0x0b7ca5fc0a91b5ebL, + 0x28b5e4bb5c410c39L,0xcc64e7eb0de792b2L,0x0000000000000188L } }, + /* 36 << 140 */ + { { 0x5f4b9fa400a0bdb1L,0xcc2cb2b2a274ac45L,0x7c05dc7c6e646620L, + 0xfca73a120cc2626dL,0x6bf5386457718621L,0x1843b57ae81976a7L, + 0xc350b19bf323d6d3L,0x0c514dd5c4a360eaL,0x00000000000000c7L }, + { 0x24ed924263c17c00L,0x4a49aa5368306bc4L,0x11b210cc1744cb19L, + 0xa6d24a9cc7284b23L,0x7a438e436b558be0L,0x798304c7bda0ff5cL, + 0x5c6782b6c1c19b1aL,0xab2864276f4d0e2bL,0x0000000000000106L } }, + /* 37 << 140 */ + { { 0xfedd7c404f2f5c7aL,0x77c579e61a9c79efL,0x88cfe011463e782eL, + 0xd51523c57ff1d238L,0xbce03def946a4c51L,0xe56855fe4486a582L, + 0xb52fc072d6d9ead8L,0xb406446c52fe62bcL,0x0000000000000085L }, + { 0xa4db7d7811dd4ac5L,0xd877aae6647e8dbaL,0x2c7ace9980bc3941L, + 0x29f8ebc99faa3242L,0x6153632a10afc24eL,0x57cbe075dc5c3cc6L, + 0xcae0ed2e9b8a0c22L,0x029cbee5ef2c6786L,0x00000000000001e7L } }, + /* 38 << 140 */ + { { 0x413c7ee2987d0c74L,0x2ed4f5a085300d40L,0x158c43d7ac13c624L, + 0xd71848db09c8b19dL,0x75505e2b0565dac7L,0x168e414a20fc368cL, + 0xcfb4283e0ea9f88eL,0xf39543bf758fe171L,0x0000000000000164L }, + { 0x35e54289d98d8173L,0x2ca86f6fb9518343L,0x61dcc693519ce3cfL, + 0x91aded0d3f9e26eeL,0xbf70d414b824d11fL,0x8a2d8a7f0f4cb14dL, + 0x735009e46c6261f9L,0x7ec2b1232471c948L,0x000000000000008dL } }, + /* 39 << 140 */ + { { 0x870f9cb4a32999b0L,0x74437cc97189a3daL,0x9dc9814abe7c0a6aL, + 0x49d7ea1cd8a8b051L,0x0501d49b68b28d3fL,0xd3e65e3df496c58dL, + 0xfcade2203b8d66a2L,0xb5a6c1c9526c8bd0L,0x00000000000000f7L }, + { 0x91cd67b271fb5f4eL,0xa0af14d96d6e2e1cL,0x840a19d4d38b37efL, + 0x4b66e20a93a9ea62L,0x81fb5fa30b83bf02L,0xa92fea241051e64aL, + 0x6788483422c14f6dL,0xe10bafeeca340776L,0x00000000000000ceL } }, + /* 40 << 140 */ + { { 0x789f41da83a65a18L,0x5bb4894fb8a305c9L,0xa6f3bacf2f738054L, + 0x997b0fdfdaf74eaaL,0xd5684faae810d5cdL,0xa935b391392fa19fL, + 0x8663e1c6850b10bbL,0xc0bb73437c749d23L,0x00000000000001fbL }, + { 0xb090011f1ae17d4fL,0x67152af36d6954d9L,0xc2e0ab2979e7822aL, + 0x6dd7cec997784f62L,0x96c7fc2957109376L,0x2815b8113f967c39L, + 0x22cb5983d7fabd0fL,0x583ee8355fa5a752L,0x0000000000000146L } }, + /* 41 << 140 */ + { { 0x8b56eb2c5019302bL,0x06e9d1aea8d84e40L,0x691e1c877df7094aL, + 0x47b910adb1f1e654L,0x83e506b760b44fd1L,0x1918e257b9ba00c8L, + 0x0ec4be508d41465bL,0xfd121959da18d1a6L,0x0000000000000140L }, + { 0xa34a1e684d047374L,0x6201d460b4316331L,0x331a7882078f21dfL, + 0xfaf64df9148c48aeL,0x2cce458d87e58591L,0xae3393755d170040L, + 0xfd9ba9ff4a6d9a42L,0x58493d52573859d5L,0x0000000000000056L } }, + /* 42 << 140 */ + { { 0x6187b95820a9dac5L,0xfd9315c1a8f9e18bL,0x50e385c080fbbe78L, + 0x42e8fa0b6d70e105L,0xcebf0b28c535e577L,0x2095d5027cb7352dL, + 0xa6673d873972a336L,0xce13ca020126a92cL,0x00000000000000a9L }, + { 0x6f3dff0689b255fcL,0x71a60c67df1a0a04L,0x9b04cf5301c122cbL, + 0xc3d298b91342573fL,0x12aa6b82848b07eeL,0x8b10b9069b619109L, + 0x56660e8ccc89c390L,0x32bd385c8f29dd7bL,0x00000000000000e7L } }, + /* 43 << 140 */ + { { 0x4e3876b249617dcfL,0x0eaded881393d785L,0x7c6967e570cb9ce3L, + 0xc6873abae584ff6cL,0x6d18a71a967162b9L,0xf29d7f31a203b785L, + 0x627ccb0851ea4b5bL,0xa4218e75ad6f8c89L,0x000000000000014bL }, + { 0x552819fdac3f5ce4L,0x4a00293c539999a9L,0x5eb79de4c782bfc7L, + 0x21fe279d671a30eeL,0x146b912c58c123e6L,0x74b095804c97f32bL, + 0xf182d37ab3acbf61L,0x8c9886570d00d2b4L,0x000000000000006dL } }, + /* 44 << 140 */ + { { 0x57f80100fe209f1dL,0x71c60c6fd43ccebcL,0x033d3c2854e446d5L, + 0x2003e502d3f1007bL,0xd87ffd20d79548bbL,0xefd5056f86ece398L, + 0x80cef761e69482deL,0xf62b923038d1673eL,0x000000000000003bL }, + { 0xfb44b80cfdb5cab4L,0x4f520a4aa1af6209L,0x0ff5bda847e5b2ffL, + 0x85d1dc6e8b0ac82cL,0x8a19ca4f1d2eb0e8L,0x9e99fdca7c488126L, + 0xb754370b1edfdd03L,0xfd181a02ee494576L,0x0000000000000106L } }, + /* 45 << 140 */ + { { 0x1d3d318ff362cd2eL,0xdd4725846711d19aL,0x56207e9ff1858a12L, + 0x3bad522a5c10b471L,0xc925ffa871aedf63L,0xd8d53ffedae62373L, + 0x248da617a3819bebL,0x71fe647262ae36adL,0x000000000000012bL }, + { 0x2acc43d9375712a5L,0x8d942d284a4962e4L,0x53c296197cc6b7beL, + 0xdec24b5919f536c1L,0x15b9e285b2e78107L,0x86d0f79383e95280L, + 0x225219f813179418L,0xd416e229387c2867L,0x00000000000000adL } }, + /* 46 << 140 */ + { { 0xb58259824bea9e27L,0x9e27af0751f4fd00L,0x5cec315a1e77fff8L, + 0xcce4be49021d7518L,0x9330fb3373d0692cL,0x1730d9e896be6a41L, + 0x9019c06c80444818L,0x84ee038eefcdd584L,0x0000000000000041L }, + { 0x65869ea65157da30L,0x478e70f6facf8c15L,0x5cfd22fcca9def36L, + 0x5ed90506a3126a73L,0xca6b0ab1a1e2cd48L,0xaab6ac8215cb7122L, + 0x8f88e213d86ddc96L,0x0412bf12c8c13f1eL,0x00000000000000b1L } }, + /* 47 << 140 */ + { { 0xc4a01839e6931841L,0x5953809f30be402dL,0x8b56ce11c3fa4c19L, + 0xd2677642043a7e0fL,0xd4b9df95fe0f5cc6L,0xc6f4a5a22d6b2c98L, + 0x49db7c27426282a3L,0x658d11be03260727L,0x00000000000001b1L }, + { 0x9236615fda96f5f3L,0x76e3fa6f6e6ae4a5L,0x1f1d55010c7de659L, + 0x80e17dc33f994e69L,0x74c24adc6684ff4fL,0x614652d6c4a60902L, + 0x6ef20ec54a2e2ff7L,0x6db06cdfcfd9b199L,0x00000000000000e0L } }, + /* 48 << 140 */ + { { 0x6b8f830418a82ccdL,0xc3c560011b132b2cL,0xafb9250d8ded65a4L, + 0x921b4635945b6410L,0x100306bd9f168610L,0xdae04ed366edab88L, + 0x4edab84ed86b2664L,0xdedaa28cc815c6b2L,0x0000000000000142L }, + { 0xdb8265f17b7161c2L,0xf022683f8f11558cL,0xb8859f1fcf5fe6efL, + 0x7419abd8ad90be7eL,0x8289c33f24aa372cL,0x3871cb407c9d3be5L, + 0x55402906b3034315L,0xae557332a7e70821L,0x0000000000000014L } }, + /* 49 << 140 */ + { { 0xd39f520b8e0f7e9eL,0x153866ff8d0fd062L,0xdb15decf87a3f88aL, + 0x6ff41e5c72d55d0eL,0x44ba35b94ded067eL,0x46f07e1e584a3fbbL, + 0xc2998a4a3f909c94L,0x645d2f7dc1f8d292L,0x000000000000019eL }, + { 0x992c5807341c12c9L,0xf2d2f1e0b2c77b3fL,0x9dbc83aabc99a864L, + 0x0687cbd145300329L,0xf8b22c0a5d7b5dfeL,0xe110d7e57cb97195L, + 0xa680d2d96fc7e42dL,0x39f4584cae5d5039L,0x0000000000000023L } }, + /* 50 << 140 */ + { { 0x7836badb584fb47eL,0xec96b42dcc48845dL,0xeec01e0f435017f2L, + 0xd7f7857766f61203L,0xe82608d41730ab48L,0x9eb3b2c3a4b35551L, + 0xf14b296e21e399e8L,0xa07c6566f4d2e554L,0x0000000000000005L }, + { 0xf50bd939b79d53beL,0x376e88bd324ca0b6L,0x43a24688b91f677cL, + 0x821c11d97fc385c7L,0x0b766adbfa3459fbL,0xce0225e33c45fecdL, + 0x508a49f7130a419fL,0xde6f53c5b9515e1fL,0x0000000000000143L } }, + /* 51 << 140 */ + { { 0x175cd2cec14ffcbfL,0x4b1b3cbbd269602eL,0xea11435a257cb2b8L, + 0x172ffe4827c1f139L,0x2f4e6229bfffa4fcL,0x4be074c9c2be5e41L, + 0xf91f21bb8f2cb3cdL,0xf116fcf14d7108b2L,0x000000000000001dL }, + { 0xf93def59effad0fcL,0x9957c785e54de87fL,0x2b3d8e7396903c15L, + 0xc373efb0f091ee95L,0x881699e9f4564854L,0x4a64517b7de613edL, + 0xb260565d73c10c4eL,0xdeb813b518d4d4fcL,0x00000000000001b8L } }, + /* 52 << 140 */ + { { 0xd64a62e38f0599a4L,0x17c8c046e4bc61a0L,0x76b9b05d1812e087L, + 0x253e9187c0f673d1L,0x8e02cdd01f8cde04L,0x576609d68714debbL, + 0x61bb207657420758L,0xfeefae3a3e68bfb4L,0x0000000000000111L }, + { 0x2ec9513515522d32L,0xe35811c580c19a4cL,0xe7dd7c057e37cb87L, + 0xb1cf36ba16a8ca9cL,0x57163e9bb05ac678L,0x9bbe8a774ab4d064L, + 0xadb308e09451dd9dL,0xe7b834ec17aa1d11L,0x0000000000000093L } }, + /* 53 << 140 */ + { { 0x847550a14f3203c9L,0x441cd9d9bd63820cL,0xf827fec2d231b04cL, + 0x96c5e5dacae5e016L,0x06dc22775d223032L,0x87e8052724c4313dL, + 0x15cc1b277bb78ea0L,0xd92f2574201d9edcL,0x0000000000000159L }, + { 0x4d7082592d170a33L,0x01e2b8e72fbd98baL,0x180daba85f9e906fL, + 0x77a1bf3a8def35bbL,0x648fbd384dd7bc01L,0x7d90c0863455a0d1L, + 0xaa24fa724004cc29L,0xbf3868c54a5fd928L,0x000000000000008fL } }, + /* 54 << 140 */ + { { 0x8cbab5a70c570045L,0xa475735431d1bb51L,0x9eee4ceb444399c0L, + 0xbb0778665a4d10a8L,0x04f4700eed6a38a6L,0x58c5c6c85bcbbc76L, + 0xf7c0f5c3976b1c84L,0x9ac5fe72074af6b7L,0x00000000000001caL }, + { 0x551d4acecdfff064L,0x7aaa92f1a4c00fa3L,0x1e55fe218c3f16a6L, + 0x3de2f116f2e73439L,0x457d00585c583e87L,0xe3335a69ca962853L, + 0xa21c0d2c118d709fL,0x4257e6a6d21e990aL,0x00000000000000cbL } }, + /* 55 << 140 */ + { { 0x6e6c89f5d04d03faL,0x63aaa58dce5e7290L,0xf76838bda910619eL, + 0x80c24771bcd11f15L,0xddc294169425e02aL,0xcc486bce0263e8cfL, + 0xad11349862db25deL,0xb4bf878a6dc9f601L,0x00000000000001fbL }, + { 0x895ee8c70b91aeb4L,0x8e7cb7e0cf0f7609L,0xe89b60279e1427a2L, + 0x7b9035cf7b8dd96aL,0x096b128e4535357bL,0xe333a5e139e3f807L, + 0x4d2ce2d08819dd53L,0xafe18d5568251143L,0x00000000000001e3L } }, + /* 56 << 140 */ + { { 0x8d6eba17e9d0c03dL,0xdf3d3c1cc3c1f760L,0x9776992607401f2bL, + 0xa0a5992204cb691aL,0x1fe9bb01b6ffb7a2L,0xceeee53e207c9509L, + 0xc9224c7f82352c8eL,0xbb29f68f772d9de6L,0x00000000000001d7L }, + { 0x3708ea24802ea716L,0x7735dcd80cb41faaL,0xb0e4dc31e2e98dedL, + 0xfcbf0807a5776dfdL,0x30b121261602ac0eL,0xb1080dc0cb155450L, + 0x9872c0b7686b0117L,0x53718ec24fc407d0L,0x00000000000001f2L } }, + /* 57 << 140 */ + { { 0xaac1bebddd3131b6L,0xb6ae2dbd355fc1a3L,0x179c6fe93e91bd61L, + 0xdb13596c2f5fe441L,0xaefaa096a6171014L,0x6af216af84ce8690L, + 0x90fc31872771b4c1L,0xef31625f6c17ab8eL,0x000000000000005aL }, + { 0xd7de907dbe106f8eL,0xea860c78ef29153eL,0x19de038a9fff6a9aL, + 0x1a4b9b0f816645f7L,0x6eac219be91f38cfL,0xec8bb484cc084436L, + 0xf1df126ec024e970L,0x106e9d1df10a9f32L,0x00000000000001c7L } }, + /* 58 << 140 */ + { { 0x887f5f7dadaeff53L,0xedcbca6c786fef67L,0xd7bb8943f09a8d39L, + 0xd479d649909702dfL,0x4ae419db67ca7124L,0x261d989c485a86afL, + 0x98967d4a9be0ae87L,0x3f6aaf7ec22f8626L,0x00000000000001a2L }, + { 0x84cce3aa6127b3a0L,0xd6c6f9e454bfe185L,0x7ea36b64c8207accL, + 0x9ec42d09651c627bL,0x3b933445b30cd27eL,0x24671c86057472eeL, + 0x59ecd8cc6eb69b43L,0x87b80a767eb210dfL,0x00000000000001b4L } }, + /* 59 << 140 */ + { { 0xa923220091a7620eL,0xedd722be85bc2a66L,0x3876833331e7a7fbL, + 0xa190e74125837410L,0x0ba399038d0339aeL,0x5e5bede878292e1bL, + 0x65c8e564fc1727e6L,0xca1a08b61c7626a6L,0x0000000000000165L }, + { 0x465c87a5d36c351dL,0xdfbb991bdde74f25L,0x587247c439474176L, + 0x24452876ad1dcd0eL,0x020dd2685d105c53L,0xc6578e676c09755dL, + 0x38e992b57fd6720dL,0x722e35424d7cab65L,0x0000000000000028L } }, + /* 60 << 140 */ + { { 0xafc72082bce9ceeaL,0x3d6782d22e39b303L,0xcf80e0e42d0707bdL, + 0x2ce86804499c7a85L,0x2886fd1d0078e18cL,0xb4fd3d50f4b05c32L, + 0x0e15e71ec3872f88L,0x22a9f5d16ed84f63L,0x0000000000000021L }, + { 0xb3373fd31a67be31L,0x2d735f4cc320c1a6L,0xbe78bf0e89cc9314L, + 0x103c15dc9d83b96cL,0xdc4911c6e59d4b8aL,0x82035e43e77f32abL, + 0x5b67b58fcc0fc8d0L,0x68345fd533f6fb5fL,0x00000000000000a6L } }, + /* 61 << 140 */ + { { 0xbf16e67a89cea871L,0xcc9468110e8a4e02L,0x0b98bedf81bd3ab2L, + 0x3e1426e33d54ffffL,0xc1cbbd7556c0dfa1L,0x620757a15eaf6969L, + 0xf4bcd2c407429b44L,0xfb1d11cafa1954fcL,0x0000000000000130L }, + { 0x620e9551ec84e8c2L,0xed5fb669bd2b2df2L,0x5fb8443227f1b8eaL, + 0xfc245aeffde8747eL,0xb5c86e67b3d63366L,0x840d13cbe6f5ace9L, + 0x8f48e4b80d52b642L,0x47218d9612968625L,0x00000000000001baL } }, + /* 62 << 140 */ + { { 0x63c7f49beaf731a7L,0x8de8815f4d3db806L,0xabb9f24f05a18ce5L, + 0x5907d287c98281baL,0x112f1c1499819cb4L,0xb6c82e83f4b873c6L, + 0x84dc0f43369bf15fL,0x71c53d9005d32850L,0x00000000000001f1L }, + { 0xa83cf8ee69584f8eL,0x5417454910553d5bL,0xbfb36711adafdb64L, + 0x4d28a04142f7244cL,0x03458837900552d0L,0x712d0c1cd6f23c51L, + 0x5f541b75b10fd53eL,0x3e1824c5f43d7b90L,0x0000000000000081L } }, + /* 63 << 140 */ + { { 0x83f636b0455342b5L,0x4eefc94489cd2497L,0x9cd00d6c21d70b2fL, + 0x6ffa051f02a29335L,0x03085ff0e800ddffL,0x9de699100fcc7935L, + 0x33e4e2a246689663L,0xaa6c25f3b467e56cL,0x00000000000000c8L }, + { 0xb448fad49af4127fL,0x1b3603254d024fedL,0xdd1b4ec1724446eeL, + 0xac577edb13df9d91L,0x515818244616a822L,0x289df7cda5480005L, + 0xcd8f303c0b8040a5L,0x88e9b6fa7d0559c0L,0x000000000000012eL } }, + /* 64 << 140 */ + { { 0x42458cfc40817aa5L,0x96f15f13560cc12bL,0xf96ea2034400b1e3L, + 0x266c2a626ebc6b8eL,0x98ebd5d42b374cbcL,0x15c1f4fd97f946a4L, + 0x759238ae33a07357L,0x6de75cfe2e3338c4L,0x00000000000000acL }, + { 0xf73dfd08a1d6bad6L,0x32228f5f72566188L,0xea587fed0497dd53L, + 0xad4c4a562c1673c2L,0x8c8dcec4ade968ffL,0x77bf520edef7feaeL, + 0x856bb7987e6a19cbL,0xc289274db7fb508cL,0x0000000000000050L } }, + /* 0 << 147 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 147 */ + { { 0x2491a894de6bafe8L,0x8aa13ac5ee9eb4c2L,0x1623b9df31b2d1feL, + 0x1fb69ca81a5b21efL,0x722f1be3290d231cL,0x8d2a56eeb93891f0L, + 0xb534845b48a2dc2aL,0xe4fadafc3e2a9160L,0x00000000000000caL }, + { 0x7f9d50b8810034b4L,0xfebda319e8703f93L,0xecae100844caa3d9L, + 0x21767275bb5e0dc6L,0x529817cdac64fb7eL,0x984cb5bd31b97b4fL, + 0x33000d288ba3a593L,0x950b9c149003a472L,0x0000000000000039L } }, + /* 2 << 147 */ + { { 0x5e94c2a2a1da3c44L,0x90d950916c05a3e2L,0xb29b791e74871eb1L, + 0xe82a41fa50c2d855L,0xd2d24df91e689f42L,0xe9e935450cf6ed2fL, + 0x5fc9d992c6ed3e7eL,0xb4eb7df03606713dL,0x000000000000004dL }, + { 0x248fd9d72099171aL,0x908d539e1d16d66cL,0xab47b9eb171bfd3bL, + 0x70e5de8cecdd8e37L,0xedd44ae051eed557L,0x4983c6faad95aeb2L, + 0xdedbfea1cdc8214fL,0xc6ae2030131549b3L,0x00000000000000c9L } }, + /* 3 << 147 */ + { { 0x7f642d9767b9ec83L,0x262cf832a01a1318L,0x279669d0288517c4L, + 0x39e8ea3451456d59L,0x5a8b5b78e9cdcf0cL,0x37b6e21f183a3198L, + 0x0cee02daf754f97bL,0xb6fc483e9c76fa86L,0x0000000000000137L }, + { 0x783a819125386715L,0x4fa789033e397588L,0x25dc90b31825e7bbL, + 0x648caf54125497ccL,0x14940daabd22072eL,0x0a6fb1af51e5a037L, + 0xc637e1ef41570602L,0xb63d078dc1685b35L,0x0000000000000030L } }, + /* 4 << 147 */ + { { 0xff25170724db06fcL,0x4421108c849ea0b1L,0x3073c4caca8162a1L, + 0x6d768642e42af7adL,0x031eba68a6bc7790L,0x9a8154c3d8d64e3fL, + 0x7060550e297aafe9L,0x8681a2c6641391fbL,0x00000000000000fcL }, + { 0xe9e76b16e0675e14L,0x3c4e3c42740f9bb7L,0xd3e75ff52869a3aeL, + 0x99a891aba47c7580L,0x42b47bcf0b321989L,0x2d1212a00a6dd3baL, + 0x25b1c5c8b43e9086L,0x8bc954e8e8d1a73fL,0x00000000000001acL } }, + /* 5 << 147 */ + { { 0xcb43d398c9b67ea0L,0xff2afbac39f12f2bL,0x516c8971a99ef099L, + 0x283cbd94271d23e0L,0xfc2a6cb5cb3ec463L,0x2f08949e25d3036cL, + 0x496b399a5d21c843L,0x493ab65a61dc0631L,0x00000000000000afL }, + { 0x1f2e2d3d092d0f6cL,0x4d09ae2975901dd5L,0xb4324dff826cc023L, + 0x98744acff8f24acdL,0xb4b90fd6c4da3267L,0x3c5a2dffa221c054L, + 0x70c249032f1d7804L,0x3d8676a9ae973b22L,0x0000000000000080L } }, + /* 6 << 147 */ + { { 0x5c9da1422f134a93L,0x898c25ba65dd482aL,0xa8ce92fef6ef3e64L, + 0xe462faec5da54ecbL,0xdff68f3fb81e3ec3L,0x5278eb6c4b8ef144L, + 0xf98a36d50dc84f3cL,0x8edd6edba94ef000L,0x0000000000000191L }, + { 0x738606c4ce8f5403L,0x102f1b3a1a20b349L,0xe7f1bf3328f73251L, + 0x0136f3eb1dccbbdcL,0x5b2b77633df6d9a6L,0x02b03dc6c9ee09fbL, + 0xff600116ea421742L,0x432728e840d3c3ebL,0x0000000000000034L } }, + /* 7 << 147 */ + { { 0xd97ddfa513137accL,0xb48fd161f90ae86eL,0xbe3b1135664772c7L, + 0x36a3f3bb882ebcbbL,0xa0bd95d4c260b262L,0x15aa4d0514b47d2aL, + 0x93607e4a08838ef1L,0xdbee0effc3e3a24aL,0x00000000000001d5L }, + { 0x552772377ea76b01L,0xfdb2d29e02b16d83L,0x8503e9f50a9f47bfL, + 0x6b462a78702205edL,0x996d0687e7449e0cL,0x005ffde4a89f336bL, + 0xd784ec5fe7b964f6L,0x4360cf20c319e445L,0x0000000000000194L } }, + /* 8 << 147 */ + { { 0x4bc6f28624374decL,0x9ded182eddbf9446L,0xf58ddb470c3c51e1L, + 0xa6bd194efcff08c2L,0xdc47a3a63e931aaeL,0x7f1020a8daf165e6L, + 0xd439739f8faabc58L,0x6efc35ab5951222aL,0x0000000000000187L }, + { 0xf15ee7129498152eL,0x9e85afc9acbf1f82L,0x7b5853ee397280baL, + 0x0283243a6b8d13eeL,0x64abbdc796cde865L,0x0284a75f3dff94eaL, + 0xc476e3a69c7b3d53L,0x9dafe302d1884045L,0x0000000000000133L } }, + /* 9 << 147 */ + { { 0x274d321fa6ea2d61L,0x189731b0d4b12aecL,0x3e3ad753923d81b9L, + 0xe89c40b7befba1a5L,0x6fcd642a015eb22dL,0xde04f997d12be7f4L, + 0xf2cfa38caf2ccc17L,0xc9aa307ed96b8001L,0x00000000000000a9L }, + { 0xd390598b0b1d5123L,0xe784d0dc329e33efL,0x5b5c64a26d030653L, + 0x3bc1c276b5a20b12L,0x2cca6aa8755810f1L,0x0c01df0aabe5186fL, + 0xe997bd088aa9c400L,0xbb8d216198c3fb2aL,0x0000000000000117L } }, + /* 10 << 147 */ + { { 0x1d7b9b778d4fec63L,0x47eb33fb690c571aL,0x3d2bb2de05a01ce5L, + 0x16cde99b78c47fffL,0x0359242c699656b3L,0xde0c7d81bdd416b9L, + 0x60863068b6144f4eL,0xdfa66d197583c05aL,0x0000000000000142L }, + { 0x892f8b4e8ccad1d1L,0x085cf7f6cceaac0fL,0xa07608425df33b07L, + 0xd4737bc3a8747ec7L,0x7f8147a128f4caddL,0x49d9db4c8480de3cL, + 0x5baa01d5872dea19L,0x2a8726b1e84044c3L,0x0000000000000126L } }, + /* 11 << 147 */ + { { 0x7f3138f9e293680aL,0x6dca11bbfd6a8b9bL,0x88932ac6b4b7ee2fL, + 0x5138acdae2b95958L,0x91dc3e9a639d074fL,0xd7d4903064296944L, + 0x818ab55f21383bd9L,0x78ae7acb1032d97fL,0x0000000000000069L }, + { 0xb23f14d075173454L,0x51df08b37ebc4f81L,0x808454cf65aa52b9L, + 0x782b09610e6c952bL,0x40658769b8e92c5cL,0x4fd3ace2349f1d8fL, + 0xa9cce79ba0699cc6L,0x587f8a83e565c582L,0x0000000000000043L } }, + /* 12 << 147 */ + { { 0x19a54eb6407386d7L,0xdf16baf601c2186eL,0xfad573e89bd9a894L, + 0x343bbb0bccbeac47L,0x44febe5e4f7f5443L,0x87c81269c3ad9175L, + 0xdd79411447319eb7L,0x3302374938f26dceL,0x00000000000001ebL }, + { 0xf94f891c1f1401dfL,0x2ae0e96a1450357fL,0x179a522785202255L, + 0x10518e7f397d8d98L,0xfa56e665828575b0L,0x7622b705faebb982L, + 0x63f9067ae3b3b8acL,0xb30f7bec6fdf48ddL,0x00000000000000dcL } }, + /* 13 << 147 */ + { { 0xa1e6af0f35b8d081L,0x46af5adba14b50ceL,0xc6cc9452288c50f4L, + 0x1bcacbd6f8910af4L,0x9ac8ebdad83a4b2dL,0x021e43ee5514aa3dL, + 0x4d5d0544b14bfd4aL,0x5bef6ab754bc3519L,0x00000000000001e1L }, + { 0xd6ee66883af6fd56L,0xfc55226508bc92a7L,0xedab91fa2e7ddfb3L, + 0xe3f1fd7731472d2fL,0x2c751ccf35ea46d0L,0x570cf16d4cdfe038L, + 0x6ace17d6c87e7495L,0x02a9a0f3942997b0L,0x0000000000000198L } }, + /* 14 << 147 */ + { { 0xe5b551ea2dbff4f0L,0x438195568ed22f87L,0x1a42769cd42f4897L, + 0xe5a6f518a1a6ee3aL,0x9b53a9e63ab29b90L,0x6189a415c2fbf1e6L, + 0x6ee4570e2599d956L,0xebc865b6dd1f4bcbL,0x00000000000000a4L }, + { 0x66abacd8843e4791L,0x3bb5c48e7a8cdc84L,0x3f4e2d197b5a2f8cL, + 0xbb945d44caffda4fL,0xd09a8f34b7bfb4d5L,0x1308f3d83501d709L, + 0x0bfb71e4352b9435L,0x92c77d2a403e594cL,0x000000000000019bL } }, + /* 15 << 147 */ + { { 0xcf38db175fb9017bL,0x66e774ccd1149878L,0x262164d38581662dL, + 0x52ca418bfbc653e2L,0x9d3b9b86c74d0c1aL,0xdd58b60b41dd9fe0L, + 0x63455b0ce55cf6f6L,0xef7fa7be5e37bd43L,0x00000000000000dcL }, + { 0x6982c041cf5b3c78L,0xb48f8d01eb7fb22cL,0x2f88398d5c4c129eL, + 0xfe6c03131bfa5216L,0x2c94d9d55eaa1448L,0x19b49c084fd026c8L, + 0xf3ff621ee08f4788L,0x4b3e563397f2b547L,0x000000000000012aL } }, + /* 16 << 147 */ + { { 0x9e4ab8c58b327354L,0x89377c3c73f5d13aL,0x1f93cc126e55dee9L, + 0x7b4255bce490c030L,0x5451bdf5047ea02cL,0xc01d952cb461d14eL, + 0x172d0f20b9e21078L,0x694587a5a1295f3fL,0x000000000000002fL }, + { 0xd54076270219fa4dL,0xbc0b846fa3bb7f30L,0x5c17104e2ba06bddL, + 0x2490f12bdf3ce36fL,0x0fe2e1ca08468c10L,0x8126c060a9e2c73fL, + 0x7685e59c5cce9dd7L,0x5f5f64da9e8ca8b2L,0x00000000000000bfL } }, + /* 17 << 147 */ + { { 0xd8f8dcb47165a15bL,0x3566655d27456d16L,0x3f8d6e4dd64d1044L, + 0x63cc9c8d4d23368eL,0x3b81eaa384ed399aL,0x7c81ee61be377229L, + 0xdc091f9720e863a4L,0xa2d486a7342e6ec7L,0x0000000000000194L }, + { 0xcc7f5d9294fa2791L,0xdc18552a8cab138fL,0x0418657c2e9238c0L, + 0x38b45acaaec9eaebL,0xbd68814fa14aa270L,0x5b521b2ae14bc29cL, + 0x22f15327c5ecd539L,0x285ff2f2e109b02eL,0x00000000000001d1L } }, + /* 18 << 147 */ + { { 0x90276bd80eed122fL,0x5feb9a3a51af464fL,0x6723d65f591afda1L, + 0x4612b5b20f6233c3L,0x8427e3b7aa39d6e6L,0xa6311da25050a2a8L, + 0xfcf3c90a384a189dL,0x956c6fa5c0e2cb3aL,0x000000000000010aL }, + { 0x665bf31000b63facL,0xcb8cfee137c71ea8L,0x8b3cdfeac1bc2f1aL, + 0x712f91a9956eb88fL,0x3e7ab8c3b979486bL,0x376b1f96e540d2b4L, + 0x01ef726f8cb06e29L,0x240b1df4cc582bd2L,0x00000000000000d2L } }, + /* 19 << 147 */ + { { 0x489148c9c274d307L,0xc660855334a59a0aL,0xf1b52d7feb4b8eedL, + 0x98e050a7ffb5310fL,0x5c6c2a50069fa531L,0x390546312329789cL, + 0xd525a9f162ad0e01L,0xfe4f68300d72772eL,0x0000000000000145L }, + { 0x22065d2ccfec9cf1L,0x55c4e59c64422d9cL,0xf233fd545bdf4751L, + 0xcd8536a0221ad6b3L,0x277d802686061244L,0xee697ba465e521cdL, + 0x0efce228316d5176L,0x26bf3a5dde23683cL,0x0000000000000189L } }, + /* 20 << 147 */ + { { 0xd145eb32b6f1416cL,0x77bfe297670bd3e2L,0x4fe924dfce10f4d2L, + 0x9651650629f3c127L,0xcc669c51715b9e23L,0xafa90094b60a780bL, + 0x35114c8ae1aee1b5L,0x3f793f136599dd0aL,0x0000000000000176L }, + { 0xf098fa02bab41133L,0xc018fb4737635fb9L,0x0be935b6eb948d5cL, + 0x453838bc27db09c0L,0xcf717312b29a531cL,0xb001eb51446ff102L, + 0xeb0a15c7275b0b94L,0x32e88f1499a3fa93L,0x0000000000000049L } }, + /* 21 << 147 */ + { { 0x8cf0c5cab2cd40e7L,0x02005c870804d05fL,0xda149871a34588ceL, + 0x867ee45824cedfe3L,0x87be46ef27b2c1f9L,0x7774a05c22a6426bL, + 0x37322562a785a3d7L,0x1027a76b6ecf1cdaL,0x0000000000000164L }, + { 0xe5e8a7d6131efb98L,0x93da652a3c4c1d49L,0xe7c912d72658edacL, + 0x8185f36a36903539L,0x37b3cf19dd6f668dL,0x70ad7a7cb48ddd81L, + 0x8b290a4f8b04de45L,0xb7382782f7a955b9L,0x0000000000000031L } }, + /* 22 << 147 */ + { { 0x5add1d5f23342e9dL,0xdfc4c696230f2ddeL,0xd925c3a6049c89baL, + 0x9e2e23d5074b981cL,0xa11db26cf8766d4eL,0xc0a69d7fc37ce4cfL, + 0xf6de27c4b102e1d0L,0x9921ec7010409988L,0x00000000000000c1L }, + { 0xa66b32657b6f0de2L,0x2bd9a65778871bfbL,0x29f760a353bb697dL, + 0xbd91b082ad080bdeL,0x8644a3328e96ea2fL,0xb3e71cdb10d83cf8L, + 0xd8611580758300c0L,0x7ebb9fd48146381dL,0x000000000000009fL } }, + /* 23 << 147 */ + { { 0x64b6828796343187L,0xf8f6db918829c5b9L,0x6474822e697768c7L, + 0xd5cf4ddd770add15L,0x483b8ab4e7439172L,0x793a759d7624731aL, + 0x9d4b4dcd439b35adL,0x46654a6afecca0f7L,0x0000000000000084L }, + { 0x9f2b0ec49d347a03L,0xc4ea4300d7610174L,0xfe4be93c7f5b78d3L, + 0x00984cc68d6e3886L,0x42a4f895c7e026a2L,0x258e4272d69990e8L, + 0xe50f642e7dbd5c71L,0x6f81b7ea0d04a6c6L,0x0000000000000073L } }, + /* 24 << 147 */ + { { 0x8c9f5cc170b1bcceL,0xe2d68ecef726d3d6L,0x330e1c4c707f25abL, + 0x4397647f18dfd382L,0x8f83b1ee249a6ebbL,0x7d291bb65738ebedL, + 0x11d6af26e8db7b2dL,0x47b048ec173d8d33L,0x00000000000000e0L }, + { 0x16819a7b1843c807L,0x9bddc58230ca2736L,0x2bccd7f2e1525168L, + 0x0f9dcb4d5900125bL,0x90e3017bb5adef07L,0x09c2381a7bda2ffdL, + 0xa203c883d47cc848L,0xd173669eeec3c0e4L,0x00000000000001a3L } }, + /* 25 << 147 */ + { { 0xb3652eaae3f65b22L,0xcaf7a68167717ba5L,0xa80950e2129aa188L, + 0xff5be361375861bcL,0x6770cc746abb2853L,0x00252a511fd9dd49L, + 0x9fe309b2ac485383L,0x59660b05d6c63d4cL,0x00000000000000fbL }, + { 0x0434d8c896b55b01L,0x053204c5de25fa3cL,0x4291f70d0e8d9361L, + 0x29a8f0a9a61ef3faL,0x0192e14431b82f8eL,0x9df111c73cfa7d38L, + 0x344917426663b84fL,0x5da29dea9c154f7cL,0x0000000000000022L } }, + /* 26 << 147 */ + { { 0xf1321a601f9d09a9L,0xf4d31ea30c863f7aL,0x8a43a25aad2ce3adL, + 0xe338837799bc8bd4L,0x062857c2a8e816fcL,0x56378ac296c41801L, + 0xdc0f04db0e66af1dL,0x8848081a56ffcedbL,0x000000000000011aL }, + { 0xea2a3e6060b59a1eL,0x85720082f6ffb469L,0xe10dc94cd3e820feL, + 0xbbead00b43243370L,0x9c4f4aaf715983bcL,0xb53c487337aab294L, + 0x527c167f49239950L,0xfc0884fe9c1d2b15L,0x00000000000000f1L } }, + /* 27 << 147 */ + { { 0x066eb52486148ffbL,0x05fa5ca6b12813aeL,0x3f6cf7521042cb23L, + 0x620497148cc148c2L,0x7063a531fc06d539L,0xdd392b8c9422ec6eL, + 0x0dabd4a1dc8c778bL,0x86485e1b9e0f9471L,0x000000000000013aL }, + { 0x6c358e988f9b6601L,0x11820d055601185aL,0xed9ba8a45e0a28b4L, + 0x3f28600944c12ec9L,0xa4b35a622ef871ecL,0xe8a89ea53312df04L, + 0x22cdb00ad5975031L,0x34bb65de3d7c84eaL,0x0000000000000142L } }, + /* 28 << 147 */ + { { 0x247f9f991415b5a5L,0x8d40f1e22ed5e708L,0xf1133cb0efc81f52L, + 0x278a9e9c2b0ed8b6L,0xd0e4058b608882b8L,0x0a74bb730c8107e2L, + 0x13e8d54e5260f955L,0x7a147d79d3ab9424L,0x00000000000001c8L }, + { 0xdc0b55a16f9ebf4aL,0xe7a018e054ae1fa0L,0x542720cc8cebfcefL, + 0xad8fb8dd27d8ba5fL,0x6e9d00da68c5590bL,0x4f1fa055014f5fc6L, + 0x972a148006b1efd5L,0xbe9062432116830eL,0x000000000000019cL } }, + /* 29 << 147 */ + { { 0xc6004c28eeec070dL,0xc744cdc1249ddbdeL,0x33a8dd7ed2fdcac0L, + 0x1bb3b8478a15f9e3L,0xbe5270ba74951deaL,0x974cefc5c62182beL, + 0x91a2ed05e7bb8ba2L,0x54ec25d63ec05196L,0x000000000000003fL }, + { 0x61e73c5078cfe5f3L,0x26d3abaa9a6a4149L,0x4813ce619128d73eL, + 0xb60761a175e2bad1L,0xa8f869cadca6d591L,0xc46ef1b082aaaf57L, + 0xe207a47f87ca8574L,0xe156c908eb027d9bL,0x0000000000000131L } }, + /* 30 << 147 */ + { { 0x12a5b6b9d51ea4edL,0x6ee257dc65396af7L,0xd921c77aecfe3419L, + 0xdb9d587e463e4d69L,0x0224cc9f03fe3388L,0x062c03973ec5049fL, + 0x705fdb3fb1e1d2beL,0xb9c77024ecd64766L,0x0000000000000140L }, + { 0x9591f031cc7fc0faL,0x2eb3bc1cc0b49c59L,0xbd54081797ce9a6bL, + 0xd5285b3641b37630L,0x85137b727a1313a2L,0xce410db03bb3c320L, + 0x2d3628d4193ace4cL,0xa39b6d8775b26d92L,0x00000000000001acL } }, + /* 31 << 147 */ + { { 0x0caca7f6389bff14L,0xc34226ae7bbc3a8fL,0xe99816857bf0ff44L, + 0x259e33e7580f4f8dL,0x5e909449d4a80364L,0xf8b2da9223c3d509L, + 0x5b3a3f4bbf30299bL,0x4da73ca88151ad0eL,0x0000000000000010L }, + { 0x5a575220fe7e78fcL,0xef28d13b03eda908L,0xe131b1fcb0f3f2f4L, + 0x2c8fe211d8151429L,0x1b9897d692f85e68L,0x76e34f11b6437168L, + 0xe13b797b97e53e66L,0x0ec0b9d4463020b9L,0x0000000000000042L } }, + /* 32 << 147 */ + { { 0x16bc33413fdd638dL,0x8c37c58a8c518c30L,0x9dddb8ee79df75d1L, + 0x0bc82320c91a620aL,0x6e924a6c9126a200L,0x744594a9d20c8c89L, + 0x0e9b59ac4cc73caeL,0xce872f0a0b276b4cL,0x000000000000008bL }, + { 0x651f74222c35933cL,0x95238355e025609eL,0xe7891b5b6c14d96eL, + 0x89c0fa370304d219L,0xdf7da33d61b597efL,0x43df03574120905cL, + 0xacf01355718df73dL,0x7ca32d0e631ada96L,0x00000000000000b8L } }, + /* 33 << 147 */ + { { 0x88809494252b6a07L,0x9afb2482a30ec634L,0xe68ffbc927436190L, + 0x63223b43168356f3L,0xed2dddc2640ef7ffL,0x8a3d853beeb5ce07L, + 0x6b555cec4f9347a7L,0xfbff3ff241559afcL,0x0000000000000189L }, + { 0x60b423485261ab5aL,0x06ec126f930b3a8dL,0xb70a741a9eff20dcL, + 0x63ef08dd43f6aeedL,0x4b7e962b391a4cbfL,0x2dea977be636890dL, + 0xc8bcca8edf28a2beL,0x75a069a00db7e727L,0x0000000000000130L } }, + /* 34 << 147 */ + { { 0x64ad4b5d38c6cb20L,0x87b25a276cccb7aaL,0x882e1be1695dca6aL, + 0xb6e216ae98e6653aL,0xab1adfbe4f2a32f6L,0xc7eb17991640b34eL, + 0xdfb5a618e9b287dbL,0xb98377f2e1c483baL,0x000000000000017aL }, + { 0x38c1708b0e576a20L,0xcae3cbb5666618d3L,0xe7681da6a8baaa65L, + 0xca4a87db3661f5faL,0xad07c49713a2923eL,0xfb8e098a1612b88fL, + 0x00648cbd0211082cL,0x77fb7ef7c08add74L,0x00000000000001c4L } }, + /* 35 << 147 */ + { { 0xfd7f19efcd518979L,0x3d439ca9d73c25fcL,0x0834e5770d158067L, + 0xa52457eb7560ff4fL,0x1d72c5c540f8627fL,0x90445790aead9bacL, + 0xa0019ef9ebd5094eL,0x0ec581eff66ceb3bL,0x00000000000001e8L }, + { 0x52b5917cee75ca82L,0xf66fdce0bf5bea5aL,0x825c07b024f56801L, + 0x5fd13cef8991b8a0L,0x3d1d4880e739ff2dL,0x01a6a6218d6aead6L, + 0xb5cae8d5fec72d3aL,0x48c498cf9e68ccccL,0x000000000000011dL } }, + /* 36 << 147 */ + { { 0xeb7eb4e3cf7c7817L,0xc190cb66eaab7734L,0xdc8faf6a81d04f60L, + 0xf11c8a8cf998d778L,0x4e2d4b63db67e150L,0xe5522c9556099bb9L, + 0xcf8dcbf0aebfc2a4L,0x921787cc0c8f6ce7L,0x0000000000000075L }, + { 0xf6e3e647e163b36fL,0x5e1a1c7428973a57L,0x08c85f9ddf4fba25L, + 0x45137abd0f27483aL,0x993d2b71753cc2adL,0xc6b0112853fa2055L, + 0x74e63b5088ca2fe3L,0x06aa66a1d2a13d00L,0x000000000000019bL } }, + /* 37 << 147 */ + { { 0x682ffa1b7ec4f64aL,0x95d2e079a15dfbe9L,0x172c693cf5184016L, + 0x45b10e4f33eac72eL,0x57f555c9d99bc8daL,0x0ca53b2f0656b3e5L, + 0xd5ad1f26afff9ff6L,0x54197dd60c648286L,0x000000000000016aL }, + { 0x98c5f4989aa5dc36L,0x906d5559936cf066L,0x4daedbf578c08c11L, + 0x60ee55a31fd25c15L,0xdd6d6c5f83d1fea5L,0x66af4443c96f658cL, + 0xd8d4b001326b34adL,0xae9b2b71a2b138dfL,0x00000000000001dcL } }, + /* 38 << 147 */ + { { 0xcf5174df03cbc63dL,0x6c45aa2990858f82L,0x4feeb0e94ca9ff29L, + 0x45c913dd476b0fb3L,0xe019eaebc60aaf76L,0xa616a69f20013ca2L, + 0x57a9477b8c19cea4L,0xc2d0700a1cd82afeL,0x000000000000007bL }, + { 0x1eef2b5fef402297L,0x9f9c57d25aca14a2L,0xb890e7bcd4f3bbf5L, + 0x210714a2ce430268L,0x9088523a57390f8aL,0x165bff6f4e58247dL, + 0x9aa04525f815b565L,0xb775a4615c6d8ac5L,0x000000000000012eL } }, + /* 39 << 147 */ + { { 0xa9bd87c6bd49c789L,0xa1fe2842970f53ebL,0x59160816abe18d83L, + 0xf69f674898c30254L,0x63688c8e6543b683L,0x14d5ee6af773af1cL, + 0x327a220e8c42efc1L,0x666a878c900c62ccL,0x00000000000000d4L }, + { 0xd868e30687e762b1L,0x213f7b043de4e833L,0x022b5fe26c684d45L, + 0xdfbac3e02cd286d1L,0x7d4e6a80aedb794eL,0xbc3f105e1e60b9bcL, + 0x6dfd09ed711f8011L,0x2b4f4fe7a3a0de69L,0x00000000000001cfL } }, + /* 40 << 147 */ + { { 0xda90f7d5baf7c7bdL,0x1a6719969766aa85L,0x3528f03b24f802a1L, + 0x04b09da3c7d51cceL,0x7a2fb5077b8e58c9L,0x4cd283ca46c7716cL, + 0x3c682896f1027492L,0xe34524bd8d6b9f97L,0x0000000000000186L }, + { 0x4696ab6ff2e6ce3eL,0xff2a7b70f6e77b14L,0x19cdf652897ff820L, + 0x09f952c2cdf6c8f8L,0x9b3b7e14e34d3dcfL,0xecf5054ee1a3bd0cL, + 0x19d2e4c83403783aL,0x549bfbc73a373796L,0x00000000000001a4L } }, + /* 41 << 147 */ + { { 0xdeaf616310141b69L,0xc41e53f255557467L,0xce2b3655c4d8b612L, + 0x5ac8d24202f58186L,0x213affee697dcf46L,0x254e7ec39c955508L, + 0xc43cb58c85f554a9L,0x0d58d3994500579bL,0x00000000000001efL }, + { 0x466b1a5bf7a19f41L,0x113f74e91fe1301bL,0x18bbe771c54304acL, + 0x8340ad81c26c2e27L,0x65367af7544448c8L,0x3021f03f017efbe2L, + 0xbe92d7e6b090ae98L,0x1a0f53e920ecd3cfL,0x0000000000000133L } }, + /* 42 << 147 */ + { { 0x66dc8a9b1def0f5cL,0xf4d1c2b73e9135edL,0x317063850d8e70e0L, + 0xdd355c7a4d254a7bL,0x6189dd55dcf70a93L,0x9d62b529b728ef6aL, + 0xf9986ad1c74066b1L,0x9aea4f1ae540d945L,0x000000000000011eL }, + { 0x130818dcf200a284L,0x56418ab66e71f015L,0x94b3427eab8722d7L, + 0xc5c1c1587aac7863L,0x55fe2df518b94dc9L,0x4813e9915c782da6L, + 0x6c50ae473eb671daL,0x8b25a79012ba0f9cL,0x000000000000004cL } }, + /* 43 << 147 */ + { { 0x409447460587a0e4L,0x5d458c798d9f57fcL,0x684919bcd8657675L, + 0xcf551d43839fa071L,0x9de6fa17af775947L,0xf4bfb5506669009eL, + 0x234870726375aeddL,0x5cb589eb1c7180e2L,0x00000000000001bbL }, + { 0x128d24df97ad0e22L,0x160d55a40111adbcL,0x7e57af656d0f5f99L, + 0x4d6c763f09910699L,0xfedb36a7ab9c174aL,0x35fd8a827f579c65L, + 0xdf7e65d169af343bL,0xca6829bb448a45a4L,0x0000000000000015L } }, + /* 44 << 147 */ + { { 0xee98bc02f445f09eL,0x8a9361ff6b4728abL,0x8ce33c616bc007c9L, + 0x191819be51c7e170L,0xa1c971c0a0f7eb3dL,0xc00ad1e760c9f532L, + 0x790750ba5972bc14L,0x665d98cb776c1093L,0x000000000000014bL }, + { 0xcf81d62fd598c5daL,0xfe84e05c7aa8be48L,0x0943e0156c7e2a3cL, + 0x39bbb769352063aaL,0x3ec54f1b638ad619L,0xe0a92fe3e248bbc8L, + 0xedeb7234718dc78aL,0x22840b19ce6775b8L,0x00000000000001f1L } }, + /* 45 << 147 */ + { { 0xb6f96a59d33ac825L,0x5b7d26dbe328d33eL,0x4b914baafb3a9c2bL, + 0x626782a126376196L,0xc4ce8e4e9c5c39ccL,0x936c7957afc6729fL, + 0x7d33ddb57a880449L,0xf1c162de61f21de7L,0x000000000000003fL }, + { 0xca755d673c0abcc4L,0xee71ff879568310dL,0x57bb9cd89996bdbfL, + 0xcda5682e63602af4L,0x45f7d757e526c8a5L,0x3f70d0a67e9730d0L, + 0x47d5cc70584be9ddL,0x57453cca4a5091fdL,0x000000000000001dL } }, + /* 46 << 147 */ + { { 0x9e6e310cf74041ecL,0x00525f452f102196L,0xd509a1fafb089536L, + 0x25eafb85b6ecab1dL,0xc3db17473e670edfL,0x73cd5b1a02a74deeL, + 0xda5f180a708059e5L,0x4756564105f4a4a7L,0x00000000000000d9L }, + { 0x4617c8dba2e177c2L,0x4c1cd54d48928799L,0x9db79cc174cd39f7L, + 0x6ad011de815393e2L,0xfda0a9d375ab45d3L,0x941924fb4944f8aaL, + 0x9e5612f2425803feL,0xb85b6231e73ad659L,0x00000000000001beL } }, + /* 47 << 147 */ + { { 0xf5f9fc42c0ef1a21L,0xe9f903271bb54b84L,0x194164521fd4c3d0L, + 0x07230be791a093adL,0xc8611dc627a57239L,0xeb64b31ef0a2ed05L, + 0xc99864d317c09562L,0x13bd93ec682aa5c6L,0x0000000000000072L }, + { 0x22a721e66314648aL,0x04bbe0541809ed8cL,0x709a97cb3431bf60L, + 0xd23000ca75e2ab96L,0xcf94327f7317e6e0L,0x5c07a24361efe4dbL, + 0x1e0f88bd75585a53L,0xe9d5784e71067b16L,0x000000000000008bL } }, + /* 48 << 147 */ + { { 0x140d97d668d43990L,0xc2153671f05254d7L,0x15c7cb4048835a51L, + 0xed01ae6e9b1e5c22L,0xc33296c45a63713bL,0xb44ae73171a53417L, + 0x70b2359e4e771c6bL,0x582eacf2fe7d32b4L,0x00000000000001e3L }, + { 0xb6a805b0b9a67492L,0x97617ae34369cc31L,0xdc08edb44d0a7af9L, + 0x1c89e626c0c63f8eL,0x75e1f1ed615ac711L,0xe03d1b7d8e3eb258L, + 0xcc2e06e4761fadb7L,0x2dea6e630cfc1724L,0x00000000000000d9L } }, + /* 49 << 147 */ + { { 0x30eeed7612619b7aL,0xd8daa1efe23568aeL,0xaf0c5d2700f7929aL, + 0xd89f78a90dca03efL,0x8eb2878fe39fdcdfL,0x8d8e0ddda71f1a3dL, + 0x4042473e60afe2bfL,0xe0501f41c19d3ffbL,0x00000000000001baL }, + { 0x1693d5a1030e8b4fL,0xa0d858cf93320df4L,0x45b0c87cfcabd7c4L, + 0xb6e171378f8fa025L,0x892617bc01ff2d11L,0x89030f422ed3a401L, + 0xda0ddd2ac10ee939L,0x24b042d5fd5f3ed7L,0x000000000000009dL } }, + /* 50 << 147 */ + { { 0x28ebed439ae8948eL,0xbe7a777ec909cbadL,0x4f5b240434fdd33eL, + 0xb0ce95a86f24e7feL,0x9549d6a80a6117aaL,0xea4f9ae39ab68d8cL, + 0x117b35bf9258ab2fL,0x262f08aed8e0e9b7L,0x0000000000000092L }, + { 0x808fe92c29be09dfL,0x57f69b77ea43d2c3L,0xfaf2e024f50c9e91L, + 0x00a2dbc95fce3573L,0x72d3ae0513d465d1L,0x4fc54830ed56e68cL, + 0x662a86491a37f74cL,0x6977a68476c58416L,0x0000000000000063L } }, + /* 51 << 147 */ + { { 0x56c1d2934f06b0daL,0x396c88f326ab5d67L,0xef6b98362d0ee910L, + 0xbd5794fb82fea351L,0xb4aa0ebb8e8a0674L,0x1f99bd2c992a8e30L, + 0x6c77c4ce13f5b05dL,0xcc2d35166d96d584L,0x00000000000000eeL }, + { 0x29896c6aa05844fdL,0x615a57b51b2ee851L,0x0780b7ba64120a3eL, + 0x5b4a41d756738a39L,0xc38019755ea981b3L,0x925ef03121aafd98L, + 0x62a94e460c31922fL,0x13cf0f0c36933ad5L,0x0000000000000083L } }, + /* 52 << 147 */ + { { 0x301c46545f009683L,0x5dd9f7cde23f6a45L,0xe751c7d9d96753e9L, + 0xb26fda93e50ed86dL,0xb48e4d2ea3014b3eL,0x6d82b06551d16e40L, + 0xa8b8f4a19952b35fL,0xa3b70bff95f097b1L,0x0000000000000003L }, + { 0x98b9e4d7ea5ce5a2L,0x86cc4bb2d9548b1cL,0xee2c3a715f65d9a9L, + 0x22b7cc27212980abL,0x8820302cf5dfdbeaL,0x7f0290529fd77d3bL, + 0xfaef33b3599b0e13L,0xd6b215998a929bfaL,0x00000000000001bcL } }, + /* 53 << 147 */ + { { 0x3822d6dde05bcff9L,0xa1df90859a966665L,0x37d8d08affd0c260L, + 0x044eb203a393185cL,0x2bb1e2a097b4fa61L,0xd824c5f2f069339aL, + 0xee973a505e47a499L,0x57c7cb70c7739a14L,0x00000000000001a3L }, + { 0x50708368841c8a1bL,0x149b29f0e1c18a5bL,0x6d3057aff86259ebL, + 0x42b1013f9c561498L,0x9c917c000a3c96c5L,0x1f64ce7260e1c1e3L, + 0xdca64e5d0b26eacaL,0x70809a61ffe62202L,0x0000000000000060L } }, + /* 54 << 147 */ + { { 0xf7cb364bebc4106bL,0x322868aa1f5b16d7L,0x35d2bc278a47978eL, + 0xdd71c1c5888a45d6L,0xbced525701370078L,0xdb80de35d2c742c3L, + 0x9de9da88492943b8L,0x056084de6d5369b2L,0x00000000000001ceL }, + { 0x481ba518bd731791L,0x5b7297867252de15L,0xd9e0d759af151de3L, + 0x32df8fcf9485cf7cL,0x959a47d70da582faL,0xada095c0ae5e4106L, + 0x1f870a322c80f5beL,0x3996f25c7b53b872L,0x00000000000000fbL } }, + /* 55 << 147 */ + { { 0x7b6c5a0d299215b2L,0xe63c9ea3ff30cd5cL,0xe20dcdced944d3d8L, + 0x58fc33a5037bfe23L,0x084db73f8964c02dL,0x295cbb668036f2d5L, + 0x5fb5b1575f11677cL,0x08c6f7868c0b8c9aL,0x000000000000005cL }, + { 0xa71be7daf9fc7b49L,0x90b4fb112eb884afL,0x5a433d1bc20f3de7L, + 0xa9f96c15d7ec8c53L,0x19d73a9202b16cf4L,0x6db1ec20f0c59b32L, + 0xea08ad41e9148372L,0xa9425964a8fd959aL,0x000000000000012eL } }, + /* 56 << 147 */ + { { 0x327d0a715d58ae51L,0x5781201b6bbb0e9dL,0x2e738768cb5ba871L, + 0x7a7c4287a1ce9418L,0x75e146164efab0e1L,0x14839cf658a84558L, + 0xc9c0e010b0156aeeL,0x337859475f043330L,0x0000000000000177L }, + { 0x9f821ccf075f05d1L,0x18ea20c4c28c986fL,0x6b3173993906dd1dL, + 0xf8c200ac1ac84aa0L,0xb6876e9b2170699eL,0x967ebc48894c5351L, + 0x545de7957c5503a5L,0xaa3e77f946b8d0c9L,0x0000000000000038L } }, + /* 57 << 147 */ + { { 0x09995831678db97cL,0x21a68fbeb3a294d1L,0xe7e00883a6e564a6L, + 0x6c2bc4473c6351fbL,0xb26c5e696be68123L,0x5f0bec160f558615L, + 0x7861fd1d3d4e11daL,0x401c74bb74294954L,0x00000000000001abL }, + { 0x99ccf2f0240019eaL,0xcd7b969a7d4cd159L,0x5c8c5407f98ad9e1L, + 0xed9f828849e17f16L,0x15e6f01c07ec8be3L,0xb9385f7f9215c605L, + 0x9d0dcdec545b52d8L,0xcf19523c4ee0d717L,0x0000000000000081L } }, + /* 58 << 147 */ + { { 0x6075fa38019b76d6L,0x69026a5309c93da7L,0xd74ca37abc94b34eL, + 0xb9326f7dfdb4db42L,0xee55a0529d571ca1L,0x28cc82349b045d9fL, + 0xc32980ff32b90fbaL,0xae7b8a9e23bcaaa1L,0x00000000000001d9L }, + { 0x7999b36bb1e28505L,0x3f1a81991a8b12dbL,0xfc384a1e842adb45L, + 0x45928ce192acfae9L,0xf9c422639575c3b1L,0xa1d497c00c64efb3L, + 0xbb171deb8f675d61L,0x1112bb0ae420e774L,0x000000000000012cL } }, + /* 59 << 147 */ + { { 0xd3a1a740a18e8dafL,0xc7ea1eb81f85d156L,0x9cb8aa11e8cb68e8L, + 0x5867e62f3cd3e2e8L,0xb9a337f1d8525a5eL,0x19e80310986615bfL, + 0x35b1126ccd2046ebL,0xe0adfd0ca338224eL,0x0000000000000114L }, + { 0xb5041e87e0d36af3L,0xf6c1d74ac830aa98L,0xa1251bcd3d9a58b5L, + 0x084bde5b887e46c6L,0x17c1cfdec9066fdfL,0xdbd86d9be1a07155L, + 0x04e6b547ad8b184fL,0xe1387caf9ce95793L,0x0000000000000082L } }, + /* 60 << 147 */ + { { 0x4443a6c3322a7d6eL,0xbaaa07e080c34583L,0x030a347041f390bdL, + 0x1570596ce98c416cL,0xa7ed4c7e3ed2ccdaL,0x936b5951440b9124L, + 0x6587c9d546d24df9L,0x77ddf54c8b22db32L,0x0000000000000025L }, + { 0x63c9e7b7a4762e43L,0x5a15b97f843b6aa6L,0x370982cb89c1481cL, + 0x2d4bfe90bc3ce946L,0x5467f0821fa11973L,0x727ec857f1ed8d70L, + 0x6d0ddd0bbcd130f3L,0x952129f8b6cbeddaL,0x0000000000000142L } }, + /* 61 << 147 */ + { { 0x8c63e55ac218c2b4L,0x93017667ca59394bL,0xc0f8e0d1602295deL, + 0x5de02425002fb9b1L,0x493fd199e8b8fc46L,0x854e759781e1ea54L, + 0x2f57f5a2dae7a5f5L,0x8454844cd6f46c80L,0x00000000000001b5L }, + { 0x246d3a54b0b01c98L,0x807bf3892860d179L,0x3455551b5474e8d3L, + 0x8e007f866fc4f8b1L,0xc409e7d377e29dbaL,0x84510eb48359b208L, + 0xf29d0ad368d94a20L,0x7afbd7935010b843L,0x0000000000000114L } }, + /* 62 << 147 */ + { { 0x48b9115978951f4bL,0xaef87217265fad80L,0xb5d5547ad94d42dcL, + 0x723c460f1a5ac9d5L,0xbced8eb304aa83bfL,0x276275ab9f569648L, + 0xb7e2654d7db652c1L,0x16acb62bdd9088a8L,0x00000000000001d8L }, + { 0x8318bbe3a0134fcfL,0x73dce05c07cab5f3L,0x0d3421b9ead384b5L, + 0xeb5c77f492b4c9c7L,0x15186834e01bd002L,0x27583717e576e035L, + 0x4d3ecbc88c8d4d43L,0xe63044592b81fde8L,0x0000000000000059L } }, + /* 63 << 147 */ + { { 0x7ec6163a6f63d2c3L,0xdff5558287421ff8L,0x7dd99c23b1f59a95L, + 0x7604aeaaa41319c8L,0x81c31a0cf7b60b77L,0x80a5bea5597dc7c0L, + 0x9abc806b7c1f1d0fL,0x60e9f42bb2c9a37fL,0x0000000000000090L }, + { 0x3526b63f850d46b9L,0x4532e167a10fc8e7L,0x48e5c8f5a35019f0L, + 0x7aee2a2dd22272ffL,0xa08dee5c726d0685L,0xd0058b333979987eL, + 0xd155241263607bf5L,0x2201b75aca9a11d4L,0x000000000000018cL } }, + /* 64 << 147 */ + { { 0x5249e953d6ae5716L,0xbbf778ca1027f3cbL,0x0a535c1f01020499L, + 0x80f4cf1b7b847ceaL,0x3cb8c87b66d8dbf8L,0xa8d7b35bdd3562a9L, + 0x0305b1595ba3f0abL,0x9d5ee3ba7775dbedL,0x00000000000001b7L }, + { 0xbe2096494e5050cdL,0x176602a16790efceL,0x6aa6ba5f0b21f5c7L, + 0x6a1d5df0f8c4116fL,0x1a188a83746abbc3L,0x2b2d3317fd2c047eL, + 0x18067e79b9e5d5b8L,0xed9bbbcfc4be8357L,0x000000000000018bL } }, + /* 0 << 154 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 154 */ + { { 0xf6a2bf97f0ab06d5L,0x9f8da4f011f2c6a5L,0x776a1eb0cb8b966eL, + 0xe74b7349a1903dd7L,0x6978c653dae05c8fL,0x77d364c9e1f5b52fL, + 0xf3e29bccbc7779eeL,0x4cd77a80641ba885L,0x0000000000000136L }, + { 0xc79b648f9cb2168eL,0x1b52d0b7d06dbbe6L,0x7b7302bfa65c96d5L, + 0x1eea500edc838093L,0x83aa1612fb69971bL,0x6398cff0528e16ffL, + 0xa64963a56d95deebL,0x36f359053d7da7aaL,0x00000000000000a7L } }, + /* 2 << 154 */ + { { 0x708003f83c48adedL,0x936a8a382497ce05L,0x66f8e450afece34dL, + 0x6cc3a161513e1c0eL,0xe2fb655e044610faL,0x8af6a3b362e764c3L, + 0x839a61f18ca809a0L,0xd43f2d1ec330dddfL,0x000000000000007fL }, + { 0xf4fb698b7acee1ffL,0x5d19501bcb302bf8L,0x393c9f0876ac0468L, + 0x16f8b677fd22441cL,0x85662743c504d916L,0xdf3d33119c8cb9c8L, + 0x1feb20ee42f9cbe5L,0xecd3eece5cad0542L,0x00000000000001ebL } }, + /* 3 << 154 */ + { { 0xa9c120098d6d74c4L,0x862c2e5ac0e79387L,0x30e24459ee38eff9L, + 0xd3637e69764f4b51L,0x6440afad56e273a5L,0xc95ca130e45adb77L, + 0x89cbd694c96ea2baL,0x9b9c1edbf2099810L,0x00000000000001d9L }, + { 0x48770298e474a5acL,0x5612463b7b903222L,0x4d92c48f12de8a11L, + 0x360e5c32ca366bd5L,0x8f16dd03e7e9928eL,0x41cbda9e88bf12f7L, + 0xe7d794691af3b3edL,0x34c52e7caa4e82a8L,0x00000000000000a2L } }, + /* 4 << 154 */ + { { 0x8756b00a9f004528L,0x20374136e9c2b2e2L,0x9e6c6a22cca54915L, + 0xc5dc6015f6629736L,0x5ef3efa8c4510c8bL,0x1fe7836cdcc2ce9aL, + 0xac80124a0b55a453L,0xe430cd86dbb7a64fL,0x0000000000000104L }, + { 0x939b5896a385741dL,0xa80c7d7e3a6c90e9L,0x8df754d6e4c02621L, + 0xccc73ff172958d91L,0xa80cd923e911d8d9L,0xee0dca498c0171e8L, + 0x850d8439fecadde6L,0x37550fc24f714086L,0x00000000000000c8L } }, + /* 5 << 154 */ + { { 0x04b6a0e91ecb5d79L,0x76dd6ff66661a50aL,0xf781aee776b0f62bL, + 0x1c5aba38e8569d33L,0xcedc599451bab8d9L,0xb9753d145556e4f7L, + 0xe0434b07504b2dbaL,0x20ebda14290170beL,0x000000000000005dL }, + { 0x2c1ede1ef77da08eL,0xcf0c0f3da5a7547cL,0xfb8f6167f81eb5baL, + 0x36854edada4a5de9L,0x006dcb13de55ea82L,0xc92040022e6d0330L, + 0x444e1300ee733ae1L,0xc75a73608f3de483L,0x00000000000000c8L } }, + /* 6 << 154 */ + { { 0xe09a3cf0af969a6bL,0xb92d2f90bd6ac057L,0x1b2f0a7c84d4595eL, + 0xc74ccfc4694eca28L,0x5f9cd8d4330323f5L,0xfcfcae74ef391ed4L, + 0x0081ef66bba18e42L,0xc025895aa955d164L,0x0000000000000156L }, + { 0x9ea9ae8ddcd1ce6dL,0xce0eaa5f3c6ec110L,0xf8336a3517ae8ce0L, + 0x34205d8698f175a7L,0x42a29536af7c195fL,0x21b0cb3cf85cdbfcL, + 0xe6da7fa8840fe9bdL,0x766646688a894e33L,0x000000000000017cL } }, + /* 7 << 154 */ + { { 0x9ef8a9907b69dfd8L,0x3c01c44a3ad9d90cL,0xb576538c5e18521eL, + 0x234ca9937a342024L,0x08f6b0d84b8f02d4L,0xb2c97f96b70eb5bdL, + 0xe8747b6f235ce355L,0x04d9f7126870110bL,0x0000000000000075L }, + { 0x1fe78e291de0c835L,0x7069aa9eac0c1b5dL,0x5f04cb3d2b647a13L, + 0xf3523829affc1b77L,0xebec6717bb0df520L,0x6abc248682c68f93L, + 0x2ac72f4e21541ab0L,0x82b5d659bfc23f23L,0x00000000000000daL } }, + /* 8 << 154 */ + { { 0x8323b86fec088bb0L,0x18eeb2d3e169bf94L,0x9eabc22555450a6bL, + 0xfd49e8fee35c62e5L,0x1a8255f86a5d661fL,0xb7ee978fff87b2b7L, + 0x6d59a24739975957L,0x8a12c7f513e61ba9L,0x00000000000001dfL }, + { 0x61a3f3a2b30ea34dL,0x21fa89924be16d8dL,0xba41fc20d8613751L, + 0x10b11b88cce7e39dL,0x82a797d7f79ede48L,0x9559db4a5fdaa76aL, + 0x445442ce577e7ef6L,0x358c9be06751a909L,0x000000000000001cL } }, + /* 9 << 154 */ + { { 0xc5febe234897adb4L,0xdd7e212192ee8af1L,0xdf70f39b6892b4c9L, + 0xf1ea9349d4a09185L,0xd5121d35fb3ba54aL,0xe4d39b40e927a37bL, + 0x39f03f40d3517713L,0x414e0e866f12f5e3L,0x00000000000001eaL }, + { 0x4c24f87b627300f2L,0x8ef8b8301a0c9008L,0x959744aa7b140d22L, + 0x7a406464bb9589caL,0xdb329b0f1bfde0ceL,0x0401dd975b24cc0eL, + 0x8aebc77378371f29L,0x02fc0c33f1febffcL,0x0000000000000028L } }, + /* 10 << 154 */ + { { 0x56e2294c1317d843L,0x66a415d0f34bd426L,0xd65e0abd82328d7eL, + 0x0026bcece495cbefL,0x45b42d450356eb16L,0x08f16cebd999f3c6L, + 0xa841b2bee82cc5e0L,0x9273864fb4719eefL,0x000000000000005dL }, + { 0x35ca9c719f125b1bL,0x2ae57e9490d045baL,0x2ca6a02babbd691dL, + 0x1dfa73463e2e1507L,0xff9ff444b7fc8813L,0xc6cb3ba2fec77e7dL, + 0x3f9d74e5b9d39df1L,0xfc653ef49be360acL,0x00000000000001baL } }, + /* 11 << 154 */ + { { 0x2d1043706f4b5907L,0x279eeb552a965a41L,0x06863a068bfd9926L, + 0xb1a1567f839d5e83L,0x8f417c47d8c0490bL,0x548295eb268992d7L, + 0xf3a1978a1ef3cdefL,0x402b4001669c3be9L,0x0000000000000097L }, + { 0x9a9f1ab9606891d7L,0xcbe4ac71cf1bdb11L,0xbcd93ed09ca6dbb7L, + 0xba64c0644767f5b8L,0xd64c49b1842b720dL,0x4ed8dc7f29bf4c35L, + 0xa97b185f57bb1f04L,0xfbfd4a978e62cd56L,0x000000000000017fL } }, + /* 12 << 154 */ + { { 0xa22790ae7d5e235dL,0x6ec954be4a9a5780L,0x26aeeddce38ffa50L, + 0xbeb75310eb3feee0L,0x44cbb50840a31067L,0x4a0ff701380e0baaL, + 0xb23528c820cfe922L,0xcff1ac948fdf6c51L,0x0000000000000158L }, + { 0x80acb80001d7f58bL,0x2181a99daf2d7b4dL,0x30ff9020146506d9L, + 0xf091d3057967a5dcL,0xb37c10521cd6c51aL,0x401010b56b0db694L, + 0x1b1bc5e960fc5f73L,0x8ab0cf8eab3aac42L,0x000000000000019bL } }, + /* 13 << 154 */ + { { 0x58703a2ad975b4b6L,0x6167e7ae4f202eabL,0x26cb064f7c8b1d44L, + 0x1bd0c0b1552b7474L,0x1f1fa56f85b02ad6L,0x42f451f99363ab7aL, + 0x9f9888f1b7f2df41L,0xe0982ddc935b0f16L,0x00000000000000b0L }, + { 0x5b3133ffbbf0e24eL,0x22cadc2c623da1daL,0xc106e88a6ba85f88L, + 0xf6e00838a71bd558L,0x6917980380b79631L,0xcc177913a4fd8cdcL, + 0x004e0ca999889eeaL,0x530683b77ea0ea1dL,0x00000000000001fcL } }, + /* 14 << 154 */ + { { 0x409928e4278ffc44L,0xe39bdc274353b086L,0x1323b2d040382fefL, + 0x69de9b7d28c99bacL,0x1c68f0e2bfc5d486L,0xfe49bce5094d3cb2L, + 0xd37daaf81110a3c0L,0xc9938bea2f6c58b9L,0x000000000000012cL }, + { 0x39b6da445a2f1875L,0x848c2bc18302b740L,0x10b917d8cb988452L, + 0xac9b47fabbe451fbL,0xc1d6792eeba243b2L,0x2eab0db88610f760L, + 0x3810ac18e475e77fL,0x1e60d81024c55728L,0x00000000000001aeL } }, + /* 15 << 154 */ + { { 0xff7ded2add770286L,0xd8d3c66e845a63e9L,0xf05e27d53c535b94L, + 0x9aeb2f6723f42216L,0xc399087f1031b277L,0xa67bb1dd192acf43L, + 0xc3957a6162fb8f4eL,0xd3ac962f8f632266L,0x000000000000006bL }, + { 0x078cf75e873bbcb2L,0xde95d2a9aa5d4d4aL,0x94e8064d7a48befdL, + 0x168bbfc6763f2312L,0xf48b61bc4603519fL,0x997d31e07c65d872L, + 0xc404c7b818cffe80L,0x540b8bdd75fcbc1fL,0x00000000000001c4L } }, + /* 16 << 154 */ + { { 0x686698e89cafe97eL,0x3f2a465db6891510L,0xa99bcd91c9aad498L, + 0x0c7ae217a47c770bL,0x95067a8b6a5e6f06L,0x0b55da4025a11429L, + 0x5372b805997cabffL,0x1e8bf414963d233fL,0x000000000000002bL }, + { 0x1343ebe6ab613409L,0x7aab5a01ac96cd26L,0x243758af112c0b65L, + 0xd276e441b236cad6L,0x88a7d43f557111daL,0xc6dace8ba0f46068L, + 0x866c31efcf4a5cebL,0x6dd991b89554f30cL,0x0000000000000066L } }, + /* 17 << 154 */ + { { 0x4d8b375c14bc49d9L,0x93b4385f18a01862L,0xc3b0ad5ee4090d89L, + 0x2038dbc2d3248ee8L,0x84c0d4bc2c487600L,0xc5ec52743ea53b08L, + 0x92d8a64f7d099e76L,0x26a6f8d1e6d580edL,0x00000000000000ecL }, + { 0x105080aa3d7beff0L,0x854ce36ec85ce751L,0x94e0df2bbbe2d753L, + 0xdd3a67f7a54ec65dL,0x21d6f4c15968bba7L,0x5951642df707706fL, + 0xc5bd9cd33158e447L,0x566eae345c290325L,0x0000000000000042L } }, + /* 18 << 154 */ + { { 0x0a1212fdf6bf1120L,0x1bc14a398cc5a658L,0x351552bdc1b52efcL, + 0x0d19749245d55560L,0x425372c97793c57fL,0xf0c48415f5838d74L, + 0x4153e1917c64e071L,0xc43828e2373272eaL,0x00000000000001e7L }, + { 0xba8932ba91dc2c19L,0xa5756a08a5163390L,0xf64a62cb65738648L, + 0x66c5c1a874a8bf79L,0x355f420f98ce1d80L,0x2de834b1f30008bbL, + 0x580f0664950681b0L,0x8630c1290033866aL,0x00000000000001deL } }, + /* 19 << 154 */ + { { 0xe77f6cc4e46e0754L,0x77af9150a883763fL,0xc5aad5130393ef51L, + 0x62ccd275526f9855L,0x83df1ac8abadaba5L,0xf6caa723741443caL, + 0xf43bbef6876c8f4bL,0x8c6e39220425ccd7L,0x00000000000001a6L }, + { 0xc5cf64d1ac4879ccL,0x1b761a83ae4857efL,0x480792c183901056L, + 0xef90c8b6047b0328L,0xb86c8a422c523b1cL,0x6aa932a8a31dd46eL, + 0x64259ea3d3dea6ceL,0x092ba1c68d41d4d8L,0x00000000000000faL } }, + /* 20 << 154 */ + { { 0xb7c90bf85b923896L,0x42f96bafd3f08076L,0xddbd3b0b0bd90a84L, + 0x8adbbb43e00a7e7bL,0xd3205108b2059c91L,0xccb379b76223fd65L, + 0x2c1eb7e2a6f8571eL,0x13f3bc56d80ebb13L,0x0000000000000092L }, + { 0x5e23bc738d30df11L,0xaab73a1f4aabdcd3L,0x4cd660823e764e18L, + 0x942328161d0c98c4L,0xd8b64c225b5048f0L,0xaf741f8ad2a17f6fL, + 0xc1540070cdb03f14L,0xa16e5e67a2476ee5L,0x00000000000000a7L } }, + /* 21 << 154 */ + { { 0x94c155b1feb3c7cfL,0x703b56a4cc26b74bL,0x7291810d1f436b9fL, + 0xe3ef9b82d28ad0adL,0xc6941fe32504f06bL,0x370e5a2eadb26060L, + 0x4328a40ec8e317ddL,0xc67fd42b0bfa2d08L,0x000000000000012dL }, + { 0xb93e8547eb854e1aL,0xdadcb35a2b4d3b66L,0x1de5ea2d8ff90fd7L, + 0xcb5d75a2d722dc7fL,0x5c4192d10e3ec171L,0x2ba11f14ce7ed871L, + 0xc43e303f9840ca81L,0x34cd4fabb1da84e6L,0x00000000000001bdL } }, + /* 22 << 154 */ + { { 0x90e3c53ef55acde5L,0x910bc008184aa9b0L,0xa62f771bb71d8054L, + 0x4f6e1a5480ff577fL,0xdaee302fe079e754L,0xeba6f1187bcd7837L, + 0xec13761caa06a669L,0xd39dd3f3f5ea95fdL,0x00000000000001c9L }, + { 0x049ae3a64b42938aL,0x8263e429b24a6867L,0xfb618fee9e9b8374L, + 0x77cd9ba6a9571903L,0x6c5a592dcd0c2186L,0x7539392cf94608faL, + 0xdb4b672140df175aL,0x7b74240b2438f55eL,0x000000000000009dL } }, + /* 23 << 154 */ + { { 0x7daf2c363805dd53L,0xedea2d49323f28d9L,0x6f8dce4163a8e4e3L, + 0xe5d47bb559423e7dL,0xdd4329e18b816db6L,0xcb6f85f22cffb9a4L, + 0x5b0ddadbae45f979L,0x6a78983747aea678L,0x0000000000000082L }, + { 0x275090b481bcd3e9L,0xfade474e249fdc18L,0x2ab90ae1bdfb4666L, + 0xbfa10e3514787825L,0x8e5ed81ce23c2f2bL,0xacf48e50825ca78fL, + 0x46b832f9ef7e9e7aL,0xf3d4d7e9278268fcL,0x0000000000000162L } }, + /* 24 << 154 */ + { { 0xece31ecffe1d1b82L,0x368c5ac029036cfdL,0x217420afa13ff567L, + 0x1b29e5f077776b5bL,0xdd1a5dda98f42d23L,0xba05233bb6c34257L, + 0xaea7d3bd2d56e9b5L,0x3d5777e784e7205cL,0x00000000000001c4L }, + { 0xf4dd56b5e7c6eb47L,0xc479b9d64a1b9d04L,0xfa1d38a48117a210L, + 0xdc603246dfb5a934L,0x9e8156abda0675ecL,0x6937ccce0ef78eabL, + 0x72dd487ad2e5cc67L,0xb735f780d372a668L,0x0000000000000049L } }, + /* 25 << 154 */ + { { 0xff5df7dd9f3f2988L,0xa20bc35e1be8119dL,0x244ccc83e9801d10L, + 0x5ab8e9f3c620ba2eL,0x7a1cb9e05237f9aaL,0x0e50887f61d0d851L, + 0x95f1d7727e0353c5L,0xfe7dedca9253110bL,0x00000000000001ebL }, + { 0xa2337f586a64b9c2L,0xb9e4cb8e44e70c68L,0x0812cb6c5ecd48d8L, + 0xd9cf0c0e6d048efdL,0x5fc207222d2d3b1dL,0x3efd117c6b03a35fL, + 0xa42ea8a8cef040c3L,0x80c4e52b3199fa1bL,0x00000000000001c7L } }, + /* 26 << 154 */ + { { 0xfd5782606e9f3949L,0x94e36ca087ee9707L,0x919b401eb1a117e4L, + 0x2418b5ce109738b5L,0x0185bb8483a54440L,0x1d22a848d6945f4eL, + 0x17c275c3624fd9caL,0x81862ee627b15448L,0x0000000000000108L }, + { 0x57cc2c959184dc02L,0xb31ae7d544011d47L,0xe076ac2d417b7eb1L, + 0xa5b2ac1baa4495ebL,0x14ca78cad984cb5fL,0x59005112daa7bbc6L, + 0xf45dc10071d06591L,0x681ef23be278b5c0L,0x00000000000001e3L } }, + /* 27 << 154 */ + { { 0x8f8240265a0a487dL,0xdbd1d7c23e8d3e90L,0x1a69874bd3c62608L, + 0x435c7884c6cb4268L,0x85b31d00655218b6L,0xcacf7eb498a67fc2L, + 0xd20215ddf7a56bd5L,0xc6950c8be60ce8e3L,0x00000000000001e7L }, + { 0x1cba47452e984f46L,0xca242d3a2a298b6bL,0xa1d3653a98c71337L, + 0x5bc149263ad4bafbL,0x09f200a204dfe1ddL,0x78e00b4714fa42e9L, + 0x1f80dbadfc2c69b2L,0xfd84c0ea9cd01382L,0x0000000000000098L } }, + /* 28 << 154 */ + { { 0xc3b2adb729fffdeeL,0x154f4d7951fb8606L,0x56e497fe9d7f4b0cL, + 0xe5e69faa0df7fd94L,0x1891fe5d114fa3caL,0x4162f0e9045c3286L, + 0x98de02c4780253d6L,0x703080027947b60fL,0x000000000000019cL }, + { 0xe77f7b12a9510bd2L,0xc663fbe72d32a857L,0x6538a301b9274d4fL, + 0xb2c191d0bb01302dL,0x567474f26366bbf5L,0x8446925cae1e52e9L, + 0x3ee00ea9209b55e8L,0x3d76982d2419e4bcL,0x0000000000000091L } }, + /* 29 << 154 */ + { { 0x6ce35fdc36bf03a9L,0x2cca3e4b19fd800dL,0x1c42c2ebeb7c3707L, + 0x345e0391fc298d33L,0x6f552d1aefea0fd9L,0xc22eadb8e73d461fL, + 0xe49110b96d4435f7L,0xf7a2fb8a81759c25L,0x00000000000001f6L }, + { 0xc09df3a632d5a1d8L,0x423fb07a53b9696dL,0x9a5942112802e680L, + 0x59e615a3e5d1b962L,0xf418291d6976a9d9L,0x93addf8326ee6369L, + 0x376c918a7446e0d4L,0x73ff26b4530f3c52L,0x0000000000000179L } }, + /* 30 << 154 */ + { { 0x271550ac40465c23L,0x7a499e8dee0dd714L,0x6feba0dc2394b340L, + 0x666de55a6e293ce6L,0x3dcefd6d17e817f4L,0x337e774f93eedd97L, + 0xc9c103f37a86610eL,0x97ae91cfc0eb2c8bL,0x00000000000000d3L }, + { 0xa7dfe71a65981a9aL,0x3d63534234407037L,0x05739745cfe533bcL, + 0xf5188158a1bfb967L,0x2d76a9c44e4702c5L,0x7e11dbe04bc7cf4dL, + 0xd6a6cac11e0faed1L,0x04ceafa1c20c5723L,0x000000000000006fL } }, + /* 31 << 154 */ + { { 0xad5aac564ea2ba2bL,0xad0a5b610b4933f3L,0x5aa51da928d19d01L, + 0x15a74ffe94ed89f4L,0xd23335608a870eb2L,0xc94daf2ed5e5a24eL, + 0xd98f616d567659edL,0x08f6a62c6c8eddd7L,0x00000000000001b1L }, + { 0xab4fe43bf3295b7bL,0x5562384426f75385L,0xe9d70df0097f11c9L, + 0x7cf18143e23c34a2L,0x2a7b24fa136938d0L,0x74154841e678eea9L, + 0x0ec99b1920202480L,0x4d1aea7252e4b7d0L,0x000000000000005aL } }, + /* 32 << 154 */ + { { 0x08136cbf025189d4L,0xa1c389cbc1b25a05L,0xbecdade76e9ed11dL, + 0x6557006ba7f3f38aL,0x96d6c877bc852dbdL,0x3a0afe4bc53efd4fL, + 0xc7f58410febe4737L,0x86deba3b84e35e2fL,0x000000000000015bL }, + { 0x96256db24dfa7da8L,0x379735573dccf202L,0x2e0b263e8fcc1274L, + 0x14870fe89ee7b82eL,0xcb8f9ba5425ca276L,0x683c748e7d876e79L, + 0x48e990d5de2f2cfeL,0x2ecc8f1f48547013L,0x000000000000018fL } }, + /* 33 << 154 */ + { { 0x5b7d4e5300df51d2L,0xd82ba908518c2d9bL,0x95121a5d8447eb0bL, + 0xc25d07a662755277L,0x51cbe7f5c19c32ceL,0xd038800e4c726273L, + 0x1e1b0e9125fbcfcfL,0x5cb2408eb5e8a4eeL,0x0000000000000122L }, + { 0x940da58ede254894L,0x6fe04e02f5d5b5bbL,0xbe069bad692f537aL, + 0x6d24c50013873687L,0xd1cd8bd1089ad9bdL,0x07929deff8eff657L, + 0x8d4d97bab426304eL,0xefe85addf45dc5d2L,0x0000000000000014L } }, + /* 34 << 154 */ + { { 0xa5fc916ec715b5a2L,0x7e6dcd80c069c5c9L,0x94dc1dd10085e7b3L, + 0x81041f123e71623cL,0x29f931fd28807fafL,0xaa5f757aaa8003e0L, + 0x0f9865bb318fe406L,0x2f18ba42cbeb37eaL,0x00000000000001bfL }, + { 0x8e9acc7259cb1f04L,0x3b9c7ff8d790d71aL,0x081ec7dcecbc1c2aL, + 0x2ee6eb196e63fe41L,0x95815ea0f4ad6fa0L,0x8bd5c1ad79cf7f1dL, + 0x3d73c9c85ae89337L,0x2eaa40409b1286a1L,0x00000000000001f3L } }, + /* 35 << 154 */ + { { 0xec9fa757fbefa7e8L,0xb41dfd88382bb7b7L,0xf9cd0f6314c0b89fL, + 0x94a8c09b56c7b584L,0x251fde74a9d131acL,0xfe3e706d8a47f3e6L, + 0xa2cc25d799bfb317L,0xb9cb1e4114c7fe92L,0x000000000000001dL }, + { 0x9daf45e2768f397bL,0x1903422a83975ab8L,0x0e42b292a61b4715L, + 0x947c78bb1a1323d8L,0xbe36ec019d1699c0L,0x9ac1807792e2ce89L, + 0x8dccf873b118c12dL,0x879c7a2187638db3L,0x00000000000000bcL } }, + /* 36 << 154 */ + { { 0x15dba0d81dbeec0aL,0x6a1671e23145554fL,0x6f59000813588c31L, + 0x404b6424783bb97dL,0x30502b92cc7922feL,0xa1bd25ce1517a238L, + 0xeb81e9dd5fbc51a5L,0xea919df3bdc3b1f2L,0x000000000000009dL }, + { 0x70802002a4aa5953L,0xb908e35d23fbdf34L,0xb47ddc1ad971a142L, + 0xe4f88a184c6ebd7dL,0xb2f7177f3065ed39L,0xd7b8cc98162b61aaL, + 0x8472dbd5774c6ad4L,0x8d1dfea98f87d2d0L,0x000000000000018aL } }, + /* 37 << 154 */ + { { 0x5caf9816316399caL,0x29ecd26efcaa7fe5L,0x07f4e09b03d18979L, + 0x00e88b2581b43697L,0xb04d6d03bd0408bfL,0xd3e4a0d3cf0b6793L, + 0xd98262dbce0e8490L,0x3ed76b1601bed96eL,0x000000000000015aL }, + { 0xcbfff64a7d070eadL,0xfd8db6ed1776da0eL,0xca03a6a2b1279cecL, + 0x16f6ec788b989efdL,0x17f581a70dabb5e0L,0x975c57adf8b5a413L, + 0x34bd7827ed1e3568L,0x7460f3374292fb3fL,0x0000000000000193L } }, + /* 38 << 154 */ + { { 0x754f1e7fd6633590L,0xb281de767862f091L,0x9135b836c3494e22L, + 0x4cb7a1bd84da9104L,0x20bf9b88ed7088a0L,0xf7e2c4378ced80b5L, + 0xfafdaef6038f3ae6L,0xd948d168e786659fL,0x00000000000000ddL }, + { 0xc84110b66115d5e3L,0xf7cb7190a7d744e1L,0x2542a0d478a699f4L, + 0xf0ae20cd574dd390L,0x4ce38ec5f120b181L,0x3763208a49ac50eaL, + 0x24a93ddd6df7ab1eL,0xd9ae90ec2e3c1f8aL,0x000000000000018fL } }, + /* 39 << 154 */ + { { 0xc6069b8567f9df8fL,0xf9f56d614436dfacL,0xa3926590bf7359e6L, + 0xd22bc2568d84397aL,0xdbf0455ce61706d0L,0xd01ed734b7bd8dafL, + 0x6dd4ffa085c07accL,0x474802574f3c459fL,0x0000000000000198L }, + { 0x5cc9670110bf26f0L,0xe4166acc4f70a85aL,0x73219eb9cb8f0809L, + 0xd9798096679a93cdL,0x6737a07257df9ffcL,0x1c897e3a1e65b481L, + 0x87fb6a8b73063acbL,0x28140113b7aded18L,0x000000000000017fL } }, + /* 40 << 154 */ + { { 0xadcd31054115d224L,0x1fdacdebb7fed1f1L,0x878fcb459e5a2448L, + 0xc3910eabf7e8596aL,0x5fca4f0e37f267f6L,0xd10d0aa06e9fb79fL, + 0x589f94624b12a182L,0x2864c5acfb879db8L,0x00000000000001faL }, + { 0x6c024c2c07e34963L,0x49581cb82de2a43dL,0x4cdcc98e29c06b7fL, + 0x338a9c59b976f565L,0x38540bc223c8ff24L,0x53f3c9b98bc61d4aL, + 0x3eaa8166e3913cd4L,0x4620fe57455e4018L,0x00000000000000b9L } }, + /* 41 << 154 */ + { { 0x3d88a754db29e8a3L,0x7bb8f52c14527b19L,0x2d3bb7d2dbd788d1L, + 0x49dc14bd036cfe26L,0x99471968e3c4e712L,0x0ac8222f664d6f40L, + 0xd76f5194480b3215L,0xc9359ce9711701abL,0x00000000000001e9L }, + { 0x159c8f1b440f4fe5L,0x56a3531e0b66c225L,0x132eab73408ea71fL, + 0x7ca9883f8ce9f301L,0xa709285bc3646318L,0x7d27e063cbe3e36dL, + 0x751cfaef4d8cd738L,0x0efabf55a2b6ffd2L,0x000000000000013cL } }, + /* 42 << 154 */ + { { 0x7a9706beff928d4eL,0xbb8726a8faba89c6L,0x39fe9f9bcfa998b9L, + 0xd62320f745c97a8dL,0x9742ef6f536968ebL,0x2cd382d2901e7715L, + 0x1439fc59a576e87dL,0xfa71d3e06310658eL,0x00000000000000c7L }, + { 0x9b0cf9a2d3308f7dL,0x8c7587624856e971L,0x58b4f4da34174e29L, + 0xa8406c3fda8aadc3L,0xc13d8977c487e6bdL,0x45e5cd3baf56ece9L, + 0x243c3a5ffe52108fL,0x52d1472fea829c13L,0x00000000000001dcL } }, + /* 43 << 154 */ + { { 0x8bb77dc36083c21cL,0x77acd9f15ed5b28cL,0x45d3bd9415b1ecbeL, + 0x3801687abc78b16aL,0x65a8b24dc6a5dc14L,0x5c1f39f925ba9696L, + 0x094d5000498bf2fdL,0x5dc5f51d8d58cbc4L,0x00000000000000a4L }, + { 0x260fd308925ee9eeL,0x19449853450ade34L,0x8950cacc6228bb71L, + 0x04853a651585b4e1L,0x10cca9b124de0ad4L,0xde491b7b3294f352L, + 0x56f35ecb05bf7690L,0x1a71fd8811eab2d9L,0x0000000000000123L } }, + /* 44 << 154 */ + { { 0x22d05a079995b39fL,0x95d85f0e97572e8bL,0xcd0262424cb31201L, + 0x15c10195a9661cffL,0x7366e9943d4d42b5L,0x3d89205fd08b2304L, + 0x4f08bfe1c72f3319L,0xf2bb6fef51da0362L,0x000000000000000cL }, + { 0x61689ed6daa8789cL,0x1342f413c80ebe32L,0x10e3c8bc6cf15c5dL, + 0xb59a6f209e380e00L,0x6faf882b16a0a01aL,0x5b43ff2c96deb592L, + 0x61ffa81c22cda20aL,0x825e5e4545d1497cL,0x0000000000000070L } }, + /* 45 << 154 */ + { { 0xf3a2aa2ef62a271dL,0x608d878cdf46740aL,0x2ebfec653fb8ed79L, + 0x085d812e2eb1d516L,0x739c07e661ebd520L,0x085705a0bd9b19a6L, + 0x6589012abf32e3aeL,0x87354918e8e0d4a7L,0x00000000000000f6L }, + { 0x2ae316a69560d282L,0xff6571977fb5311aL,0x2e317d5bfd7b740eL, + 0x392cd22956bb7d02L,0x9a84fd4ad462ed84L,0x381b59c019b2f83eL, + 0x230a68ca47a6c3e2L,0xc9372bc4660fbed8L,0x00000000000001faL } }, + /* 46 << 154 */ + { { 0x610b5c299b82e4dbL,0xd237f09a0bdec03fL,0x8509f5a57eab51f4L, + 0x418b77d8c604baebL,0x02830b0ff0b4e07fL,0x4af187f4a4d4df6fL, + 0x00483aa9e0bb134bL,0x9b0b6e7722357f23L,0x0000000000000120L }, + { 0xcb09d589a259bfb1L,0xa708770853774d43L,0xbab0b5d04504b5d6L, + 0xc128fefd23ae1fc3L,0x84a99019f0cf68fbL,0x3bbf2bd5b1656aaeL, + 0x36294624eda54a94L,0x0c6df507d4e9b03dL,0x0000000000000068L } }, + /* 47 << 154 */ + { { 0xb3210cb2d8b230d3L,0x1e173d0445f2b4dfL,0x777f7e921e51c3c1L, + 0x1f09c83dca2d24a9L,0x18f6110d7b0ed41bL,0xfc47b77f1741f679L, + 0xed0c01d530bbb573L,0x79d732666b1edf8dL,0x00000000000000ccL }, + { 0xd80eca007d182a43L,0x8d7a50665674063cL,0x6f3a06ba012fb6faL, + 0xe06f683a10c1f5c8L,0x305692702fb5cb0bL,0x009de930236d12d8L, + 0x1b3bc0356438e8fbL,0xb4c91501af74c6d3L,0x000000000000003bL } }, + /* 48 << 154 */ + { { 0x0724e185fe85076bL,0xbda15c151e532717L,0x6005728065e7c149L, + 0x1a0cafe8158e6817L,0xee796ad900baa1ddL,0x8ff14a074d3ca73aL, + 0xf153c867a3604f8bL,0xe07458b5c4ca1be8L,0x00000000000001c5L }, + { 0xb8c4f3ad8f7dc861L,0x98ff4d7f6c38d10dL,0xda77b9018c10e7a5L, + 0x448fbc787d69625bL,0x91ff7124ed5dcb7bL,0xaf0fb28be310d65cL, + 0xacf3cfc0c388141aL,0x2d3d003454bbb99cL,0x0000000000000097L } }, + /* 49 << 154 */ + { { 0x81bffa6ca966e8acL,0x048dfaa314845f1dL,0xb38f181e803a2355L, + 0x0f854fe03a91ad7aL,0xb523f3665ce6a9d7L,0xf40cf03a828f212dL, + 0x128550efe3b4d93eL,0x16cb5eee53d13993L,0x000000000000011eL }, + { 0xde85cde7c9e55138L,0xedc613495183130aL,0xaa4bac70d709a5b4L, + 0x0e69e8bd50ab30c0L,0xef2c35b69738eaddL,0x486ac544ed00f833L, + 0xbeaf94aad4708e9cL,0x1be9775c9a0bfb65L,0x0000000000000111L } }, + /* 50 << 154 */ + { { 0x111041700f944affL,0xe0fb9dfafadc4f56L,0x86a7245c1411b66cL, + 0x25afc20163eb7a89L,0x3400be4c6404ec18L,0x62d34a7e53c67d6cL, + 0x4feaee310655849bL,0xd0343df08ad7770cL,0x000000000000000cL }, + { 0x1a2a4a20c81c959eL,0x9386848e8c2194e7L,0xce7c378aca4024edL, + 0xd336fb923710a3aaL,0xfea376078da68818L,0x885e98fda10bd29aL, + 0x7fc72000f5d55becL,0x5f1d796a7f14dad7L,0x00000000000001c4L } }, + /* 51 << 154 */ + { { 0x808147c7c32eac97L,0xb959c61d91e84536L,0x9ce837b46b7f230bL, + 0x62b7aebfa19a78cfL,0xfdbf0af641120b6dL,0x30b5525b18a1bdd4L, + 0xfe54bd619fc0f391L,0x0f19f880e152702aL,0x000000000000000aL }, + { 0xfea6fa9a19cd0ff3L,0x4f3c3272b86342a0L,0xcb3db4f982cd5ce9L, + 0x626a3ebd70dd4903L,0x3103519773bd843fL,0xddcb8135885ec254L, + 0x19e39cdd277200ffL,0xace2b791def0d1b4L,0x0000000000000098L } }, + /* 52 << 154 */ + { { 0x9429698b23371e14L,0xd9d9270c3fdb1b92L,0xa4a76289dd4ba2a1L, + 0xd9dd43d69ad4dc60L,0xba76236063dc6801L,0x210be4e3b8a45231L, + 0xaaa24379f764e727L,0x7b1a75a82b1fed01L,0x000000000000006dL }, + { 0x89e40d215f621487L,0x70c38498a2f303d5L,0xa88ce1890faffebdL, + 0x0cc0eae6735ebceeL,0x2b755a3d8518c06bL,0xf3cd31dc62f92c33L, + 0x6f7fe733c7562c48L,0x749f7a1c83b00376L,0x00000000000001e2L } }, + /* 53 << 154 */ + { { 0x2f1ce070f5ce45c0L,0x15740dd943485e36L,0x2d64f265dda2a33fL, + 0x121759ed77b450e6L,0x4948df7e9b3e74e3L,0xcbe885b4fcd25d35L, + 0xafbf90f2e3436d1aL,0x5e26435a3deb2819L,0x000000000000016aL }, + { 0xbcaae7b82a463668L,0x57186a775ac86d9eL,0xc0d15d503d5f4223L, + 0x4a2f6842c4fcf264L,0x79991f8c301187fdL,0xc699b4b77352976fL, + 0x51c47ee14838204eL,0x716e444dc03bb06cL,0x0000000000000151L } }, + /* 54 << 154 */ + { { 0x8d3fcb66a7684cb1L,0xa4bb8ee051970634L,0x29ea9efdb52f9d9eL, + 0x63fef7659018974fL,0xe31f478fb752b634L,0xdcc67b7730b8f98fL, + 0xafff9899b4b62d24L,0x7d4a60ee4efb0c00L,0x000000000000005dL }, + { 0x17d0f2ffa2bb6822L,0x3d74b52ab3b2e1d7L,0xf7e7150d5ba4a476L, + 0x712e26f18771c73eL,0x22f9054ff322f80dL,0x9863136b3c085bcfL, + 0x317a39fcc7e4dc86L,0x96a83008be7f0929L,0x0000000000000092L } }, + /* 55 << 154 */ + { { 0xe0908d5214cb295cL,0xf74df339f363ef28L,0x56eab1abdba8fa26L, + 0x33f43f008ecabae3L,0x5d1acbf12425e382L,0x480bf576ee67518fL, + 0xd6644dd469d388d3L,0xb3c512501e64d1a1L,0x0000000000000069L }, + { 0xcb3010b42e3bfab8L,0xdb788efcba2e0da3L,0x4bd5ad9cf9d0d7c4L, + 0xd363a5471214e14eL,0x108980023a09dbd2L,0xb0745bedf03a2af2L, + 0x5d56b6aefacfc072L,0x37ac6ad4e2873b41L,0x00000000000000b1L } }, + /* 56 << 154 */ + { { 0xcf04e87527a5499fL,0xea912dfb28f3fc97L,0x4a9e4e41d59efb52L, + 0x9ccb0b83f8f373c7L,0x684d2d75ba1b61ecL,0x15449a0254d58701L, + 0xee02c9456a4e86aeL,0x5887a0882069e1dcL,0x000000000000006aL }, + { 0xcf7bfc8c922649ceL,0xb4fff12e522a5024L,0x262bf90df6d0066eL, + 0x97ae80928e0b0afeL,0x7191c6fe18507cbcL,0x54e973e1b2ae3ee2L, + 0x41b734bc7bea70ecL,0x38975ba3b3f6b89bL,0x0000000000000138L } }, + /* 57 << 154 */ + { { 0x0cc156bd6c72b806L,0xa9d9c17b0dd25daeL,0x3b6c0857b6738119L, + 0xb95b735d3476c04bL,0x737d1ec2dc86969eL,0xb36e0bc8c1cb47bcL, + 0x83f15727ea463cd7L,0xd7484fb02e58dce3L,0x0000000000000060L }, + { 0x85416de81bc14766L,0xae6a0b47fadb3be2L,0x496b92ab303d0900L, + 0x3fa4d158292db573L,0xf501ec245e3fb9faL,0xa317ae11bf021888L, + 0xfef0d5ec2271c572L,0x7e69594e90d2ebafL,0x0000000000000162L } }, + /* 58 << 154 */ + { { 0x19f57f08b5d2523cL,0x197abf7ece1690c1L,0x9fdf174c768a6dafL, + 0xb1a50c4b7f45acc9L,0xcac4790de77ff44cL,0xc4985159642f4c0bL, + 0xdfc5151faf23c14cL,0x61f453cb9b5fb8d8L,0x00000000000000abL }, + { 0x61f3c9ea1432808eL,0x4c3b91d68da6cd90L,0xa97b9dd60b47817dL, + 0xe51ba786ae34dcaaL,0x6e1ab1b12bfe1245L,0xcf8435b9aa609192L, + 0xdf7a582cf97cfe54L,0xf9871f1f80fca392L,0x000000000000014dL } }, + /* 59 << 154 */ + { { 0xbd5fce303e85ba4cL,0x12152109b1d4ff28L,0xa94551982256991dL, + 0xe54b2276191dc793L,0x7aba8d3c1232ad0dL,0x95b7d7023e493e3fL, + 0x90c17102574f412aL,0xab5d8dabf1245c79L,0x0000000000000119L }, + { 0xd64211426352b215L,0xbea2f2d73ca86b5eL,0x4c3b97bbedf46484L, + 0x1bd94ff68332db35L,0xb8729f9a7dcd9899L,0x461df4172e1c9299L, + 0xbfbcdb18295c8819L,0x1613563c551a6832L,0x000000000000017fL } }, + /* 60 << 154 */ + { { 0x704d466059d5d8e0L,0xa4a222b252d7ca1eL,0xa45b5ce29c26a86eL, + 0xc9668e73dfb9fb62L,0x13efde0ad58183ffL,0xdc752a59850dedb0L, + 0x0314cbc5ebf8c27dL,0x279680f3d1ab5f41L,0x0000000000000118L }, + { 0x3b8a1d583184e940L,0x8cc52f17cd1fa6c1L,0x5118b9093f701026L, + 0xe04d31daac34a4afL,0xf3a66f73282febacL,0x05018340e8d029f8L, + 0x1a8bd0002058c971L,0x61198c7e7e962723L,0x00000000000000d9L } }, + /* 61 << 154 */ + { { 0xa86cc3e807f97c2bL,0x8c9e8034112ae91dL,0x1cfc242420bcc2f3L, + 0xbb04d40865dcf7adL,0x8b7bd0b5e4652017L,0x080b6a0df49cec36L, + 0x94d0ab4360e82729L,0x97ec8ea8a301c00dL,0x0000000000000012L }, + { 0x9eda2fd27ed16beeL,0xbfef5c8acc95961fL,0x877ec19bae691773L, + 0x2aad9eff76522fecL,0x4daeb48170b656f0L,0xc4fb1cd626197e6fL, + 0xd8bab20ec0e741b5L,0x4b5d6d8e16332fd1L,0x0000000000000044L } }, + /* 62 << 154 */ + { { 0x67d03c6a85f8f5b5L,0xdcb9caba90e96ceaL,0xd23feeb7f3811dc4L, + 0x8c54b7237276f1b4L,0xd09156767d4bc3c9L,0xf48a98c87a1d17a8L, + 0xbe959d5b76b265b7L,0x6fe4777d4abdcaa0L,0x00000000000001f5L }, + { 0x2807c04517c8deaeL,0xd1bbf745674d701eL,0xa8e5a7bf272c6e74L, + 0x1901e0947c7c55cdL,0x2e1316c029711934L,0x05f68290f804ee8dL, + 0xd3a3cb0a7511e346L,0xd3a640c3c5565874L,0x000000000000000dL } }, + /* 63 << 154 */ + { { 0x088fccf4ea02385fL,0xdec0885d3c2e714bL,0x02d6b86fad82e553L, + 0x8712c2bf6c241bbcL,0xf59427d81f33a407L,0x172dd5359fa5bde4L, + 0x019b1451380694e5L,0x55b2c33bf9c76789L,0x0000000000000135L }, + { 0xf73d19986397fbd3L,0xfb91732482721b52L,0xd42957f6aee6a644L, + 0x9d7857f260cbb57cL,0x6dc17afa5df4783aL,0x875dc5e0e0382dd0L, + 0x458c445b02d1298bL,0x8a1562fb5183e144L,0x00000000000000adL } }, + /* 64 << 154 */ + { { 0x11446efe378cab85L,0x4c3c68cfbc48bf6dL,0x5e51466164c19bafL, + 0xc0f00735d5cbb135L,0x5d4844a7a693b1a9L,0x517a48719579fdafL, + 0x5ead064b532c1e6eL,0x60207c2d4135ecb1L,0x0000000000000172L }, + { 0x4b033a37843dc902L,0xb2d3664cc27a9863L,0xf4ebdc3518280e54L, + 0xac7789a6ae02b757L,0x50084ba410dd96f8L,0xcc3459371f5f6349L, + 0x5f65aca8fdee6f84L,0x4aac5f5902d841cfL,0x00000000000001b9L } }, + /* 0 << 161 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 161 */ + { { 0x7c44096f532856caL,0xf054ac944d701f46L,0x1bf992e78de81577L, + 0x871d2df2a90a4d2eL,0x963357a5775cabe3L,0x2e92d83dbae3ac5dL, + 0xc39e645c415be4dbL,0xbcca60e29781b22fL,0x00000000000001c9L }, + { 0x638cf76cfc5374edL,0xffbfbc7a24278420L,0xde577d69e7668bc6L, + 0x88444fdebab7b568L,0x4cb8237b59d220a6L,0x02186c1c25b582ebL, + 0xe4a04cb19b639f32L,0xd12b0e644590e20dL,0x00000000000000abL } }, + /* 2 << 161 */ + { { 0xdd0f09b1bb55f47eL,0xfacbdef0be55c9d6L,0x8b8b1a9cbb62c5eeL, + 0x617ff446eb8f0bbdL,0x24176f925c1efe94L,0xf27ae452962f4019L, + 0x3b812a15220fc04dL,0xb17b1b9bcc92239dL,0x000000000000005cL }, + { 0x9200f2e48a465257L,0xc8ca2a1cfbb69960L,0x5410ef875a8ff2aaL, + 0x87bc97b0e11b6896L,0x1d3a496952263f49L,0x2ed88b0bcca15d27L, + 0x90ab34c1765c9300L,0x719d9bc7d2630404L,0x0000000000000010L } }, + /* 3 << 161 */ + { { 0x6366cc838d9e9c1eL,0x6da8b24c7cf6ea2bL,0xcf34ac0834a0c6ddL, + 0x957954dde3a1bea6L,0x2ea0ef68f4060336L,0xe61893a5a84fdf5dL, + 0x03cacab47bf522c5L,0x823249dd0dbf1790L,0x0000000000000145L }, + { 0x2ea0344dab3e90d0L,0x5c70cc712bcccc9cL,0x5eec27550063e840L, + 0x3983cb1bbe50d3a5L,0x462b3dc27b09ff44L,0xf779391edd6b34bbL, + 0x6c00d9b8bbf36f70L,0x9e0eb0d1f82ee371L,0x0000000000000080L } }, + /* 4 << 161 */ + { { 0x41b4d05bd15e752dL,0xce20b1f846e78fafL,0xb529cc0108d808b5L, + 0xd934f10f2210cf60L,0x9f2e7e1be41eca93L,0xc09da7a94d82d15dL, + 0x45a84216a1ed7860L,0x9f6fa43de6be684eL,0x0000000000000109L }, + { 0x592bd464ecf53a79L,0x9dac1bb8349754edL,0x79bbe62284339a7dL, + 0x29c3d4aaf9f4fa5aL,0x4788cda816dc4698L,0xf94ea4495c3060efL, + 0xc5f4191dcb3c2b37L,0xeb720d23ff69861aL,0x0000000000000026L } }, + /* 5 << 161 */ + { { 0xd2285ad12caa3808L,0xb871463efbc2278eL,0x361fcadec889c9b8L, + 0xf261c688541e9cc3L,0x2cdf861c7a27b983L,0xc97af02cacc87384L, + 0x7a80f6087638d824L,0x79852d0b79663aabL,0x00000000000001e1L }, + { 0x9a2b626f98893ca3L,0x31a7d784d1e24597L,0xd230aba2129eedbdL, + 0x1d339471d270be5eL,0x826c3dd781950b28L,0xfa1b74c66b0b44f4L, + 0x5d2131e993fdbbceL,0x78176ad853eabdffL,0x0000000000000002L } }, + /* 6 << 161 */ + { { 0x52df1de37f4c74e7L,0xa1554957c7c2a420L,0x2bb5b1a7a850b73bL, + 0xd0b58abf05174618L,0x09c054687bb9dc77L,0x50cca4a64e6b8e44L, + 0x53298c54a61636f7L,0xf0325227718f824cL,0x00000000000000daL }, + { 0x10f1de0bbf549e72L,0x7153f47caccd5cd7L,0x9c171b49425abf63L, + 0x9902c984eac3d535L,0x9853a7df0a098af6L,0x7f88102cae0627b1L, + 0x318ec476f6bd8690L,0xaca786dae42b065eL,0x0000000000000069L } }, + /* 7 << 161 */ + { { 0x75eb8d356bc9662aL,0xfdd82d5f0d8bcb50L,0xd8caaf4b9d8ad82fL, + 0xffb9e309d0642731L,0xb514ec984da42a1eL,0x9a765c8a7d4421ffL, + 0x0ad7d2be7e9ae93dL,0xab4d7910c97fd28bL,0x0000000000000130L }, + { 0x4f08b836fb2b614dL,0x259737dca73051d0L,0xea3c3bc8c30af31aL, + 0x5cee4c934a11e8faL,0x4be3e8e3b1fed7ecL,0x0b313e360052c235L, + 0xd760058f5f082363L,0xc6e58d205fcae886L,0x000000000000000dL } }, + /* 8 << 161 */ + { { 0x7b9409bb42df612fL,0x32f47dd21bc66a3fL,0x213f80e023f375feL, + 0xaa71c57f06aa6c21L,0x5bf7b3a784298947L,0x001c2c965b608fa3L, + 0x417619c6173c7f4fL,0xbcff66acd4a20677L,0x00000000000000b3L }, + { 0x81b9b5640603bc79L,0x8f39155ef875e591L,0xf6a508fddfd32c0dL, + 0x7eace6f96b0dac71L,0xe40163d89e4c1eb3L,0x265226c5ff10fa6dL, + 0x1e1af5cbda76d272L,0x954bc056a10dc788L,0x000000000000012cL } }, + /* 9 << 161 */ + { { 0xbca524e7fadb3e3dL,0x0d2906d5783f176cL,0xd5e30887e94c1a4cL, + 0x3f2d19b69790cfbfL,0xe0965575d49af522L,0x3830ed4d1881a1acL, + 0x207dbc8554f43059L,0xf0f178b3973ac147L,0x000000000000003bL }, + { 0x7f0e0020693db310L,0x4914754404980bdaL,0x2d9a8176cc4521bfL, + 0x3893e7acc5e2d29fL,0xa99dd4d11afa9ad3L,0x8a6ce7fab2e0af41L, + 0x868c12a1a5188ad3L,0xd1138b17b8debefeL,0x00000000000000feL } }, + /* 10 << 161 */ + { { 0x709016a77aa3bb92L,0xad85ec5a6310f638L,0x37806a891d746ab2L, + 0xb878e2df80c6026bL,0x2d1bd5cb715e5095L,0xfad149bdcf029798L, + 0x252f826209fa7a60L,0x48c7e31ca0e64ce2L,0x0000000000000124L }, + { 0xed6295952077c9ebL,0x86ccb5d1e1191074L,0x456067b16aabffa9L, + 0x55deb4fa5921dfc9L,0x4d9bfe498fec9c3fL,0x09a4cb5ce52a0452L, + 0xa590232e9c48af89L,0x593663f633ac7451L,0x0000000000000032L } }, + /* 11 << 161 */ + { { 0xe7e659560b715201L,0xc3db8e9fdce536b3L,0x0f53382987bf4fd8L, + 0x4630cd5699c4668aL,0x377120aca98b1b34L,0x878344bbd54a8a45L, + 0x788fbd286c45b8aaL,0x473be93182f3fbeeL,0x0000000000000097L }, + { 0x3fc47f9a536fee38L,0x69bea946488b377eL,0x3d75c35835c67175L, + 0x7fd136ea89085cb2L,0xda6e1ab485bfe292L,0x703a854684848f1eL, + 0x37f0e804ac8000daL,0x53b3ed02c83fb8feL,0x00000000000000b6L } }, + /* 12 << 161 */ + { { 0x76f6bd2345f1f8edL,0x01af1d612c1af63bL,0xd83a40982bd9cdc2L, + 0x12ec79ad79d14104L,0x999b06da0b7f13d7L,0x111b8eeeecd188a9L, + 0x920f2064138b5729L,0x290b5f2475b98e34L,0x00000000000000c0L }, + { 0x38a91bb8b98cfd7fL,0x0cc2d4ceb574c3f7L,0x49a05d4b8ff024c4L, + 0xff054d9491b0d00bL,0xbed247dd100eb8aaL,0xafdb1747e9a46615L, + 0xf801a52fc03cffdcL,0x415f01eeb9d027ceL,0x00000000000001aaL } }, + /* 13 << 161 */ + { { 0x15b2ae5549f234c7L,0x58635013b0beac59L,0x94c23cba4739fae8L, + 0x840ab93eacef3a53L,0x3bc47650dcccd34eL,0xaac77901e7df4d3bL, + 0x440cfe01370e4438L,0xa53a1a7b8e5923e1L,0x0000000000000006L }, + { 0xa8e48ebfaf875672L,0xb6d62f9f3221cb04L,0x521ca35fb9d685cfL, + 0xba27652d100ed01aL,0x652fe08c46958398L,0x32b370924443c383L, + 0xac43dc6de676ea7fL,0xad362a924bab6228L,0x000000000000014dL } }, + /* 14 << 161 */ + { { 0x018820c2d2f48f4fL,0xc0dd7d24686b3002L,0x8ff02f40a5f47131L, + 0xb84bad1b1995b352L,0xc640b6049af32820L,0x87268cb03dd15622L, + 0xfea64350ffd221ccL,0xc7f6e79257deea86L,0x000000000000012aL }, + { 0x482e7151037030deL,0xf781946d9cfd8f64L,0xd4d751a1037bbb78L, + 0x3197bde1031ed4d6L,0xd9bbfeacd6ba8cd4L,0x5688d4fdc0fbe2e3L, + 0x76dcd7e0f260abefL,0x019122d294017174L,0x00000000000001f7L } }, + /* 15 << 161 */ + { { 0xf22306192420c83dL,0xcb52f3d51a6ad672L,0x6f23327787188da8L, + 0xc9978c3e37b3ba7eL,0x043d79269986ba91L,0x71312bf8b69e4f23L, + 0xe868d488669cebefL,0x358d1c613b9ea975L,0x0000000000000000L }, + { 0xa1f3d9c7d2435583L,0xb30834558f46d4e8L,0xf857db3c847df616L, + 0xc69106f36e4969efL,0xff89cb872a3e3af0L,0xa28a9399bae1ad4cL, + 0xae59df21c67b70e7L,0x10e6c606fe60fba4L,0x000000000000003bL } }, + /* 16 << 161 */ + { { 0xdba8e40547f7153bL,0x73071fd967dbfebfL,0x35881eec4441a2ffL, + 0x50623c475b31b917L,0x8f09dbf6c2bad8b3L,0xbf1f09cd90bb890dL, + 0xab770cd77f034fdcL,0x45b959236b7d98b1L,0x0000000000000029L }, + { 0xd2ef1e00f599c343L,0x1289bdb17024d641L,0xed8e7886f4e36687L, + 0x4ad57b343696269cL,0x69bd1b6e70bf3629L,0x680f08622bab0453L, + 0x1865f02a68e4b123L,0x7bc5dbf3e8d8ab75L,0x0000000000000150L } }, + /* 17 << 161 */ + { { 0x6726ccbc3e2aff94L,0x5484b226c90c8dccL,0x2a2bdbc53a2527a3L, + 0x0bc5ef43d04f49d3L,0x3e245050b41ceddeL,0x89eb42f3066f3911L, + 0xd5a654cff6abf8ccL,0x667c82e307a3812dL,0x000000000000009fL }, + { 0x1263649921a22440L,0xe811715df9b6b318L,0x9b6531ad5cd6faceL, + 0xcc9494fa99de1630L,0x515021752dc29591L,0x65b72bd29d42f5bcL, + 0xeb938f6b2e72ce01L,0x9f28e413b40a61c9L,0x00000000000001bcL } }, + /* 18 << 161 */ + { { 0xa7b6d19ac91805e9L,0x615699eb501c2667L,0x48e2afa5a34bc77eL, + 0x3481417aa5c5f041L,0x2919a3596c1e6286L,0x3d1dd1d074db3545L, + 0x342a15f49e76b201L,0x82b5db095e08f638L,0x0000000000000105L }, + { 0xbcc640c0ad8c1f5eL,0x2251bb88dd8a841dL,0x5f210ad6a5948f36L, + 0x28d15c5f82e9e673L,0xbe1d9e7e6a1b484cL,0xd40a0934622ce1afL, + 0x1b7d637fbcb6d740L,0xa76f02210e1a2598L,0x0000000000000174L } }, + /* 19 << 161 */ + { { 0xb896a3f2d350250dL,0xf294278d7ee9ea71L,0xc9c8283e6249854eL, + 0xc21eab81bc99cd28L,0x73b8e2c38f4be65aL,0x8244fedf69c189ebL, + 0xce75ac12d0cdf781L,0xf323ab63fe1faaacL,0x000000000000004aL }, + { 0x8860fa20e9ce92e6L,0x83479ba5eff82f2aL,0xfe15b93eda4dba32L, + 0x7f6c7ace601dd142L,0xec990ad49d19bd4aL,0xde2eee45c95fed76L, + 0x88e4dcd7f5448f60L,0x8aafa008120b0d53L,0x00000000000000ebL } }, + /* 20 << 161 */ + { { 0x5e52e88599dd702eL,0x8788f039002b4e01L,0x1c6e3d695c4f0701L, + 0xe5739c8b4d9f865eL,0xaa101c5cc3799864L,0xe3cf2696f3fa07fcL, + 0x389a552716f96437L,0xbbb4d9808b801596L,0x0000000000000164L }, + { 0xae6ea79567728022L,0xa02d2498f57436ccL,0xa7d4f844f72b5ee7L, + 0xe1be79d19c24f898L,0xe9d02da89af2917fL,0x589677b798d5af37L, + 0x8515a1be298ddfedL,0x85c9f54d73c666eaL,0x0000000000000099L } }, + /* 21 << 161 */ + { { 0x266b62cd5e9b3f3eL,0x5923ec10fcf4c770L,0xa94e579454e6c393L, + 0x4537ad5461e76eecL,0xb0c3a5730f27b256L,0xebc9b1cc3d1829adL, + 0x43f486fb2a535031L,0xac5e5f3d265b0bf2L,0x0000000000000034L }, + { 0x696366449ddef973L,0xcb381b06bd3c5340L,0xa007691234214149L, + 0x15c92a1bbe0faff1L,0x9093712f0da430c9L,0x925af269f6ce1ed0L, + 0x25aac7039766963aL,0xf00a980f330c5e9cL,0x000000000000015fL } }, + /* 22 << 161 */ + { { 0x9dc85b22b1c47096L,0x122c6be77e0e8e66L,0xdec66b03648fb290L, + 0x27f4376ec40116e7L,0x7b7bafe69dc0cc4cL,0x0cad9ff312d08efbL, + 0x70db245bd6542645L,0xc28346160b545d05L,0x0000000000000026L }, + { 0x298ec3d3ed63af02L,0xbba1f13c8bf8477dL,0x2830e1ecef7a945eL, + 0x49a0e5e009c1b798L,0xe2ef4ed3728f9e0cL,0x235768bb24570e74L, + 0x10007a9283a6aff4L,0xae2217684ff5f9d5L,0x0000000000000014L } }, + /* 23 << 161 */ + { { 0x78be01f01cdd5596L,0x27c282a59b4b44f6L,0x03a41b7ba989e031L, + 0xd477af5342fc6296L,0xd24df6d41fdb1d44L,0x7458e14ae1b0d7beL, + 0x8eefd3d58838a0f9L,0xdf17dc2d7db4b37bL,0x0000000000000113L }, + { 0xbb8f431ca4c7855bL,0x7cc9118df5a15060L,0x61ff999b04efc1e7L, + 0x34f30946331fc317L,0x6f24717219a61908L,0x869beba812619b2eL, + 0x1c53bd3a4916f1ffL,0x1276f58ea988bc64L,0x0000000000000005L } }, + /* 24 << 161 */ + { { 0x02e206c37471a905L,0xb6ce013934d0c122L,0xcda8a158ca1ec8adL, + 0x17ba844b6eb26374L,0xf8dca5a62ae7179cL,0xdab1da42807ae8f2L, + 0xdcc18ac94083d6a7L,0x1464e68edf65f291L,0x0000000000000038L }, + { 0x6599fd7bd22a48c2L,0xfd36f482477abe8aL,0x403663c930756f73L, + 0x1b38f348805b2e09L,0x213feb71c142d148L,0x9878091a8855d42fL, + 0x317421962821af05L,0x9023473580457e5bL,0x0000000000000053L } }, + /* 25 << 161 */ + { { 0x47ba1f99743fea79L,0x06acb3c3a291a643L,0xaa7348bb9f2bb6e6L, + 0x84fdba6fbc8a12e9L,0x15b33b6b0a667c24L,0xba9f7deb101dc30eL, + 0x459a46034c57add7L,0x5464da8fa6609167L,0x00000000000001a8L }, + { 0x0c6a508a3784eb33L,0x4479937d0f0a90bcL,0x1a531a0d977b0dcdL, + 0xc2b64ddae64bad13L,0xd5be67bfee7f09adL,0x22f09f2ce76c1626L, + 0x9686e65fe16904b6L,0x36b7a900855d97f2L,0x00000000000001d5L } }, + /* 26 << 161 */ + { { 0xd43858c0df99624bL,0x13136325cb492988L,0x1c894e8eee9f005fL, + 0x22ac0619eff5ed87L,0xe5541a5988b981bfL,0x5bdc0aa3fa6c6896L, + 0xee9ae8d5d079468cL,0x62019c6ddee442fbL,0x0000000000000133L }, + { 0x26522a74f2b6d9c7L,0xb5b484afa0d85fc4L,0x5e43e58b8ecffd0bL, + 0xe4208afcae402e16L,0x0f40d075c422d4d8L,0x2a9bb7082750ab4eL, + 0xede6cf4bf468eff9L,0x8513051277fae59aL,0x0000000000000198L } }, + /* 27 << 161 */ + { { 0xcc69a0ccd9e4f8e7L,0xc022938a9bed81f0L,0xfa5d71e306664d82L, + 0xd5be5e220f1e2acdL,0x92c0da616ba1adf6L,0x7f4f4a83200f1487L, + 0xf057fde0dd6b2d46L,0x04a1fea64ee5aa31L,0x00000000000000c1L }, + { 0xc89bcd96ed79484aL,0xa5c975a6ec2453f9L,0x33215bdb09a122e1L, + 0x354116245bf4ac00L,0x555dccea0adbf9dcL,0x305b7c95457c70b7L, + 0x1afe30a21ef03bcdL,0x4fb417396ad91b4aL,0x00000000000000e4L } }, + /* 28 << 161 */ + { { 0x9b725e514dd1d21eL,0x168e6606593e95eaL,0xae7b78d6fc4197eaL, + 0x5292f0812cd1c536L,0x7ec384125dbdb782L,0x2853f48a71352e28L, + 0x904986af7516103eL,0x23d71c892b57f46fL,0x0000000000000091L }, + { 0x5d4a4f006a1e8ca4L,0xbbc96d859935baceL,0x4175dcd7b578a386L, + 0x208f65eaac20bd79L,0x2859dbdb96f5e3efL,0x59757e035c91ede6L, + 0xcca1eb6dce0673afL,0x886baaa29a7cf180L,0x0000000000000086L } }, + /* 29 << 161 */ + { { 0x23206c18b63cc880L,0xae2a1109db8b7f25L,0x6779ed193b8e5e1eL, + 0x565a9bd7cd18feadL,0x17bd2284ccf3caa7L,0x385aef038c0d52eeL, + 0x8ae77665138d63ffL,0x718bd33248a59df4L,0x0000000000000008L }, + { 0xf9d111dbe3242981L,0x2fa83b3d575d3c49L,0xf45ce3fda49b5fe1L, + 0xf35d796a9d2ec2b8L,0x9b29bd6b4fff07dbL,0x7a773285d6801cbdL, + 0x2050e529a0c11fd0L,0x049b8045486f19dfL,0x0000000000000178L } }, + /* 30 << 161 */ + { { 0xfd8f5732a214ffb8L,0x44b1642c528bc00cL,0xddf614c664ab949aL, + 0xd2c8792db600c8caL,0x742d67de888308f1L,0x7c935d3ecbd60490L, + 0x100cda325395d111L,0x1b6a6cd19a931f92L,0x0000000000000110L }, + { 0x5d45901e48ac9d0bL,0x1b6930212ed16103L,0x938c16a2a85d4fd3L, + 0xd42348aaa382830dL,0x1bdea8d49ba29408L,0xde5f07a05aa9b9f4L, + 0xf2ff6e2c17118d5eL,0x7d9e7fd948437ea6L,0x000000000000001cL } }, + /* 31 << 161 */ + { { 0xf21bb0d569edba14L,0x5b03deeaba63fd7fL,0x6428178b6ccbda3dL, + 0xd90721c9ae40f7cfL,0xffa95ed408c9d593L,0x233675f5916327bbL, + 0xf63a304a3ebd7c4dL,0xfc1e62599af1bdb0L,0x000000000000019cL }, + { 0xe10c054ef0fb085bL,0xa82deca9e42abb84L,0x9657610111a401b5L, + 0x7a8a0895b7452de3L,0x9a132acfc3b65cceL,0x550ebc8e611b6aaaL, + 0xc769bab2fbb950d4L,0x9b0207afb0bbfea8L,0x0000000000000139L } }, + /* 32 << 161 */ + { { 0x4f6ba84d09ed0733L,0xdb3908d14bf2273aL,0x33bcff3f3a074347L, + 0xbafc83c622e849cdL,0xfb13992ceed3b0dbL,0xd24dd41d3c016bedL, + 0x0d15167af6c72694L,0xe06346be162238b6L,0x0000000000000050L }, + { 0x28fea53455d96852L,0x46aee84a3f0cef9eL,0xb8af77e1775a77a5L, + 0x63e3b7fcb66f217dL,0x763ceb6796e84554L,0x39b7ec8ed79b3203L, + 0x62e24b184f17b166L,0x873ea1aa905054dfL,0x00000000000001f4L } }, + /* 33 << 161 */ + { { 0xf9e957e011ecd03fL,0x36e97a5162a70172L,0xddc99f268cee4a6eL, + 0xc9133a8bf59c132dL,0xf0360bc56ea138eaL,0xc951611bbf929662L, + 0xd5e127b017c41f0bL,0x273a6206effc45eaL,0x0000000000000034L }, + { 0xede37cd5600d7097L,0xeab8d796081144bfL,0x1b45983f88a59279L, + 0x9bac431f8fd88379L,0x1261faf43b1033edL,0x72f4d092e7106cb8L, + 0x9c77283c885fe210L,0xd61437588f312001L,0x0000000000000120L } }, + /* 34 << 161 */ + { { 0xe741401b3e4b27f0L,0x3cba76aa0649726dL,0x56c542047fad17c1L, + 0xc6f0731569335954L,0xc578f101a9fcaec4L,0x5c100c039431d291L, + 0x2bf9d339f2630180L,0xfe18d6e0fb611fd6L,0x00000000000000f4L }, + { 0xf7c8ebd21d5a95e2L,0x50abdb6201c572d9L,0x8122c3d1cfa44f8aL, + 0x6eb1ebb1266a9f78L,0x6fac9cc4ea52170fL,0xb27ace4dd2a508fcL, + 0x6060020ac99e9aa1L,0xe55643997d3f48ddL,0x0000000000000190L } }, + /* 35 << 161 */ + { { 0xa9e4882c7ed59d91L,0x07a64a9b1fce9c0dL,0xcc45cd129065e704L, + 0xeecf8b38f26f1dfcL,0x2f5c4945a1616c3bL,0x900b2ccf9642e488L, + 0x5af1d7f8162fc9e2L,0x2a71595e18424eb8L,0x00000000000001fbL }, + { 0xec338fe3dd9c2e9aL,0x65dafe42b0517883L,0x40a9bae3d93ec0b7L, + 0x674169aaf094a74bL,0xcc4ca4f411a22a04L,0x45a2ee1e25ffd70dL, + 0xf12af516101337a6L,0xaebc10f406fabda0L,0x0000000000000072L } }, + /* 36 << 161 */ + { { 0xc0bbb7f74d30e883L,0x492e66eefcb15f83L,0xf72ef2fe6f48c1f7L, + 0xb947548d314b03feL,0xd65e9cf3c25acceeL,0xff39b4dceeb29d36L, + 0xbec64e77c99c5afcL,0x5b8a197d3a67c06bL,0x000000000000002dL }, + { 0xe1331e50f871d815L,0xe03985a391481e0fL,0x813f257a226cedebL, + 0xcd331cce2a206ba1L,0xd638fdf4827d764dL,0x6a615884c63d7d4bL, + 0xcc4d0e0d6b08663aL,0x66436385310ece41L,0x000000000000005eL } }, + /* 37 << 161 */ + { { 0xadd8cd73f0ecf34dL,0x43f62967ff35e2e5L,0xf5e43e254b1a345bL, + 0x1ae3f1eec54f35a2L,0x4f1c7df9cad2bf13L,0x15d4803bde8fd01bL, + 0x9f4042173924fc8eL,0xadab12608a67eaacL,0x0000000000000030L }, + { 0x194e264056788778L,0x9150f420b9b52528L,0x65e6f21ef741753eL, + 0xdec82a916b87ac55L,0x1cbe14d2287c7942L,0xd7a761913f6f27b9L, + 0x4aab93b16fc85b18L,0x542dd2083b421209L,0x00000000000001e3L } }, + /* 38 << 161 */ + { { 0x2791f6cbd8c481b1L,0x5f6e979b105355abL,0x9717cbdb3c359624L, + 0x524de35b434adfa2L,0x342553b9e4390513L,0xba52714e2c4aa1d3L, + 0x203610b03352d5aaL,0xb0417dbfde6e9eb2L,0x0000000000000007L }, + { 0xa7a4ab660778eeb6L,0x3e73596701347028L,0xfefbc03f86847bcaL, + 0x6b746803cc5bdcc8L,0xec44a1ae77b147a8L,0x447f4acf99a32887L, + 0x57fc9a18a05edd76L,0x984f16610c725648L,0x00000000000000f6L } }, + /* 39 << 161 */ + { { 0xd19616ae1983ebe8L,0x4503d5f2e1e673f5L,0xef3f5a36e6b42028L, + 0x55b2352ddf585e21L,0xc96b97cb803254a0L,0xbb91658d46a8dc81L, + 0x510b21ca6c1d02c5L,0xc01a0264618a46ccL,0x00000000000000fbL }, + { 0x809346c47c10f0d9L,0x1f4da743e1e88f53L,0x53670064ebd2df12L, + 0x700b420afa09e88fL,0x4a22c1529e631b7aL,0x1d4d965c170a085dL, + 0x0d6d1a47fbbdf9b9L,0xf95f3f2816be4629L,0x000000000000001fL } }, + /* 40 << 161 */ + { { 0xe410d8f75fd30bbdL,0x966e71d6031cff85L,0xfe51c6c9a6f4b6a7L, + 0x6aa83ca8d5681412L,0x28a61d2e57a5d801L,0xb699f6cb52bc0413L, + 0x1729f8d0443bb821L,0x6e22f51204452a00L,0x0000000000000076L }, + { 0x7b51ffac0cc2608eL,0x370f9a54495e770aL,0xe32a66e2f574b07cL, + 0xc3ead5c57c808c30L,0xf04bdebd20d3d51aL,0x82b07f2804ffd31cL, + 0xd2d81ae3b6df8fb3L,0x3978bb58002ea4e2L,0x00000000000001abL } }, + /* 41 << 161 */ + { { 0xe78315f6dfbb52a9L,0x9d3cba08427fe77aL,0xdfe7bf03a89c1b0dL, + 0xab65916f705a6509L,0x674049815c466030L,0xe8f1d410c97d6559L, + 0x38bdb4141cd203caL,0x3ddba428daf215d9L,0x00000000000001e1L }, + { 0xd1174c6998d10f89L,0x049f8fdc7bf92d99L,0x5e93b8e5cb52899dL, + 0x886da2cad057f3d5L,0x7bedd886c75e69d6L,0x48b808d7b869acb7L, + 0x542a5fbb7168759fL,0x18d5a3b332d207f7L,0x000000000000015aL } }, + /* 42 << 161 */ + { { 0x7d28aec6ca1338b9L,0x60e2c0021de0a9e9L,0xec83a036473cdc40L, + 0xa18a48427b3ca8baL,0xbb6ce1a4d57240f9L,0xd8849fb45cd926edL, + 0x0a6bed34a99ff832L,0x4f95f32bbf65f46cL,0x000000000000016aL }, + { 0x1b97562771d2f781L,0x75e4501fc527e102L,0x79130a459ac2b105L, + 0x470192f34acebcf5L,0x72b54c075997b4a2L,0xed718897c8b60b66L, + 0x4093b1f8f51bd41fL,0xb29f58d3a42de0f8L,0x00000000000000d7L } }, + /* 43 << 161 */ + { { 0x331982b89a4db510L,0xf3712ed38465e8c4L,0x3d34b58f16d166c5L, + 0x4c288793731d073bL,0x70ff503acbb64d41L,0x8c1af3bc188deefbL, + 0x5a73b272b1ec9218L,0x9ef75613bf5d7d3aL,0x00000000000001bfL }, + { 0x539822ff861474bdL,0x180f16e73e4fbb41L,0xcd2510ea4d5b4fafL, + 0xdcc4d89a7fd914a7L,0x2b80a838ae4c4ac9L,0x51b8089df7376ed8L, + 0x0ab1f9608fbaa6a0L,0x6e736c68dd816684L,0x00000000000000d0L } }, + /* 44 << 161 */ + { { 0x5d35a91d101c28c4L,0x0f09bee540733fc9L,0x15b904b81454c498L, + 0xca3a5474136142c5L,0x58c9b304bd03303cL,0xea4075b5a60b59dfL, + 0xca58872e67858464L,0x788b761f48354a3bL,0x000000000000016eL }, + { 0xd17d61f750501d32L,0x791776d613aceb1dL,0x3b0e441e42c97751L, + 0xbc212d890edad7b1L,0xf9fe60a0ce37bf73L,0x65fcc158880d070dL, + 0x00a84549d1d4e035L,0x01c1e68c701162b4L,0x00000000000000ceL } }, + /* 45 << 161 */ + { { 0xd0a8554c494fb156L,0xc71afa6a8c500c38L,0xee41b4fa39a89f0bL, + 0xbbb592d2af6b45d9L,0xfe957349f903b987L,0x43cd6c638db1e4d9L, + 0x8e43ec817ce3023bL,0x416c60c5329a63c2L,0x000000000000019fL }, + { 0x2b21d1cf4d61105eL,0x251b9594b74f4dacL,0x12b8fe3c5e4d45eaL, + 0xba2c53365fc42715L,0x223fdd1c773b2d48L,0x94f125933065fc8dL, + 0x492cc924e2333395L,0xb3616a8475ac3c66L,0x00000000000001e3L } }, + /* 46 << 161 */ + { { 0x5ace22d86c5a60ffL,0xb851dfa9f4a3b8e9L,0xf69113c2fa83cda2L, + 0x32ebf61d64cfd0f2L,0x54c22f18957e47caL,0x6bbd312371db481aL, + 0xee07ace023f0bf72L,0xe21cc99694f56b82L,0x000000000000015bL }, + { 0x5f550ca712edff28L,0xfe0f2ee6bdbf147cL,0x0c5ce5382fbf0a76L, + 0x1ed469f774b92d4cL,0x9b1c90af48edafecL,0xffd719c5d02d67d9L, + 0x1393017c26b20c72L,0x394c92a765bbfc0aL,0x00000000000000e8L } }, + /* 47 << 161 */ + { { 0x8e3b16ffff7e659eL,0x46277baa15e7d54bL,0xa9a805181780a668L, + 0xa3c489288ee7de0dL,0x5a33494c6a492932L,0xf21c24fc8c77046bL, + 0x0f03d8aefe34a7d0L,0x124f0b04cee67a16L,0x0000000000000155L }, + { 0x5a35d64c33c9043bL,0x43c74b4c3bf7ba98L,0x799109fbd8945905L, + 0x144fe1552c7afef7L,0x216b3cd4a58245eeL,0x59db3457dee2d7edL, + 0xff7a2017df7c0f11L,0xc4f76ef73306c02dL,0x0000000000000056L } }, + /* 48 << 161 */ + { { 0x35a5767a91f7da33L,0xb73136937ba4dfedL,0x3843d6c37f7e4d30L, + 0x6f461c6be2595a8aL,0xa34766d014105be4L,0x62d28311d7c9924eL, + 0x7bfd28110f51f78eL,0x9cd30fb24b760751L,0x00000000000001c4L }, + { 0x739b01bc3391726aL,0x82721aef5fde4fd5L,0xf3ec542dea71f1f2L, + 0x520fdd3458345718L,0xef2a3d6f54bb2d09L,0xf75fc3fcfaeb5530L, + 0x6fb68582f73076b3L,0x27d3835620e57568L,0x00000000000001beL } }, + /* 49 << 161 */ + { { 0x7a28d24dd585507dL,0x1e4e683466285c9dL,0xe4b682babb22705bL, + 0x64d1ef0045ab2054L,0xb5cec856cc08627eL,0x889ad5ebb8b7e942L, + 0x8d714cd95079afafL,0x4d8076a73e83d558L,0x0000000000000190L }, + { 0xe9f3ad2bf576cd63L,0x59000a34b58cfc2fL,0xc264ed814685f799L, + 0x982c7552123238a0L,0xa9ee7e5968384c7bL,0x8664439308b33f41L, + 0x17b12f704e50471dL,0xc0d7e9bc75ebea21L,0x000000000000019bL } }, + /* 50 << 161 */ + { { 0x912222d815c2f2a8L,0x2de888271cd8f2a3L,0xe76034a53dd91b8dL, + 0x845a5d5194b6f95fL,0x2eb60c5b52e29a05L,0xf0d98738ba25e6bdL, + 0xfe6a89c8cdc0327cL,0x167c43e4c20b4ca6L,0x000000000000005bL }, + { 0x39630959e8c54a45L,0x02c1c7fe70765ab8L,0xec3f9a8de3407090L, + 0x9cc4b5fb695237a6L,0xd8f106ce75b984f0L,0x9cbaf61edbaba8d1L, + 0x1d97a9051041afc9L,0xf2e9161e55b31a38L,0x00000000000000ebL } }, + /* 51 << 161 */ + { { 0x5a6bf7f91f8b55a8L,0xc69d8efd172ad4e2L,0x225a073ce679c187L, + 0x715c881907c52ca1L,0xc0e7ea40ea089856L,0xd468af9b39f8886eL, + 0x0d4ba8bc050432baL,0x19f774a064252dceL,0x0000000000000156L }, + { 0xc799518c93721f59L,0x366c163c364ad15dL,0xba588393e560fb0dL, + 0x016a8490de01278dL,0x2b528be2d1a40335L,0x5309de257787392fL, + 0xeb1b66711c0958edL,0x53fc34a953ef3c21L,0x00000000000000a7L } }, + /* 52 << 161 */ + { { 0xb4329354b10450e0L,0x467523c0f2c7c16fL,0x8704d83d9859020eL, + 0x04daea48e8ad89feL,0x63c0e5f15b92f0a8L,0xb46d370e1b70e374L, + 0xdbf6127aa728e693L,0xb2121298b13a28b4L,0x00000000000001a0L }, + { 0xd0421f84c0574c9fL,0x0cf61cd6d362b1a8L,0xd89f8484bb97b57fL, + 0xc9bd3731b39b01f3L,0x1af2db789dc8f339L,0xe4841c8d5be1a90cL, + 0x4fa214cce1f35bd5L,0xdf654917c1a9f844L,0x0000000000000118L } }, + /* 53 << 161 */ + { { 0xcc657fbdeded2e02L,0x141fc3e7caf6a533L,0x7da03a50c37499b1L, + 0x065eb76c204614f0L,0xcc8bd1807ab23c85L,0xca7080fb5f1cec2aL, + 0x1760042d05f0a0fcL,0xd8fe8fc96c675e0fL,0x00000000000001b4L }, + { 0xeac258083d064fc5L,0xa906864eade9696cL,0x58fcf49e1f711665L, + 0x1ff08bacde11a5bbL,0xa7feb828b482ca23L,0x6a0182962648ed45L, + 0xb86ca29c5066a0a5L,0x6365a8682517aa29L,0x0000000000000077L } }, + /* 54 << 161 */ + { { 0x34733894f44f0ca9L,0xe4cfd1e412871471L,0x744cae9c6937e907L, + 0x4e64dd6163227fa9L,0x47d3efa71c46edc0L,0x0872f46ecc49f614L, + 0xef5581ef6c3aa650L,0x40cbe7deffa56263L,0x00000000000000b9L }, + { 0xaa3435661a572651L,0x3dfd8846f16f48dfL,0x0a61cd2df660bd14L, + 0x2aac6852b7382316L,0x237e7531a213fc6cL,0xe5fd6bd5733eb51dL, + 0x07d667c654816bf0L,0x1ad4cb04e1079eedL,0x000000000000001eL } }, + /* 55 << 161 */ + { { 0x6798f4d06e56988dL,0x41378b308e88eaa0L,0x2dd138d2fd992c8bL, + 0x5e7400ae71030b13L,0x7b00ea1f13c9ad72L,0x171f656d79101b3eL, + 0x1614fd762b2d5ebdL,0x75c70e686e0f5bb5L,0x00000000000000cdL }, + { 0x9d980a7936d23463L,0x834c407489e9fdb9L,0xc9b2dfe11423a5cdL, + 0xab0462bbfec68ee5L,0xf1b299fd96ad5ee7L,0x1a1052ce79847284L, + 0x11d08428d9a4aba9L,0x3f76770dd07f8532L,0x0000000000000058L } }, + /* 56 << 161 */ + { { 0xf85c1b6a18db2823L,0xda36fa1c2203eea7L,0x81f96771cdac541bL, + 0x52e33f39605c943dL,0xadf4c06637c23121L,0xe9e5aa4b8ad2c86cL, + 0x3d4a61630354ad23L,0x44b4d8ebae152c75L,0x00000000000000fdL }, + { 0xd5d3d35dab2d2652L,0x55b6f8ed382b2fb7L,0x3a1eee79a7e69b83L, + 0x98f430aab5c6da08L,0xf4af6cd956cce57bL,0xd67e787b7af2ea27L, + 0xc276f0a7d8841e4fL,0x2e23f60fc0478c59L,0x0000000000000026L } }, + /* 57 << 161 */ + { { 0xf0eb4ab877863073L,0x94181818a546f8bfL,0xcd887c3ca8731841L, + 0xe2bcafcc75595c73L,0x344aa895985d3b76L,0x1bf93659471fdde9L, + 0x966caabac3b6a887L,0x3ef855c5d55370f8L,0x0000000000000066L }, + { 0x19d2a2302184ccb0L,0x8fc183ee69acc9dcL,0x7a4e06407c15ff8aL, + 0x558ad1c20c26289bL,0x6c4115510529310cL,0x280ac8190fca48a9L, + 0x0415f9bacf58bdbfL,0xc3886683f2c0dd4aL,0x0000000000000062L } }, + /* 58 << 161 */ + { { 0x3e88401aeebc8d94L,0x7e5f8ec0075f3f3cL,0xc9fa5c48d3004237L, + 0xcb17bcbb3d2f2e5eL,0xb105514bea735fa2L,0xba377336fba76370L, + 0xf221092e050d6dc4L,0x77a261f905f5f809L,0x000000000000013cL }, + { 0x606d9cfad207d34cL,0x596d48e3d86cebd4L,0x9446a6eea25feb06L, + 0xf8aa591e0e3061baL,0xfafdaf2e4d4e8f87L,0xdc642f5dcde1b623L, + 0x0780a3b18c4123c6L,0x0b0e00885b2343d8L,0x000000000000009fL } }, + /* 59 << 161 */ + { { 0xd6968c6456a686ebL,0x76bf6f10f6be50c1L,0xb07ea82138afb6a8L, + 0x0fc9edaf1d7736e9L,0x20d6045f9b3e0246L,0xd17c5795991c0a45L, + 0xd7db43f4df3ed4e4L,0xc89cf90f2ac74830L,0x00000000000001a2L }, + { 0x30417f69b28de367L,0xcb660863643918c8L,0xbdb5eb048fb7d82fL, + 0x401f0699f4368ab2L,0x9ef1961f15413dabL,0x30b0324f41c40991L, + 0xc2b48a84f8668b54L,0xd93b0282b6d52027L,0x000000000000019aL } }, + /* 60 << 161 */ + { { 0x69660fea949317feL,0x373d869babd661a7L,0x9af9b8f1d3e12c73L, + 0x924254a4b60c5995L,0xe35cd8673d19f242L,0x52b966e567041fb6L, + 0xa23e82421d39defcL,0xdd92b1a8eb62a410L,0x0000000000000125L }, + { 0x3c1d6f5250ed0bacL,0x3266bab423c65483L,0x1773d1942fa296fdL, + 0x1716ffa0cc5c82b1L,0xed0b4af7185788b3L,0x9ad866b5990b8e96L, + 0x3263e59f69dad8d4L,0x4fa90e3f3c4701bbL,0x0000000000000142L } }, + /* 61 << 161 */ + { { 0xedc26bc2b7001966L,0x07965896b8d8521dL,0x485564a35427755aL, + 0x522dd1d1f3f96155L,0xf182a1d731bf10a8L,0x3967f375fabecb7aL, + 0xe657de0bdcd6c80cL,0xa8db2b3a8de73efbL,0x0000000000000074L }, + { 0x6cec45bbbb1b75a3L,0x1e28ed9134f5ef01L,0xe4a2fdbab493e0f9L, + 0xf3b058cbc67f30f4L,0xc63ce1f97c21e9e4L,0x6df803bc9587af66L, + 0x83b68042b2d6058dL,0xbd466da98be4c8adL,0x0000000000000050L } }, + /* 62 << 161 */ + { { 0x8ebd9194aecfa334L,0xf12fdf8b12498ec5L,0x10aeeea323bc15c8L, + 0x67e32fb5746aa8f0L,0x6c2a3ce334ea8c2cL,0xed5b45bd7d1e6a40L, + 0x4ec06953c478d94cL,0x06653d528fa54d3eL,0x0000000000000002L }, + { 0xefe66e37c9fc19c3L,0x3df15ce78f92ef51L,0xecae3d50d107059fL, + 0x4e588f54d6f8d314L,0x3fb75086789921daL,0xd475779892273a4eL, + 0x18a75dfb63e04c7bL,0x90f39dd1fa412403L,0x000000000000018bL } }, + /* 63 << 161 */ + { { 0x618e0931d9ee46b4L,0x3c65fdc65ac70195L,0x46a7ca1f25b503a5L, + 0x3d43ee7529d9d453L,0xeae5decfcb21a646L,0x32cdc75f95a04c13L, + 0x7b39b1bcfc631a28L,0x903bbdaed2ab7499L,0x0000000000000085L }, + { 0xa0cfb2a4f158b009L,0x59cf9fdda8d72e02L,0x2f83798e4e24207dL, + 0xd8fb75cf4272d53dL,0x55e7a933a3dff9a0L,0xb72160f8815ef182L, + 0x5a14b7e3fe0d0337L,0xad937b534b590bc2L,0x0000000000000041L } }, + /* 64 << 161 */ + { { 0x0db2b099cad6b074L,0x4749379c2a6efc26L,0x31862f75ab64d63cL, + 0xa465075d1623e85bL,0x5f03bbea00e7e1b8L,0x24ef8956a2c133cfL, + 0x6f9fbad452533ca3L,0xdfb15df3abf81b19L,0x000000000000015fL }, + { 0x0616fd242796855aL,0x4a9a066c9cbc946eL,0xaa0fd8ee347283f1L, + 0x63bde9adf15aa16fL,0xbdd5677a9277e9bbL,0x5ec032e2d045538cL, + 0x5f4eba3aba8c76fcL,0xa17d2872088e500cL,0x00000000000000a6L } }, + /* 0 << 168 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 168 */ + { { 0xd8faa3bcd4cd8948L,0xddc437839d2888d8L,0x1e951bd5581abba5L, + 0xa52dfab22947d11fL,0x8fbb0ed8728a40eaL,0xd4f4d06f31d769ccL, + 0xd93b1d93fac908d2L,0x8852b438f5a3b0cdL,0x00000000000000bbL }, + { 0x84ea0e03c8c88de9L,0x621a808863cdb21fL,0x332c292ecd6ec216L, + 0xfd01ee09c4cd0dcaL,0x5354ec9efdfce95cL,0x05c307b2a6b8db30L, + 0xf7b254eab44784aeL,0xd38481257e61408aL,0x0000000000000128L } }, + /* 2 << 168 */ + { { 0xf9b815fe2f1993f7L,0xe1f5199a3ecce0e2L,0x8898d789ab10a910L, + 0x49863dac4e863533L,0xb5f118aa5e465bafL,0x10d031280281e10bL, + 0xc98361355042343bL,0xc2140930bc68242dL,0x0000000000000073L }, + { 0xf1417466e84f3896L,0xd4167f1323db4590L,0xe5347265c7699f09L, + 0xe4458198be6204bcL,0x9375296868d8b7a8L,0x49757c532a065d8bL, + 0xdc0856fbb6927e83L,0xb1679b0c56449cf6L,0x00000000000001b2L } }, + /* 3 << 168 */ + { { 0x1a80e022f6b5e497L,0x210b6f4b8d73860aL,0x16f2e7bd7b7f92c4L, + 0xded9e969d068de98L,0x68105e12546ace60L,0x56a0c5b0efb6f66fL, + 0xf0d5fdb985d590d0L,0xf15e274fb69d5821L,0x00000000000000bbL }, + { 0xd8d0621dde543268L,0x9e0ab44c04b71011L,0x25c135cbdb9fe3ccL, + 0xf0a9044b024b4ce3L,0xecfb54fa49d2589aL,0xf57495388bca2c50L, + 0xd76bf119f8f8e5f6L,0xbd946e2c292a452eL,0x00000000000000b2L } }, + /* 4 << 168 */ + { { 0x1d43440f28eb6ffdL,0xa97797d41faa5c82L,0x14a960425c895eefL, + 0xd35e5d8cd4fa3e4cL,0x9e68f293277b5084L,0xbf5b8a162bc6b1a0L, + 0xa1d47a38cf49a617L,0x703e609e62fd2244L,0x0000000000000048L }, + { 0x5909fbf6c6af3c4fL,0x1428b8357c1975abL,0x30f8a30810400297L, + 0xd753559f5b5750bbL,0xbc5235ebf99fabb3L,0x588aaf0bcaa35305L, + 0x13085abc579dc86eL,0x793f9efafea9393bL,0x000000000000000fL } }, + /* 5 << 168 */ + { { 0xd1e60e5f8256720cL,0x171027f57c49fa90L,0xf64e8839f6549f3cL, + 0xffa1490469b799b0L,0xadd4e8fb34553fdcL,0x7cf91923741a95baL, + 0xc9fa2fe993a1ba5aL,0x5d9e34020980acc1L,0x0000000000000020L }, + { 0x2a5cb99c98d2c9ffL,0x448896bf6c3294e9L,0xf76232cb81e067a4L, + 0xbda3583a3757184dL,0xfa41813b2e0c61aaL,0x6ba2aeb6c106499dL, + 0x93782286a19e4b30L,0x13e2306f86e8c59aL,0x00000000000000a4L } }, + /* 6 << 168 */ + { { 0x6f8275e50e37edbfL,0x64ad3b76db3e6343L,0x2eef15036a5161ebL, + 0x2c60875ecf10d729L,0xd477f32a9424ce59L,0xfbd5ab3fddd91be6L, + 0xbe1f16dfac18456bL,0x88e755378dac1a4cL,0x00000000000000d1L }, + { 0x4f8e076e3c10f2f8L,0x04f48ba57a6bc509L,0x8b825c99420b54d1L, + 0x35fb71ddd7412044L,0x1d954ed2771b6d9eL,0x24ead049473e8a03L, + 0x513ff05d28ba193aL,0x42d94d2c7a8abf0cL,0x0000000000000143L } }, + /* 7 << 168 */ + { { 0x5169a5ba3792d0e1L,0xa94f38ae866a52b1L,0x52123a0860f5270dL, + 0x3753c8f7a5b51694L,0xc28f1bd14c0dc22bL,0x39279193e55184f5L, + 0xb9fe5bda528c4498L,0x1a04e978bce7de75L,0x00000000000001c9L }, + { 0x95b584d1ed890690L,0xd02b7fc965d82f4aL,0xa630a8dc3f0b031aL, + 0x79759d955be69e84L,0x8cdb211f45539242L,0x523fbd101032dad8L, + 0x40c47f4d5b003e94L,0x02de551920d6fd97L,0x0000000000000144L } }, + /* 8 << 168 */ + { { 0xf63486b5ce9c07a2L,0x109221f7b9b5e762L,0x0450b6c79210d1caL, + 0x8a9b11b1cc780322L,0xf33c4701a30bfdaaL,0x1a7d75f3c33bfca1L, + 0x4ee930e51c13af9dL,0x3ce4223bce402aa2L,0x00000000000001ecL }, + { 0x2ffbfae311fdd6eeL,0xca5f04d003c1edebL,0xbd80684a5d3a2e52L, + 0x98d1a18d54193ab1L,0xc549cecef366fc87L,0xb194e11aa13ead38L, + 0x6acaf7b21882660aL,0x686c0c7bd2cad2adL,0x00000000000001ffL } }, + /* 9 << 168 */ + { { 0x1a90defed7b4fdedL,0x195bee8f6336c055L,0xe843d42796a94272L, + 0x86cae8e64d7dd6e9L,0x5403b7b7119151e0L,0x3314afc85037da6bL, + 0x6d7f8f8212ca9455L,0x3937bc09b94463aaL,0x0000000000000081L }, + { 0x306e5a03cb8e9b37L,0x126473043c8b52f2L,0xc869679ef93cae12L, + 0x20f33375ab4f3d13L,0x90d9c5cf0501336cL,0x35c85984849df5ecL, + 0x42f846286244c6c3L,0x901d2a938b835c13L,0x0000000000000113L } }, + /* 10 << 168 */ + { { 0xb584b65394932b53L,0x68aad973af36cdd6L,0x2ac829a60bb4cadaL, + 0x403367f33f0e5d18L,0x412b28ad7dda08e1L,0xe7841355e24b3863L, + 0x5171c588093520b9L,0x9db8db4c5ec5762bL,0x00000000000001d1L }, + { 0x8d3b541ec6238d25L,0x586350aec23dc89aL,0x8455da87fa4d8c25L, + 0x7adfdca316492352L,0x8fdb5c08c81f1590L,0xd5e01e0324773e80L, + 0x4a3b7f13dcc95928L,0xc08babf4f80b2facL,0x00000000000001e9L } }, + /* 11 << 168 */ + { { 0x37d8ad51452f6968L,0x9abf76030be46391L,0xc64ba0aa4f51f996L, + 0xb71cf4b72bef5fa2L,0xdadcbf144aea3eccL,0x86619a4a5ca7abeaL, + 0x17bbbd06af54e743L,0x70c9d3b76d5362ebL,0x0000000000000057L }, + { 0xf2486bbdfbf53393L,0xbf16873cb87c38acL,0xca1694b587557348L, + 0x95ad3cb860dd9d75L,0x61177e70b097e1eeL,0xb481729fc4d1faaeL, + 0xaa07352130f14ce4L,0x8c3e81e6e534f9e6L,0x0000000000000104L } }, + /* 12 << 168 */ + { { 0x22d3054c1ab5373dL,0xbd4f796363adadc0L,0xfb4298ff2650904eL, + 0x2801e275ebdbc2c0L,0x1122ea0608190926L,0x41ec28d8b2e79279L, + 0xce049e5276667fcfL,0xc84223fa732778a4L,0x0000000000000081L }, + { 0x56be4933666ff503L,0x0b396d37bcee9583L,0x1b469e0e153f80beL, + 0x3d42cd0932305dbbL,0x8c0d230527134efdL,0xb90a2abb08752f5eL, + 0xce873be8b5b796ddL,0x6a240d331b25403fL,0x00000000000001d4L } }, + /* 13 << 168 */ + { { 0x86aad63f60740f5cL,0x32b7cc316ee3ee57L,0x14340252a16df7d9L, + 0xc70632bd30867552L,0xc34ab941e536c3ebL,0xb8cea58f1b8c9771L, + 0x4f12848942876fc8L,0x2b6009fca2b9447cL,0x00000000000001edL }, + { 0xc93452d882b5e6ebL,0x29958d78ac7dee6fL,0x22dd8fd9bf6fa7e3L, + 0xc59c69516a75f72cL,0x1c6101e36123b596L,0xf5907c48e3f2d479L, + 0xe5d0b6a671bd58b7L,0x840ff28c56d9185fL,0x00000000000000a7L } }, + /* 14 << 168 */ + { { 0xd89e1fe8330465deL,0x73a9c39780ef0efbL,0xb8200ef38ed739f2L, + 0xe0914b2f6ec1025dL,0x7f277714866a3ec9L,0x1c9b4cd05716d865L, + 0x176d3095167f3a3eL,0x85a16f0afcccac3cL,0x0000000000000177L }, + { 0x767e84814f1e3019L,0x88b991b861f15fdcL,0x26cbcb752a17701dL, + 0x79f801348eed411eL,0xdd053c40411302fdL,0x35d3c2f030858f66L, + 0x0b53da7a9107a692L,0x9452396f37f9cacbL,0x00000000000000c3L } }, + /* 15 << 168 */ + { { 0x961404136bc539e8L,0xe6edfee1e4524c6bL,0x09749744d07cccf5L, + 0x1748b47ec03bdba2L,0x2690d0192bc9c91eL,0xd3f34165bade77f3L, + 0x1903c781805ee388L,0x56ede14320ee5637L,0x000000000000015fL }, + { 0x4300285a7b5b9f4aL,0xdd3a022f1b6ae86eL,0xc54a8abe376288f0L, + 0xc1d8ec4f26e3394dL,0xa3979d7b91ce3ad0L,0x0aaf9ce7d1798cc8L, + 0xe972a6d14649a9b3L,0xb0d0e5b7f260defcL,0x000000000000002bL } }, + /* 16 << 168 */ + { { 0x327eec483e206f1dL,0x20bae09cfd9e0124L,0x90e15a5012fe91f1L, + 0x0dd6de71359590d6L,0xb47ff7877346265fL,0xf68c7538d72b31fcL, + 0x1210aa5e853e2029L,0x22955a4df09c0c81L,0x0000000000000040L }, + { 0xe194dba07335f4a6L,0x9d6532fb4bde2f58L,0xfac563c59016930aL, + 0x626eaeefaf9e2095L,0x7af29b71b7d9fde1L,0x809b7fe436360bbfL, + 0x57b5e52de3cd1c8eL,0x52b9040937b01251L,0x0000000000000032L } }, + /* 17 << 168 */ + { { 0x2de80b5c1b9fe008L,0xd7a592bdd3848f67L,0x12505f695f8777c4L, + 0x596956be711b00a8L,0x04ead4ce34d9bb68L,0x87d1821cd0ed1006L, + 0x0f78cb9699acc9f9L,0xb77b5d17054b4885L,0x0000000000000081L }, + { 0x61d1517fd4aa7593L,0xc1f884346683d4cbL,0x15409fa1c660ba6aL, + 0x5756c0f91e5e0b78L,0xa110b7ec1649f63aL,0x5a1b015f245a2867L, + 0x450aaadd6a15fffcL,0x2f44146d8f20e164L,0x00000000000000edL } }, + /* 18 << 168 */ + { { 0x34d354711d7c9b90L,0x02efc83e7cd3e5c4L,0x3bdce4d599838e28L, + 0x903b361fc09f1f79L,0x13c9e9250540eab6L,0x69f28111375ec845L, + 0xd35cefa96f3fb692L,0x7dd9245da6e1cd71L,0x000000000000007bL }, + { 0xcf0e51b7d6693e85L,0x34584a15e14a978eL,0x38d36120591126beL, + 0xf26f3d52d1a5a853L,0x904a64964e97c262L,0x7f3089ff6ac89132L, + 0x1f8934fd38e0e5fdL,0x52e3fdf2cd20b982L,0x0000000000000134L } }, + /* 19 << 168 */ + { { 0x1c18f81921822327L,0x19d3895763107dafL,0xab3c2c5bbf02410bL, + 0x056973d48084f3faL,0xf6c0f0ad9c6c0e1eL,0x5813f6354715d332L, + 0xb1435e02d54f75b4L,0x483968e79f4ef80eL,0x0000000000000077L }, + { 0xeb0da4e25a942323L,0x2236437f2120ac60L,0x16ee032f66313c05L, + 0x6bff696400a38fedL,0xf5b8ee71cf6d4cd1L,0x939c85b5fdc6617dL, + 0x1505c244a7fcfe0fL,0xa496610ab7881695L,0x00000000000001cfL } }, + /* 20 << 168 */ + { { 0xf915b3f2f47e55c4L,0xb163cda470610cf0L,0x822197f50a302cbbL, + 0x8d787800ad959227L,0x10c30f94571bc9ffL,0x2a28b5ab2caa7d3cL, + 0x35cdf34647775889L,0xc81fec4b397cc8f3L,0x00000000000001fdL }, + { 0x83a251027f5a86a7L,0xfc270fdc49e2b112L,0x351f20f947f16503L, + 0x4fc2abe97820ddd9L,0x68391136093b9c29L,0x0bb983f1c6d0c759L, + 0x15a671860ea6b033L,0x56602ab7edd62f4fL,0x0000000000000125L } }, + /* 21 << 168 */ + { { 0x947fb594aba4398dL,0x19a131cb57bb07e8L,0x8f3be50326eac16eL, + 0x451e83c0f2546781L,0xb4139dd4e44ca83eL,0x38a9d3905dc48a1cL, + 0xfadb6d0641cfb776L,0x6ff29cc5eef89414L,0x0000000000000021L }, + { 0xcbde6f766a7e915cL,0x1a7e27d2797752a6L,0xb09dd97ab6cdad5eL, + 0xbe5d66881bbb61b1L,0xc4b76c701fa85cafL,0x88282fbd92ee96baL, + 0x1681c7dde1b4e7a4L,0x3c57dd60ee3982f6L,0x0000000000000128L } }, + /* 22 << 168 */ + { { 0x54c8fc4e9f2b71f9L,0xb6c0fc49988e8120L,0xd2058da0c8927203L, + 0x16766dd5bd3f622aL,0x7ad98310f854d625L,0x4995d6bb55c5058dL, + 0xeecdf90c74bcfc75L,0x2d62d4faf4154102L,0x000000000000005fL }, + { 0xbcfd7e5cc78ec9abL,0xba7846a3e9c8c551L,0x80ce88d165cbee60L, + 0xefda52b95632fdd2L,0xfeae287b8ddbf5e4L,0x8fe72dfa152ac780L, + 0x63f4561e614d87e5L,0x1a8d5a1e1190f43cL,0x0000000000000124L } }, + /* 23 << 168 */ + { { 0x14e9504a41cf60e4L,0x4d580f91ba2fc313L,0xc4d5ec9342122efaL, + 0xa5a21ef9a3107935L,0x190ecc1c466437b7L,0x696df2275126df44L, + 0xc9d6872cd8ae1dc6L,0x7e2fbe46f515214dL,0x0000000000000047L }, + { 0xe226b86e0bb95f94L,0x77f82979db22d5b5L,0x059b2efca0b81578L, + 0x8994c684251b4724L,0x248ab92581c5c432L,0xd7a601bd7218d58cL, + 0x87e9d6e652fa557fL,0x94240f677c6717d2L,0x0000000000000172L } }, + /* 24 << 168 */ + { { 0x04b6642cc79728ecL,0x82006db7d4052ddeL,0xd7c03908da99c38fL, + 0x7ba70fb9a6637549L,0x1dee482cb94a286dL,0xa628ac2a01d36d7eL, + 0x4de12e5c003da5e7L,0x4f68a006e306d20eL,0x000000000000013cL }, + { 0x90cb32d5635d5908L,0xa802b2585c89c608L,0xbb22484549a3be5eL, + 0x53f1fc3655480c5dL,0xa3eb1e7b0c070e5aL,0xd0e34a5af1abbf5dL, + 0x145ea8cb4b88db7bL,0xa9ff13bc839bead8L,0x0000000000000147L } }, + /* 25 << 168 */ + { { 0x30cf3767c86ddfa0L,0x6b3fd143eee9269bL,0xcebdd16d52204c51L, + 0x274b0dc204cd14b8L,0xb6e928505a86d610L,0x6b37ae54e6800db9L, + 0xd93efc3df84a0f79L,0x879d7dd56af06c52L,0x0000000000000101L }, + { 0x8167dc1f83b6b8e3L,0x13e08ee718402d20L,0xf0186bf60b26ce15L, + 0xd5aa811535c6875dL,0x0c76d0c2c5ca2103L,0x5fc8a70593646cb0L, + 0x8afb8877a4ae3f67L,0xe2326f750e699723L,0x0000000000000143L } }, + /* 26 << 168 */ + { { 0xcf7d26bb15ba71a8L,0x000c8e8c0bfbfa5cL,0x718a54266f646f01L, + 0x7c0fa210dd018dfbL,0xb64adc0f384f7dcdL,0xc0c66542477625edL, + 0x871e593e2c75b53fL,0x96c1451fb9f00e20L,0x0000000000000060L }, + { 0x8e0c5c99b1d5302fL,0xb2c5f34efd4421c4L,0x947c3fa92a1a9d1cL, + 0x6721f393881b28b2L,0x1b5513edddd050a2L,0xfb2d77941408593bL, + 0x3425f229e707ecefL,0x88c2673666df4ce3L,0x00000000000000aaL } }, + /* 27 << 168 */ + { { 0x5b7ecdb52c5f43eaL,0x53082afe02559d3bL,0x070e354264f68b42L, + 0x58d6c1aa7c879d43L,0xc41be80750405684L,0x955a275cbce7908eL, + 0xc70c146dbab9f5c9L,0xd31d7c27901ac701L,0x0000000000000010L }, + { 0xdbc82b2bf7614365L,0x2875d55c32cd5ab5L,0xb395d08feaa9be8aL, + 0x97838c558ce9b4d6L,0xee22d8ce4fc6794dL,0xe401877623372c0bL, + 0x6dd2c34bfe0d49b5L,0xbcbcd109f128646fL,0x0000000000000105L } }, + /* 28 << 168 */ + { { 0xd0bdf16917216f79L,0x2a9d74a468bbe483L,0x6965b608e5c7b3b8L, + 0x83e8d0b290d8ddf1L,0xb27aa05509ba77b1L,0xbf4a09be67c32e3aL, + 0x5982ac195bf2c8eeL,0x764907324f70b752L,0x000000000000000aL }, + { 0x1088b1f878a204d4L,0x40a0293f4d4e8a06L,0x312854a3af8d6f9fL, + 0xb1df39186282a51eL,0x9de32af13769fc44L,0x045404379a6d0abbL, + 0xdadc6c9d13185e57L,0x1f2b8d38e979f37bL,0x0000000000000012L } }, + /* 29 << 168 */ + { { 0x5f8a8c0d3ddd0191L,0x1f266a867fae699cL,0xefbd18d2bbbb048eL, + 0x648e5f90d1ab43b6L,0xf81591adcf10f706L,0x362ddc168ed8f28fL, + 0x2108b2c3f6b398e1L,0x1269ce097783a2c8L,0x000000000000008bL }, + { 0xa6a691c37f465dfeL,0x8bfddfc027667ee8L,0x85db93c4e9bbb777L, + 0x77b3e07dc3aaecb0L,0x0bba38f41d6635b2L,0xb608d7f20156a865L, + 0x628c6f6e0f7e38f2L,0xddc70efd519f6429L,0x0000000000000069L } }, + /* 30 << 168 */ + { { 0x705bc9adde556ab7L,0xfd31b77a08c5f548L,0xe4d6128f47ccec97L, + 0xda6413aba2467e6cL,0xf551c43677e4ddf1L,0xd60ca0c72bc04c9aL, + 0x7ad228c8ff849e17L,0x59934f4e930f91a1L,0x000000000000011fL }, + { 0xa76a8707868154b8L,0x15eca497a7dde8cfL,0xc7c5191877ab98a0L, + 0x3ea558ae7fb66e87L,0xdd8e344c48afd9bcL,0x2f65b11c119fb92fL, + 0x9dff92f896a00df2L,0xe2fde0a4be17daf9L,0x0000000000000057L } }, + /* 31 << 168 */ + { { 0x65ad3804ea9ec2a2L,0x44155525f7c6d675L,0x7c1934b57d7cdf15L, + 0xa9ac66b468cc4856L,0x98b006b4ab305ce3L,0xc0bbc4242c87391fL, + 0x1c8056caa6fc5d89L,0x91c2a6af74ba9ca6L,0x000000000000007dL }, + { 0xa0b6f93e661a095dL,0x91a7b4d05deac6cdL,0x151ef226bb2331fcL, + 0x37ebf47e93095466L,0xab8aa36598606ec4L,0x27d7aaadfc03f039L, + 0x0c431597fdd46012L,0x0556f5285af06a03L,0x0000000000000118L } }, + /* 32 << 168 */ + { { 0x7eff6eb56037ff54L,0x78cfc774963e7cb1L,0x48d139d16b3956a1L, + 0x3265461a619f159dL,0x80dcb967a3b1db5eL,0xdb164bf4f293c925L, + 0xb0a372de83b65b62L,0xda262fd344760c21L,0x0000000000000058L }, + { 0xa8f9432da6dfc562L,0x0c7b74ecdc54c213L,0x56b4b751df24c7e3L, + 0x5b32314bb42c77d2L,0xcf2312a419053195L,0x60bf22144f2c75ebL, + 0x0207884171877095L,0x8acd999da44f75faL,0x0000000000000152L } }, + /* 33 << 168 */ + { { 0x45a2719a8907ce54L,0x013d1b3aea166fa5L,0xed88621b21199f5cL, + 0x7700e61eede4254bL,0x5939852d884c08a4L,0x19734a9b2d1d89aaL, + 0xa246df9f0dfa3941L,0x9879316c979c73daL,0x0000000000000151L }, + { 0x8dbcc1180b113864L,0x7ee1a88ce82525b4L,0x0c43f86653bea16eL, + 0x37ff6f028824f34eL,0xfa8838b9b027581fL,0xed25a70c8647d511L, + 0xd63ae1953618e600L,0xcf0b98339b9a93d4L,0x000000000000003bL } }, + /* 34 << 168 */ + { { 0xd506324903eb9185L,0xf65164dd7c1fcccaL,0xb69558864fb70360L, + 0x0d697e8afac2988cL,0x9c4c06a537c97636L,0x635c88f58055600fL, + 0x4ea4c09a74703af8L,0x81ec33b9e7a75723L,0x0000000000000089L }, + { 0xda9a2dc047213f93L,0x6ed717efabaac078L,0x3076325bed22d7f4L, + 0x951dc05863bd3f8bL,0x77f1cf965b1c8d00L,0x9dea7ebab5941b35L, + 0x293095f4d4e61491L,0x0e7ead86eb9ae037L,0x00000000000000d2L } }, + /* 35 << 168 */ + { { 0x39f9f819f6c209f4L,0x089db5253c2a5cb7L,0x20c5ff24accedeb3L, + 0x82f84b46d2b5c3fdL,0x1a1ed43fb90a33ffL,0x436766d3e973e5afL, + 0x2294111437456b11L,0xe860807c70ae9ea9L,0x0000000000000059L }, + { 0x9009e20ab72f6cccL,0x741fefc4fe6e5cafL,0xf1a848dd63098fc9L, + 0xac752a505ede32ccL,0x9efccb9732ba5b6dL,0xb2856b15b94e8d20L, + 0x99e35bc499607f7dL,0x113f31762f82564aL,0x00000000000001d6L } }, + /* 36 << 168 */ + { { 0xea8b99529f5e6fbaL,0x667318ea35ac6c04L,0x7485f1c55ed348acL, + 0xee18ff33019501b6L,0x90107a5f3e904e7dL,0x2ec33497aed29e07L, + 0xafd289974c20de45L,0xc48cd0bacca26170L,0x0000000000000018L }, + { 0x9ba74136a948ebdbL,0x78ed508ac81acda0L,0x9facc76fef5fedcbL, + 0x36faeef8a3b6ddffL,0x5624949573b54036L,0x089e6fa3c9408662L, + 0x1604ae228f9f0744L,0xc1297a7657db098eL,0x0000000000000065L } }, + /* 37 << 168 */ + { { 0x3992cb6807815aaeL,0x7195db4f1b803175L,0x97cd250b424fb1cdL, + 0x016049f38ff45927L,0xb64141ada1ad30f3L,0x8f16a5857dd25547L, + 0xaec66e6aea7f6a7fL,0x4dd29ee57846a86dL,0x00000000000000bfL }, + { 0x6e964890f5112f11L,0xb011a6269bb780f1L,0x6e448385bd208e60L, + 0x5210e0dd83eb0a37L,0x996090715d94a88dL,0xc8bfcac7273e631fL, + 0xfc414a18bb2f9b24L,0x975182160c054385L,0x0000000000000196L } }, + /* 38 << 168 */ + { { 0x9b304aea874e82c3L,0xffbfb0822b7d090aL,0xd9196a7d06ec403aL, + 0x9209f5cde36151faL,0x8935c8aae25e0c13L,0xdb1a4c7716593d65L, + 0xd1e963377d694686L,0x4356bae0e08dcb7eL,0x000000000000001bL }, + { 0xea20f2c0c14af785L,0x48d27833a4295f88L,0xf05d85ca6c7d3114L, + 0xe58db881730ea2d3L,0xdfbe2a686b20c2a2L,0x31768d41efcae9f4L, + 0x9cb4a4e393d8386bL,0x63ce4ef1ad75a399L,0x00000000000001f9L } }, + /* 39 << 168 */ + { { 0x5614856e14f693f9L,0xee48d0157ccc71a7L,0x943ba66129590c3dL, + 0x154b5cc174db3bfaL,0x3d1ee077d73d37dfL,0x05d5e0cea58a17d2L, + 0x6abb262dfdb1cf05L,0x858b3bec8b39082fL,0x000000000000013aL }, + { 0x394f12df6ac53fe7L,0x9d14a39e3a811443L,0x051a0ce3addbf1b9L, + 0x7e4fc131013bd7c8L,0x5795cfb876750d1bL,0x14f8ee5d3ab7d068L, + 0xcb83f31d2f526886L,0xe6f51eea0b3f327eL,0x0000000000000032L } }, + /* 40 << 168 */ + { { 0xe17cf4f985e05448L,0x5e29ec55936b85d7L,0x5f4969344230c449L, + 0x6fc1d8a08206a6f0L,0x399712364d5d2de2L,0x42689259be3a7848L, + 0x77279b58c493be56L,0x43cbcf5c7d422c0cL,0x0000000000000004L }, + { 0x6b1df3f0438b9048L,0x293cee3bd53799a6L,0x44b4dee8945bae0cL, + 0xdf3236a21fc75aedL,0xead6de57fed25024L,0x68541a4ce87d013fL, + 0x72be3aeb4a734d6eL,0x5e84735bc7371e3cL,0x00000000000000f3L } }, + /* 41 << 168 */ + { { 0xd3aca247bfebd246L,0x7d956df376ca7483L,0xb7b30bf20f3a19b4L, + 0xb062c4b5697de983L,0x31cdaf2d79521344L,0xb8daa6236ced6cf0L, + 0x73830cc4156e1821L,0x6364e445b8644ed0L,0x0000000000000152L }, + { 0xb26d84846704d687L,0x42ebf82d2c2b018aL,0x68b526dcdc65295eL, + 0x145615d496530ca1L,0x3ece93877955b80aL,0x177d234769f38006L, + 0x19f33f4c84e14bbaL,0x01c98346e0704d10L,0x0000000000000082L } }, + /* 42 << 168 */ + { { 0x735eb443357ba15dL,0xdfa17a9a54409ca0L,0x6ef5378cabc39f28L, + 0x7936cd2a1ccd0467L,0xd85db56a5c9af9e0L,0xfcf9e3d12316133cL, + 0x3904ded86c2487c6L,0xbff674d2b8cc515cL,0x00000000000001ceL }, + { 0xbb912bd83ddb6e4dL,0x9d82f6936116dc27L,0x3b72986d952ee214L, + 0xc932e3fd03c36dd4L,0xd787bc68500decb3L,0x39092ea64b6dc85fL, + 0xb3cb876162e68168L,0xc4df21b060e2d9eaL,0x0000000000000029L } }, + /* 43 << 168 */ + { { 0x77cf38dd8b9bbedeL,0x7b0a344f7b110970L,0x18827ec691d85793L, + 0xea6a1f394d11d455L,0x2fcabbd4a81e87bbL,0xeae28f10dc775f92L, + 0xa3df2a95e8acef4dL,0xc5e112384901a4f9L,0x00000000000000c1L }, + { 0xda7885dbc589a008L,0x1054bc12edbebdf5L,0x24ffbe9e60727f53L, + 0x89be3122c2e1b79eL,0x28e91a4be68dc55cL,0x2232a01160a852c4L, + 0x578de9e994036a7fL,0x6e5b3ef4936e8e5aL,0x00000000000000e7L } }, + /* 44 << 168 */ + { { 0x8a7b02d872e5f18bL,0x29c3db48b8dd1860L,0xf3b0f47fa5b307f2L, + 0x6bc9ea8cbaf7b51bL,0x6651b8dfc59c6616L,0x11df28a9da6f33adL, + 0xc8879af48b93d7fcL,0xc119ade4e9b03431L,0x00000000000000c5L }, + { 0x6bafe0406631c4f0L,0x4ecf64f6c34562b8L,0x1ceb750c3c4c04caL, + 0xadff4c89b93e2106L,0x9e281057459e3e63L,0xbf3b3cdd4b54f2c3L, + 0x7547b18beb880314L,0x34d7af52e0663b30L,0x000000000000017aL } }, + /* 45 << 168 */ + { { 0xcf9abd89b41c4c8fL,0x5232bc8561dc8213L,0x02da5b3b1cbcb9e4L, + 0x43ba9d21f98b522fL,0xa061c2b6534bf7adL,0x5f375638a48855a3L, + 0xa0450b26113089e3L,0x9ce6a8484ff249d5L,0x00000000000000b8L }, + { 0x9fbdac93bedac56bL,0x4f95bd5bcdf67eb4L,0x657253778d668ae4L, + 0x762845ea23c3aa43L,0x3af6d2c21ff463b5L,0x6f2aeb6c0ec166d2L, + 0x134f65a49be5d26dL,0xeb728f6afabccaddL,0x00000000000001a5L } }, + /* 46 << 168 */ + { { 0x4a0927077c43553cL,0xabd2598f43690980L,0xab06313fa8572f82L, + 0x56e8f3ac4cb0844fL,0x0bd7554aa69f60b1L,0xadaa48b599f8ff55L, + 0xc7f85e160e0ed3d2L,0x906dfd3f77e510f2L,0x0000000000000064L }, + { 0x1629167574a41facL,0xc679c38178d601bfL,0xee9838c2bd9fa391L, + 0xe1fabfc20c7f88abL,0x7759b0943183eaaaL,0x62bdc1f4cf96a23bL, + 0xaf85e735b9a8dd9bL,0x1be45d2bfe461052L,0x000000000000017fL } }, + /* 47 << 168 */ + { { 0x8add55157869f938L,0x8328e90a3567ecdbL,0x265f489d0ab7633aL, + 0xda46aaa44a9a5a8dL,0x725f698404b3b4a5L,0xd5cb793e1d7e98acL, + 0x77a2e141b724608bL,0x711415c04b81416cL,0x0000000000000173L }, + { 0x5d2381001f95cc6aL,0x8eeb20613f6f9261L,0xa9f048ce38ff1f73L, + 0x39ff65199978b00cL,0x1e81ccd1a7a52e68L,0xfd1103a50c01e3a2L, + 0xd533b3a501de60d6L,0x7746c9c4374412d8L,0x000000000000013fL } }, + /* 48 << 168 */ + { { 0xfbdd71d76da23955L,0x58aed61e4e9dca11L,0xc8f75ddcc41906acL, + 0x37020b118ae30a21L,0xf91295233fe85c4fL,0x201400fdbe4b3724L, + 0x9739962d3a2d4446L,0x917e35cda0f4b655L,0x000000000000018fL }, + { 0x99496b29ae9e0ffdL,0x98a9369f6dc1cb95L,0x5db5ca11cf116e11L, + 0x3df10d8508fb818cL,0xddc80cc991ddcd37L,0x8e8647da496af53bL, + 0xe244c394573a894aL,0x056a45e741b4de2dL,0x00000000000001a1L } }, + /* 49 << 168 */ + { { 0x84c2553f194efd8eL,0x15247b56051d3139L,0xc1e3437f7cdf7295L, + 0x8c154778be6ef662L,0x809b77523cdb4e9aL,0x573cbaf2cf155669L, + 0x083e1d6cc2fec451L,0x879d2b2408e53cb4L,0x00000000000001f3L }, + { 0x6ed740af3991a529L,0xcf5df2320ee78333L,0xd597b1865f2a7fe1L, + 0x169b4d47e1da55b6L,0x4b61da2128866db1L,0xbedf5c1851a98c39L, + 0xbcfa723bd4c47affL,0x293bc51c24bc0569L,0x0000000000000193L } }, + /* 50 << 168 */ + { { 0x2a3902169926b505L,0x8d78ef77e00be256L,0x1b4243060813f0f7L, + 0x7bbe8aea99e1bfc2L,0xee0d8bfb2c16d048L,0x0f52f62c16927281L, + 0xfa1eed898b0aab8eL,0xc960284b03aa8ec9L,0x00000000000001f0L }, + { 0xc0cea54084e2c7d4L,0x2c967298631cd12fL,0x9d363cee377d24ddL, + 0x8044dcd6a5143a2eL,0x827cd09de5438b71L,0xfa445ab5e086affdL, + 0x6bde5b7ea21ab95cL,0x473e55f7e1344c94L,0x000000000000018dL } }, + /* 51 << 168 */ + { { 0x1806b23055de5e11L,0x66181382a8b7907bL,0x9a53b2cd761b28ffL, + 0x99b04005e0d8111fL,0x2329d028af15774dL,0x0c69a9aeed7ab9b4L, + 0x679b29e25ba9fb26L,0xc4da383c1109c306L,0x0000000000000096L }, + { 0x0c596d701ec7b181L,0x2320a21d38dd635aL,0x76fb5cbbbef074c3L, + 0x659c69f34c245d5aL,0xd4cb6c4f73910addL,0x7a9b35b02bcb561aL, + 0xe6a998c30b42e5baL,0x1d13f70aedffeffeL,0x0000000000000050L } }, + /* 52 << 168 */ + { { 0xb063202f31fe14fdL,0xbb003d9c8bbc840cL,0xf0e2d2cc3d91dd50L, + 0x09db1c24722cafcdL,0xdb3b82e189c2e546L,0xe719cfa86f470d17L, + 0x909871aeb4c59142L,0xbce979234a64f3fcL,0x0000000000000112L }, + { 0x95b635710d88533eL,0x778f74c30006844eL,0x2821500ad11a08e6L, + 0xcb91cf3a0d64bdc9L,0xe1ae9e56e2129232L,0x7f7c927ab11ff55aL, + 0x8db638e4416ac07aL,0x3a38dd3d1d2c2dc4L,0x000000000000002bL } }, + /* 53 << 168 */ + { { 0x12c5026f19d3a430L,0xebd36b518531be4aL,0x2d0eedc93dca12efL, + 0x441f0dec6cc652f4L,0x8b0fd67bf1272c4aL,0xd9b90373ecb89af5L, + 0x75514cf9029341daL,0x438f0a8da9e45ec9L,0x00000000000001d0L }, + { 0x79fa46f4b976862eL,0xbd0b6a77cc80c75aL,0xf37779ebb69d3f59L, + 0x77b1d47d8cfc2042L,0xab8c9d21eb6f9983L,0x02620b797cff3615L, + 0x4000cec1d74dafd6L,0xd5cc4e4a5cc17f2fL,0x000000000000000aL } }, + /* 54 << 168 */ + { { 0xde6d2f0250b19dedL,0x17e09363c883d0e3L,0x583f1980473a0d98L, + 0x669ec3ddc3666af9L,0xfc4c58c495d4ca2fL,0xcbba5d0a4125183cL, + 0x878a81f19cd646daL,0xc3fb9edf12f64d9aL,0x00000000000001eaL }, + { 0xdb504ffc97bd5280L,0x05aedba0a49b016aL,0xfdada5630d234e6eL, + 0xf7d210f9201efad7L,0x41e2fadcbad7f0c4L,0x8746684bf35f6af5L, + 0xebc37b5a8f816409L,0x5fe729b874ccb6cbL,0x00000000000000feL } }, + /* 55 << 168 */ + { { 0xfdc15977e34a2d53L,0x03a3ecd71eef5a01L,0x4a024948549c37d6L, + 0xf9b6ebba3572805dL,0x8468d745a3904985L,0xa446d8636fe46f66L, + 0x6425a52a6242e469L,0x0d8ea4349722c51cL,0x00000000000001dbL }, + { 0x11603f9e75d953e9L,0x424af418470a0f3dL,0x62dec6c2e5d8071dL, + 0xd26d146f32dad225L,0x99f756df8f286bb3L,0x4c0f5d7c1dd19cbaL, + 0xdb43a2d1dbe50905L,0xce07db2555d15d34L,0x0000000000000145L } }, + /* 56 << 168 */ + { { 0x060ad0b2473d5f1aL,0xba73eccaf0d23a2aL,0x38ff0b96b44913b1L, + 0x551791f011ef6a24L,0x728c5333b5d8ff2aL,0xa22a2fb1f6c23b09L, + 0xfbf8a99a6442b975L,0x2433fcf020e16f2cL,0x00000000000000d2L }, + { 0xd98e567c33ad7094L,0x386b2d8e3689fe95L,0x842afb8d5d2e0b51L, + 0xb04c9c8d2f412733L,0x09acadadc3a3812bL,0x6ca2083c78c320ceL, + 0xdff84922933deb8fL,0x729eef0f270fa28cL,0x00000000000001ecL } }, + /* 57 << 168 */ + { { 0x0e70ba30d1457b21L,0x06d7b4372aaf52a4L,0xb5fd37195a0d8d07L, + 0x57f2d337eaca1022L,0x2fb19653b038b66dL,0xb7aab5519f1835a2L, + 0x183eb2b9e5565ff2L,0xbe91ace36018b442L,0x0000000000000052L }, + { 0xcfbd5ca05082d526L,0x4f358cf275761316L,0xe67eeb30dbf98fffL, + 0xf3719573a45ffb48L,0x1fe011d829cc0c3fL,0x640743f0507ad2f6L, + 0xe2e66fb4d225c2fdL,0x37933dcbf3f6de1aL,0x00000000000000beL } }, + /* 58 << 168 */ + { { 0x56d47bc171146be1L,0xeb3ec09f4e911458L,0xa9dff639184c52e8L, + 0xe68567f4b5d0a63eL,0x1c2ee8b7126e32dfL,0x731d04202898412eL, + 0xfc01fc84e0b2f347L,0x0599ae70d031d939L,0x0000000000000011L }, + { 0xb90a4fc12e48754aL,0x8c4c8dcccfc67435L,0x8986e7b11cd90cf7L, + 0x080af15a92813705L,0xf31e604f94c68b5fL,0x178bab1966ad70b8L, + 0x017bfa632a529227L,0x1e4d20d3e0e9d229L,0x00000000000001bfL } }, + /* 59 << 168 */ + { { 0xd7680aaa26e31466L,0x90f7d3a84ed9df83L,0x7ce63b23f4cc85fbL, + 0xc97e8e4d3d2b2ee6L,0x8e1b908f2f0ca474L,0x6d334abfe6f70df8L, + 0xc8ba8486ab24e38dL,0x21f8763be7ca8513L,0x000000000000018cL }, + { 0x238477ee82cf6a21L,0xbd6b5c610164a8f3L,0x4a931e61674dad92L, + 0x48f0a0a926e566deL,0x0e8886143f5f44c3L,0x42f5473b4302e1beL, + 0x322092857a5fca8fL,0x4f77cc8c4b4b7cb1L,0x00000000000000b2L } }, + /* 60 << 168 */ + { { 0xa1dc7875d24cc937L,0x8bcc5f83efc3446bL,0x26da651ee8126a41L, + 0xe784188a356f436fL,0xa5765424d774138aL,0xfc685d4cc8a96662L, + 0x036ac0f080a3fd62L,0xec41502a6dd04e6dL,0x0000000000000006L }, + { 0x6982079813ec2f1eL,0x52ceaa44e8f956dcL,0x9f8211bc6c221df1L, + 0x4cedf8cfdcd0689bL,0x915c690a05fbe468L,0xb745f676942032b6L, + 0x531dfaeb81ab170bL,0x63163f66951d11b4L,0x000000000000016cL } }, + /* 61 << 168 */ + { { 0x59a83a30ebfe0068L,0x532358c703d578c0L,0x20e092c3d7cb9ad8L, + 0xea469bf62dec6c49L,0xcecff1a9d541249dL,0xe8346d1ff9970638L, + 0x90c33c61a2e00cb4L,0xaa2babde0c03ebbaL,0x000000000000015eL }, + { 0x6905928fd6423e1eL,0x68fd55acb4419fbfL,0x9475915a53727da0L, + 0x6d4d6b768dd0d218L,0xeca16dd535e418acL,0x67962c9e28ea6a4eL, + 0x7e5a60333071a0a4L,0xcaa404212bdbb2f3L,0x00000000000001cfL } }, + /* 62 << 168 */ + { { 0x24a1ffcd85febcc0L,0x4e2d4cce5fd15677L,0xa52aaecdf168ed5cL, + 0x7fc6ca94de1944d8L,0x7b058166dc2e18f1L,0x6ebb51366c4c7ac7L, + 0xa11f966fd06c33e4L,0xcc445c9f1e602e6aL,0x00000000000001e8L }, + { 0xd8622d0945fc989dL,0x48b4f9dfc820794dL,0xb3a22f12ceff02c8L, + 0x1fb5feb7350fc5d3L,0x6590d85b716bd79dL,0x923b166fb46d3df3L, + 0xae7d111bfb66e05dL,0xf869c3913e382cb3L,0x0000000000000035L } }, + /* 63 << 168 */ + { { 0x91592d60beca406fL,0x2d8ce8f2838ff992L,0x194caec60bba2292L, + 0x38833614c8d8769dL,0x04f36284a443febaL,0x3352e7afed8c2369L, + 0x431ee9dbda6ad421L,0x2ffd00f078f1f563L,0x00000000000000d8L }, + { 0x47fcd60325da5fc7L,0x5b55a33e45b44e69L,0xa8ea02a5be9c36d7L, + 0x510aa45fcd6fb386L,0x1378ce53b8adeea6L,0xcc015dd0847ef47cL, + 0x77f3813358f44061L,0xca4610c8de8eac82L,0x00000000000001a1L } }, + /* 64 << 168 */ + { { 0x0cfa67a640b0a066L,0xe97c179e8057a710L,0xb69d285227cce600L, + 0xf7dde8c5419687d1L,0xdbcb15185d5b039eL,0x3e47772fd99abbe0L, + 0x71c9c23ab2a9c6a0L,0x5af10b9b94496951L,0x0000000000000152L }, + { 0x0f96137f2c91f83aL,0x1f743fa449f15b33L,0xd43b3ec0ccb4c1b1L, + 0x895a3be3cfeea9b0L,0x27c3b4959eefee4dL,0xf1b4212712e344eeL, + 0x023afa76ff929c1eL,0x3078097686d07325L,0x0000000000000166L } }, + /* 0 << 175 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 175 */ + { { 0x48a2fc84658b96b8L,0x039af24c465ebf34L,0xa7a9b0144f404eb3L, + 0x80176ef0763ab5c4L,0xd4984b136b5617adL,0xa6a39ed89195ee81L, + 0x1ce2974905c2eff8L,0x78aeff6c1281fa56L,0x0000000000000199L }, + { 0x40cf462310374f0aL,0x89212e99958219f6L,0xe7599b2027b97cbaL, + 0x1ae1cb659e551710L,0x62881658502e6670L,0x8f78ccf876e23a8cL, + 0x78e165ad732cc46eL,0xd6b5ab1cf7670684L,0x000000000000014aL } }, + /* 2 << 175 */ + { { 0xb342d85dda44ff51L,0x54ba0522d08596ffL,0x3da8e8ceb0ac99c2L, + 0xcebe6a9691f37dcbL,0x8ebad3d9c24d7d1cL,0xf11a4319c75945efL, + 0xd4dfe534ed1ac001L,0x745e27721a521e4dL,0x00000000000000c4L }, + { 0x745e5b41df65e0bbL,0xf8887b533b84ed58L,0x1a54bc479c0684eaL, + 0xfd4f1d36ce9c7672L,0x490bc265ce5730fcL,0x2fa1ad6d8430adb9L, + 0x0f117bdcb7db19b4L,0xbd339a789c246e44L,0x000000000000005bL } }, + /* 3 << 175 */ + { { 0xbfc7e834db350c46L,0x3c5690ac4efc9f55L,0x2fa3c78e3dac8271L, + 0x0a74acbe0c8572aeL,0x8923620856c68aacL,0x8fca20bdc305b75fL, + 0xb42c64d995be9334L,0xa738887613fa1dedL,0x00000000000000cfL }, + { 0x03c242e114b5ce0aL,0x89ee94768381d1adL,0xdd3c7dcb2d1ceb97L, + 0x64f13ad71ee7368fL,0xa014619a5ca7fdc4L,0x5a62e9bd3cf1e7aaL, + 0xb9e9288a45541059L,0x3008f4f6055be852L,0x0000000000000019L } }, + /* 4 << 175 */ + { { 0x5f8fd91ae19a881cL,0x355105b08cf8a2c3L,0x442b97782810e499L, + 0x930d835ada7972c5L,0xd2266522555ef4d8L,0x943d9adb64e05b06L, + 0x480bd27f42fc02faL,0x9265b289699e5b7eL,0x000000000000008dL }, + { 0xb60df9c855c7cd14L,0x62131e73270e2672L,0xe6983794672942b7L, + 0xbf8c55d1817db72eL,0xc3785f3608a5963dL,0x4a6646098f39218cL, + 0x95c868541b5b49ccL,0x045a0243178df85cL,0x0000000000000019L } }, + /* 5 << 175 */ + { { 0x52a05628990339a3L,0x5ef0637a72ee1accL,0x88b9dfca0eb46118L, + 0xc529dec836cbba9dL,0x0a9a77cd549d674aL,0xba2d3b7b6e878eebL, + 0x3585c2a2ed48127fL,0x79c54417a4fc7430L,0x0000000000000130L }, + { 0x70f7b87e0affc41dL,0xf0e0487fd5b72949L,0xd9b820bb6ad325efL, + 0xd3d96caf9a05b4d4L,0xb4785d8a951e1dbaL,0xcb985c219eb25fc3L, + 0xb11c96769313e51fL,0xd6e66bc87fc01e6fL,0x0000000000000100L } }, + /* 6 << 175 */ + { { 0xccc1bb64b666ff60L,0xe786f358ba04b5c3L,0xbcd5e56d384c0ad4L, + 0x5d22436fd0372bbdL,0xfeea9ed9a061a543L,0x6f81f168fa3d34beL, + 0xf52059d24ba47193L,0x5ff055894ada65adL,0x000000000000006fL }, + { 0x346aded56d4cabf2L,0x65e4b11f4f1a8ee1L,0x1521718b07eba73aL, + 0x8a2f32bad16d7d56L,0x01d49595d3acbb17L,0x67625faad3f62095L, + 0x7c3cbecf613b6059L,0x3908f6506118c496L,0x000000000000016bL } }, + /* 7 << 175 */ + { { 0x98a6459bb3ccbfebL,0xf71c7a35cafe23f3L,0x7bb7dc9d87162f32L, + 0x547b6132e519e8a5L,0x3663ff8b259f407bL,0x5bf1ec1a75b127d3L, + 0x00cb4c43429402e3L,0x129914ec09801ce9L,0x000000000000017eL }, + { 0x4ce81e8a0e74a262L,0xd40155c7834f3480L,0x17b56afeb5575007L, + 0xd977fa3a692ceb5cL,0x6efadcc239b47d03L,0xdcbe2b844d776ac4L, + 0xfdc2c5f16ba4ae9dL,0xada2148824840515L,0x00000000000000f1L } }, + /* 8 << 175 */ + { { 0x06494428cab2443dL,0xc52ad2d86afd7840L,0x8ef6e0a6fce7b5f7L, + 0x6151d2b3a82ae7e3L,0x08abb38a14e3b825L,0x9782e6f9366cada3L, + 0x9f0127b25076c2ffL,0x2f29b7282d812ed5L,0x000000000000004fL }, + { 0x7142e7e004e049f5L,0x9338a870d7f99e76L,0xf7e4837b72a90177L, + 0xa7bff79911aacd17L,0x5442d6f2d1d10b06L,0x986e7bc45ecc3f54L, + 0x02cf381831286fe1L,0x5cdc56d1cd383eacL,0x000000000000002dL } }, + /* 9 << 175 */ + { { 0x3383252d2cc34f63L,0xffaab47215c2cf2eL,0x04fb57cdf89242edL, + 0x6b87076af9b1e42fL,0x9ee7a62c24682e88L,0x23d9fb1a99ffae3bL, + 0x24a055cdce125019L,0x33ba5705ebdee308L,0x000000000000019dL }, + { 0x911f2dd9f00fb803L,0xbc3c90d88ccecdf6L,0xac2a5eead4d059dbL, + 0xde8cfc4cc3dc4a6fL,0xf51a932dc565a8b6L,0xe0ac04382b13a372L, + 0x6f9dc1bf079c4979L,0x386c19b4d0c32e09L,0x0000000000000068L } }, + /* 10 << 175 */ + { { 0x7303ec69c80291f9L,0x0f2c64390c9ff7f4L,0x96c08ea0f3ada285L, + 0x3cb50f3ddeae887cL,0xaba571fafefe26dcL,0x4e4dba336c6f1071L, + 0xf5796982f02bda38L,0xa59c60b140485de2L,0x00000000000000a1L }, + { 0x28d7c281cab044f7L,0xa5eb04fe515d247aL,0x5b42d919ba014b10L, + 0xb150b4fbde221cc1L,0xb5cb8c3a22a5a502L,0x62c0da97cc37e34aL, + 0x4b9bfb1f9e827814L,0x96666256c97679acL,0x00000000000000e6L } }, + /* 11 << 175 */ + { { 0x0df173a23adf88d8L,0x519aec4c0cd5f80fL,0x83c410d4af5f84deL, + 0x9cc04e5db1f7f73aL,0x1aec97b529db8865L,0x12748d28220ee9e1L, + 0x9fb6df0a4d6669a8L,0x02305a83b3714010L,0x00000000000000f4L }, + { 0xbd96e2ea2c8a86caL,0xc5436ccd0f9a14c0L,0xf428f7fcf0d34672L, + 0x952b703956697a68L,0xdffdd1770bbf56c6L,0x8b4ea6932b65057fL, + 0xdcef28d3630e47e4L,0xef879aca74cb48baL,0x00000000000000b6L } }, + /* 12 << 175 */ + { { 0x415fab9a8b894ac8L,0xb189914d64822449L,0x0abfdbf50f4b9444L, + 0xa333e872eb96cf1fL,0x91d3473db8c5318aL,0xbb5740e624d09cb4L, + 0xebe6ef7f5b5d73eaL,0xbe33338809f67393L,0x00000000000000fcL }, + { 0x9970d38e1ffa22b4L,0x00d1fb74003e55f9L,0x0d1ef824820f7bfdL, + 0x4352125173fc040dL,0x295f8a66e51b5870L,0xceacb67d3dbfe159L, + 0x0ee77417a218d2feL,0x25a0e0a9f3b7d44aL,0x00000000000000f8L } }, + /* 13 << 175 */ + { { 0x3e2e135a15a102e1L,0xa4b33d115fb3faf6L,0x12da9549693b3a58L, + 0x32c39adbee7132b1L,0x87da9a38fea6b800L,0xd32b22eb92074fa9L, + 0x3f8d05570673a898L,0xfededb49a5044d27L,0x00000000000001fdL }, + { 0x89658bcc94e1a724L,0x5ad21f6e48a02c0cL,0x4d17fd1d0c70669fL, + 0xdef09b95709fc4a4L,0xbbf206eb610ed18eL,0x4f454d2e2282af7aL, + 0x5c8334bb75660c93L,0x0037d62bd4064169L,0x000000000000006fL } }, + /* 14 << 175 */ + { { 0xc1f7698483ef0448L,0xc4d18389fb276974L,0xfef2c810e52836c2L, + 0x3687104bc09e4020L,0xec3248ef0cc4bf06L,0x38daa51bc401078bL, + 0x84bd9f6157bc72f2L,0x68b23e45003676ebL,0x000000000000009dL }, + { 0x22526e98d78b9d6bL,0xc4ccdc71c4f28366L,0x7dd12d941c646b9dL, + 0x0cc00136931bd51bL,0xfebf1e1e512ec8a6L,0x6bc281f6bdc888b1L, + 0x0bc271f3f6ab354fL,0x27d1ee7630c4e559L,0x0000000000000038L } }, + /* 15 << 175 */ + { { 0x98275c41fd5ea81aL,0x74f37478b8ebd833L,0xb91006f1e7e3e071L, + 0x6c900f222a0389e5L,0x7fecd4c8919e031fL,0x36625840d6596951L, + 0x3f43ae32a685759dL,0xfb2b004d44199a43L,0x00000000000001faL }, + { 0x92383e835e0731d2L,0x387ddec2cdb84d5bL,0xd4dbfa0ab10f9abbL, + 0x50172a7a4e02cd21L,0x6b1b8c40d0a95f48L,0xc47d694d5c2cd4f6L, + 0x4b0402695e712406L,0x4fb68605536a77c8L,0x0000000000000041L } }, + /* 16 << 175 */ + { { 0x12427d324acccf77L,0x3194664ffb77f869L,0x07e01cac61a58e24L, + 0xc506637b91bf22b5L,0xce7d0251dd2d701aL,0xf9a662d4befc0d7fL, + 0xa54117ba2c6c9454L,0x31864e416978e919L,0x000000000000007cL }, + { 0xbbe419029d971044L,0x68742001f8f24116L,0x6d0065933e4a40ecL, + 0x9fc1b8e282771429L,0x59b2f9342e7968caL,0x4817f90b470dd353L, + 0x3d32e10f7e2c1429L,0x43e9474e00206509L,0x0000000000000076L } }, + /* 17 << 175 */ + { { 0x30a937cc52c7da7aL,0xe5ea72ba19c1f2b9L,0x6bc71d9c417aa5aaL, + 0x10aab323644f9edbL,0xcb37ff967bfdcf69L,0xb0a66538b7e9b0cdL, + 0x40d040ead2138d03L,0x8aec105b0bb5c9fdL,0x0000000000000074L }, + { 0x19ed6f4d481e74caL,0xc36fb9f46f0a3b87L,0xb010d6af88bae63fL, + 0xc7eb8196dc29e08bL,0x4a7e448713523497L,0x9c01c48c96c052d3L, + 0x71342956f6210924L,0x8034186347f93f2aL,0x0000000000000099L } }, + /* 18 << 175 */ + { { 0x2ac928fa4ffa68bfL,0xafb482236b647009L,0xa7b54f134cfae9cdL, + 0x8a271e4bbb3f772cL,0x78faaf7ebf953d68L,0xb5f12134519aa8e4L, + 0xef343445359ae969L,0x86e25aad76816a06L,0x0000000000000110L }, + { 0xa0b1814536a88707L,0xdd48d1f21f9a1284L,0x48670dad845e01beL, + 0xc13d5aa9fc7fe6a9L,0x611969b964757542L,0xa4529c72689bb1c7L, + 0xd7c3f96a06f31f72L,0x8b09a45a5418d15eL,0x0000000000000121L } }, + /* 19 << 175 */ + { { 0x0b8c943bc8c379e3L,0x00e426a148453fc4L,0x15fdfec387f6176dL, + 0xd298cda5d2efc806L,0x8be18a1c7d81d810L,0x45580c064b172b23L, + 0xac5fa9093b7607d4L,0xcac47041e73d7361L,0x000000000000001cL }, + { 0xb2b2c5bc0eed9820L,0x2cc057538ff7e335L,0x35caa1845f2063a8L, + 0x342669b5a43751d7L,0x222d457413260988L,0x2bd0633f296a6b04L, + 0x3c803e22f5904b86L,0x1fd07d57a93673c9L,0x000000000000019aL } }, + /* 20 << 175 */ + { { 0x58170eb934fcd20dL,0x8205ac09c669b90bL,0x2dd08c383d1c1595L, + 0xea1217ac91da182aL,0x3c9275359d2cff8aL,0xe8223f7a2a4a396aL, + 0xd206d341db22c77fL,0x6e03fe3227864348L,0x0000000000000168L }, + { 0x3a717c4665d9bb06L,0x234158eb1431d7afL,0x389290045a27a3b0L, + 0xd745267181008744L,0x7961f9c3055610e7L,0xe260f8a1936a3625L, + 0xc05dfba284b88f57L,0x930747e71f67fd22L,0x000000000000002fL } }, + /* 21 << 175 */ + { { 0xc7079fd7a4e9afe7L,0x43bbec79f95430a2L,0xafb58ef46bf066bcL, + 0x074e17d918835bd8L,0x9fa64f88ec35838fL,0x627686e0a6a44e92L, + 0xb2fa7adbf0040657L,0x064a31740debb6c9L,0x00000000000001a8L }, + { 0x37d6f29579c093ddL,0xa72f5b6a3288b841L,0x46c586707cfe2a15L, + 0x221b3107d1f7e80fL,0x0e332aa17826dad0L,0x38c9217328c7b27fL, + 0x522ac38d4d0aec11L,0xdc271d8fcef88378L,0x00000000000000b2L } }, + /* 22 << 175 */ + { { 0x886c7f2258ef1600L,0xd581f2bc4eb5bba0L,0xd6c7b858e64666b2L, + 0x114250884e813bb2L,0x428e86f000c98e08L,0x3ac9b40376eb5c59L, + 0x904f313961bd2db8L,0xa3144a05c71cfa5dL,0x000000000000002aL }, + { 0x002ee77cb69fa450L,0xa1f5050c87240a19L,0xd7df65abbe333836L, + 0xb64362dcb8cd4a96L,0x058d9bc3d814096bL,0xad04c7e39687901cL, + 0x67aef9e2f5527030L,0x20dae8096f375a8bL,0x00000000000001ddL } }, + /* 23 << 175 */ + { { 0x6cdcb4f4d7358b2aL,0x721a661239de828fL,0x90fe45b87849fa10L, + 0xcbf025e4f5f3dd29L,0xafc867ddd5e38af7L,0x445f0caec89ee1fcL, + 0xa1bb95133a94995aL,0x9e2677e8a7fc2cc0L,0x000000000000013bL }, + { 0xfaac6a7e45067fe6L,0xf6e4b6c3db3c4c8aL,0x9505248205472bc2L, + 0x99a92d49b96169dfL,0xe9f7dcdc77a9943fL,0x5962e40811c6f013L, + 0x9f2209abb25ab9f6L,0x9a3c6ccf1c4e31c0L,0x00000000000000abL } }, + /* 24 << 175 */ + { { 0x2124d45c3f9900d0L,0x2452fe1db6a32111L,0xf12b476586f1f700L, + 0xc04e44ec977316f6L,0x582ca8509705fd58L,0x51ef29493167cf6aL, + 0xca75486404f204c8L,0x61dec7f3a46fd945L,0x00000000000000b9L }, + { 0x78114ed6af6e04d1L,0xe1ec56f601a4b6d2L,0xc26f5416ba41c631L, + 0x76b8204b7d9b9f82L,0x665fe0fec0565f61L,0x7ccd8aa573c27925L, + 0xcd6756d82f58e901L,0x6e932418e8f75a4fL,0x0000000000000019L } }, + /* 25 << 175 */ + { { 0xf40476e4bcea293fL,0x7b36fc1824605173L,0xddc9d807136fecebL, + 0x07e3f62b80bde5c8L,0xff1a3e28f192162cL,0x6e38386668f587e8L, + 0x7f14a26b473ca679L,0x8370dc18bbb2a08bL,0x00000000000001abL }, + { 0xc10876849fae944bL,0x8b7105dd319085b9L,0xa81fceb47e689653L, + 0x9a69e78c74054550L,0xc13a53b0d742d36cL,0x57e2bd060cfde494L, + 0x2d7eb0afe06377e1L,0x21f439a8df91f422L,0x000000000000001eL } }, + /* 26 << 175 */ + { { 0x11113aff28dbbef1L,0xac6ac5329515632dL,0x1e88244e42a245a7L, + 0x30f28f4a9b0e20e2L,0x8bc178722b15e0ecL,0xc15e2549ba5a885aL, + 0xb42f06201685ce7cL,0xd80f13b9df452e98L,0x000000000000008bL }, + { 0x39df7724f9a80bbaL,0xc734b0bb28dcf06dL,0x6e09c92bb43a5cd4L, + 0x175127da8c0ee83eL,0x50bde213f88485b4L,0xd9f6c90124a0ef83L, + 0x66ce46bfc205d20eL,0xd6f1c04fde4968adL,0x00000000000001a2L } }, + /* 27 << 175 */ + { { 0x0b0c0c46b26e8efdL,0x9afae2cbef3aeaf3L,0xce90d21ea18d8d77L, + 0xd951a21fb7ae720cL,0x33317bc9a460d39cL,0xe5be6b979a5930baL, + 0x9ea57837ae7f0e7bL,0xeec5b7efc747589eL,0x000000000000007fL }, + { 0x454e0e744ac962c6L,0x3b802e2b396bb8c0L,0xd3aa625a4cd0c242L, + 0x44ea67800cf000fcL,0xce7a84112be0e87aL,0xa9448c66754f626bL, + 0x62c789ed9e6d6832L,0x0f5bd24ad7463e7dL,0x0000000000000096L } }, + /* 28 << 175 */ + { { 0x920e0cf981414e26L,0xb45cf442498a580dL,0xd424e9e24cbbfa67L, + 0xe64390071decf2e2L,0xc7fae004d9051a61L,0x9fd02f2898e66be9L, + 0xcc4afb0b419b7e92L,0xfd421f03ef8af4b3L,0x0000000000000126L }, + { 0x7b45c797e0e5eaccL,0x04a9b25f202041aeL,0x27888ac85849c309L, + 0x9325ce353f58628dL,0x90e98c4a25ef6fdbL,0xf4ff673cfd3dfdf2L, + 0xe56aad1a32ccfb2fL,0xa6c6d2caab9d6f10L,0x000000000000011bL } }, + /* 29 << 175 */ + { { 0x4a4d81339e5e7cc1L,0x80abf8ea31cb6f3cL,0x098274246ffe6f1cL, + 0xa164acfc5b814af3L,0x5141097ba9b76b27L,0x1693d5fc6e26a0a5L, + 0x70d5e6e75223f787L,0x417a7ddfd3686b7dL,0x0000000000000082L }, + { 0x9315994a8004c24aL,0x1bb50e549423011aL,0x0c8b3470ead2f81cL, + 0x9afa2a0e52c11f14L,0xc7094b863b019170L,0xc3405dacd8518bb6L, + 0x3df2a5c805b718b3L,0xe0dbf810b65b9d21L,0x0000000000000038L } }, + /* 30 << 175 */ + { { 0x13f45d45ed942a64L,0xfcafee83705cbec8L,0xbc9a11d052186f58L, + 0xb5ec924941ee3189L,0x71d770cbc95c6c4fL,0x7f348c27d610f241L, + 0x9116747564ab476fL,0x30396f906d8350abL,0x00000000000000b6L }, + { 0xb866ca72a9c8b74bL,0x26aeabdb3d3df97bL,0xd4ea56a365cae75aL, + 0xc873fde088facf27L,0x4d4211cedb995516L,0xa01cf05289799b04L, + 0x55d85824d0f0c2a4L,0x434682b0dd83ded2L,0x000000000000011dL } }, + /* 31 << 175 */ + { { 0x920437b6b0463ae7L,0xed6c97665155b076L,0x06ad4f3c0a2cdb79L, + 0x98cb88f2485ffea3L,0xc8ba1b252988de4dL,0xce3a27f73529a997L, + 0xb1440b30b2796951L,0xb810edd37887e411L,0x00000000000000c6L }, + { 0x22d9308a6c094e06L,0x01d5e2e57cb1cafeL,0x2587378cc3de4bbcL, + 0xf60e9c3d774d4435L,0x689cff22147428d4L,0xc3dc80c093139fcdL, + 0xb3c4ba93c526a089L,0x623be8e82e6e4891L,0x00000000000001d3L } }, + /* 32 << 175 */ + { { 0xc03c182d851368eeL,0xbb94a33d57c919f2L,0x9cf0c767e333ace3L, + 0x037c7d47d974de7fL,0xe96e02cf8b18fa5cL,0x6f1a3e9126cf35f7L, + 0x47affdc033486c59L,0x2c370bfeec8c2a2bL,0x00000000000000e1L }, + { 0xdce5de2c18a3f84bL,0xc4bb672e2ad42ca2L,0xb6581d7edd0731b2L, + 0xb6ca46ca2809ac7eL,0xde8edba2189b0ff8L,0xa72ee3e81ff4631dL, + 0xf44a8fc6f1004bcbL,0x103dd1b9c8878531L,0x00000000000001caL } }, + /* 33 << 175 */ + { { 0x4d01b08c332bfc6bL,0x8223c8936205e95dL,0x4d003a46cb91bfccL, + 0x08316e3431e111dbL,0x30c85a86e0c13d49L,0xce3c5846b4fd3073L, + 0xe4b316bc05c18409L,0xd7e8304cc72b6cb1L,0x00000000000001ddL }, + { 0x9ceb7fccaa1fc1eaL,0xcefb89e96f61cf07L,0xb1728fd5a3daa487L, + 0x2476192440ab1f39L,0x23d5fdba2e9e2328L,0x0ba18c70f81dd4c5L, + 0x61333b1c10884f58L,0x57ef145f6496ae54L,0x000000000000014fL } }, + /* 34 << 175 */ + { { 0x5bf22305ea6d16ecL,0x1a13c5bb3ae82195L,0x622ea9454f1c43f7L, + 0xdf1011f675063cf4L,0xbe98aca99eec62e8L,0x2face80fd88258b0L, + 0x9e3dd0c4bd532a0aL,0x1c13334252ad512bL,0x00000000000000faL }, + { 0xe7004937964433d2L,0xf15f8f25b5d0a434L,0x392242bbb1c07babL, + 0xfd3c197437177cfcL,0x611a58203051cf50L,0x4a830e19f9c0b4ddL, + 0x2675d3aa2d92d1b7L,0x7a2ae9dcb649859eL,0x00000000000001e3L } }, + /* 35 << 175 */ + { { 0xed601d01372f2e61L,0x989f14361e886ed5L,0xc924db9cee004808L, + 0xbe07b40b7469977dL,0x2be5445a075e3ed3L,0x2dcb8edfb00fb21eL, + 0xbc859cd63ee23c76L,0x484c8988125dd98bL,0x00000000000001dfL }, + { 0xf6b5b6871f25661cL,0xc95ad2c580939d4cL,0xfa12444f920a40aaL, + 0x7880e344a99e78a0L,0x6539ed2251356672L,0x41f6c73ba4910f50L, + 0xb6372d7ea09afb69L,0x6e7a691b1e8c75a4L,0x00000000000001adL } }, + /* 36 << 175 */ + { { 0x659f832f1cc592e3L,0xd9c86a4591c3bf97L,0x1b1166b918c82b12L, + 0x82ad18e294eb33ffL,0x0678927a15699561L,0x7cd0ef8016f7b8a5L, + 0xfd885fe19451b7e7L,0xaf7de79539a9fc7eL,0x0000000000000052L }, + { 0x6f2206e984cc8626L,0x71d11d08a976b5b7L,0x0ac59e61b66812ccL, + 0x1b40ab0a9c675e35L,0x6562f93810775f6eL,0xc2da3006def4955bL, + 0x89a9418346554627L,0x2972033edfe70a84L,0x00000000000000f2L } }, + /* 37 << 175 */ + { { 0x985f25c699200f88L,0x262dad2bfac98496L,0xc9274cc72ed4811bL, + 0x413e0b2d02b34d7eL,0x57462287f3201f03L,0xefa6d0a869264714L, + 0xc052453a9baffce0L,0xab1111b0929171a0L,0x00000000000000e0L }, + { 0x1358d4d4112c22ffL,0x2fbbda6874617bcdL,0xbe2bac8e85a767f9L, + 0x5a30778114862b96L,0x82c0ea86a58e1ec5L,0xe096587186208bacL, + 0xda711c4c6dd71c57L,0x8babe7ae242273a3L,0x00000000000001e8L } }, + /* 38 << 175 */ + { { 0xfb985617a96468b6L,0xc71a196944df6e18L,0xb8f7a7d810c008aeL, + 0xdfa67bd1372d99b5L,0x328e967977169ef4L,0x20e3b7e505134517L, + 0xfa7cac595097dc00L,0x09631199122797d8L,0x0000000000000003L }, + { 0x6ef792160862ee06L,0x71996694a81f02cfL,0x133519becbd2e915L, + 0x193f486759e47728L,0x90195620a28aff0fL,0x5da4a9d642049561L, + 0x60372d2126350d3bL,0xa7b159f3c9f56af2L,0x000000000000003eL } }, + /* 39 << 175 */ + { { 0xa58f7256ccd817f2L,0x0465664ef4fbcf70L,0xad43a3ec1608a6a2L, + 0xad7caae01bfe0a90L,0x3f2599a0b3a447bbL,0x4ddd47b8f61936f6L, + 0x61a3607779e2c9edL,0x6cbe8278c92cb5b7L,0x000000000000013aL }, + { 0x3c882911139061b8L,0xd6d348d221d48350L,0x5979da6b4860b3fdL, + 0xd051bc321869311eL,0xf3ba42446ad422fcL,0x3b84c1810d1d093bL, + 0x2f29f5a6a96ab441L,0x8f9c5d395bf8399eL,0x00000000000000e0L } }, + /* 40 << 175 */ + { { 0xa3b9aaacdc1c3f86L,0x529872b63f893dadL,0xff3e74c2e604ac86L, + 0x39652c96857a9dd4L,0x494455e9fa60a469L,0xa11b7cbf1320b873L, + 0xeb26217c6ee60a53L,0xf732544484560f96L,0x0000000000000125L }, + { 0xdd6e98a1c4996f4dL,0x5fc566d07e9e70c1L,0x59e3c611dd9ee515L, + 0x4d60fb0795e436c0L,0x50e75e0cc096c8c2L,0xaa7d04addebafd21L, + 0x2fd1b14c7ea109b7L,0xd72ee3392594d6f8L,0x00000000000000c0L } }, + /* 41 << 175 */ + { { 0xff378e0a242ee38eL,0x38d26881d4264551L,0xca783b631e1df843L, + 0x019aeefff9c95555L,0x2fcd1e94eb753522L,0x16c244eec1225950L, + 0xbc7a6a75685338b9L,0xae67fa3b6215cf68L,0x0000000000000012L }, + { 0x53b4e8f437defd78L,0x2263cc33e9d41961L,0x62814a2a2119dc07L, + 0x50dd22ec96b76d54L,0xbf0b2b8727c615d8L,0x66c9c5f1730ae555L, + 0xc04c86a937cd7d89L,0x451915bed4ed94d0L,0x00000000000001f5L } }, + /* 42 << 175 */ + { { 0xb3e69f756c45239eL,0xa015685259815529L,0x37951fa13984661aL, + 0x8f7699e888e8fd3dL,0xae10f84b6ebf1690L,0x7f2baff5ec3175edL, + 0x2d6f42f53266e09eL,0x94482ac1273e04a6L,0x000000000000015aL }, + { 0xa3ed1424e85317c4L,0x7d9afa32db2a9492L,0x660fc8e609558105L, + 0x611bbeb9a8be7a44L,0x8d66417600638075L,0x4685c94a4d7a0954L, + 0xb23ae4dd1ec88785L,0x766a2eddccb991f4L,0x000000000000016bL } }, + /* 43 << 175 */ + { { 0x1c24478a341f455fL,0x8132a6b905502538L,0x5c23c4d9528d2865L, + 0x6fd3bc4e93b35d0fL,0x88838b49f6f4768fL,0x60c0a07f18a8ded4L, + 0x4fb44a8e0d71dcc0L,0x4b41b64efbd17fc0L,0x0000000000000198L }, + { 0xdb828f7a2f01d067L,0xad69f37320b0b060L,0x650cdaf51f7c4be0L, + 0x00b810e8ef9f47e6L,0x924e5a636e908d72L,0x4836dd4ef82efeeaL, + 0x91855928e8ee36b6L,0x747f47d578e6199aL,0x000000000000001dL } }, + /* 44 << 175 */ + { { 0x7844a30e11f5f4d4L,0xbd36ad4cc9663248L,0xaf697c63af7b4892L, + 0xe1f94853bb03fb6cL,0xd5da7f43e368631aL,0x970c4f6c783be7a9L, + 0x2321bd2643f90845L,0x5030a262966a8c34L,0x00000000000000e6L }, + { 0xf12d877ff7a82150L,0xbcdd93fea5ce6cbbL,0xe6c917b93c59668fL, + 0x4184cf3046222b1dL,0x24b27ee14b67d28fL,0xba7e6f7345e0b96bL, + 0x64dd699bf935b686L,0xaf0be69e62eb62c4L,0x00000000000001cbL } }, + /* 45 << 175 */ + { { 0x74ae258fe63f6b8fL,0x2f58e84b09144f8aL,0x858165d0064af3f4L, + 0x77ecf07c5a2081f6L,0xd9e9be22e211fb05L,0x3758f5b14971cc28L, + 0x68ca7d8af535a5d2L,0x2addd4138398e52dL,0x0000000000000046L }, + { 0x442dfac0dc04d140L,0x6abc234ebae00092L,0x63209027118de0d8L, + 0xb2510bc14b1bb070L,0x6bda803e3b104f51L,0x1821f785bcf2d58fL, + 0x506f07e25785c8deL,0x9d78c916b2e16847L,0x0000000000000028L } }, + /* 46 << 175 */ + { { 0xd3bb0945ec04d3d9L,0x3a706fed4eca5fecL,0x9b228ea2c93eefcdL, + 0xd412dfcf0a627f68L,0x906c39a812f470a0L,0xa115f5899c93c4ecL, + 0x5f22b7175e649e76L,0x83ba978a159e8642L,0x000000000000014bL }, + { 0xde363c41c76cdb50L,0x909b66c948f7ba77L,0x30d1e4ba7d0d3b17L, + 0x39336ef38b390c4eL,0x8cb65cef53420b4aL,0x5c811baa11b8a142L, + 0xa4be4bebf8eef3ffL,0x4a11fd509bc39344L,0x0000000000000097L } }, + /* 47 << 175 */ + { { 0x8474dd63efa97351L,0xa12cf18a58d6b638L,0x3bd605e35d890e6cL, + 0x21cf4525a2127369L,0x7db4aa6a5466e713L,0x3f034b4fcda4954dL, + 0xce44ccb88ede8849L,0x6e5ff0eb6fe75314L,0x0000000000000191L }, + { 0xa700e7fe9dd62d65L,0xa2be6afacff305a6L,0xfc61f045f675a7e6L, + 0x69be21693f984612L,0xb5e140abaabd7748L,0xcdac92d519c004cfL, + 0xe28bf3bbd7164a07L,0x1ae1ffdfbe784423L,0x000000000000002eL } }, + /* 48 << 175 */ + { { 0xf57c47836c0f67a6L,0xa5704e7dd3a38f5aL,0x24fce48d4f9422eaL, + 0x0dabc5a4d9982f3dL,0x6bf4f9eb3f09226fL,0x944ed407a196f481L, + 0x949495bd7d88490cL,0xbae973645634cf1dL,0x00000000000001e9L }, + { 0x5bebd8ee1e5dca35L,0xbac77371b329d451L,0x124ff920b7a9d67aL, + 0xf7e8e1f69616c1beL,0xa518deb38dbc7e17L,0x13ffa792d52e3e76L, + 0x6dc6ef5d93109227L,0x5ba160b49ce19c4eL,0x00000000000000ccL } }, + /* 49 << 175 */ + { { 0x1dfa85794d2a0cf8L,0x48618e4b2bfba0c2L,0xf266f3b7177f1fc5L, + 0x0f12b884cf1720e2L,0x0ae8e5a8ddd5c9f0L,0x33a683cfca4456c4L, + 0x68b15ae27b153cf3L,0x78c717e3f483a724L,0x0000000000000135L }, + { 0x3416113cd38af77aL,0xd5063d39fa9c36dbL,0x97daad1fc1a854fdL, + 0xf7b1efa6f2599bffL,0xaa3314678e129857L,0xb920da73a4b7c9f2L, + 0x6aa551a06d96f499L,0x2b7f6f1fdbcc5e09L,0x00000000000001d9L } }, + /* 50 << 175 */ + { { 0xd823b9961d977aa9L,0x88decc8f402a5bddL,0xc7b191d8368c7e5cL, + 0x556e9ec0f5ed5110L,0x4a81febdffecce67L,0x75919f76814bb429L, + 0x04d08334849762c4L,0x9a037df8ab24edb7L,0x000000000000004bL }, + { 0x9cab359f9592c8e9L,0xcf5dd731f35f9450L,0x49ff6c9083f04d04L, + 0xce238fe14f50c7adL,0x6a971ec477442addL,0x7aaff382eb6dada3L, + 0x14b43e873b7f4447L,0xc9e21cc7c67838aaL,0x0000000000000032L } }, + /* 51 << 175 */ + { { 0x3c5764d9e652d9e9L,0xf6d664d15c58afd5L,0x6f72c39c5263ea19L, + 0xb7c2a994e740eb02L,0x24868934ecf6cd38L,0xeb5aaa0ab07e9b54L, + 0x76893a641cf1a8dbL,0x13b31739fb9f2e6aL,0x000000000000012dL }, + { 0x185abf0f52104f6aL,0xa352e81acf2e3361L,0xf3d248cdd9a21f71L, + 0xdae16ec865f3686cL,0x42ce080c6bd346e8L,0xd0354599fab70983L, + 0x9259e6c9a24aa4fcL,0x816c3c813b4fe5d3L,0x000000000000007eL } }, + /* 52 << 175 */ + { { 0x4df0de9441fb96a0L,0x4412561574225d62L,0x02c82702def80205L, + 0x7611d83d8585c7f3L,0x4b30c7c70a4a0380L,0x5f2089fb0d1c3a3aL, + 0xce0fd7a169b754f7L,0xcbcf956d7b840647L,0x00000000000001eeL }, + { 0x9254d02e9152b2f6L,0x0004290f5a55c8eeL,0x1e65776a06eeb968L, + 0x1aa40ca875898404L,0xe5f6d7e1082b1b3dL,0xe8a4447d432c62aaL, + 0x5c988bde7f251463L,0xf1a617221e477a2bL,0x0000000000000082L } }, + /* 53 << 175 */ + { { 0x6e94a400a12e0ed3L,0x7ec7f137693f0b2bL,0x3369312a64dcb97dL, + 0xb6ae9d92bb174be6L,0x4343763fb49f4545L,0x2781f0c93988b19dL, + 0xae34e2bfa54bfd46L,0x19a6243e347e1992L,0x0000000000000135L }, + { 0xab693a17f339cc98L,0xcda5aa8a306031fdL,0xcdbd42722daafd10L, + 0x8fafb43f6dd6c153L,0x1eb19e412ba28ddaL,0x34a0298c447fde55L, + 0x7872662a05fc38b9L,0xa174d1070222688cL,0x0000000000000032L } }, + /* 54 << 175 */ + { { 0xbe5631d2d56dc535L,0xde5e0ef7f0485a91L,0x848a5d3836185a80L, + 0x61dbcdebc44e4ba7L,0xb9a36cbf13524447L,0xb10894ae6417cda2L, + 0xfdf2be12c4321744L,0x604734309cfcb161L,0x0000000000000031L }, + { 0x13f91ab30bb077cbL,0x27f4d378846d00deL,0x3a75d114c18b6495L, + 0xb33cd9d3cff1421eL,0xf72e1ba92e077183L,0xe6d74ff2fb4bc174L, + 0xcd5a5f06d2ed2ef7L,0xeaa9a4d132ead477L,0x00000000000000adL } }, + /* 55 << 175 */ + { { 0x208c52b387c78c83L,0x8a814dcb41c862e8L,0x278976d32ca53494L, + 0x488caf4bcd7bf0ceL,0x2a07f905074b5f09L,0xb53383af0fccf4cbL, + 0xcff62835deefe7b2L,0xbb0afbcbc2febcf9L,0x00000000000000c0L }, + { 0xea3673b6d107b949L,0x9253804482d5a087L,0x52da8af43c18e3beL, + 0xcf635990b6fe0565L,0x7be60be52dd319a4L,0x246d7e44be349ee4L, + 0x395a48efc0cb8352L,0x3013388d08100678L,0x000000000000000cL } }, + /* 56 << 175 */ + { { 0xf43f57f1644be376L,0x5ead8d33cf66858eL,0x40d9ab0358ad4128L, + 0xaa6ad5b5707e7c69L,0x52d2bee602b4e5f0L,0xb39b48968b1a1287L, + 0xa9f1e0ecebc32b16L,0x7ff75a69f442c70aL,0x0000000000000039L }, + { 0x118149e893cee35fL,0xdc281d8fcbb28a72L,0x33055e64e8a60d55L, + 0x8c247f5a89367610L,0x246297062f93e57cL,0xff8dc0ef68104540L, + 0xea50e01fccda4787L,0x18ae4a7472e80504L,0x000000000000015dL } }, + /* 57 << 175 */ + { { 0x974295ab6f7aa51eL,0x5f1f806d43926d80L,0x803b6f7cdc56e092L, + 0x02a052652cbe5bf6L,0xcec50eba11dcac2bL,0xebc5da4767993ba7L, + 0xeb38f44945afb69aL,0xf84fc77f0e5d3809L,0x0000000000000103L }, + { 0x31bbb0ec544fb496L,0x3d31c7f9675994d0L,0x8c77b0c4c5116ee0L, + 0x8f212d31360d5cbaL,0x133fa56c95775eb7L,0xea09f1bbeb5fa46aL, + 0x50d1ec5096379da9L,0xe07b4ad7d54e7be9L,0x00000000000000bbL } }, + /* 58 << 175 */ + { { 0x6e83884fcc138ec4L,0x776f59d1af876d2aL,0xe884ba6e1138ec1cL, + 0x1f5ac35fb7ee8a3bL,0x411a904dc6f0ca3dL,0xa0e25f75698458f4L, + 0x8ab4b3f1270abf22L,0xc45f0668afd72a81L,0x00000000000000ccL }, + { 0xe93b101ab1428555L,0x49c43cefe1110034L,0x5d33dd3532cec22dL, + 0x6c96474469e47c69L,0x2a3af875eee1b050L,0x9ef7e37c53339d75L, + 0xf7b4e7cc69f70379L,0xe52c507c005d6188L,0x000000000000016aL } }, + /* 59 << 175 */ + { { 0x900a407c16ea7e9dL,0x7472498ebf1e2d4fL,0xc41b8e8f47d6ffd6L, + 0xd93cdcdb567590d9L,0x2361492cab650660L,0x0ada3e50975a5b7aL, + 0x4342909767613b71L,0xa8d9a32cd90f479fL,0x00000000000001ceL }, + { 0x67c544a1722a95d2L,0x68312626d409ec5aL,0xed244c5c2738cbb0L, + 0x491e21fa8fbbe571L,0xbc6431fdbe4b131dL,0x8b84091b8eb05fb2L, + 0x7d82a00220103d94L,0x016cc8bc6ef8af3cL,0x00000000000001cfL } }, + /* 60 << 175 */ + { { 0x5e2b4a26cd5ccc7aL,0x44e349def1b8db05L,0x034d54b8efeedfb9L, + 0xaf410c3eb0de1cf0L,0x0e6cbe44ed9148e5L,0x752ad823e9cc2a8cL, + 0x3917cc72e9af3244L,0x8639e4a393e23c63L,0x0000000000000111L }, + { 0x4c0d05a46faf150dL,0x2d93d0d2d8d42497L,0x70669c4383f3c667L, + 0xea34f74cf766b202L,0xcfd2dfc1a8cccad3L,0xbe520b4002cb38f7L, + 0xeeeebc97677939b8L,0xc7a734e82e86cf07L,0x00000000000000d8L } }, + /* 61 << 175 */ + { { 0xf72c72940d75ed6fL,0x3135f686ccf5cc3bL,0xe971c1f6cb3219edL, + 0x01aeed44d7c35486L,0xb9fe395d13a4d803L,0xad124ee40f47fdc0L, + 0x113d1117ee46329cL,0xe252a70e8ffb9c24L,0x0000000000000072L }, + { 0x88f443817efec9e2L,0x2444c821133a819bL,0x558382008114f80eL, + 0xc3a4e191088f5c42L,0x222c3389f8775660L,0xee7e2c8b0329f914L, + 0x3a4fad9f45c18e0cL,0x3ace9e4b3309eca3L,0x00000000000001daL } }, + /* 62 << 175 */ + { { 0xd343830f2ab0160aL,0x731c5a9bd81bd1a9L,0x4443103457b8df66L, + 0x2af1ef67433ef550L,0x9e84336c278ea0d9L,0x53f2c6addb1c8881L, + 0xbdcdeadf12be675fL,0xfca45c98ffb39ecbL,0x0000000000000109L }, + { 0xd7e752f42cd3a337L,0x245572edf0c5ccafL,0xad130568627281c2L, + 0xf1948b1494920647L,0xc35f002381b60cceL,0xc99bf95e4d6d439fL, + 0x827a29de2c19ed5bL,0xbefcd94f65737517L,0x0000000000000157L } }, + /* 63 << 175 */ + { { 0xf54cd73b46227f96L,0xb6beaa8d1a062ddcL,0x1d6ea57dca017154L, + 0x47292049f2c3f854L,0xe0b35769eb41db25L,0x58573070cdef8384L, + 0x8cd35c89d8eb4e66L,0x883e914d4fdc8982L,0x0000000000000056L }, + { 0x0e47c705af242c24L,0x864a4cc18e09b24fL,0x32ee11e88747f144L, + 0x72d496348f666496L,0x74a0a314bbf0720bL,0xabe29f1db6ba8cc1L, + 0xae92d4e7c20e7e84L,0xc2e5dd9336f6c9f1L,0x000000000000005aL } }, + /* 64 << 175 */ + { { 0xcb35df1c0a0eb364L,0x6d0479606efaa084L,0x2ae2d2833735ce56L, + 0xe0b9d7e8df82fdffL,0xff7a316c39cf5ab2L,0x23254f4e26b6c05fL, + 0x4751cb2de5c125fdL,0xfa059629894e9a61L,0x0000000000000148L }, + { 0xe64ca47b49112eccL,0xf4f9f08cf5fd0dd6L,0xad1c8c7274bdc4b6L, + 0x34660962bba001e4L,0x346d7a407e2ad36fL,0x68c8b34bd27181b2L, + 0x8aeb7ea43b424e34L,0x257ab659d4498d11L,0x00000000000001f2L } }, + /* 0 << 182 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 182 */ + { { 0xad57280abb5be7e6L,0xf102ff10066e97a7L,0x5308cb8e61c86340L, + 0x11b53155d02e15c2L,0x50ed7916ef223374L,0x8c48f8371ee821e2L, + 0x0b3010cf7931b97cL,0xb84cd0142062ec30L,0x0000000000000007L }, + { 0x05a7a5a024b6e1ebL,0xf8364d18b5a7402dL,0x2b9ec6c5f9eb3538L, + 0xf363dac905915b41L,0x2a567784a7909cb6L,0x5b386ead22fa3badL, + 0x646554b91e873fbfL,0x99f6ad72b88ca34cL,0x0000000000000077L } }, + /* 2 << 182 */ + { { 0x8f37e861fc3d5a8eL,0xd09d8e74901ce1a2L,0x212e32a992f81574L, + 0x38ac8ba60d96a933L,0x3d990b98b84c99d4L,0xb97f4e73df58647bL, + 0x95ae93cc7641888cL,0xd7bdb083a0969dd3L,0x0000000000000182L }, + { 0x6531530c17830426L,0x9db488f48a57b683L,0x9452ca0f0d87eca3L, + 0x19b198f08b4be2a7L,0x17939ab22775f0fcL,0x7f662a0625bc5c0cL, + 0xdf04cbef428884f3L,0xa4d202209f9ff142L,0x0000000000000090L } }, + /* 3 << 182 */ + { { 0xd36d95e98d20af1dL,0x042b6b721475a01dL,0x90de7dc34e5322d2L, + 0xd7201c9c8085753bL,0xb2467afb2a41a80eL,0xde0b994dfcbef3eaL, + 0xab46bec5219735a2L,0x5c6efc5a9f068a58L,0x0000000000000167L }, + { 0xad80498ffbe3e605L,0x0e49b878b58b1af1L,0x5a90b3d5f7467e30L, + 0xb6feeb53d872561eL,0xd33ca82162456735L,0x6262f717cdf5cb42L, + 0xb7fe12752d9b039fL,0x3d76868940af9134L,0x00000000000001efL } }, + /* 4 << 182 */ + { { 0x4585d3aa31ff6e80L,0x8521ac35c19ae65bL,0x983e0c6af9f78334L, + 0xc4131279435db0f0L,0x0f2d350b51bdb67fL,0x1fea5391d65d265fL, + 0x7f0b55f7798e80aaL,0xf322493ad549eaa9L,0x00000000000000ceL }, + { 0xdf7b80aafe9e5dabL,0x5585f93a1861e785L,0xb70fa9abbbb5764cL, + 0x23ce8aed3a39ef40L,0xa719329a14920ff9L,0xa74511124a00e5b9L, + 0x48b0001bff5c3588L,0x8dcb5fe10e0551ceL,0x00000000000000f1L } }, + /* 5 << 182 */ + { { 0xa31575ca4b4c5966L,0xae86a8c831b5d540L,0xd19ce0a2386fc523L, + 0xce86998b4540a480L,0x7a4897ab0f0eeee5L,0x0dea1fa0042b6dc6L, + 0xd1c403044ff99780L,0x3d882bc861fb0837L,0x0000000000000023L }, + { 0xef942ae4bd2b6cedL,0x7a91fbca038d1540L,0x644571d23631a109L, + 0x865061c09289c9a1L,0x9f9d809df67c8866L,0xa73777bb75b91533L, + 0xd4128ce73ee99cd6L,0x2c83701571d8a67cL,0x0000000000000043L } }, + /* 6 << 182 */ + { { 0x1e318c6a5ab019e7L,0x83c490b03660eaccL,0xe464c85856e4b9edL, + 0x6724aedbecf9b627L,0x83f9312e6a28f322L,0xe1f8314f3e3758e7L, + 0x8ac29afc1c0639f4L,0xa0390923591fb450L,0x0000000000000132L }, + { 0x0188fc767dab76e1L,0xcff3c75f570fa4c0L,0xd1a1db3a4be7e55eL, + 0x6f97be3c575e816aL,0x5ea0e3240a9acea0L,0x05699068a9be2cc7L, + 0xbe8c58299391ceb6L,0xcb55ac5d672c1fcbL,0x000000000000013cL } }, + /* 7 << 182 */ + { { 0x82320b25e4eec241L,0xa93543d00c54433fL,0x5ff474b1024c6917L, + 0x70337516d2e8c7c0L,0x5d97d6b2eff507c2L,0x415cc09eb642c29dL, + 0x6234cb715a3b675fL,0xf4721ec9bf150b2eL,0x00000000000001bfL }, + { 0x5e4120da2a377b63L,0x518a3bf81a851ddcL,0xe2dd2b95dcc9ee26L, + 0xc2b153998fd7b647L,0x1b4b43df7eb436e8L,0x7c354da61ca50fa6L, + 0x2997e27dd1f1b4ceL,0x89b755a4f0e78f01L,0x0000000000000097L } }, + /* 8 << 182 */ + { { 0xdaa72ebc797ee80dL,0x5ee6de5e13bde073L,0x8f21fd78a69b5019L, + 0x5a8fce8c89191df9L,0xce39466d725f4f61L,0xe1015721d258c98eL, + 0x22a2f6a5bfb1acc6L,0x5dfef1da97265eaeL,0x0000000000000164L }, + { 0x4697804882470cadL,0x5a62de6c92e3daeeL,0x95cbc0199163a136L, + 0xd1f51a60f0ad7fbeL,0xd300b7c6f2380828L,0x3fce67f4b1246df8L, + 0x6aec1313466ec418L,0x95ffd2856063ce6fL,0x0000000000000080L } }, + /* 9 << 182 */ + { { 0x082a54184ceb12b4L,0x20d67a8ce6889f68L,0x5beee4dcf78c7ee3L, + 0xe234bc07a7c70d07L,0x57957765845cd20bL,0x9f16fc18ce9f8562L, + 0x7793805865d54d1fL,0x244d5d5c0a33ac80L,0x000000000000000dL }, + { 0xa7642eceb3d0302cL,0x7553b599438b82ceL,0x35db7e6e3bca2614L, + 0x0f549f7414c632c4L,0xe9d8a3122cf8bdaeL,0xa27062b3281d7c68L, + 0x1023e6791155cd46L,0xa2d781fb734a6f3fL,0x0000000000000017L } }, + /* 10 << 182 */ + { { 0x46cd5849f6ef2fdaL,0x5b3fa640b1d02cdaL,0x0fb696664e1222b9L, + 0xf7c6c4a888c8de4cL,0x742c5f24ef16cc79L,0xa4866982c0032997L, + 0x076035dbbf0a2982L,0xffa69b49779ef790L,0x0000000000000068L }, + { 0x74a20b1174e76a5dL,0xb9bf4b3f35722dd9L,0xad2de2d42bf57ad6L, + 0x413223308bb9d811L,0x6e7574a05ee8d6c2L,0x16253e6b439a3683L, + 0x8ba8f866d611da87L,0x11f5f946c9e2df24L,0x000000000000006aL } }, + /* 11 << 182 */ + { { 0x1fa5a29708431e9cL,0xa400cf7ae43aea69L,0xc197afcabd8182b9L, + 0xd8a48cb691ed64e2L,0x45b3b155fcd39e10L,0x4951cc79cbe71480L, + 0x939e1a833101aae8L,0x4c354aa4c1bc4f13L,0x0000000000000042L }, + { 0xc21a1cab3571c9c2L,0xe85b99fb54b5d704L,0x0b26a9e0e740e33cL, + 0xcc33a25ef6780727L,0xaa203d30c2871675L,0xfee96fc8735015f8L, + 0xea5f0118fb6c489eL,0x1ee87d166e566a79L,0x0000000000000012L } }, + /* 12 << 182 */ + { { 0xd3107dc42eb6d453L,0x3897a60ba3f6b312L,0x5d8ae4b9a26ebd5eL, + 0xf83e1efa445f3ea9L,0xcdf3f2d6e9c5ae62L,0x43624a7ffe4a7f11L, + 0x9169460f6634235dL,0x642b8b112f75464eL,0x0000000000000186L }, + { 0x7924b1cdc84496a4L,0x0a5eb42c5f8f5f30L,0x14068d49492cd04aL, + 0xe0824856f2a5f227L,0x6e146cdead328ed4L,0xc63a68c19118fabdL, + 0xa295b4127e56624bL,0xc1d31ed187055125L,0x000000000000001eL } }, + /* 13 << 182 */ + { { 0xeb7242709aeaf343L,0x51e4dda7521d1303L,0xb0867bcf4b6d79ecL, + 0x9f3769280dc1e1c1L,0xc807e9397ce09227L,0xd46d4ccf18f02ee5L, + 0x4f50c46e7cbe5fbfL,0xee0d19eb8464a6c6L,0x000000000000008fL }, + { 0xf9eabcaf838eb681L,0x3a04860e43b0649aL,0x3ffc96f3f6f4a93eL, + 0x0e1cefca2689da24L,0xa5c0b7cc6aaca275L,0xa5bbc5ab87f572dcL, + 0x4c577d4f315e7e93L,0xcdd104b0be10ef14L,0x0000000000000065L } }, + /* 14 << 182 */ + { { 0xa267789abceb62c0L,0xc3536fa5dd342598L,0xa0a158edc66bb97dL, + 0x2282d372413001f7L,0xcc6365d7bb80b2b0L,0xcfcbab5a75e3e8e9L, + 0x0b0c89a9d832c899L,0x4c5f3c139109e60aL,0x000000000000013eL }, + { 0xe9437990177cdf1aL,0x0ce91379f9a4d1bdL,0xb3a1a40968b5bdb7L, + 0xf9a22cc775573480L,0xd05f00d5bdd43f32L,0xc65e107c24cd9011L, + 0xd3c7f981782fd8d9L,0x4b998ad9771b4074L,0x0000000000000153L } }, + /* 15 << 182 */ + { { 0x3829eee292c5afbeL,0xd6ceab835f385af5L,0x80feac24ad9740b3L, + 0x2795a4aa1425ac94L,0xf8a2e2fea345fd1bL,0x83e9ee2e41cf9386L, + 0xf384b56b675a9faeL,0x8dd8fd07ad024c34L,0x00000000000000b8L }, + { 0xe5e8b6b647baac01L,0x0cf5cdaeebb4627bL,0xa0c05d92d9ece132L, + 0xb778b8723138e22fL,0xf15d930b482fb046L,0x4420d1953ef00d49L, + 0x07c2bce32acdfd2cL,0x5f0932a7df5d86deL,0x000000000000008cL } }, + /* 16 << 182 */ + { { 0x241872e36910d714L,0xdfcd9db5cf96f106L,0xd32e27facb3a51e7L, + 0xa55ecaeac4a051c2L,0xfc54759af8944204L,0x569d9efdf938b503L, + 0x8882ab15c54bf05fL,0x9a8869ed0cab8124L,0x0000000000000007L }, + { 0x1f5995bf361ce52aL,0xda53a556b51ee176L,0x735bfcd6cb664a03L, + 0x4d8e405d38486de8L,0xeeca6379a807c481L,0x9a24ac8a4d456d2eL, + 0x159b9f4c3b85ab21L,0x77524d58f256c82eL,0x0000000000000125L } }, + /* 17 << 182 */ + { { 0xabe54ca59d266057L,0x10173cf8ad5590e8L,0xd52929fdc2710408L, + 0x75164c9941c9bbd6L,0xe2042c2e09ec8272L,0x244a305e411aa6ebL, + 0x004b6a2930ab6539L,0x58d67a63bb2284dbL,0x0000000000000190L }, + { 0xd830c66efcd95a1cL,0x0e023ec73b20bcd5L,0xc136f25c50b4c7a8L, + 0x2e36e65af69c7a3aL,0xfca3db62e46a0699L,0x2615d7fe828e0fdbL, + 0x4bcd0acac894a9f6L,0x1f6a011bc266e64cL,0x00000000000000d6L } }, + /* 18 << 182 */ + { { 0x9624aea7c3ba79a5L,0x303f5c44356b4d55L,0xb266745f4420746aL, + 0xd554d641ab2157c3L,0x270e293a89a1b1c6L,0x73541da2e81fb3d0L, + 0xa3af4df99d28ef8cL,0x83e20fe57f159a0bL,0x0000000000000075L }, + { 0x6d4032ad216ce79aL,0xa979ded996dc5993L,0x0653ce03a77cf5e7L, + 0xd4c7c0f8e612cbf7L,0x664b14144cfd535fL,0xfc1127f460795eafL, + 0x3c612b6b68378b1cL,0x023e398ec1c7d949L,0x000000000000013dL } }, + /* 19 << 182 */ + { { 0x6f1d92ec3d4ae696L,0xb86106f398fab4fcL,0x822b872e3d5b0645L, + 0xe99b1302745556efL,0xf7d257a7ffb5a1f3L,0xd1c72810c5469b4cL, + 0xc023f597f796298aL,0xfe1e4d4c37019403L,0x000000000000016dL }, + { 0x0afd0124d8c8e429L,0xdc549ae033397fcaL,0xcb6b425ffef20af6L, + 0xf179caa3d3ee3c39L,0xa2bafb4ae2cd0281L,0x6ed7124a8c6764beL, + 0xe347fd98c891540aL,0x6b531daa0ba5c525L,0x0000000000000176L } }, + /* 20 << 182 */ + { { 0xd83376b06e0604e7L,0xf3da2540b9bdabcdL,0x8a64129ab0069b9cL, + 0xf98d8a694ed21f8aL,0xcd7950fb17c1fdd1L,0x988ac822a1b303f3L, + 0x2d6dc22444468a4aL,0xed780eb123f65549L,0x00000000000000c2L }, + { 0x38e4f1e17c4f29cbL,0x030f563663776af6L,0xea357f378032b066L, + 0xbc96afa12141ebbaL,0xe1396e9f7cacbe45L,0xdc5e9f07c62172a2L, + 0x00ca038fa26039a1L,0x23796edf96c93fa4L,0x000000000000014bL } }, + /* 21 << 182 */ + { { 0x016e72db586f95f7L,0x7f9e2c36c2400728L,0x052457cf2bf04972L, + 0x029911681fb30152L,0x5be915ba80cd0439L,0x5e151e1c6faa956cL, + 0x21ab062272dcf9d1L,0xba0e51b09414b9b9L,0x000000000000014eL }, + { 0xf0a1bc7a201e1ca9L,0x768f5444c7ce120fL,0x2961c06b496c605aL, + 0x3dfd88f3c22605eaL,0xca174491e4a89909L,0x71319e378b5c6ba1L, + 0xd56268a9e975dbafL,0xb2e5ff36f38511b5L,0x000000000000014aL } }, + /* 22 << 182 */ + { { 0xa943c94277e540eeL,0x9d13fef6443f5f5fL,0xe9c360fc9ef01a49L, + 0xf2e1fb0698f8ab08L,0xcb966ce4cc21b8e9L,0x212a01e81f1cecdfL, + 0x800a4f8be95f2409L,0xa0a6b162fa1a1310L,0x000000000000009aL }, + { 0x23b1052ee4bde75fL,0x032a7dcbb01aa17eL,0x5e4cb05ba641f2b7L, + 0x6ff29dd715184d0eL,0x29181004f479238fL,0xc3f7b9466a04f7f9L, + 0x12db296a2c9e0a68L,0x323542c22f7f2a7dL,0x0000000000000108L } }, + /* 23 << 182 */ + { { 0xd9cf8b726028a775L,0x3850c15741cfa8ecL,0x6b4bc9b606915137L, + 0xae55a728d1c99839L,0x045b7719e23ff638L,0xcae591faf3269ec6L, + 0x04d5743ff130730eL,0x53372a0e6208ad0cL,0x0000000000000036L }, + { 0x7db65e4e4af7b7f8L,0x9dfde4aee67bba08L,0x62dda13e2cc2e030L, + 0x4ffd3135a4080e00L,0xd06a20444b48aad0L,0x9442188e59609700L, + 0x10ffbebaa9ce37a8L,0xe22428600ecb7346L,0x0000000000000005L } }, + /* 24 << 182 */ + { { 0x20f7c6ee1bcf9917L,0x1c7d940f8cbc7d2aL,0xc2a1139ed592def0L, + 0x0b6b644a936c827aL,0x1b16bf59abb4f3ebL,0x1693fe7916b8b498L, + 0x4b81cf5badbf552eL,0x620ef3be7e0cb980L,0x00000000000001a3L }, + { 0x6106878f2f6f9a16L,0x15c2303e6cf02778L,0x18c605cf7f7e7e27L, + 0x6bd45dc798a8de3fL,0x5874172095869ddeL,0x621d8a4b2de20d0aL, + 0x7632703cdca67562L,0x9374d05536957eecL,0x00000000000000f1L } }, + /* 25 << 182 */ + { { 0x291cad563c81c389L,0xfa1b6f768a663dfaL,0xc23bccfb4d4e1bfcL, + 0x14218a8881c16ee9L,0xf4c48023fae0c87eL,0x58e978728e9874dfL, + 0x690b375959dabf0aL,0x45e8790a2f13f6bdL,0x0000000000000156L }, + { 0xc1070b3f17bd5f8aL,0x4bb893cf246d0744L,0xf5e5d9a855e6f814L, + 0xfea75ee5a20ad845L,0x000d0926d4e9a182L,0x120ef85508b102aaL, + 0x495db2342947588cL,0x5101834a455b2626L,0x000000000000011bL } }, + /* 26 << 182 */ + { { 0x2a8ebbb27efc758eL,0xc14880f28e3bd85aL,0x95ec9fa6536a42e7L, + 0x07a26257f6cf0da7L,0x67976b68d1a6a137L,0xf143f97bfcd0179aL, + 0x99645da231832f2bL,0xbe0940ca0955eb50L,0x0000000000000076L }, + { 0x6d01c753e3acb83eL,0x9cb6f41194373f4aL,0x935f9fc2e758ddffL, + 0xddee3e64b16c8ae5L,0x5b16820748e77dafL,0x2c0a5f0862a3cf00L, + 0xf47f143a79315466L,0x2adc4a4853a7d9beL,0x00000000000000dbL } }, + /* 27 << 182 */ + { { 0xd2d98879bb29fe08L,0x261ca47e9e1e47cfL,0x349da8771951592cL, + 0xe6da4080eb147166L,0x036929ab99d5410fL,0xa870f2a4ff40d8a3L, + 0x6e12a9c78b775ae9L,0x0c493e092de6fac1L,0x0000000000000193L }, + { 0xea12c2dec0522680L,0xb3a5c0e533afbcc6L,0x5e7da51f049aab7aL, + 0x5dba87587f2dfa4eL,0x4e931594e3339e83L,0x100c2dcd0c75a4c3L, + 0x6795a18e562cd1e0L,0x3778c7509aa010f2L,0x00000000000000e5L } }, + /* 28 << 182 */ + { { 0x2d0e0e8c3a12be03L,0xa4febeba75aaa59dL,0xe5ac02d67e8056d8L, + 0xf3788c67bacd030cL,0x4018ae3921fda504L,0x690259dcf16aa240L, + 0xf51723b291ad5d2fL,0x9cf893221f0aa525L,0x0000000000000166L }, + { 0x0f4b0c2a09388e2cL,0xb8308ceff40b8763L,0x2462458061a66221L, + 0xb67a79e8165aaa95L,0x94bc0b4708789bc2L,0x8b234c8708114d2cL, + 0x1ee26556bec8d5efL,0x22e3055df315d134L,0x0000000000000118L } }, + /* 29 << 182 */ + { { 0x46cf9a123b3ba75bL,0x1d186c661adb1a26L,0x56c777349bf1316cL, + 0x0f3e2f5ed8304840L,0x01baabfbe6070ce6L,0x503f8d930c9102d2L, + 0x4dcd3adacd453ef9L,0xd1a8baf5fc6aad57L,0x0000000000000084L }, + { 0x1faf5d6dd292a0c7L,0x910d30cb74f8c530L,0x442d713a6c0fa806L, + 0x0a18ec4704b1a792L,0x880b2e809501a8f2L,0x2f074b43aac9e8f5L, + 0x44209c655e1eddb7L,0x5916efadee7e28f3L,0x0000000000000131L } }, + /* 30 << 182 */ + { { 0x91e1ced21c8781b1L,0x1b160a6cd9eaceccL,0x32169b6de05009ccL, + 0xa6e611cbd398f69aL,0xce953c71b0eee5fbL,0xf0b9112de9cdb766L, + 0xad581159c871f5d8L,0x0ebe554be2279127L,0x00000000000001e4L }, + { 0x89976adf1ab77e95L,0x33164f3df139a517L,0xef133446a1c4a5aeL, + 0xdcbd304791d5782cL,0xdbd5c67c9750d5e5L,0xa23fc0fea185893eL, + 0xe094fe8ec7f91de6L,0xf9e3a96a99faa7a7L,0x00000000000001bbL } }, + /* 31 << 182 */ + { { 0xd116cbade405020cL,0x7c108b67c633aba9L,0x2b7856f5e8aa0582L, + 0xf8eab5a93ec464a6L,0x0e90e325e59632daL,0xd4dee596e85e20bcL, + 0x4cf2c91482dd5124L,0xdda35341a6160911L,0x0000000000000073L }, + { 0x4d225d4108a2c6f5L,0x481686652c17247aL,0x65877f1497b579b6L, + 0xda95b1fa53d48dd2L,0x43fb7079ca44e73aL,0x7b69f069e3ec902bL, + 0xdfd35aa860d9ab1cL,0x3457f579b3e5b6edL,0x0000000000000041L } }, + /* 32 << 182 */ + { { 0xf43e7556bd385e53L,0xbf8f9681875c0190L,0xe3d90dee555bd31fL, + 0x3baf65546edfe5ecL,0x3d6df218960e906dL,0x7b6635cb873d0635L, + 0x98a44993ded25371L,0x918b7555ad8e24b4L,0x00000000000000d7L }, + { 0xb776f9146553287bL,0x7075a0a36fa9fa1dL,0x7bc1d77c11d5513dL, + 0x789a19867993f668L,0x6162a249612a7f5aL,0x04348eb04ee7f938L, + 0xf7102ceb56583274L,0xae423442e356e7bbL,0x0000000000000002L } }, + /* 33 << 182 */ + { { 0x523d61302f4ead44L,0x201058f04e5b8666L,0xf7680d4267d123adL, + 0x0884457684ebdfb4L,0x84274505abd16ac3L,0xb9fafd02b3ea127dL, + 0xbbfa958703e32460L,0x291a5eb015781790L,0x0000000000000173L }, + { 0xe91a30aee53f10c3L,0x9584ef2bfd5dd433L,0x108924ba581b9fcaL, + 0xc2fd60ccfd8e8fa8L,0x318e224fcab2784eL,0x0c81392b9d31ae14L, + 0x9b2311108aa8130aL,0xb270d685276f3462L,0x000000000000010fL } }, + /* 34 << 182 */ + { { 0xdb7ce23edde05ffcL,0x3498ab21c6d0e52dL,0x70c111977b0e5286L, + 0x2397818f56a3293fL,0x8e5190030f16f124L,0x42f9ae77dad7d391L, + 0x5b6d52b49000eaadL,0x0250415c53e5d19aL,0x00000000000000a4L }, + { 0xd5134d31720c627fL,0x5225579c2ace2686L,0x43db384eb8a3ec7cL, + 0x3f857459f0f7a16dL,0x897549699739d8dfL,0xb1a377181c8ce480L, + 0x81c5b1868267d330L,0x3f45401b037f89c0L,0x00000000000001ddL } }, + /* 35 << 182 */ + { { 0x4e66abd437417d43L,0x9514ca813998603eL,0x3fd620ece6ffe0c3L, + 0xc72d522ec2371320L,0x9aa50a59acf05fbfL,0xc3e9213306fe5a5dL, + 0xc1d244d73c56f206L,0x7c22b8ea72b4d2e0L,0x0000000000000114L }, + { 0x2cd8c20869310665L,0x41bc43a56af1617eL,0x41acd532d54f9707L, + 0xc3fe18ad05c56c89L,0x706756a536a5189dL,0x63137c9f3815ce6fL, + 0x2ff051973eaabf9bL,0x0c3118fe365d4a01L,0x00000000000001d8L } }, + /* 36 << 182 */ + { { 0x5d07a2964e588df1L,0x141993bb19b144a5L,0xabc8b7f17aef2cfcL, + 0x6a447e47adf5f260L,0x3617bbf9413523aaL,0xae2df3568658871bL, + 0xb73a2681eca0d3d9L,0x9faa02a845905049L,0x00000000000001daL }, + { 0xcea9672c9249e1f3L,0xd6030dd792b80377L,0x1731a9b6b430427aL, + 0x707319a7dd23ecd8L,0x7497f2d3137eac72L,0xfd455e149fc57ecdL, + 0x1d5be9f6f7a02326L,0x58127f1635ac3188L,0x00000000000001daL } }, + /* 37 << 182 */ + { { 0xa465edcff4ff1298L,0xf6875a88fb3ce39aL,0x0f8c3e4154571e9aL, + 0x9750c93b2a33bcfbL,0xa4dda6a9c95cd2d0L,0x09512242e236cfe6L, + 0x7045f4a1f8b5241bL,0x6cfba4be0c875c33L,0x0000000000000053L }, + { 0xbbda856f057cc244L,0x039c06c1a80b8883L,0x909d27543627186cL, + 0x02163479f6b0fc43L,0x46c3962436cc44beL,0x83113b35d0abcaadL, + 0x0d62ed6f0ef103ffL,0x04100f40c2c14b5fL,0x000000000000000cL } }, + /* 38 << 182 */ + { { 0x5492babe8e507a53L,0x01ef00154f5fdf0cL,0x5b07f050e8bf3b02L, + 0x06205d089907fdc9L,0x4ce4ecc11ad754c1L,0x3d01c3b63723fdafL, + 0x77064d779065a5a2L,0x5de5e1cb725ff125L,0x000000000000006bL }, + { 0x32d6e29d839ddf7aL,0xb5908a08f6518318L,0xcaedb6a5e99f9497L, + 0xbe22eccb8509addaL,0x46511cdcdd3a6f5fL,0x74e345cb75b8d2e3L, + 0x041b65006c166683L,0x23c654ead41aa178L,0x00000000000001a4L } }, + /* 39 << 182 */ + { { 0xd79f98edc740422aL,0xb365a02f6f658438L,0xcbd68ebb726bb3d3L, + 0x1a74279568c9ffe1L,0x173b3d054253a108L,0x2873faa613d84b25L, + 0xba2b605b705e1276L,0x35aa0bfe1d1e7e13L,0x0000000000000013L }, + { 0x54a4ac10ae28e669L,0x392bcb0022d8b070L,0xf15f6b0819e99d02L, + 0xd3019505f97077efL,0x8ba03e9342036fc1L,0xf1b3b093837e9fd2L, + 0xc92b110d891b000bL,0xb99d4924e628c1deL,0x0000000000000169L } }, + /* 40 << 182 */ + { { 0xa62acac89702711bL,0xfa8afde1e5a60500L,0x6d0b6ec9851d3077L, + 0x2928a849ddb4d927L,0x40731d2fee9c5656L,0xf53c199c78402e28L, + 0x8d704153bf178d27L,0x67bccd2a3f735c1fL,0x000000000000008fL }, + { 0x1fb2240d935c8aa5L,0x1fa6b65f794daaa0L,0x30aa4bd3c31a7abcL, + 0x7b571e77f50b1a73L,0xe5105e178d05b874L,0x224ce4ac6574a6a3L, + 0x64f2c14ba2d9fba5L,0x30e0f151c304c3dfL,0x00000000000000feL } }, + /* 41 << 182 */ + { { 0x7400571b0014829aL,0xc976d3db3d15b8adL,0x2d3780591335164cL, + 0x57e68ee0e2e843bdL,0x0df368cc40341b75L,0x004e99f63666a071L, + 0x80898faa2ded5ebeL,0x2d5e2b8d5d64a7c2L,0x00000000000000a4L }, + { 0xb54da69bd389bb74L,0x3f2bfe1ddb6e2d4aL,0xeb09473dc831acffL, + 0x64455b0c0ad6afc8L,0x38ac4f0c13088a99L,0xcd44c67980602e36L, + 0x6b28f3b36c93cc94L,0xcc9ed6f2649f16c0L,0x000000000000001dL } }, + /* 42 << 182 */ + { { 0xcb8db302d5aaec94L,0x58fb71700e303300L,0x0a78247a8b0e0ddeL, + 0x2d30370d6c007b9cL,0xb9d1fd916bd6d93eL,0x8e4be06b02362bd0L, + 0x373af6632227469cL,0x1a1b76090bac6e9aL,0x0000000000000171L }, + { 0xae3c93ace2962773L,0xbd96ef35232d4523L,0x44b5d53f254b07b0L, + 0xbdc86e94658ea74eL,0xbde3eada9d758615L,0x267b46acc55f69d0L, + 0x91fc9a2d4f0a8474L,0xc3757adce7249118L,0x000000000000006cL } }, + /* 43 << 182 */ + { { 0x18b7327734169522L,0xc80df3cade9f570cL,0x2248ebd72ee0abb1L, + 0x719461e77ebb6b43L,0x8432cb62cbf4f4e6L,0x031df9951dcda661L, + 0x1889b33b9e588c1dL,0x1d9ba4a04dee2e11L,0x000000000000017dL }, + { 0x57ee376f719a392bL,0xbc128291bcfce4c0L,0xde788ed68be2ebb3L, + 0x865d088f0c72b0a0L,0x720f285ae7b5af3dL,0x716d0d401f6564faL, + 0x68b5093e16b7733dL,0x3ce4e5a3544922acL,0x00000000000001c3L } }, + /* 44 << 182 */ + { { 0x3e157f2273ee88f7L,0x1c0630464384bb96L,0xfa8ccff1163124abL, + 0xf2a63516e1c82e20L,0x06f7a495ecc52cbdL,0xd00255d7d546b8b3L, + 0xa4e0ea97f2c1bd00L,0xc279c9b9d8367846L,0x000000000000018cL }, + { 0x493ca0d2f4846640L,0xf2f65176836101bbL,0x9a2131507c4f5f62L, + 0xe7a4d6cb33ca1654L,0x6705565959e1757aL,0xae2233738bbdda8eL, + 0xe915be3be856975eL,0xcd6117edb40db96eL,0x00000000000001cfL } }, + /* 45 << 182 */ + { { 0xb4e8c9370aca7d71L,0xaac0761b9812c1fdL,0xe8596d4bee0886f8L, + 0x9bcbfa589fc12093L,0xb9e4b19c06967ae7L,0xc0e4f5c98172cd32L, + 0x94098a335ccb77f4L,0x8996dc27bdfdee8eL,0x0000000000000129L }, + { 0x81b4d7ee284d4731L,0x263845cfdbf96ca8L,0xe773b3cd3a19983dL, + 0x7e16a7d17e7d7707L,0x7270b36600cffe6cL,0xef2ac9029c2de9feL, + 0x3cfae8b0c875b52bL,0x8bf88154f21e1d82L,0x000000000000002cL } }, + /* 46 << 182 */ + { { 0x9c765f620a5e3ebbL,0x970ccd4e818fffc4L,0x9522877a70bbf4fcL, + 0xa720852dcd9f1e17L,0x50dd2c7e15b0005dL,0xd5ff9c20842795ddL, + 0xa942f53eaab55facL,0x9c8350e25cfb4548L,0x00000000000001bfL }, + { 0x8afd20bb94f7a376L,0x17de2a98a5c8cabaL,0x97d74ac55ae239ddL, + 0xcf5925c5917be399L,0x0092fdd6185c43f8L,0xbce625310c2334a6L, + 0x264385c8688c7ea4L,0x09b75046c0a5b76dL,0x0000000000000000L } }, + /* 47 << 182 */ + { { 0x1569eaf24cb224cfL,0x269fa0409f146821L,0x76debe784598e444L, + 0x55dbf9b1dfe244a5L,0x16cbf26eb67a835eL,0x59af39e01281e580L, + 0x71eda217a229a7a9L,0xbada5b1369432cfcL,0x00000000000000e2L }, + { 0x59356f88eb5b5f1fL,0xac200a20ee82718cL,0x870cbb11bd305f9eL, + 0x6ef0a89a969de1ecL,0x89c705ef0110fd77L,0x2c944b594c73e141L, + 0x4bac7679c140aaa6L,0x910125524b4d3534L,0x0000000000000024L } }, + /* 48 << 182 */ + { { 0xb77b2273b687e9e4L,0xd0c9629b158071baL,0x7b0d0576bd865783L, + 0x47ca9a1b48334138L,0x22c0b777990beeffL,0x3dae022d7e8dbe92L, + 0xdecf568567233c8bL,0x96eadd8a999e9c94L,0x00000000000000d6L }, + { 0x8aefeaf1b8755eecL,0xbfd2bc88789362fdL,0xa2c3bc994e34479dL, + 0x5025831148df5301L,0xd392cb00c0e07947L,0xc8c4c6a5e4a0c2baL, + 0x315a9be295c84fd7L,0x12be251d40cb144aL,0x000000000000007dL } }, + /* 49 << 182 */ + { { 0x600281fafdfb666dL,0x89b2e2ee9bee6f0fL,0xe8f208e3124ff0a1L, + 0xd674853a97ef8785L,0x333f682b7c613cc0L,0xa7bcf59549dc2ea1L, + 0x3431a30d26a67e52L,0x1023799fbe4720f8L,0x00000000000000abL }, + { 0xf98a30432480bd59L,0xc46d6cf7fdffea8fL,0xf6f7c7e58a9d667dL, + 0x2a7fac095c65f054L,0x389cf284df19c157L,0x7b70ba00b35cc84dL, + 0x068cc13ee30d0abcL,0xca24da284f1db63fL,0x00000000000000adL } }, + /* 50 << 182 */ + { { 0x22becdcd9c071a34L,0x66bca042c09cda4cL,0xf701267bf0732913L, + 0x25c3b4b5a6a84d63L,0xd20b654f96a4bccfL,0x649bf2c16963824eL, + 0x3e173b274d84f899L,0x0573e68b5ae76effL,0x0000000000000193L }, + { 0x850d688aa7578c52L,0x9e967e16cacaef52L,0x24e39bf84daec11eL, + 0x9ee4a66878b0072eL,0x3255f3f8f17b5e45L,0x1d8b5306c11d1f63L, + 0x79b70aa80fd0248eL,0x1e15707ccfcc577eL,0x0000000000000095L } }, + /* 51 << 182 */ + { { 0x6e9477a46b25f458L,0x808fe2c599cb8c78L,0xad2dccf34619ad52L, + 0xf2424ad828e3efdeL,0xb4cf55c2bdd4b5b1L,0xf03879f1d1735445L, + 0x74fb4795f9feaf2dL,0x588c3fd6312ca1eaL,0x0000000000000046L }, + { 0x13b13beed8007d8fL,0x5c5b92246fdaacf9L,0x84ab74efc221e90aL, + 0x37384eb8758581d9L,0x923205a320eaa26aL,0x0cfc39ffd0995704L, + 0x8b3bd7a2d4ede2ecL,0x4a640b0002c020dfL,0x00000000000001a2L } }, + /* 52 << 182 */ + { { 0x9649b0c126bd0385L,0xdf659955ac77dd42L,0x512cab0f5f1d946fL, + 0x3365c06cb8e888ceL,0xd5b139364003ad9bL,0xb9ddf7422e777936L, + 0xd882a2e7f0a2ea17L,0xca25477d329706f7L,0x000000000000002cL }, + { 0x109144281f14b985L,0x8c6727485f7bcafbL,0x43da81611a6b62b8L, + 0xadee9242ffc82eccL,0xb7dcdb418f391fa4L,0x1fbbf2e0593add60L, + 0xfb551a699ec74127L,0x2ae9b20e7443a903L,0x0000000000000116L } }, + /* 53 << 182 */ + { { 0x1ef97491410cd077L,0xa8206904f205d56bL,0xdfea5b4c334d1427L, + 0xf1982219bef52120L,0x2f5de7f02af4cbccL,0xb80fa224b097d873L, + 0xe7ebce309b3ef192L,0x34043ba58508a38cL,0x0000000000000044L }, + { 0xe73a1b716e77eddbL,0x422356ea070ebe74L,0xd1e293e6b5fe5ce7L, + 0xe8897492231d1036L,0x04241a1ea9676b41L,0xbfe53ca26833593dL, + 0xb52a531006b71326L,0x09b13c8e66a34460L,0x0000000000000197L } }, + /* 54 << 182 */ + { { 0x65b70bc6eeae508aL,0x78b81935b70e19c5L,0x9652937bee8c6b85L, + 0xdfb53e2a9ba55188L,0x9bde675e8ce3f3baL,0x021519ed8693d863L, + 0xb64130071fd9ac55L,0x428560d4845869c4L,0x0000000000000170L }, + { 0xeae5f3757b7ec622L,0xe582b16f49a0d23aL,0x23c4a6c413352ff2L, + 0x29ac206336a323c6L,0xc1be1ef1f7fdd1f1L,0x490e35a0a7d81d3cL, + 0xe6fce48a29caea09L,0xa8b619aaf3969db2L,0x00000000000000f5L } }, + /* 55 << 182 */ + { { 0x1870f45409e7211fL,0x63654bcea1a5ce06L,0x82f781ebaee02970L, + 0x704b05b98e6cfb1aL,0xda7779919e75ee2fL,0x9c311b5c8119aee8L, + 0x878920321ec5731dL,0x45db4af638e2efbaL,0x00000000000000f7L }, + { 0xc9da8fd8e60fa998L,0x2a908c8fa661efbfL,0x948c9b3fca468a51L, + 0x485238d4944e68b9L,0x4d696e8be223a143L,0x260195a48ab78084L, + 0x3c53be3a2fda134dL,0xaab965b37d24899bL,0x0000000000000028L } }, + /* 56 << 182 */ + { { 0x2698187d69d2a365L,0xceac05a40b4636e7L,0x508d87efb39d959bL, + 0x7dadb94ac08ac0caL,0xbb5161f9df1dc5eaL,0x9f9118e6ec7a8f47L, + 0x5f7d5a7ee578d6bbL,0xd2cf3964beb0c4c3L,0x0000000000000076L }, + { 0x312ccd712dd174e0L,0x84bf3b4bf1b3cf22L,0x4215bd31f0927edbL, + 0x062cfeefc2aafa52L,0xdd22d90155eec990L,0x18e19734057e44e8L, + 0xdfc288d25f2cd2dfL,0xbfbe2cee43c3cfcdL,0x00000000000001b0L } }, + /* 57 << 182 */ + { { 0xede9dba37de1996dL,0x5cd619af1e5e92b0L,0x10a0c857f0d58153L, + 0x61315a1882d47722L,0x66dd97910865fea2L,0xd2de59878c9ba9c4L, + 0xa5824d290e0820d6L,0xe5d76f638dfb9646L,0x000000000000007cL }, + { 0x50c9cf139ea56fc3L,0x6da6381cbdb8339fL,0xb032c6afcd108fc0L, + 0xfa7e5df3f3b77a78L,0x5341541b5164eb5eL,0x155b51417cceb4c4L, + 0x4f553c4b4beeb913L,0x6843d58366d8e171L,0x000000000000012cL } }, + /* 58 << 182 */ + { { 0x46edae7d7392fef1L,0x13c81fa327e3da29L,0x131235e48fc31b9dL, + 0xd45727f4b5da6124L,0x01da47fdef43cb27L,0x5bf5a111ba1b6f30L, + 0xd139d86b94e2ec2aL,0x4509851b27a5489dL,0x0000000000000050L }, + { 0x8e15e7da4e69630dL,0xb108240ba9cd3f1aL,0x3a17d2e7e78db971L, + 0x5c5d774bbb9907c6L,0x28a2b3d988c7b081L,0xae4db188a3b1b36dL, + 0x2f3dad86d87987a6L,0x9a5bf38f3ab24f4eL,0x0000000000000158L } }, + /* 59 << 182 */ + { { 0xdcf8bff495b0e157L,0x53222102015e65c5L,0x26806b4211c7c94bL, + 0x5d3b14efad4a65f6L,0x5b3c27aa8f1803e1L,0x9903b6938f835996L, + 0x459cd77f13b3933bL,0x895307a086b8393cL,0x000000000000004aL }, + { 0xc82810de845e5630L,0x8eeb40e14c46edd9L,0xd71578b209ff3691L, + 0x5d4c8c8d7d9b5b99L,0x27751925ddbab2dcL,0xaa917a7c563fc5f1L, + 0xdbeea6a290fd3955L,0x758fe38ec8b0effaL,0x00000000000001bcL } }, + /* 60 << 182 */ + { { 0x3069b9d66e77aef9L,0xd042ac052eb93fb9L,0x8335fe4212830002L, + 0x293146e6e1b614fbL,0x652317e76ddb3ae5L,0xecf47236fefba037L, + 0x24b51adf58eb0b7bL,0x5109f7df4874fb35L,0x00000000000000d6L }, + { 0xff02ac885f1d5485L,0x8a993daf530607feL,0xc63139c93ba41f73L, + 0x934a2f9445f411a8L,0x2208220bc95aae6bL,0x96710a9c63292771L, + 0xc1b102e35b2a77a3L,0xc8177ecba35d5e23L,0x000000000000014cL } }, + /* 61 << 182 */ + { { 0x9203060baa7b98a3L,0x07051a435181b164L,0xd3fa67c3eaf21604L, + 0x088e3498db6572d5L,0x7bab12be9fe1247fL,0x3c0eccaaaa9ff72bL, + 0x57f2e32204b15e7eL,0x633bc218494c1a0fL,0x000000000000010cL }, + { 0x773b7a085bd8eadeL,0x8f844710f88f009dL,0x13dac3f48190c4abL, + 0x48d9eb6ea3f2ae28L,0x81f21ad313664e39L,0xd4110b6316c4b065L, + 0xa11b1afa2b8e957dL,0x0a5f65cd9bc85cb9L,0x00000000000000a6L } }, + /* 62 << 182 */ + { { 0x8739e462a1a8188eL,0xca1226f5110b4497L,0xce536f0a7d506e8cL, + 0x9f0e49dc8a1e00f5L,0x8abd40830a409966L,0xee494417b4ff83a2L, + 0x6a2c14d4c8b8e02dL,0x2eba1a0242c27ed3L,0x00000000000000b6L }, + { 0x4991518dec3148eaL,0xa516810f34200143L,0xd15d96f86b6513deL, + 0x359a3d9657b574aaL,0xa689c45cdeda4bdbL,0x764cbd196bba4697L, + 0xd9d7997f82988c9dL,0x24dddb31d7554449L,0x00000000000001daL } }, + /* 63 << 182 */ + { { 0x51bc4616753f0ed2L,0x7c3c6eee58b2a1c0L,0xcf37a0748131bc15L, + 0xf06bd740fa7909e1L,0x2d7984b54b800a3eL,0x57f583984ced2487L, + 0xdc68de17f0f308caL,0x813c4b8e3a16b769L,0x00000000000000ffL }, + { 0x26daebaea97c43c2L,0x258edf708fe82e3bL,0x64847056879ab94aL, + 0xfab3c35232f43e05L,0x01aa155831a2f7f9L,0x0ac01ea817d8b3f2L, + 0xa7e3667b67cf00d7L,0x83e9a2ef50ccf5eaL,0x000000000000000bL } }, + /* 64 << 182 */ + { { 0xdbeeb421cfd387adL,0xd208fe085bb4d523L,0xff2282dbed24edf7L, + 0x30556ff9c806b492L,0xe7716c3cd5b5bf57L,0x847e3a43153fb3b2L, + 0xe9a8fb3c391809c5L,0x52b4a72992596244L,0x00000000000001c0L }, + { 0xe49b344e59d9ae12L,0x5a5e32a1c925b6daL,0xe81d38d1f054a1c6L, + 0x4ae4fe0cf15fb6d7L,0x6ef46dcd5cf03296L,0xbf46b97612da300dL, + 0xfb373fe835fa7fddL,0x0f78e5ee62bcaf82L,0x000000000000003bL } }, + /* 0 << 189 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 189 */ + { { 0x9f3034247a56433eL,0xbbfa0fd960307947L,0x21c78cd331db3e3dL, + 0xa837641c0a2a5f1fL,0x28338efb56b8bdf5L,0xc0b66594f0600d27L, + 0xc07399315cb9a3bcL,0x95eac1715267267aL,0x00000000000001e3L }, + { 0xe9ee22d1897f04b2L,0xb6002a034cde7dfbL,0xcbb7be483c3b494cL, + 0xbe6c1a98670f740cL,0x9e9d0b911fc603e2L,0x532af80c351c0915L, + 0x0bbf9ea379802339L,0x96c16ecadd33d50bL,0x000000000000002bL } }, + /* 2 << 189 */ + { { 0xd7ba67b2dca5c86fL,0xa2ad0453e4c65e75L,0x77cf1258e190bc9fL, + 0xb3f630142c69bc9eL,0xd962eac71689858fL,0xc1b865c669336a7cL, + 0x3f97565d9b5a5216L,0x93ea967c1a73a39eL,0x0000000000000083L }, + { 0xa8e4af6b6ea73ebaL,0x3ff8e52dbe248173L,0x28a4e2bd3f3d34acL, + 0x5d52213e1be61adbL,0x8ebaa43d02968a4bL,0x735958e06fadce98L, + 0x2f143cd6c15f7644L,0x533bec76a7fbfc74L,0x0000000000000061L } }, + /* 3 << 189 */ + { { 0xfbd6e794814311efL,0x1751996b26cc272fL,0xe6a2e157a4d9ffa3L, + 0x10fc93529a7d88f4L,0x02545d80395d7365L,0x56b7ee84dc6c4d32L, + 0xc3dea31ea455538aL,0x582e311bb82cd5f3L,0x0000000000000151L }, + { 0x1eadf021b3bec16dL,0x3406d4109ce73860L,0x6785be85eadb4c7aL, + 0x68adfdc3e1e59a9eL,0x131ead3cab13f561L,0x0b2c0c4925936137L, + 0x6c318ebc0f35b6f1L,0x0abe8c27f819e825L,0x000000000000015dL } }, + /* 4 << 189 */ + { { 0x44b3f23919fec274L,0x1dc89875e3a66eccL,0xe36ec60985eab4f3L, + 0x3713bcd6840458e2L,0xccea99d32bb88a8aL,0xebd3d4bafd21e802L, + 0x03437c286ffc588bL,0x3a21efba413b06bfL,0x00000000000001d8L }, + { 0xd9afa45c16355e69L,0xe9b28f5b2502acc5L,0xef1999b91711da7dL, + 0x4657efa14eaa0320L,0x0729dac2badbe83dL,0x45b0acd9b9cfbc6eL, + 0xa014eab6bb296d4bL,0x29f6bef363b0d362L,0x0000000000000101L } }, + /* 5 << 189 */ + { { 0x30016ee322b152c0L,0x70de4bbf114bfedcL,0xcbdbef831e26173aL, + 0x7614a523370e3b92L,0xd7d0c5f4d3e9e924L,0x221af66abbea4a57L, + 0x2da2bd107007e4f7L,0x17d2a5bacd5b3b7eL,0x000000000000012cL }, + { 0x6e8482436639e4ecL,0x9eeab903ee4d0989L,0x0bb3fb64dd0b4efbL, + 0x07089df4ce8f6909L,0x7ef71f65545a7c71L,0xe91cdecc6884437fL, + 0xe1d60e1401c4ba6fL,0x398484de2ab50390L,0x000000000000011fL } }, + /* 6 << 189 */ + { { 0xebce4a115a224092L,0xfe887cc974e91ac0L,0xed3beff1626717d8L, + 0xd6915dc097f9e7d2L,0x6c5aeb18a6de7e4eL,0xe18c932268e302a2L, + 0x4ef08d0ee3a4fce4L,0x7524f6f1a36161aeL,0x000000000000001cL }, + { 0x096e3563fe82f5f0L,0x54cecafd13216b13L,0x51d809421d3cde0aL, + 0xdbd61ad0c2608747L,0xaf378b29ddc6c9f1L,0x09a4057360507871L, + 0x755fb6481251c43eL,0x7eeaf993a14377deL,0x0000000000000035L } }, + /* 7 << 189 */ + { { 0x354d6470cff89fc1L,0xd5880e0184c3f4a5L,0xe8cb2497dbd934d9L, + 0x80244f291dd7a63cL,0x87c983b43117c661L,0x72361de291c17b69L, + 0x2c94190b2ee78d13L,0x995a4ff57d91fc16L,0x00000000000000f9L }, + { 0x98b17d2525c080a5L,0x3f2403b05b281b5aL,0x30f4915af7f4c45bL, + 0x01310b2ab9189ec2L,0x3dd5f6adef36c514L,0x39c3a0e711fd5062L, + 0xcb2063dc862bd28eL,0xe1a75d8e90a76270L,0x0000000000000037L } }, + /* 8 << 189 */ + { { 0xfdf7681d6debbcf6L,0xe21aef25600ea3c3L,0x181fb048bb30b75bL, + 0xf9d8c43d8d8b7b4fL,0x3d3bc6deca68ec32L,0x6d469252af1641ebL, + 0x1be3c9ba10252291L,0xb57f2c328fa4c10bL,0x0000000000000155L }, + { 0x599e10e59ea96c94L,0x4426deb740593ce0L,0x948d92862d48d29fL, + 0xd3ad8e54c03f5cc5L,0x53fe15e4219cd0d4L,0x03b914210c8fc479L, + 0xa023c978e5aa96e6L,0xdee00502edb69f81L,0x00000000000000b7L } }, + /* 9 << 189 */ + { { 0x2cd49083e79f9778L,0x7f9946f671882e65L,0xfb08405c07d0fe20L, + 0x5ed7e04781c7bafaL,0x01aa5ca253de8921L,0x71ada196b70cdb9aL, + 0x2db79a24e4149028L,0x2d2a7f5c94c21733L,0x000000000000016eL }, + { 0xecf6e42f2cbeb376L,0xe632d553b396294fL,0x5cca8cb36fd4084bL, + 0x92bbab151b2a587aL,0xb2e240be130d1e99L,0xa0672abac859261dL, + 0x84cf5fd413d5c102L,0xf470027bb2cdd4ccL,0x00000000000000dbL } }, + /* 10 << 189 */ + { { 0x27e1ae748e84955fL,0x09aaf5ae9c4f7a26L,0x4ec5554621e1e993L, + 0x800515f0370d3efeL,0xc32a04e6c4c06f83L,0x5483476dd5fab42cL, + 0x07278cf44de2d405L,0xad26e4ed4e6413ddL,0x00000000000001e4L }, + { 0xbb9628ee17f12587L,0xaa5ff06f578f0bcaL,0x5c7a8d11d465f293L, + 0xbf2955e83f6e5394L,0x262cc5a3affc8451L,0xc35a8e8a722ffad7L, + 0xb93e5c2aa09cc5e1L,0x4cea80e846301158L,0x0000000000000185L } }, + /* 11 << 189 */ + { { 0xc18386a836ae1465L,0x7f620b6851f49485L,0x35ff5db59ac12858L, + 0xa24cb377e1b66ef8L,0xc5a5f120824944e6L,0x6ba1777e2c75d5a9L, + 0x9b02b8a4205b5e9dL,0x06a6688c47c4473aL,0x0000000000000039L }, + { 0x17801becd6b7a9eeL,0xf99177ee7266ee48L,0xce538f55eb961569L, + 0xeaf8059b06dbe1e2L,0xf136975608a59309L,0xd86d2a94532ac00cL, + 0x64467ba65380bcbaL,0x0301f9be27d7e1ebL,0x0000000000000164L } }, + /* 12 << 189 */ + { { 0x29d39a4a7807bc2cL,0x835ca5afc2a6dc42L,0x26bb5781ac817836L, + 0x3def5563525f97d2L,0xc6cf91e9237a68b5L,0xe1e954d9f9f587a7L, + 0x7854a450126484d2L,0x0690c82dd3ae3825L,0x000000000000010fL }, + { 0x3d46b028271428d1L,0x278c1fafd766ea34L,0x9c59b166355eec06L, + 0xd6570ec7a36f1efeL,0xb64c51d48e8de46eL,0xdc4c58e2b39a5f35L, + 0x1c46808b966c687fL,0x0d045d3e28f0fa4bL,0x00000000000001f4L } }, + /* 13 << 189 */ + { { 0xe1f8f593e4035d20L,0xf3115ca872b27c29L,0x6f021dbcce8a79a5L, + 0xc9b99de992e812c4L,0xeef9605ed479ced8L,0x17bf75d1ed230e0aL, + 0x1c37a667604f45a0L,0xba2e8685455b00cfL,0x0000000000000159L }, + { 0x8d9901318b4e88e5L,0xfb0085e29fa679e9L,0xd4b5a294d35bc826L, + 0x65e3da16624f4c16L,0x28dfc963738be5c3L,0x093e9cb0de3d8c84L, + 0x83ea7628f32f052dL,0xe095054317da9fb1L,0x00000000000001d0L } }, + /* 14 << 189 */ + { { 0x55b440bb989b4a3eL,0x4a558cdb7ca65342L,0x0243665f6848d1afL, + 0x46e00dd3eb611d13L,0x7cdbc1eb485f070dL,0x0b6b2a4e37d9317cL, + 0xb86c7c90d9212b88L,0x0a5a39d8f0436535L,0x0000000000000185L }, + { 0x0d127ab6cc7ad679L,0x87e9e7400651b018L,0x4c87124af0507f1fL, + 0xc89734aeae2e207aL,0xba7939ac576d7e51L,0xb95bd1af45eba10fL, + 0xc1674d96f2530deaL,0x227312b48e61b9f4L,0x0000000000000066L } }, + /* 15 << 189 */ + { { 0x1b31108442459f4dL,0x7cae90dc5e878116L,0x127607a98c0a313eL, + 0xd2c39871a55fd972L,0xdc51776a077e2127L,0x34b0a44a2fdcf34fL, + 0xa79ff2c28333072cL,0x384befa6bf1e4db8L,0x000000000000014eL }, + { 0x1163018471e4da59L,0xe8eb4bd16da9fa16L,0x169061c82cf15f97L, + 0xef24bf4b32753221L,0x79cf9b4e978c0e94L,0x4ee31d9fb2e8c08eL, + 0xa9999a9b491f866aL,0xb40eaa6d61d44be8L,0x000000000000018aL } }, + /* 16 << 189 */ + { { 0x28904911200ad609L,0x6fb2b6633505c98bL,0x1bf6e6b517ad4abaL, + 0x6d0e654fdbd8e9b0L,0x8db772c1f588fbb6L,0x5c2b736fa292489eL, + 0xeac6e04ab902a7c9L,0x249c84e7b2dbafadL,0x000000000000010bL }, + { 0x2cbc039029de2306L,0xac01d1a5bfc36a57L,0x89f1a059bacd8b95L, + 0x2e1e095a3e92c99aL,0xf172bad607b4e976L,0xdce0b2e435885013L, + 0x9a0c55567fa11b87L,0x1df0dfd67eab7ad3L,0x000000000000008cL } }, + /* 17 << 189 */ + { { 0x55c1cb327ee5e091L,0x9b3b38b6a4f1553eL,0xa93f2ab9ca3703b5L, + 0xe153206da14864aaL,0x53cbdcbff2500ecbL,0xd70649347984ffaeL, + 0xf0193f44c239a4cfL,0x6bfc462f1567d5deL,0x00000000000000a5L }, + { 0xcd6b480b381c54e5L,0x17190f5f936eeb1aL,0x55514c1aab4dd2d5L, + 0xff15013f4d378b6eL,0x086bca23e25466d6L,0x84b630fef5c92761L, + 0x6909e2adf9aaf597L,0xea3054a8e1b21ec2L,0x000000000000003dL } }, + /* 18 << 189 */ + { { 0x5225f56496479004L,0xfe7f6ae01d60f576L,0x1215e4a2885f56a8L, + 0xe70f09235d1e6df0L,0x7cbf30fb66614db3L,0x346de189209d9a42L, + 0xff0cec30249c0bacL,0x76488efe69c479beL,0x00000000000001a1L }, + { 0x4ad5db4a81213654L,0x748c9e51a3b0203eL,0xd9c64583acb72ac8L, + 0x3926c7c4856aab8aL,0x62eae82ca80385efL,0x74d7d9713c061721L, + 0xe647d75b3a7ddbb3L,0xab2b33724d872279L,0x00000000000001a0L } }, + /* 19 << 189 */ + { { 0x563ada4da040a5ffL,0xea621ac31fff0784L,0x28e6ff0875ce88f5L, + 0x09769ec371f3aeceL,0x0ff40dcb3d45fb95L,0xd7b33147eb09a022L, + 0xf62ccb4a81d97a19L,0xe45cc4d83f51b39aL,0x0000000000000137L }, + { 0x79aa7c43e9a9779dL,0x36b108fb70f8c585L,0xe1736644af50cf32L, + 0x820b16320c429c35L,0x442c9d21b01595cbL,0x28c0c7683c79c26cL, + 0xde4901b3e59608caL,0x0a19facf0b39089cL,0x00000000000001e0L } }, + /* 20 << 189 */ + { { 0x1b5336d7e43ff8d0L,0xc6586c542375cf3cL,0x751a95b05d372296L, + 0x95517fa7db98d315L,0xb7671c0f015782f7L,0x467c1fe893c3fe96L, + 0x2a6c0f17f284f967L,0x76bff3a612aa02beL,0x00000000000000d9L }, + { 0x3242434e39647784L,0x4cfb956b92a3ffe2L,0xebebb9bd37da53caL, + 0xb0c81cc581b8ee8bL,0xe3f6dd68212e7cb8L,0x7bc578594c1c5556L, + 0xc053d123d233497aL,0x599caf98bd4c5762L,0x000000000000017aL } }, + /* 21 << 189 */ + { { 0x461e855e310573dfL,0x54014992600cd5d8L,0xf59622b38d9f5217L, + 0x8a12593ce712e5d9L,0xc3ec3c3adfefd672L,0x424c770e4264f3aeL, + 0x08be0d0020979dd4L,0x5d78b2e57c42109aL,0x0000000000000147L }, + { 0xe0136d93f5bad4bfL,0xd06d7d2e2957b516L,0x43aec5cba5b37693L, + 0xfb83976ee625c1bdL,0xf244bc273bbd2729L,0x82b9d28ff5aca07bL, + 0x0accd6d898b95aeaL,0xad438298370aaec2L,0x00000000000001f8L } }, + /* 22 << 189 */ + { { 0xc7c33c5eecca4b24L,0xf9044a18db7e92a0L,0x33676183cba7445aL, + 0xa9ae5991d5766cb2L,0x77c6ccfb59467537L,0x7f0b64d45a680fc8L, + 0x942a29960cb827b1L,0xec6fda25fe4e54e9L,0x000000000000003fL }, + { 0x89d45bbf50285e48L,0xc367bbc6bbb6d77fL,0xcea1ade71e45ff58L, + 0xcf02f6dc094f0616L,0x87e28486305c7815L,0xb1509ee1bb11a8bfL, + 0x17d6e7d1b32df112L,0xc8c5a7f784aced31L,0x0000000000000038L } }, + /* 23 << 189 */ + { { 0x1dc8bdbe227db0f6L,0x1e425be636244ac1L,0xc6c7f5b0164da7f4L, + 0xe2aed98497ff7952L,0x92e74200872554e1L,0x15004c56b9be5350L, + 0x402804113966ca44L,0x5ac0cfaa80173c93L,0x0000000000000149L }, + { 0xbbe3e8071236a9b6L,0xf1699483b8a492bbL,0x7a217dfae96083dcL, + 0xfcae468c2ee8724bL,0x2a1ced809815ca66L,0xc8a7078d0644ff4eL, + 0xbda30ba562fb3e52L,0x7a487bb502077ec4L,0x000000000000017dL } }, + /* 24 << 189 */ + { { 0x3555f75c6a7e545fL,0x1ea37a8e7a15bd6cL,0x188207a366f5c712L, + 0x0fc45bd94926901eL,0xc55d2d8cd34b1e01L,0xfc75e1afd84f7268L, + 0xded24e68bec74ee3L,0x0f611682b82806baL,0x00000000000000e2L }, + { 0x5454065af1943440L,0x88291bffd01b257eL,0x6eaca283c6ae2502L, + 0x88f29ce08a74cbe8L,0x6ddb3ae9adce5e80L,0x93df4593bd911187L, + 0x16bdc203a6b62e07L,0x3a0ad818e8eaac95L,0x000000000000011fL } }, + /* 25 << 189 */ + { { 0x28e370db59a880c7L,0xa84aac2c2a3c718eL,0x4f9fc4fab725af9dL, + 0x52ae3e8947f0e1abL,0x4684a83853068039L,0xa3f5b78f6eb5b99bL, + 0x631dcbf590a53bb1L,0x65924aa0fbbc4453L,0x00000000000001a8L }, + { 0xe12adaad50519774L,0xc9899fa49227bb12L,0x496631535bfe092bL, + 0x7ddad74485659edfL,0xd527efb9d5a37c69L,0x52d5d81842abf762L, + 0x8a41c43b2d11ba2eL,0x97bf9f89a7abf282L,0x00000000000000c4L } }, + /* 26 << 189 */ + { { 0x3f21fb13ef8235fdL,0x488567261d6a5b98L,0x6ba7b7667852a2adL, + 0xcb927631373de0deL,0xcae220066eb7e6a0L,0xe58ea13dfac8816bL, + 0x4dc4f35b4b2cf759L,0x7a8c6844737f6e13L,0x000000000000016eL }, + { 0x0ae2eab434c605b7L,0xd78433a0fcd4c618L,0xa7596dae82b46a25L, + 0x35dcb1ff9d1608b8L,0x6380a771432f758cL,0x73bf36ed012ea66aL, + 0x923c12bcdf4a3ba1L,0x4cfb1a1b12249dbfL,0x000000000000002fL } }, + /* 27 << 189 */ + { { 0x21c6dda909afafd5L,0xc3ffe02f54ec3531L,0x81c46e852f5b6b5dL, + 0xfbca5691640e9b41L,0xf3a8c5f270a6f613L,0x47f8636a333304d4L, + 0x9619088688f84bdcL,0xa74c957089b07f7aL,0x0000000000000003L }, + { 0x8ac2444429053fe3L,0x5f016a56f3422459L,0x3957735f6aba2e10L, + 0x1bf31ff8ed5bdd25L,0xc4c8ee2ef7e5572dL,0x6c36f06fae94f876L, + 0x5abeb8b11cf49d8cL,0x9226e08369e4554aL,0x000000000000015bL } }, + /* 28 << 189 */ + { { 0x6b89e2e509ace912L,0x5f4e515d7b067334L,0x6aa5259aec702301L, + 0x9cf73f7f688485cfL,0x48a5f3410659825eL,0xde9eefd6a1d3be96L, + 0x34a432aafaa769f6L,0xd1f8f08434abde10L,0x0000000000000025L }, + { 0xd9b076e3bbac8db4L,0x85858504f72ee1b8L,0x451a18815ebc2846L, + 0xc8cb018a0ff63a36L,0x5256a51f13eebe02L,0x85058bc0160ac671L, + 0xd110ea4ed0d4a62aL,0xcaad594b2b033a9eL,0x0000000000000023L } }, + /* 29 << 189 */ + { { 0x1d34963913c1a7a6L,0xd980f79ea896afb9L,0x701959597495f839L, + 0xf0615616e8aa79c8L,0x80061bd87a3147beL,0xe5d69e15f6973e6fL, + 0x2522bad76030ef18L,0x8acfbac4495c762cL,0x00000000000001acL }, + { 0x5cd25ef7321a2280L,0x95b9b0d0a0458db8L,0xccf4f8ce8ac8b7deL, + 0x904356985aa93f11L,0xe50024d1e7df2595L,0x09d83948ac09ff49L, + 0xfbaaf1d19abb573cL,0x28e4b4cd92bbe12aL,0x00000000000000daL } }, + /* 30 << 189 */ + { { 0xe5ca83fa634ddcb6L,0x5460a43245f882c8L,0x01a701ff4d1b379fL, + 0x743801734316aac9L,0x5e1a0761c8e815d0L,0x4f129ade260d2619L, + 0x3dab946d6a892a9dL,0x6ec73a540d8efc75L,0x0000000000000048L }, + { 0x7d8189195a78b6c6L,0x110936717cd52f20L,0x42d3bed515e9988fL, + 0x1c55dcea772def70L,0xcbdacd7c334ec5e9L,0x0686fd8a7201506eL, + 0x6148b0ee02462ea5L,0x55acec514eec4f0dL,0x00000000000000a2L } }, + /* 31 << 189 */ + { { 0x009751a569eb5133L,0xda66bc95a61f94abL,0x839c283d21b8e5ecL, + 0x77b3814771f6e8b4L,0xf08e62f140cbe8fbL,0xf1e7ec5e33993050L, + 0x3e540919a5c6f5d6L,0x143d5a48b948ae50L,0x00000000000001abL }, + { 0x227237fe6d7c37b6L,0x12b782ba91c864faL,0xe770b68f8bb0141aL, + 0xc3059e3f470cb922L,0xc55ecc13e3494d25L,0x7c7ad20198ead855L, + 0xed29220cdbf56bcdL,0x55607debe48d3158L,0x00000000000001bbL } }, + /* 32 << 189 */ + { { 0x1206ea28cd7bbb6aL,0xeff556d0c63062faL,0x8ad321fadeb86a3aL, + 0x9ea9fbdaf78fcdc3L,0x3ef406cb0e8c80a8L,0xc168b5970f593d36L, + 0xd6456267c352a3a0L,0x303bd08332c5a9a8L,0x0000000000000102L }, + { 0xbf911f73e680d0f9L,0xb12fcca8bc1a1b47L,0xb8264577ef8024f6L, + 0x9d17b8b7f66ae119L,0xb135dfc275b33227L,0x4cbf0d6d96e667c2L, + 0xfff742beba373b24L,0x218e787e776cbd3bL,0x00000000000000ccL } }, + /* 33 << 189 */ + { { 0x6ea795ecf0b3e8e9L,0xa8d0f3ef415b14e0L,0x388d0b9eb2b8bdd7L, + 0xbac3606b47077e99L,0xf27e9a49fd56307bL,0x879d0ad938cc9279L, + 0x2b33ad7acd8f229cL,0x948444d5577207fdL,0x0000000000000164L }, + { 0x028ff3d3b9b6280bL,0xec12413d8aba1800L,0x8fa64506ac21d32cL, + 0xfb3b64089ad6c573L,0x4e558b8146d75117L,0xdd4a2eea3159469fL, + 0x7c9060ddb61dbf21L,0xadb892fb4fe85705L,0x00000000000000e6L } }, + /* 34 << 189 */ + { { 0x610cb28654137f6cL,0x1aede3330753a5c9L,0xc67e4426f48014beL, + 0x85b1131f733f87ebL,0x4b5b882d2cea996fL,0x29c274a0859aabbeL, + 0x822e16883647b4a9L,0x484e63777cae42c5L,0x00000000000000b1L }, + { 0x358f911aacf0fda3L,0x0693edbb8fe82648L,0xfbec08a1c04f3fd0L, + 0x45316917607c7c60L,0xc8ea888ada6dfc1cL,0x2121c681c65bf16dL, + 0x00ed364fa1da19afL,0xa28b1de4514495d3L,0x0000000000000184L } }, + /* 35 << 189 */ + { { 0x2999ad23a2dc7f0dL,0x77d0c3861314df56L,0x01f67ce42a664882L, + 0xfe134b6c5787a492L,0x2bcd1d1996126d18L,0xf190abf67aa5602bL, + 0xcc21b699f42256adL,0xc629989d7e2510b3L,0x0000000000000001L }, + { 0x7aabff2fa2c86a01L,0x8f5272cc0bbe44c5L,0x86c041b44308cfe8L, + 0xb14043018862f6b2L,0xbe9d148d51fa122eL,0x651dfd87b3519fd0L, + 0xa0610f30d9e81817L,0xb6976414ed2a30c7L,0x0000000000000110L } }, + /* 36 << 189 */ + { { 0x386cb3067bdfc975L,0xb8ab848e10483147L,0x7fbbc54074dde708L, + 0xd58eb2b596761accL,0xa885a6e8b8a0f062L,0x2630f19117e92bb0L, + 0x7169a8ed1258ed15L,0x471e6bcb40976b29L,0x000000000000007dL }, + { 0xa6d493e2795c47a6L,0xe118ad5806746dd9L,0x47ea093ba29cee54L, + 0x2a61aa20f7d8e228L,0xd78ec0e1fa70eda7L,0xf9511b38e1f2734cL, + 0x491fb14ec91629c2L,0xa3379d598e002052L,0x0000000000000170L } }, + /* 37 << 189 */ + { { 0xc505a9bdbc1a0a26L,0xb85a1e9478d9ed45L,0xdea7d140b7a562f3L, + 0x9930f6d4e2ae8f4fL,0x1159929213476304L,0x13e811ae421ff4f6L, + 0xd9e2eee649cc4771L,0x73c04b328b036469L,0x0000000000000180L }, + { 0x4664a7a522123d30L,0x053c5233f9cbf61eL,0x0c6d121442902bb0L, + 0x61a509855f6d2484L,0xb427545c1f3fb40dL,0x55e444a4dbf4c320L, + 0xa8311978c13da776L,0x4623001ddb288ee1L,0x000000000000001eL } }, + /* 38 << 189 */ + { { 0x1cb6ff10734c362eL,0x3ec3afdc195a00b1L,0xa1ea59124f0957c3L, + 0xd9fc3ff63a5f5203L,0x9949e8d68e8a558fL,0x252995ac0e4cf7d1L, + 0x517b1dfacc935cd7L,0x7d8bdba8f0db0cf8L,0x000000000000005aL }, + { 0xe8ce78b579f92ab1L,0xd9fde900ede42817L,0x5a6add0bb9b4bf72L, + 0x845e8b94a03706c2L,0x3bb16f047eaf2758L,0x29d823930580ca79L, + 0x5930385b7bf0462fL,0xd69171b381b47f3fL,0x000000000000007aL } }, + /* 39 << 189 */ + { { 0xb054987a59b57345L,0x690f4ea00df9adbcL,0x709a2218b262038dL, + 0xb5df8e93e44eaaedL,0x3e1580e4b2db7099L,0x1ebefedb2b3342e3L, + 0x6e0ef1246521bd90L,0x767b444de97f9531L,0x0000000000000112L }, + { 0x4fa11e48d4e8e902L,0x804eb6b4375336b0L,0x0d79d368f96e7400L, + 0x66fa057582576c82L,0x514fbfc45ec62c7fL,0x72eff87ed38bd85aL, + 0xfe9185f4d8d6cc9cL,0x47a733213fd2ed50L,0x0000000000000171L } }, + /* 40 << 189 */ + { { 0x1f8215f9efb91de7L,0xf3add5d08a2f74d9L,0x56433008a520d556L, + 0xb6df83c58628ac75L,0xf3c38ea27390eea0L,0xb1aadf0f6078a673L, + 0xa9e0832827b6b8fbL,0x22e64291f3b7ce53L,0x000000000000017fL }, + { 0xb3e7ae4c6e69500aL,0x740b9752c441e02cL,0x3d04d6dcf503d4a0L, + 0x22490a59862f3496L,0x0f4a62aab2472177L,0x668475b979720c85L, + 0xf6ebeb0bed9b4fd9L,0x8a5a6926914e8bf4L,0x00000000000000f2L } }, + /* 41 << 189 */ + { { 0x88714284d7f15d9bL,0x23bb89a963dae31bL,0x1dd869f770aad758L, + 0xade2a58dbc0c8b8fL,0x4e34cb29ecee1b5aL,0xffc36695a418cf27L, + 0xe6b995d08e90eb26L,0xdb6704209e7edb4cL,0x0000000000000030L }, + { 0x11a61ac697a71becL,0x01a7863ce7f9c778L,0xb33e274f0dc175d2L, + 0x5bc952b86bd31118L,0x3d3c7b5ecb4a461aL,0xb247e08560a81989L, + 0x98cead253cabd814L,0xcbda7ffe5f3774b0L,0x0000000000000015L } }, + /* 42 << 189 */ + { { 0xfb865e6491e6c598L,0x88b4cbc94f5c3464L,0xc14f04519bba56dcL, + 0x283e370e9828f0baL,0x178381311807abb9L,0xbd9227dc91eb4794L, + 0x998862db036ae356L,0x64214f5650a6534dL,0x00000000000001d6L }, + { 0x5c07fc067be97a7dL,0xb83d3ca51f918746L,0x1fc4ac0bf600c884L, + 0x00c1c0643ff9138cL,0x37107da9fe8ae63fL,0xce43f9671a021382L, + 0x533716b2dcafef65L,0xa50b01d91c1ba0a3L,0x0000000000000140L } }, + /* 43 << 189 */ + { { 0xc8c61097bfe6c874L,0xe4a957c48c7b1695L,0x8975c3d5cd617093L, + 0xb55cf8b8a2aeb3c3L,0x210a5fa0412b23dcL,0x3b306039222fd5e8L, + 0x58ebec8802e8b2b8L,0x85a935b9175925d9L,0x0000000000000088L }, + { 0x20262a2acd9e1515L,0x23dd0510bad24e8bL,0x3f904417b39e83bfL, + 0x4767a90419e5a1e2L,0x4cd0316ec8b10371L,0x33f91aa8c3985efdL, + 0x2d2eabb1bcaf62e3L,0x61c39f69a9070a71L,0x0000000000000197L } }, + /* 44 << 189 */ + { { 0x2f548b4254bd23edL,0x9bdccde5e22e32ecL,0x01b0646f4d769b35L, + 0x749212ca1ddb13a7L,0x01aeb2c2793156d2L,0x1dc2dd05afed290bL, + 0x2dc90777f5976b90L,0x69b807201c24d363L,0x000000000000000aL }, + { 0x927ebc909024b3d2L,0x04befa23e7fba843L,0x179ed851cc504025L, + 0xd66e92d06f1e878fL,0x27604533a48eacd1L,0x25f304c0be099838L, + 0x12f01ab1ff150469L,0xfc009f4cb502c2c3L,0x0000000000000067L } }, + /* 45 << 189 */ + { { 0xd18e7cdd15b76157L,0xb8b81063545ee371L,0x18860c3a5685e78aL, + 0xd2d62d66fa06d7f1L,0x7d518c8ebe31f479L,0x49634d5aca4d86edL, + 0x73510393286743faL,0xac5cb2646b005aeaL,0x000000000000006dL }, + { 0x3ef1fb8c995ac4f7L,0xb1b476da5041fdc8L,0x013bba673ae16bdeL, + 0x8edf24bd989ab5d7L,0x27cbab12f77269c8L,0xd0df40b13ea9fa4aL, + 0xa0ef8425ec2bb20dL,0xe69c5d12bbb95ee4L,0x00000000000001edL } }, + /* 46 << 189 */ + { { 0x38d4b079f0269288L,0x024ba79538c84188L,0x8dce75a4f8f34375L, + 0x12ed9b998811a78eL,0xc43f9833966242c8L,0x1ebf2a2893a4de55L, + 0xb77ba54b5df20ecbL,0x0b59c76549fb4c99L,0x0000000000000001L }, + { 0x23d73eaa305df024L,0x17641a8bd4804cb0L,0x3d7e68433251d480L, + 0x3a7ca4126fb3aef7L,0x511263cf18747d97L,0x670de6bd5cd3b9c4L, + 0x6cdd5a8ae71b247aL,0xd984e646528e3fbbL,0x0000000000000108L } }, + /* 47 << 189 */ + { { 0xe027d07b2f4be08cL,0xbcc10ac427c3e5ddL,0xf64b8a22e63fdda2L, + 0xf9df0d4046b68556L,0xc08d7d8e7e2240fcL,0xd854c44766a163bcL, + 0x5c52966b491bba4dL,0xb84134db9f066e7aL,0x0000000000000044L }, + { 0xb09bdb9264c613b1L,0x2ca2ccbdac59d414L,0xa97aa22e6436bcf3L, + 0xf22bc357227e1feaL,0xd8a7b25c7f842c67L,0xfb4cb6c88939f9f0L, + 0x28b488967f1d398bL,0x8de5e61a807c6f89L,0x000000000000000eL } }, + /* 48 << 189 */ + { { 0x4231da59e97ea880L,0x5d4c048745262614L,0x3718643736aeeaeeL, + 0x17229d1dbbaa66cbL,0x342daf59a5d41097L,0x5fc2cc015080f051L, + 0x657c05a20e6527beL,0x3e61c315949e36ecL,0x0000000000000008L }, + { 0x961bdc35d8a5319fL,0x5b8f4db874708cd1L,0x901d28564726604cL, + 0xe35ad211ccc10400L,0x5a9b120594004a0fL,0xdbf493bf5c8eaabdL, + 0x6ccc828bbaff6fd2L,0x83003f1077e9a126L,0x00000000000000b9L } }, + /* 49 << 189 */ + { { 0xd0396d365c605661L,0x736a0fba8c8a36ddL,0x120af17a26d18bcfL, + 0x2846334514e206a3L,0xd3690bd20f5657e7L,0x5f7879531bb3ddbbL, + 0xee94af0449d00002L,0x29f266ce19c87885L,0x000000000000005aL }, + { 0xe4c501899ceb0396L,0x065237a9181dd431L,0xf6b4de78102156d1L, + 0xfadd137e9466701bL,0x556194d58bad4752L,0x4d7cd2ed0b23e83eL, + 0x770c7e894b1329fcL,0x4af47a31fb173fdcL,0x0000000000000035L } }, + /* 50 << 189 */ + { { 0x111c4e8e15567b4dL,0xba2af2f3d1278cb8L,0x95e0ef97c8a0c184L, + 0x47e7053b2a46e1f4L,0x1e288ab774cb99fcL,0x2a5285bb17060961L, + 0x50bf49cf6921f98aL,0x4ce741821d7e1372L,0x00000000000001d1L }, + { 0xcf181e712a064bf7L,0xc534c01e49b3e010L,0x277fe91f939ef9aeL, + 0x0724737c2a40040eL,0x2a320ce8ffe77170L,0x391b6f9f8eff74edL, + 0x83b4195abe5b9daeL,0x937f0bcf4114237dL,0x0000000000000021L } }, + /* 51 << 189 */ + { { 0x41d6ef761ab2d533L,0x298cc67e04d17f47L,0x062870d3c680b409L, + 0xa74cd183e9685d65L,0xe2db43ef1eb8e1cdL,0xb0125ff3856c97e5L, + 0xe7d81e0b672a3ae4L,0xaf12df591600af74L,0x00000000000000d7L }, + { 0xc6a9d40b7bdc041aL,0x8e91290a49e7d705L,0x52beef55ae198296L, + 0x5c4b1922f23fb61aL,0x79a2a393957a052bL,0x1452ec147c8cc360L, + 0xf168e6f217c63a38L,0xbe17a226707be9dfL,0x000000000000012fL } }, + /* 52 << 189 */ + { { 0x717f198fb091e5e2L,0x13a018ba5a3c1a39L,0x2f8745d8131531cbL, + 0xfc9c44e53e11e93dL,0x7240364643a6f330L,0xebbbe261babaef99L, + 0xc4f1941da2e2749dL,0x6bb03f4bdec5c4fcL,0x00000000000000ceL }, + { 0x321b501994a5c6e6L,0xc6b51804a3f6b8ffL,0x269f2a832561b248L, + 0x3b97ea7807238ba9L,0x2fe33d8bf290911cL,0x5d148537c8cb8e95L, + 0xd1a1305196d62b4bL,0x44121143649ad208L,0x0000000000000049L } }, + /* 53 << 189 */ + { { 0x18c67cee142f459eL,0x374f0e63c240a5c7L,0xccb426eed0b3c8ceL, + 0xc83a69c7018a50d7L,0x9a2e51154c61e39aL,0xec5edbee6dc14408L, + 0x45725abe624cdb91L,0x4c0a11373d6d72aaL,0x0000000000000154L }, + { 0x7accd15cc3b86af3L,0x78de4ff07ef41f2cL,0x3601519c309d2841L, + 0xa8712f8c378ff8f6L,0xb6e9045a0b8f9a4eL,0x2ef0a92ac1501d2eL, + 0xfff126325c745eefL,0xb0712d67b91b1b4aL,0x0000000000000165L } }, + /* 54 << 189 */ + { { 0x7714e43a0f7b0604L,0x225769c178d565bcL,0xedbd4a162759fc30L, + 0x5094390eb8daef23L,0xcac7cdb538737603L,0x6ec71d48993c34c2L, + 0x9eb0c65edf4ffd86L,0x9a9076f819830bd7L,0x0000000000000159L }, + { 0x43d60e2fafe174ecL,0xbc38a3da61f4d263L,0xcca7c7f7dd11ad45L, + 0x8aaf2e3342e975cdL,0x9d8b2919134c2420L,0x5f8c989ebd92fa2bL, + 0xb4ee181c72b418feL,0xaa7368d199f80763L,0x00000000000001a8L } }, + /* 55 << 189 */ + { { 0x7baebdad95697470L,0xf6fe566efb547be8L,0xb087ed3f2a8fb882L, + 0x1c4d32db206e9a27L,0x3b047777f384b8edL,0xf2a55b91a1dd6624L, + 0x949d913812f34dafL,0xdb09a6fe227067acL,0x00000000000000d4L }, + { 0xfeb27fb80160f0f1L,0xbc082e7c89550518L,0xea404d2ff506e716L, + 0xac5ac13f410aaeffL,0xa6463d178eafe900L,0x41febe7847b7f1b8L, + 0x7f6304ec189426d4L,0xf481347ff7b60b29L,0x0000000000000178L } }, + /* 56 << 189 */ + { { 0x3d91ea604c2a4cefL,0x8de8f69c3de838adL,0x9a725723538c37d7L, + 0x3923fa077948b4ecL,0xd1a25d23d23e67f0L,0xa2d7b9d7396fbc50L, + 0x8f8504e41d36f98cL,0xaa9e585159191798L,0x0000000000000147L }, + { 0x8edbf0c2e3db7296L,0x677a299681fc8f30L,0x05e27b8db727e6aaL, + 0x4f5a9a6903113b4eL,0x9d4724e83ed42117L,0x06c0b8a572fcb859L, + 0xecdcf4d8ad040b87L,0xe6bb9a0e8cda9eb0L,0x00000000000001d4L } }, + /* 57 << 189 */ + { { 0xcf9d92912f2e8e0eL,0xa061e2d775701d6aL,0xf4a75abcb896c99dL, + 0x7b502799c595a675L,0xd703789763e618e2L,0xf076e4da3fbfb9e6L, + 0x3b7196719736bb0bL,0x0e2f898f4f343030L,0x0000000000000056L }, + { 0xa67c36c34ce58368L,0x42fc1d6643ec3b5dL,0x2bc8c76731b19f69L, + 0x66bb8a65b40529acL,0x2ddbd08a1bd5d8bbL,0x3358549130479ed3L, + 0x4fe81de647ea8842L,0x9d43206628fb9086L,0x000000000000000cL } }, + /* 58 << 189 */ + { { 0x0ab82b7cfd9f395bL,0x6a40f5c4b4edd9d2L,0xdff6705215b2434cL, + 0x52a16a6a7627d662L,0x42d0a2e3360868d1L,0x7bd61bc13135ddb7L, + 0x756baf27fd0b2628L,0xc66119f396b6d910L,0x0000000000000054L }, + { 0x1b35db2327a27e71L,0x3de3025d74802d8aL,0x1d1924ad6911cfedL, + 0x56b50c4fc3626e7fL,0x5ae02f9e48297981L,0x92c82e544102fd01L, + 0x0824a2f45de66a6fL,0x0e3e069689d53be8L,0x00000000000000eaL } }, + /* 59 << 189 */ + { { 0x31330a7b71c94713L,0x4311eeb630cbcf92L,0x6cbdc637c826f128L, + 0x07911288df0bb278L,0x13c1197a55ca042aL,0x7058c95b521d6793L, + 0xb4f7251008580366L,0xae2725c7fa6780eaL,0x00000000000001cdL }, + { 0x132cefa9e24b6b80L,0xb87b576fa20cfc85L,0x596c4323e05b359dL, + 0x2ad43ee0a8c677f6L,0xb4bdfd4283cb25e5L,0x0cb93226376be03eL, + 0x5617ae0aeb0a9712L,0xf2a2e92048e8e273L,0x0000000000000040L } }, + /* 60 << 189 */ + { { 0x20be8f1deec0daffL,0x06e4f56b826d13aeL,0xa4c7e5313d4e15daL, + 0x27dce66c40aa5d0bL,0x88778febdc9227b4L,0x64304c472cecbc95L, + 0xd9d8f0d3d393cef4L,0x3383e2dc872b3f66L,0x00000000000001dfL }, + { 0xe805eb457cf69310L,0x961caa7041f9dbf8L,0x3053e3c57e2d4b33L, + 0x4b877980a63a655eL,0xc41dc07cf1836df4L,0x6d1c3b9b2634f265L, + 0x368c4bd1062e70e4L,0x35c6e6c42b00fe78L,0x0000000000000106L } }, + /* 61 << 189 */ + { { 0x664ea3fa8433f73eL,0x20696c8e1e96d416L,0x8ac356ece1e2cdbfL, + 0x104c4bd5e64270ecL,0x8f25722b0c79a50eL,0x43523e2977d6a915L, + 0x04bc5be29f26d4d4L,0xfb4a05a84542270dL,0x00000000000000fcL }, + { 0x908b94c28618efebL,0x99dceba2b5266591L,0xf3834ace94cf34b4L, + 0x69e6d2cf5a56d236L,0xf4030a4f05de63ddL,0x05ffad5422571079L, + 0xd5fff5da4cdc3814L,0xf0c143e2079d4e91L,0x00000000000000a5L } }, + /* 62 << 189 */ + { { 0x57f9bed894e9705aL,0xb2152f6c13e76e24L,0x35c88fe5d3293955L, + 0x2f9eae4d5ff3839fL,0x9c81a6c705d09a9cL,0xfa25a810240d9691L, + 0xb72c661177297890L,0x6181b2bd10c73788L,0x0000000000000101L }, + { 0x4339f177dad59376L,0x03decffdc272fd1eL,0x0e311e8c5e1915cbL, + 0x89e003b809e2d503L,0x4356b8d46f2fb5edL,0x459d3fe5f482888bL, + 0x8851c7086fafe565L,0xf29b97c5da78e2b5L,0x00000000000000e3L } }, + /* 63 << 189 */ + { { 0xdee86d864b8e33f0L,0x2db9e320e3d4a055L,0xefdc83bc93eda388L, + 0x8960e57a47eda00bL,0xb96aaddffff77657L,0x44b1deef8dc0dd1bL, + 0x5a154403cb477801L,0xb14a66c5a8f3ae22L,0x0000000000000103L }, + { 0x3acee5a7c5a696e7L,0xa833e7959e1e41b4L,0x1956f091705706c0L, + 0x0f37ea79ba39f516L,0xecc5e91b4a48092bL,0xbf42b417f640b7abL, + 0x2f32ddc0addbe690L,0xa136296a9903bfd6L,0x0000000000000153L } }, + /* 64 << 189 */ + { { 0x6287bbca1236bec1L,0xc16eabd8608e6b5cL,0x253193ef15fcd3c6L, + 0xd4ee71c7da5fe623L,0x62531449ac806cb8L,0x9152cf1690d7e00dL, + 0xa1e05703d20a1668L,0x6480d486bbf588daL,0x000000000000014fL }, + { 0x37f70ab3fe2753e3L,0x562f4a46023ebdf8L,0x97358d7d89caf63aL, + 0x387422a7ab4b1dfbL,0x80d654962c8c3421L,0x7d338b4bdb8e13cdL, + 0xa535529dac590192L,0x39a28761a6760430L,0x00000000000000c7L } }, + /* 0 << 196 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 196 */ + { { 0x1e226a938f1ef579L,0x8aeebec9138fedd2L,0xa7e39d7d1ca4c4e2L, + 0x5e53f86d49724f86L,0x9815987deb759db4L,0x538c9f700e9cd47aL, + 0xfffc98c71a7fac87L,0x3195776e7ce980e1L,0x000000000000019fL }, + { 0xccab7b37e33cdcbdL,0x561d18f2855619d0L,0x56aa5963818f21d0L, + 0x1413193b571c3c5eL,0x9b30214cfd748c72L,0xef0bd8d48eadf936L, + 0xeb6d831fd1964b92L,0x4b37d5e09bc42bebL,0x00000000000001efL } }, + /* 2 << 196 */ + { { 0xb854567b979551eeL,0xf97b8cab3f3f499dL,0x3e343aca603fe8e0L, + 0x2c4167f8813abd6bL,0x6b23eed8a74bfd77L,0xf88b1cecd47dd491L, + 0x9e613c3226b2fd87L,0xc1b1ac19fe54bd69L,0x00000000000000afL }, + { 0x12bcafbb43b4df17L,0xbd0829247266cd9bL,0x9d1e0eb35a197805L, + 0xf44ac91276cae6cbL,0x7d4e90e630d68179L,0x5407a9a02ad9609fL, + 0x02cbce1c921ecca2L,0x058ab973a246d665L,0x00000000000000c9L } }, + /* 3 << 196 */ + { { 0xf31853c99a73eba3L,0x3fd889550e6e4613L,0x0f685bb932a3d4acL, + 0xaeab0cd88cca35faL,0x8f65c0f7ef18d468L,0xbd45dd2509f0f948L, + 0xfc2ea6e46ba83339L,0xf6f79e60ae6934ceL,0x0000000000000158L }, + { 0x3d279791e60d9028L,0xc2ca4234f0c3cf4fL,0xf21ec8f085ac780aL, + 0xc2d9b19ee6fdb32bL,0x9e2811fe0827c781L,0x42e1538749e61bdbL, + 0x936b8e23414d6887L,0x7fe54043d7edc198L,0x000000000000008aL } }, + /* 4 << 196 */ + { { 0x61b51bb04bee80d7L,0x0e1f6a1f7692de69L,0x8379e46ca0ebc3bdL, + 0x1c0bffa7930644f0L,0x97c67b87390db077L,0x095c33e1fada1ce9L, + 0x3c500addac54b512L,0xc231d360d3118656L,0x00000000000000b0L }, + { 0x0628929839bcab2fL,0xc0c0678064dd220aL,0x062f6084763dc2a0L, + 0x88e9da731938c3e3L,0x69be8f2d52e46eb9L,0xe55c8d2d6a5de0fdL, + 0xf3a3fd63db2c0e26L,0x899c6d9f1e4bff57L,0x000000000000014aL } }, + /* 5 << 196 */ + { { 0xef850c0a886b7b20L,0x8f635025bb9943c6L,0xb81d4c7d73ea21aeL, + 0xcb8f069f99d3a6eeL,0x6cf775a6fa33eeb0L,0x4e222f778f96805dL, + 0x143517aaa6141ea6L,0xe38a3c64167210c0L,0x0000000000000116L }, + { 0x0fb62f621fc78dbbL,0x2c6410236cab326bL,0xa72ea35af1549e38L, + 0x537f6ea3830f25ceL,0x9e079cae33eefb84L,0x38c173bab87ddbf0L, + 0x1b4373897184af77L,0xf978084cb1868bccL,0x00000000000000aeL } }, + /* 6 << 196 */ + { { 0x37456b07d454e1cdL,0xca872026e651ca99L,0x66d075726a03d9a8L, + 0xe6e9ba231d31cbaeL,0x2b720e01a80f7b13L,0xb6d7f827615f6ca5L, + 0xf88f8cd69012b0afL,0x5024d5de7a8e112bL,0x000000000000019fL }, + { 0x24ea6d08a8749354L,0x1143855e290612b8L,0x64cfb996e2edb77eL, + 0xaddfbf4f77a5f775L,0xdd57f915dd64fd57L,0x89ce3192055a07d0L, + 0x9323e3e13cd0549cL,0x7cc109cdd89fc00cL,0x00000000000000dfL } }, + /* 7 << 196 */ + { { 0x09dac9edf4222533L,0x241333a9fd65cf38L,0xaa15ecbaaf69f2f4L, + 0x680cc1a0e6afa6e0L,0x581099c95f6f8c82L,0x32f948104dcae40bL, + 0x0fad007d4ff8175cL,0x5dfd36385450471eL,0x00000000000000bbL }, + { 0x57a1bd6b9bedad5eL,0xb187ede777139166L,0x66ab56bbd66ab437L, + 0x8d67305e0875cc7bL,0xd1221e1d2130fbabL,0xed4482700a435b57L, + 0xea5da47bf4089497L,0x280b2cd4e8bfb1f6L,0x0000000000000115L } }, + /* 8 << 196 */ + { { 0x8f33504dedb74cb1L,0x1edd5d969d2a2437L,0xb4a43aadbeced5e0L, + 0x94faee53a6679dceL,0xda5ca04ff6d1589dL,0x4e55494935d8be40L, + 0x3df8d236d5311f54L,0x46f9c2d24988fe68L,0x00000000000001f7L }, + { 0x4d0d9475fb2327cdL,0x552291cef0b8faf0L,0x3e3f4a98661e3c78L, + 0x2c452f05bda95549L,0x53276d708bb411bdL,0xfde0440e539322f3L, + 0x02d8d55b20961bf1L,0x3c1e105518739dccL,0x000000000000017cL } }, + /* 9 << 196 */ + { { 0xc03f25c3c4aa5646L,0x86c63161f5058946L,0x524d9f422c02da6bL, + 0xffd709d2d08cfc8fL,0x1e8214f8bd67d9acL,0xfdb0716ea4953705L, + 0xcee198f0637d647cL,0x0eda33411fc50c8fL,0x000000000000014cL }, + { 0x3771081ba26d80ecL,0xf495ac4f7ee1f2e6L,0x458ecda5cc9f8790L, + 0xa98f84aa11d6c1d8L,0xe9d067997b694bd4L,0x510a0f753eb0c3f8L, + 0x9747f31b40f2693dL,0xa08798275c3a8fdbL,0x000000000000014dL } }, + /* 10 << 196 */ + { { 0x2588c869678be8ecL,0x643c5bcd70a17049L,0xfa8ac412627e6964L, + 0xc9e2c5e1d12b30bbL,0x6189df6fd7f8b20cL,0x7fd5df1593b30b2bL, + 0x30176abaaa08dc30L,0x12c058c36722406aL,0x0000000000000166L }, + { 0xf11e3160e2a9cc34L,0x839d94196b6ddfb8L,0xae2e884680035a3eL, + 0x8bc4c61bd1f3c45bL,0x128980af18fd9ad6L,0x4210b25ee59cc085L, + 0xab245ee01ef3a74bL,0x256fac53af76d59eL,0x00000000000000d9L } }, + /* 11 << 196 */ + { { 0x31509afca5f6cedaL,0x147406dd5010591cL,0xbc35c5699341d9ceL, + 0x7ce9ace9ba35f50aL,0x19928ac5ea9234bbL,0xe036d3987fe039f4L, + 0x920ffca4004965e5L,0xd18e5104051a7e12L,0x0000000000000040L }, + { 0xf5630cf3a22366a1L,0x52e3ff045ca6ab82L,0x98b95576053a65e4L, + 0x8e7f4a1b08b986a1L,0x7e4f3c04e03a5dbcL,0xca92d084a3cc2e38L, + 0x92ebfe5fefcd06f6L,0x88f060b35a709eabL,0x00000000000000d6L } }, + /* 12 << 196 */ + { { 0xab2a1492a5cba744L,0x6a6a4317986c613bL,0x9c8d1d01e872357eL, + 0xe7c3d762a1a8701eL,0xca296c88937b667fL,0xe5ddb632b105ffabL, + 0x602efda3db9d7cfeL,0xb72b8ee5075fa1abL,0x00000000000000f7L }, + { 0x420f01bf09297d3fL,0xe6cdd604ce02895bL,0x475de3990ec6827fL, + 0x2a44d697d96cd553L,0x889200f98c9d9769L,0x4fa1f222a7289c6bL, + 0x5a45ff976d4d1b4cL,0xdc9910a51e63aa2dL,0x0000000000000098L } }, + /* 13 << 196 */ + { { 0xb6a85b1106456038L,0xa422307cd48af4a7L,0x25c9835ce781207fL, + 0x61e1a5ac67c18dcbL,0xe294353581f948f2L,0x427f9ac30f7fb178L, + 0x2dc53c002e2bb161L,0xda0d1835b838b9f7L,0x0000000000000122L }, + { 0x2e4d3e46df6800cfL,0x66910d0655d86505L,0xae5a0dd7f0d1ed6eL, + 0x0f3f3d6a84643a02L,0xe9d03f79eecf2e38L,0x8a7c07d8ef3e2228L, + 0xba7786af35a3f709L,0xff8997ead43da7c4L,0x0000000000000044L } }, + /* 14 << 196 */ + { { 0x78fbe6ecff0fdb4dL,0x4625db146786490eL,0xd296291fdba395e3L, + 0xa6a1df669878e3c8L,0x0aa57c9655fe0afaL,0x9294f74589235066L, + 0x6fb4692d1d24d786L,0x7fdaab78e13f2a4eL,0x00000000000000ecL }, + { 0xced57ecbeec9bdbdL,0x5b935678813a87cbL,0xefed2811b9c45d4fL, + 0x17b12bf552c52dd1L,0xc8f5c7e0211123faL,0x387fa7dd245804d9L, + 0x2a42349809bc76eeL,0x5b95c590457daa36L,0x0000000000000089L } }, + /* 15 << 196 */ + { { 0xd2006fad8a90d8c0L,0xae1a509ab872540bL,0x774048a4ea281ecbL, + 0xa53efead7a84cfc0L,0x5d970194332cb313L,0x4b2736352866ed7eL, + 0xb646a148a3d68d32L,0xfaca3712cf8b5d7aL,0x000000000000011fL }, + { 0xc726fbc2c789acf1L,0xabf5095893f6d7f4L,0x5c9b593f8791335aL, + 0x83523e7de5e84484L,0x5dccab399f06edacL,0xcb72c6d6bc2cd75eL, + 0x5038af212638469fL,0xfa7eba1954b0dba0L,0x0000000000000165L } }, + /* 16 << 196 */ + { { 0xfd0261287468e327L,0x789e8dc4ee755906L,0xea0453e480218c94L, + 0xe263fe27088d7c56L,0x9219cee630277200L,0x293401d28ce5ad85L, + 0x627036d3b960ebbaL,0x8be032fef92da0eaL,0x0000000000000030L }, + { 0x7ab52e739daf8d6dL,0xd6d75d4c3f789f6cL,0xc3b7560ebfc9a31eL, + 0x87b7f183d7db7ad5L,0x0f60a2db9c213425L,0xc2e67c9452c58981L, + 0x001cf3e81b6d0b43L,0x31efa39b78235b3fL,0x00000000000000ceL } }, + /* 17 << 196 */ + { { 0x82dcc474b926527eL,0xb05d073fc3255b46L,0x3a5d46c0a94534d5L, + 0x90e7d00664949c9cL,0xe4de4002925753d1L,0x68653da277145588L, + 0x8161ee0fb6d85753L,0xa76d05fcda37d8b8L,0x0000000000000030L }, + { 0xf0baeebad0cbedc4L,0x6aee7715d6cb0047L,0x9d0952da03cdda9dL, + 0xd90557e55fc2f41bL,0xe30c3f5a68e8a4deL,0x2393e17cc79fca8bL, + 0xb11077041c6c604eL,0x6bc9e1e163858b47L,0x00000000000001ddL } }, + /* 18 << 196 */ + { { 0xa27480914603ef03L,0x3f533f5d3c01915dL,0xfbafdfb8da43a354L, + 0x730d876e3f58afaeL,0x72a24af75e4a20cbL,0xcfb685270e8ac725L, + 0x98dfcd5a0ac3e4cfL,0x24183e26023324feL,0x0000000000000033L }, + { 0x47f328728f9a1a5bL,0x24813af407534a66L,0x928fbfd422744d19L, + 0x4f40a8ad4dc1aabfL,0xe39d2cf81946d1feL,0xf5ff40ea1ad5eb5fL, + 0x84e96cb1cc4f45c9L,0xb35d04bd39761faaL,0x0000000000000045L } }, + /* 19 << 196 */ + { { 0x0bd7313310603fe6L,0xb1b275b4ef3295dcL,0x1c6cf47747c52883L, + 0x950000c5904cba77L,0x36086feec437482dL,0xd59c1b42b7e63ab4L, + 0x66106e22b1b9479aL,0xd0f2f68620df7528L,0x0000000000000180L }, + { 0xacb366106e9a8384L,0x7db709202b9b5667L,0x4a1fb3fe1f19a121L, + 0xe5e0295f9e62333fL,0x827ae3f70cc830aeL,0x4546f4aa82d17628L, + 0xbc357b50afdcfe1fL,0xd5287e6e8ed152d5L,0x00000000000000afL } }, + /* 20 << 196 */ + { { 0x829ed3209592cb28L,0x86b85a02f9bfea7cL,0x895d6cf2a22a9de0L, + 0x9cabe2bcbfd6d129L,0xb1c051eb4a792e52L,0x6684d6d90fbf1905L, + 0x77f1483ccb09e7cfL,0x8e04529b9e4cc9deL,0x0000000000000103L }, + { 0xb4e2ff1ee3817349L,0x5486debd159eed89L,0xd58f04d8f1b7cfbcL, + 0x825120be1fd7a7f8L,0x3db1a0c20fb0d63dL,0xda92a7911ca49429L, + 0x21772918dadc1761L,0xff883825e3d1c3daL,0x0000000000000151L } }, + /* 21 << 196 */ + { { 0x6e8d3a7747c54a44L,0x2fbbee944c70d069L,0x3b0b5d7dcfce5abdL, + 0xa1588de81dd88c6fL,0x8bea3b2f3970b25aL,0x9b57781fc0211a66L, + 0x41559a91a0f70d7eL,0xac02eea95d7a7d8aL,0x00000000000000feL }, + { 0x7fdaae745ec7c1d1L,0x21301b2354323760L,0xa5575e5b951e014cL, + 0xf6ddf2aea1689a42L,0x9f9b01adda3ec528L,0x21dfb4c20341571eL, + 0x9f2734abcda1a75dL,0x794ad1e16d2418bfL,0x00000000000001c4L } }, + /* 22 << 196 */ + { { 0x2b0a6d8949380b2bL,0xb6be08f3d53dda84L,0xf12421a02bc75f54L, + 0xc6aa94691172bc8bL,0x9ae6f245bc269c6fL,0xea55b4def0069449L, + 0x2efdfe343235c756L,0x48d89a45a7f9471aL,0x0000000000000006L }, + { 0x6aac03fd019f94c8L,0x6cc17e953487dcb4L,0xd3be9f21dac7ae3cL, + 0x85c706c50cfe0d60L,0x15cdd8e82e74e362L,0x043cda214f1829b3L, + 0xaff541f04b50619fL,0x27f7440b29562dcbL,0x00000000000000aaL } }, + /* 23 << 196 */ + { { 0xcee0dd4d2d4aff98L,0x8cd8c37c025d56dbL,0x2855e53f95b39565L, + 0xfd72a2f7a7cba4d4L,0xc36a463b8758cef7L,0xaacda7d98488a435L, + 0xb77cbe7dbdca5af5L,0x019c571e46e70a6cL,0x000000000000008eL }, + { 0x37e15e7801d4a3a0L,0x5ff7813ccd35c388L,0xcfea3bf2ac8378caL, + 0xf8b1ecb62e32328bL,0x2930b5ffe4c83cf2L,0x489f0f1ad6f765cfL, + 0xc8be93137d7c2d1bL,0x3ee2ba463c16e2c5L,0x0000000000000084L } }, + /* 24 << 196 */ + { { 0xbbcf2dcf3deb0321L,0x999e365320b9a035L,0x80ef540546c0470eL, + 0xe80ed2ab9934bc72L,0x512c5cb9f684a11dL,0x3d5b66118185f3a7L, + 0xdf159ef4885ea75dL,0xa3ea9b394ed4cf9fL,0x000000000000013eL }, + { 0x7b66126b8c20b4d1L,0xc4ed6bc1544ac0e0L,0x279f02aab2923004L, + 0x0b40e88bf0747e04L,0x05c9e6d27312dc2cL,0x6294972e1b7860ceL, + 0x14a57cd533881463L,0x8b82ededa1248b49L,0x0000000000000185L } }, + /* 25 << 196 */ + { { 0xd7d62aa27fb8b741L,0xcc49ebaa22c2ceb0L,0xb7bfb7a6d783b548L, + 0x38bc7436ce389959L,0x8b0f8386da43e59fL,0xdfc845523c0eb486L, + 0x45ec3ccb01814a76L,0xe962b5073a441d4eL,0x000000000000017dL }, + { 0x2b9bf5d3e7daa36aL,0xf3db2ff2de01078dL,0x8329d82a2f10cb32L, + 0x8a933f560a0240efL,0xb05ac0fcf0ab8458L,0xdc32a5492dd7cd0aL, + 0xa53ce7ea66018a68L,0x8d453dece96184f0L,0x0000000000000017L } }, + /* 26 << 196 */ + { { 0xfd28fd2f06a1b72fL,0x54d721eb28ab431eL,0x131944e6449a31ceL, + 0xcee896aaf743b99cL,0x5a22f9b61f309b87L,0x52595371d6f002c9L, + 0xec1119fd2f9c5506L,0x685f82c7228d2e82L,0x00000000000000d2L }, + { 0xee614493cbf1b3bbL,0x44d7be7af3861222L,0x4b01c72974a0b60fL, + 0x8272691f411bef96L,0x276d9a53b6a0abc9L,0xec66bc5e0495166aL, + 0x0c3802fb8127af16L,0xe1d8c99ccfd1e4a4L,0x0000000000000021L } }, + /* 27 << 196 */ + { { 0x4fe4d6cc0345e12eL,0xfc55a162bd0ba258L,0xf5945aba4fa7fa6fL, + 0xef5b2f368a815568L,0x345ed8ff6f8d1f91L,0x57a9ecae13e62497L, + 0x121b4b4054ea607cL,0x2e33b7eddc577373L,0x000000000000015bL }, + { 0x69b188f030164779L,0x5a5c8bf1b5595d69L,0xa5ddf2d33c99af05L, + 0x5421f999e5f631a2L,0x75741d7fc9767800L,0x9f3f1a7c1e91e805L, + 0xed1f12ac428cde8aL,0x0cbc29b61258e75fL,0x0000000000000179L } }, + /* 28 << 196 */ + { { 0xdb0b25781169daafL,0x6136642d2a9d17caL,0xfa6bc4e8f72df335L, + 0x1778a4c225b9ed79L,0xb633fd4f4310dbcdL,0x4b13b4faafd58778L, + 0x4522514b00e07b54L,0x559f9ab9e477fe71L,0x000000000000011eL }, + { 0x5c8d43b07ddf0054L,0xa53226de46f8cac6L,0x065c1269f2f221b0L, + 0x9cafcd4a9956f22cL,0x982ac10bc519cd6bL,0x15318af6799774cdL, + 0xe71307981ac986c1L,0x73d702b5045f1c18L,0x00000000000001e3L } }, + /* 29 << 196 */ + { { 0x8eadf42faf4686cdL,0x1d08e2137ab97161L,0x0f210dfa3f500b83L, + 0xf1b2e0861ce61acbL,0x7eaede2891fd68dcL,0x895d7935745fb13dL, + 0xe01b00823880eb43L,0x6f6f1e0287e35884L,0x00000000000000a3L }, + { 0x54e32082e6dbaf6cL,0xe7a3aaddfb20f209L,0x9a0378062a9ec5d4L, + 0x03e8f27aac9a81a8L,0x2a98abdec88a9bc4L,0x71ef89d99dc10283L, + 0x4f0489fad411c02dL,0xff054fe1b63afa4dL,0x000000000000002aL } }, + /* 30 << 196 */ + { { 0x5c9de5a4256a761eL,0x683f423188f0d612L,0x2a78b4b8280c04f5L, + 0x57b2bf75e3f6518bL,0x477eea36ca8677f5L,0x71035e47b0bb0034L, + 0xc12117e2aff585d4L,0xfc2faeca0c076757L,0x00000000000001cbL }, + { 0xeaf9bdfa3be3d035L,0x8d68f8ccf1ff65dcL,0xf293f7cad465c674L, + 0x89ba6110617b8082L,0xe20a69b05a81151cL,0x6846f5bf5cdee0ceL, + 0xc152ffdd86d5304aL,0x0b309e4ff1a6e86dL,0x00000000000001c5L } }, + /* 31 << 196 */ + { { 0xd0483c82a8530876L,0x63098f1b0e175f10L,0x3f2b2af973cc5d1bL, + 0x1e1a0147e2868b4bL,0x2c156d2e0a5b21d5L,0x58954ed044ca50d6L, + 0x66c0fee44271ccf5L,0x4671b5ead83255c9L,0x0000000000000072L }, + { 0xc96b825264cc2271L,0x79bf4ed687d0b7d3L,0x90aa275e308f708aL, + 0x556639c530812a17L,0x926fb02487645ec9L,0xec979253102ed54aL, + 0x5317b9e51478db12L,0xbd141acf0110f601L,0x0000000000000171L } }, + /* 32 << 196 */ + { { 0x04e8dc9fbcb97cccL,0x4652847e88086fbaL,0xe5ec071965411266L, + 0xaf72b4a169e1c843L,0x48e75c318161e951L,0xab6457928a39806bL, + 0x3fa10b38a2a97512L,0xba9e2ee7fc3a693aL,0x00000000000000ffL }, + { 0x3b96e7c33b70d15fL,0x87c3f3dd4ae9e52cL,0xa256e7e864ecc9f1L, + 0x541d8b6d07c90736L,0x2367141f6c2b4160L,0xe32aa363cab770f3L, + 0x51e2c83261282e7eL,0x194b06a2f3fa7817L,0x00000000000001c1L } }, + /* 33 << 196 */ + { { 0xe8a62c0de1d32dbfL,0xf1f616421cdaf3eaL,0x70881e0fbf70d93dL, + 0xf031d4ea8bca2111L,0xa19a22ea7037de31L,0x37018144040b9b64L, + 0x6890d852d77984a3L,0xe460fa7552eda9feL,0x0000000000000105L }, + { 0x6446173460a056fcL,0x708f17630b561251L,0x8e32715ffab9f6f8L, + 0x562046fb8987e3bcL,0x6fe352a8e555a225L,0x9a1db7632433a38fL, + 0x81887733cbb91ebfL,0xeff8a2c627793c77L,0x00000000000000e9L } }, + /* 34 << 196 */ + { { 0x3ca6f47973e4c097L,0x6ff9fa2ae38e7294L,0x1289d5bc9ea599abL, + 0xf2a37ae7a8928130L,0x50f2f989f1a6c895L,0x8757f302d7d6cbbeL, + 0x4196104d4e40f53fL,0xebbe471fbd519cf2L,0x0000000000000107L }, + { 0x2ffa710db24b0064L,0x02cddcf4aee5996eL,0xe16b2968e5d23a32L, + 0xe5c9be9072f414d2L,0x8a5c1db065de7f6fL,0x2130bd6b0ccef750L, + 0xdd3df13f4f42f281L,0xc13fe304695f4b82L,0x0000000000000084L } }, + /* 35 << 196 */ + { { 0x93e7f4ec605cbaa3L,0xae4407b383e3f52bL,0xa2097b0241a92aa1L, + 0x9e7466e3deb06bacL,0xf8f92193bd45255dL,0x8b5f880087cc518dL, + 0x3f49ebef9ff8b60fL,0xde7fa84a2963894eL,0x00000000000001bfL }, + { 0xb50f8ad28a71215eL,0x458e00b689d5ea0eL,0xd7713a988a12ca3eL, + 0x3a87bb016251a8ffL,0x9628caabf1b37745L,0x33b4c2ffd0d6c193L, + 0xc3ce2cd7168ba8e8L,0xcf2c2dd14078fec2L,0x0000000000000147L } }, + /* 36 << 196 */ + { { 0x8bf7c9c33cf23534L,0x5653e5d9a1868c02L,0xbfec2eeac365617bL, + 0x2b6f61f1ecc8298dL,0x40396d9f1a169a31L,0x42eb3d8659bd4b25L, + 0xa1b8cc7d241065d7L,0xa6e89c216fc1ace9L,0x00000000000000e7L }, + { 0x10d6248e72bd3d50L,0x1a6446b3c4671f34L,0x6734a9f04bfde2edL, + 0x84afb994938bdf38L,0xa122be493fee450aL,0x9b83520e500ee214L, + 0x11982bd9dd8ad22dL,0x3bd3f748c9839b36L,0x00000000000001baL } }, + /* 37 << 196 */ + { { 0x621af6617e7b439cL,0xd6e8e2b815afe413L,0x97c6b17c7b245ddcL, + 0x289e61717feb0b73L,0x98505d821c5e51f0L,0x33cb22cd631d3314L, + 0x723a81a5e1e6b48cL,0x8e32d81164218c3eL,0x000000000000014dL }, + { 0xf98fef26fdc0e56dL,0x0e9cba0ad776149fL,0x896f5700cc8b41d0L, + 0xc2361bf783699489L,0xad6795f43e5d1c59L,0x717681b64d2c7cfaL, + 0x5fe3591eeb9a970eL,0x47c95b0d8350b6e6L,0x0000000000000184L } }, + /* 38 << 196 */ + { { 0xbf0616bd5ddc2698L,0x765ac3d4be906553L,0x1f75a167f5494395L, + 0xf81d9ad94284513fL,0x9d6e5bdc7d63f117L,0x64311ef80dd9a72fL, + 0x1e03cb88a093fd4cL,0xc044f4baf45dac7aL,0x000000000000004dL }, + { 0x01c2507bcd22efaaL,0x62a24322e919f292L,0xb1b116e684c75e56L, + 0x8ec19402f1471232L,0x4b206e3ba7712367L,0x91ef56c2038e4419L, + 0x9facfa3e5ab207beL,0x4fdd07bc11203cd6L,0x0000000000000106L } }, + /* 39 << 196 */ + { { 0x4d0117ffbfec68f5L,0x4cd18a0033d8d3c4L,0x3d63af1e96480162L, + 0x8b2bf467fa9ff1b7L,0x74b4ad4943c77644L,0x35725c2f089a359eL, + 0x639a66f4df226886L,0x50b29f65fedcac88L,0x0000000000000018L }, + { 0x0afc4f29bd92ab9fL,0xbad68d8a47163560L,0x1ffa046d37e10fbcL, + 0x2b49d323ec249cbeL,0xac325052aec86c45L,0x3a7519254b6a83a5L, + 0x22ede30b2f277217L,0x8f88e2e854d41982L,0x00000000000001dcL } }, + /* 40 << 196 */ + { { 0x7278584f6efe90ecL,0xffe6da80c2971903L,0xd05ae9ff0e5b498cL, + 0xef414b291a1027b3L,0x2f651f532ca82e24L,0x2f0da56297e1b677L, + 0x7e984b0792315558L,0x30f1b1f81c3e6b0eL,0x0000000000000183L }, + { 0x273edf2427a65a6cL,0x6ff4b6fa7b16c778L,0xfedb4e3731fb5dfeL, + 0x181e43b8cf9d4e15L,0xd60dc117191c9415L,0x9daa6f3583e150ccL, + 0xeff60ac45cf670adL,0xa75fc519da594d74L,0x000000000000005dL } }, + /* 41 << 196 */ + { { 0xe20b5aec250f9de7L,0x1aae76fb3fab5e0aL,0x0becc02b33318cb6L, + 0x3238d3a763f85fb8L,0x32b6b02f81279952L,0xfbd0321d4463d8dcL, + 0xbcf5583706ba9c4fL,0x215737ab3e66a0d2L,0x0000000000000047L }, + { 0x0630af2f21485b77L,0x98d384749422760fL,0xd5ad435706253457L, + 0x3d72489a60164b41L,0x1eceddf2318658e2L,0xc01ea9d766ad11e8L, + 0x85381580468ad1daL,0x6fbf66bb215835b0L,0x0000000000000000L } }, + /* 42 << 196 */ + { { 0x89733251bc6e092dL,0xceb697d37f36c9faL,0xcdd80fe333cded69L, + 0x4e1dacdcad9308baL,0xa2ffd125033c2941L,0xe8d009e6974bb3ccL, + 0xd72c5fa93368b9c9L,0x8d7ed4ff5a1331c7L,0x0000000000000189L }, + { 0x4813b528e8edc388L,0xdc17a243e0c0af22L,0xf1bd51fa74495d32L, + 0xec53b061b286aa96L,0xa6abdf9619068cc1L,0x820646d9e4207cdcL, + 0xf74a8d2d1bcfa5c3L,0x8633342d5f097f9bL,0x0000000000000078L } }, + /* 43 << 196 */ + { { 0x0c95ca695ac18348L,0xf75ca2fa5b052634L,0x28d4424b71bf0289L, + 0x0d40063f31653756L,0xc814832deb0bcc02L,0x0158f9995e70861fL, + 0x05d5a762936ad90aL,0x434a7f74a144b3f8L,0x00000000000000dfL }, + { 0x6ffd78fee3f669f4L,0xe16f72d982c47401L,0xe6e5963f36c8f97aL, + 0x05c42226cf7de123L,0xd4101edcb94ee7c6L,0x6b3e2b62daa047a6L, + 0x4d55db3b99607484L,0x70960806e3c0818bL,0x0000000000000055L } }, + /* 44 << 196 */ + { { 0x0c19aae6b0dcfc0fL,0x9e0eb373c5c3aae8L,0xc161fd1c4a50f55cL, + 0x1ee02a2b9e0697e0L,0xd57f6a602ee4267bL,0x5d336ac44a0ddf2dL, + 0xe695c75e6e4a7d7fL,0xa79a0ff3fb055923L,0x00000000000001d1L }, + { 0x2e7ea0098188fce2L,0x81ef9870c12184a3L,0x4e03c68fb1e88e83L, + 0x3b5ebc5d7e85694dL,0x436f1044bbcd787fL,0x56c1bc0d2d3e5789L, + 0x23c6542d6fe9f054L,0x8a1bf0c65ffd53ecL,0x0000000000000162L } }, + /* 45 << 196 */ + { { 0xee9e0619502108a8L,0x4f1b33366a1062e3L,0xfe009ec650a122d9L, + 0x2d8fcb69c9e48f99L,0x808241ac63c706c2L,0xc91225fdb524a19dL, + 0x4a8e5dcc9ec80451L,0xa03ef54586005457L,0x000000000000015eL }, + { 0xac3efcae7fd50c83L,0x7e0b922f431aa19dL,0xa715c2def3d56a91L, + 0xc92deab66be3d952L,0x1c388d660db1584fL,0x65d59dea3102a4c9L, + 0xaa23da841d251b83L,0x12234fba7c16c390L,0x00000000000000d7L } }, + /* 46 << 196 */ + { { 0x908fc9cd1bcbb1ccL,0x1c8e79c605c7336aL,0x3be79537e9943726L, + 0x50fbdd811d9cea24L,0x0bfab19d8c2c7fbdL,0xc5549a0ad1ce3a2fL, + 0x9b9d925f8cd9652dL,0x9897484a739e0cd3L,0x000000000000007dL }, + { 0x393991e454df8672L,0x3182ec23505c2450L,0x1a6d66342e010ffdL, + 0x4862c7f13a3a0457L,0x04128c4709ce1141L,0x7665a44691ecac9eL, + 0x4d35845d698e56a2L,0x2bd11b9c63edd2e5L,0x0000000000000069L } }, + /* 47 << 196 */ + { { 0xf005c9a8b9a2702cL,0x810498cf2c1e6de2L,0x6befe7b4e18f6d56L, + 0xc6589cf794b8d39fL,0xbf8d443c00cbf65bL,0xc7c9879f45df1b9eL, + 0xccc79b076de50858L,0x929bea822826a4e0L,0x000000000000010dL }, + { 0xb296b3eca13db6f7L,0x7e5d7795faf5b85dL,0x2ff9b0b266854f2bL, + 0xc25523050a9a844bL,0xbe3c89f03882805eL,0xa9cf3ecf3b332aa9L, + 0xbe5ed1a69fad543cL,0x0f7d29f52bec4b80L,0x00000000000001e3L } }, + /* 48 << 196 */ + { { 0x22d8c3c4d2a1f241L,0xcb6055158ed2c14bL,0xbd634a2e2939f855L, + 0x30b2b6b91e9519b4L,0x19911c0008a31d22L,0x60b0713b979cd33cL, + 0xf41a8abdb94f9b61L,0x3f2d8c91ef72cf1eL,0x00000000000001b4L }, + { 0xf0acf9b3d78b56eeL,0x12112d24e915a265L,0xa18c93adc1d7025fL, + 0x279a4db82e652107L,0xbca73ad02bffb5daL,0x2cdfeee4e2d168eeL, + 0x6d83a77f5a4aca12L,0xc203514b1d22e8f4L,0x00000000000000f8L } }, + /* 49 << 196 */ + { { 0x91055288c64e6ec6L,0x8c16e01acd04e9b3L,0x4036627a86571434L, + 0x881ed57082957000L,0x310c958bfa5d4e68L,0x06baf7e9bb2d87a9L, + 0x4fe3ea057c2f0a2bL,0x38bb26bfe948412eL,0x000000000000007dL }, + { 0x66b2c752f3e37ae3L,0x0dd0ff8df7224205L,0xf90ce316e12378caL, + 0x6491f235c3d6855bL,0x4fb4f58faf02496fL,0xde97dddce1cb8cd6L, + 0xd92c7227fa3d595fL,0x9f6b1ce8fceea6b4L,0x0000000000000180L } }, + /* 50 << 196 */ + { { 0xbd22be71b0de2450L,0x2e3f91b14c3a57a3L,0xa977e3772a4ffb4aL, + 0xdbf6fffe5aa5cd91L,0xbc48435eee1e3920L,0xa563e4307d378540L, + 0x62f6de0dfb41e34cL,0x8011c6cce38ee8efL,0x00000000000001baL }, + { 0x005b98bcd74a67ceL,0x8befdb54ccb7e590L,0x9f161fae5663c8aeL, + 0x3693d9d550eb0d37L,0xafb1e38ebf1d75d3L,0x087fcc9c5153565fL, + 0xf43377b8105862c2L,0xeb84439f6dec3a5fL,0x00000000000001dcL } }, + /* 51 << 196 */ + { { 0xcde6dee7eca8f051L,0xe3f3cf361cc4d62eL,0x25ff2aa03b1c36b8L, + 0x6e72443eafdc5613L,0xf893f5ecb9772bc5L,0x38371f1262192ac7L, + 0x9c1813baf309d94fL,0xb449174edc9a7a48L,0x0000000000000111L }, + { 0x3b0e446d09c82a82L,0x6f50ae7c89493d60L,0xf1903236a6080476L, + 0x5e3827be27267e80L,0xc24b7fe32bc49a51L,0x616056db7c842855L, + 0x97ba566f1e70d7b8L,0x4dc726365a13ac33L,0x00000000000000d0L } }, + /* 52 << 196 */ + { { 0xf4f43b19e0e59b2cL,0xc6e2be38afeb7cdfL,0xae8a39acc29cebd1L, + 0x6efba10c45c2e785L,0xf8e52a9ab2641116L,0x40c2f2414196bec3L, + 0x60a7d1e4b1d839e0L,0x5d7d06d259f62c00L,0x0000000000000130L }, + { 0xea760695ac9c3c53L,0x5986bea02581f9d6L,0x59308ce3cfc0fd55L, + 0xea705a6119235af9L,0x443894840718acaeL,0x836bf3f6328cc902L, + 0x235deb73230f3339L,0x8e0c21f18ee4baf2L,0x0000000000000164L } }, + /* 53 << 196 */ + { { 0xe926ef80c0a99509L,0xed1649689ccfc4f2L,0xe99b4885e39ff73fL, + 0x8ccce32809d0e1beL,0x805d18ad17806543L,0x10726fcfd6d2a9a6L, + 0x0e3467645de89209L,0x9b900651642b18f4L,0x0000000000000027L }, + { 0x2b74d221bc1b36e9L,0x7e989f8c6caed8c8L,0x1e25efb759d92e98L, + 0xc6de531b1b5c7709L,0x26e58ed46b64af05L,0xd86e96d549e9e433L, + 0x4fecebf92c240df5L,0xe0d4dbf983308bfeL,0x00000000000000b9L } }, + /* 54 << 196 */ + { { 0x0e30bb6868a51131L,0x0989155b462c88c1L,0x548c6c52d03cae33L, + 0xbc452d0ffe58d840L,0x24200a5335a13957L,0xa601a8497546a30fL, + 0xe5d9c7e607f23c63L,0xfc71ce9d10a1c525L,0x0000000000000015L }, + { 0xf6a1299a1fe5ca30L,0x2f81c374ea5824d5L,0xe644122857981dceL, + 0x3df02523b1a2de8fL,0x54e40ac804cc4237L,0xa3055eea7bd400d9L, + 0x48872d662187a56fL,0xa69e9dee884aac47L,0x0000000000000181L } }, + /* 55 << 196 */ + { { 0x75eefbfc42f8fcbdL,0x07eb8d597ba7414aL,0x84cd31bc2c8f2b79L, + 0x4d26c915d04becaeL,0x36868d1c210fb6c8L,0x6cfccace72f79b53L, + 0x3cdf02781ebdbe12L,0xdb4a114286fc40ddL,0x000000000000000bL }, + { 0x73f66dd28db9ed5fL,0x889959a887f6b0c7L,0x6704c74140878e48L, + 0x1090b3d446b6c3a1L,0xd7b7aca178e5c4b1L,0x2d612e96ad1a5598L, + 0xca5da9eb614b70ebL,0x9ca5c48a854622e0L,0x0000000000000031L } }, + /* 56 << 196 */ + { { 0xd20ea6d6f77ed05eL,0x4ba76f5ad4eba7fcL,0xb8f202d214b757d4L, + 0x4c9fe411a5fa5280L,0xf6e25d132839f868L,0x13ea010ba1cad300L, + 0x480c268bf85ea22fL,0x4d0cf6672072c1bbL,0x0000000000000023L }, + { 0xf1e5e5230b6bb224L,0x0d56223d8d239295L,0x3682808df54c3f69L, + 0x0c17e3ca1e59cb8cL,0x7d756e19ec2f17dbL,0xd836652462e7664dL, + 0x3768cbff8ac12886L,0x5e741a688c3d34f5L,0x0000000000000020L } }, + /* 57 << 196 */ + { { 0xc54fe29871847964L,0x8280a8fcfdf1f5dbL,0xd6a0534af822e863L, + 0xdc9d3cf826386f9dL,0x9bc6b98e0407ff49L,0x017784bd80755fe7L, + 0x213c81559a977a8fL,0x8dd3801d4e65da56L,0x00000000000001bcL }, + { 0xef919759c03c33b2L,0x75233bef62d3430bL,0x3f1cfea4e4cc6a49L, + 0x38ab926464e971bcL,0x423b0ed9d5eb0c22L,0x6624a340b79c6d63L, + 0xd6b3fce896fa51f4L,0xadd1d8b35d609eceL,0x0000000000000070L } }, + /* 58 << 196 */ + { { 0x35045126ad9eda52L,0xc2ea75df374dd433L,0x487538ab0373e6c3L, + 0x33614263e0eee811L,0xc58358ea57c10df4L,0x32dc5cc35073a6c4L, + 0xb045e46a13446f08L,0xa5732ad5b66c0656L,0x00000000000001baL }, + { 0x7a153e655b0966fcL,0xb603017fb8c3c1afL,0x1a39c8dba067a681L, + 0x3033a65440667020L,0xd7801a9db73a160fL,0xac1ee568789ccc50L, + 0x554288945893ec7cL,0x384002a000eda7e3L,0x00000000000001e0L } }, + /* 59 << 196 */ + { { 0xe8b72ba4767216dfL,0x2bd7cc62233437c1L,0x2ec3866c25595c2bL, + 0xad8e4cadcb1171acL,0xb3013d8151bd5ac4L,0x6b5abdd61b508534L, + 0xb4cc2718d3050696L,0x6b986ac7816e843cL,0x00000000000000cdL }, + { 0x674cf9e1a395c9f7L,0xba207b7ba99d5579L,0xb88e686fa1933b93L, + 0x12c6706d9df81c4fL,0x15e65e3946b23f93L,0xf35458c73fc0f44aL, + 0xf1e31de2002e545fL,0x4af49917e9f03de5L,0x00000000000001caL } }, + /* 60 << 196 */ + { { 0x9dbe2571dddb531aL,0xcb512e4c394ebff0L,0x4b63c8a4a8d37bdeL, + 0x97beaa21bdb7f8f0L,0xf200e1e9b0a7a3f0L,0xdb7fe7da3a46a4e8L, + 0xa180445dad11a863L,0x74cf8239b2a4b922L,0x0000000000000108L }, + { 0x427c0b0eb74b8368L,0x55c6a2ea42e1ee50L,0xcecb56d06ee5f819L, + 0xdd52e9cef6d2ec2dL,0x1c990f3f9331556cL,0x9178c5a82f80fd9fL, + 0x75d32ffded631398L,0x0cedb14b019fba96L,0x000000000000012aL } }, + /* 61 << 196 */ + { { 0x69faed89822a012eL,0x53c0271bc1ad8f52L,0xee0d31f1937d338bL, + 0xa06b3a494041a239L,0x8b63fb07702f6ba9L,0x41d5178398b69014L, + 0x8486c40b544bbcb0L,0xcf2a64640917e881L,0x00000000000000ceL }, + { 0xeadcae05e83b1cf0L,0xc225b14edd855ea5L,0x1ea3000a0373f195L, + 0x28e08aa0bef5f415L,0x323f250c04f8b333L,0x5c2790bc8c761109L, + 0x49334e096d0e9ebcL,0xf82c8cd830e0fcafL,0x000000000000010bL } }, + /* 62 << 196 */ + { { 0x4c1a21f51222a135L,0x7ffc7eb85e187c31L,0x2b6d847fe1fe189aL, + 0x19dfc0372d394d0bL,0x55dc23a056e7e285L,0xeadb01c77fb7a5c1L, + 0x885f8312bd11402fL,0x7bb2f26fff2843e4L,0x0000000000000137L }, + { 0x4be6d277183e33d9L,0xdb4a534c64233c38L,0xf6cb271db58af85bL, + 0x41e0e7e5e334d256L,0xc91e2cea2d50cf26L,0x7819aed35fda0020L, + 0xe8f81026c0a98fb6L,0x109243793a618687L,0x000000000000009aL } }, + /* 63 << 196 */ + { { 0xac831cc6dd08552fL,0x749fb06f21e38f55L,0xffe1fa0503348862L, + 0x8181db69c8c3e026L,0x1404819e3dd3daddL,0xb065c5a303749cdaL, + 0x9f2ff76605fdc4e1L,0x6c73b8ccf67793efL,0x0000000000000080L }, + { 0x807f0dcbc1f25aa0L,0xa38b959a84dfac44L,0xa112f1c8427a1fa8L, + 0x8bbe3c9dca7866e4L,0x67777e1960dbae5aL,0x323e9a90a9f6aa43L, + 0x693abeca12ab7028L,0x262a90dad5667809L,0x0000000000000053L } }, + /* 64 << 196 */ + { { 0xad8f0fe4c4e24c8dL,0x81cf16fdd1eaa52dL,0x710a08a7d68864caL, + 0x8e736c0e7539de5fL,0xca6583377f67341cL,0x63892b7d900b74b6L, + 0x5d1d91db6e9be886L,0xa4da95649633a600L,0x000000000000019bL }, + { 0xcdae7a975c319d97L,0x873b4f53d0316874L,0x7ac186845c213ab4L, + 0x4f0b5ae2ae9109c5L,0x1934b7ed7a661690L,0x0f98acd90707b518L, + 0x12f07c5534017e00L,0xe9b121a16ce4ad49L,0x000000000000015eL } }, + /* 0 << 203 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 203 */ + { { 0xf4d3bcb3e588a775L,0x63161619365983c7L,0xba7334ad66782f6cL, + 0x1f14dedb8783d25aL,0xff6fe73c50c67fe1L,0x711ddfe3f79a86a3L, + 0x7cbd7f3a027d18eeL,0x7f63d331c4ecd52aL,0x00000000000000d7L }, + { 0xf463a32bcb49037bL,0x66e4ce3de7c35c02L,0x1d175313116e0d6bL, + 0x3bcfa6722d307ee9L,0xc6f9bcdc7c79261dL,0x79bb33dc994649cfL, + 0xb491b05f6d4ee55cL,0x4bd0668e1e6846bfL,0x00000000000000ffL } }, + /* 2 << 203 */ + { { 0xe7bfc99fbdec6efbL,0x2bd3d26b67326c7bL,0x23f041ff594c614eL, + 0x5a778c67ec9b4420L,0x75b5618b30eaa444L,0xf8e2bb92ae6d50a4L, + 0x7278c60b1fdeb6f0L,0x5b8471dd97c654ccL,0x0000000000000139L }, + { 0x115a81123e959ca8L,0x13dc4aa38bed7eeeL,0x02340f86ef94c5fdL, + 0x519293a499a5d5ccL,0x09573381294349a0L,0xdefe063fda33b39fL, + 0x6a18640aa382a806L,0xb3e25fdbd227a86fL,0x00000000000000d3L } }, + /* 3 << 203 */ + { { 0x64018be514712d81L,0x5fafa8b67cdf8720L,0xd92dbefac4676cc7L, + 0x81b9d5d13ff37dbaL,0x2ecd7dde297a8d41L,0xc893b59f1f558676L, + 0x5d8d83ce75659922L,0xe1dc82f90559beacL,0x00000000000001bfL }, + { 0xa3cc74ebdce6c952L,0xa76fdacac95505acL,0x67ab4bddbfa02be6L, + 0x2a24dae8df7d00e4L,0xf82d0a5af2523788L,0x0b3db8532f8ffc37L, + 0x65ee0920549e630cL,0xb682bfc6e02b93afL,0x00000000000001ceL } }, + /* 4 << 203 */ + { { 0xe3169760d3a4d912L,0x5568005c7ba3a1a6L,0xb469884dd7e7b5d5L, + 0x6be70c098cecec62L,0x6300a32fc5d6363dL,0xc2fd66e44db63578L, + 0xa131ea2135ae7bebL,0xa82fd7fbd31a6f2fL,0x00000000000000daL }, + { 0x1280d69100deb4b3L,0x72865157a4ea0abeL,0xc87881678934a09bL, + 0x5733cc11a70dd871L,0x139e88760ffc8eb0L,0x8e9436e30d958521L, + 0x06d1a845b3059f58L,0x3c220ab3a81354f6L,0x000000000000000bL } }, + /* 5 << 203 */ + { { 0xadb4a7d18cd7b861L,0xa73ebcba1bf42950L,0xd952d5552c86dad2L, + 0xe3f82d13b988894aL,0xc389b7497ad6f062L,0xef0ed10973ec36d6L, + 0xecd5d73dcdd4431aL,0x006edb8d828f55e5L,0x0000000000000144L }, + { 0xf19bb0ce05a40d99L,0x152e0fd650dd05b4L,0x057bbf2e9508569dL, + 0xa125e601f434a3adL,0xcfbf6d31dbd7abf3L,0x8a47f4fbabb7ec67L, + 0xba9b13e665be73a5L,0x06d9f4989a033039L,0x0000000000000113L } }, + /* 6 << 203 */ + { { 0x1499ff98ac49d960L,0x98f7925a3940ff6fL,0xf6c696dd743f2ccdL, + 0xe7c6c272ef310a73L,0x4029492d91ad1858L,0xa2e65af9483037ccL, + 0x080c3fd0d3453348L,0x4a1f9803068380c0L,0x00000000000001bcL }, + { 0xa39427ddd9dc2514L,0xe0f733ecb2a91d48L,0x0144d74f29219e28L, + 0x25ddcc92e3fc6c5fL,0xb90e371391f871e5L,0x3f763adb81a245caL, + 0x1fc6a1492f3e2323L,0x92cc2090b41f0b93L,0x00000000000000a7L } }, + /* 7 << 203 */ + { { 0x16fc10ab7052570aL,0xb8ba7dd0743fc619L,0x74e19ac2fdc9c41eL, + 0x8c377fa7ad8dc91dL,0x9c7d2ea6e0cfa9fbL,0xb1f1b0d571b1aa87L, + 0x912d33bc900e1282L,0xe8efdbbb0235ab3bL,0x0000000000000193L }, + { 0x2f42a780ecf05e45L,0x4abc067a5ffea26bL,0xd934e1bf3b8f86d9L, + 0xc622689102f42e09L,0xb828a02f94bdda3eL,0x08d0e0d8fd23164aL, + 0x49138165cd40abe2L,0x2cad4154325e1e74L,0x00000000000001d4L } }, + /* 8 << 203 */ + { { 0x293385edb0596774L,0xcdf619f0dc09fb31L,0xc4af9e6a90f76d96L, + 0x54b471b7cc8596fcL,0x18ffee0265bb2ef9L,0x4942b53fcf9d919eL, + 0xd6f9d2a9e553f316L,0xd83946cc6a58eaa5L,0x00000000000001b2L }, + { 0xa1d2daecdf8aa88fL,0x62cd7b320caed19dL,0x7c08bc5735b5b292L, + 0xe1a60a4454118651L,0xb1d83a9eaa8e4f5cL,0xc47b05fda30e7ac3L, + 0x9167e33e388daa83L,0x27a10be2b9685de6L,0x00000000000000bbL } }, + /* 9 << 203 */ + { { 0x61072e6115d296f8L,0x11fc465ebc5f7f34L,0x93eb012252ce572dL, + 0xe66fe1a8a14f9fc8L,0x0857caf11118519bL,0x0ec99efbce22fd7aL, + 0x9630984caeebd7b7L,0x06e2507983a6f11eL,0x0000000000000123L }, + { 0x6b5c0e4a5b5a2bf8L,0x0a0430a05250b970L,0xa547c2981f8d2d99L, + 0x942a92d7773b6ae5L,0x3832ae1f9b1776f7L,0x9ff2f6eaf2484715L, + 0x8e886adfed1ac349L,0x5fa46fc33779f565L,0x0000000000000112L } }, + /* 10 << 203 */ + { { 0xf5fe39ed1fe5f835L,0xe17a6fb3164e9741L,0xeb9a084a0f1180e9L, + 0x95f55d56a1174f31L,0xc33fa3d103850ac7L,0x15a29bac6e0ddd06L, + 0xb80a0b7836f2f29eL,0xe5d8ddf467cde67aL,0x0000000000000133L }, + { 0x1767c74cc5ee0dd4L,0x1d61299bd7e4bddbL,0xf4d89a2ffa74fb11L, + 0xe8a63c4a0037fb90L,0x30fcdb047800ec4aL,0x49a1e298d490df3bL, + 0x3d3510a2da5acae7L,0x2d4184aa78ce9832L,0x0000000000000024L } }, + /* 11 << 203 */ + { { 0x461789e5a7e331b4L,0x94f5c063e056cf9eL,0x51c4cb0bafe86b1dL, + 0xa77f9298ef14a074L,0x077bb1b48fc7cda4L,0xe5463bbe3ec37500L, + 0xd8929e637a85c083L,0x99bde81e8a6a20f3L,0x0000000000000039L }, + { 0x3267c02daa1f7d63L,0x23d9787ed3ea992dL,0x8bd7ecc6eadfc006L, + 0x1dcc457818d40a6aL,0xf4e545c5c2837f18L,0x4936de14599552abL, + 0xf7ba407716ae7bc3L,0xb5a4143a28bd986bL,0x0000000000000132L } }, + /* 12 << 203 */ + { { 0x7af8ee911d17b705L,0x8e4e597421098f55L,0x0542a394d5cecacfL, + 0x4d8eb8395094f357L,0x8c86900c62d5a150L,0x7ad4081bafe653e7L, + 0x387c8a7e677a5bbcL,0x088120b9be7886d1L,0x0000000000000156L }, + { 0x986523706c398918L,0x7c9c49714f6c44c4L,0x443abe7820a8fa98L, + 0xf42a7ff4137f926fL,0xff4c7aa9f464b480L,0xf88b491a277d4eb8L, + 0x170a625fc3cab7f6L,0xf9bba62bcdcab435L,0x0000000000000186L } }, + /* 13 << 203 */ + { { 0x31b0c09c5bf0852dL,0x18aaaed5d0a908a1L,0x4895590f40ba0908L, + 0x5b3c1647b5ce8ae3L,0xf1b7f80893496465L,0xd5dd6bab9f019279L, + 0x85473bc14ec60138L,0x5e219e24c353225eL,0x00000000000000b9L }, + { 0xb175cd2ff10b3294L,0x8cdc3ffa902373d3L,0x49bf103f9b547ce4L, + 0x78437a78684806b9L,0xedf8cea1ecb2881dL,0x3f74415f802d7f2aL, + 0xf921f614ec2bd162L,0xc5e11e9860a14a15L,0x000000000000007eL } }, + /* 14 << 203 */ + { { 0xda963dc90741634eL,0xc2b6b3cc6a62e31dL,0x726ffdb0926a9817L, + 0x2bc8a0c6a7a05347L,0xb440bef6dad1f7a5L,0x96ae7cb3666ebfb7L, + 0xf3b41ea36955bba1L,0x2ed577ffb1869190L,0x0000000000000025L }, + { 0xcd5dc527b188b5e5L,0xa3fa5778ce6e943eL,0x463b9aa822daf81fL, + 0x2a0544efdfb44926L,0x7c78f0e5c5f56c83L,0x83a5552b87a0cacfL, + 0x8a5a5cdbf491affeL,0x419ba8841b8398d3L,0x00000000000000bfL } }, + /* 15 << 203 */ + { { 0xc7a233d26a1409e2L,0x75613083d555f813L,0x57856cf213217cf0L, + 0xa98b1bfbf09af4a8L,0x43ce553426956b0dL,0x952672fca3623606L, + 0x6c5123aa9b1676b8L,0x7ea503cd2c03b708L,0x00000000000001ddL }, + { 0xfe97c8445f696473L,0x41fc8b4210c17454L,0x5988fe457d0bc836L, + 0x771a9260151ffcdcL,0xb20c19f78f31f140L,0x6a616316ea95a25aL, + 0x16ca637432de15b1L,0x428f650bfdd3aecbL,0x00000000000000fdL } }, + /* 16 << 203 */ + { { 0x4ac7cf5e23e6c204L,0x7b4459a48deb8019L,0x2ffd2583ebe4e27bL, + 0xab15feb705e6ae2aL,0x270a5d8fc902355cL,0x8d3df414faf419fdL, + 0x191b90b534ae3d01L,0x497282c4bb5abd9eL,0x000000000000011bL }, + { 0xd2232ae08c7267f4L,0xe1fec69b17b6532fL,0x1c6a586eb1a2d82cL, + 0xeb84318629128558L,0xbf24d2f13a80efd7L,0x93763ddbb22228d1L, + 0xf9da00393cb37b92L,0x780d8ba986cd2952L,0x000000000000018aL } }, + /* 17 << 203 */ + { { 0x85ce6086c42c91ceL,0xe3730ba1beff726fL,0x3a03ccef7b5cdd1bL, + 0x21f5dd5f74df90ccL,0x515f92541dd8a8a5L,0x32e144b04540ec56L, + 0xd5b0f4ce1bf59220L,0xf7475efc15b613a1L,0x0000000000000009L }, + { 0x9648786de09a8312L,0x637db864dec600f8L,0xc8d80a2aeff36b2cL, + 0xb35ee379748d7b89L,0x80480f180bc51ee5L,0x93ecb703dc1e3844L, + 0x96285408970354c9L,0x1d025481acdc2e04L,0x00000000000001b1L } }, + /* 18 << 203 */ + { { 0x11da16733b2b4777L,0x0da01f6bee05eca4L,0x4a39b2c1b5c102bfL, + 0xb1c398f40180ecafL,0x0f34076588c87533L,0x0c47914ac3c66289L, + 0x3696a6fa0dd34c31L,0x81e365a2a368c6d7L,0x00000000000001f0L }, + { 0x840b207fbd408342L,0x30261282fece17e0L,0x40257c9f3da41fb8L, + 0xa4cae7603b1ac31aL,0x5fbcff5f594f704cL,0xe351cefe85314299L, + 0xe22cb75c12b975ceL,0xb1f453d9aa5cac15L,0x0000000000000004L } }, + /* 19 << 203 */ + { { 0xc9ead8a3a32cf5c9L,0x7dc1770b97c506e6L,0xe7c79e2d42a4fbc7L, + 0xb04c9f61bd24d6a0L,0x3205b73d256b6b7bL,0xf16c9e4b2de3cb16L, + 0x227f1ff5d82a3f34L,0x0019b2fde6349182L,0x000000000000016eL }, + { 0x1f14191b7572f22cL,0x7302f1d0bf7099bbL,0x7fb16da069ac373fL, + 0x3339a34583f442ebL,0x2e9f4d5b3748f434L,0x824a57612b80770fL, + 0xdbb71eccb3f15c50L,0x1682849bf4442e7bL,0x0000000000000195L } }, + /* 20 << 203 */ + { { 0x178d85d0416d2153L,0x4c5e98ac3192f6b3L,0xa2bf4a99f469bfb7L, + 0x0cf96dce79b1c827L,0x1f0c1c1426bfefe5L,0xb311eb1d1eed9d89L, + 0x740f6075a0e8edfbL,0xaf5aaccd20253b96L,0x000000000000004aL }, + { 0x383c53e0e2c5bcb2L,0x2bd6968fddf8bed4L,0x3f7fded26470a5adL, + 0x4262c3d7081745f0L,0xda4dd333cdf6dfd3L,0x3198e00505d05d05L, + 0xc90c1c96590157c9L,0x2ab65d5b805804c4L,0x000000000000018cL } }, + /* 21 << 203 */ + { { 0xc8b63533163574afL,0xd4411f669392afb1L,0x39cae3efe849fbb4L, + 0xb6905a8d734c3270L,0xb67c07e1cbbb8c06L,0xb255b4db9aac51d0L, + 0xf199d235564033e6L,0xa95f3a7c0f798793L,0x0000000000000188L }, + { 0x991b58c05d94e2acL,0x160c5d37fee15559L,0x34604e5ab17d3f15L, + 0xf5a0f8bdfb1b01f8L,0xe991821b060c46a2L,0xf7a38307a55a0299L, + 0xc356d8e03afaa0d2L,0x333a3c25f0970ad8L,0x0000000000000175L } }, + /* 22 << 203 */ + { { 0x127344e29f66bf23L,0xe5072714c3ff9b2aL,0x9aa6b001571e9d32L, + 0xe92e4ae6e6738a5dL,0x4616e315433defabL,0xe1309a4a0b892330L, + 0xde5f54f81acd85a9L,0x6615d532a0013a39L,0x00000000000000b2L }, + { 0xae7fe9461948c112L,0xaba4b1f25fc94eefL,0x622344602eebeedfL, + 0xe9794e6c8c0c8afeL,0xa2dbac68aa7d5990L,0xf59d5a7ad07e966cL, + 0xc301f1334490bb8bL,0x05102377fac91f14L,0x0000000000000076L } }, + /* 23 << 203 */ + { { 0x60373c888966b32cL,0xea1c42d6dc6c28c8L,0x18d4ad6301e1db60L, + 0xbdaaea6027297a7dL,0x8195b41927d1104fL,0xc48c92ed6b0c1bc8L, + 0xb73823906b31a986L,0x33294cc20e930151L,0x00000000000001c0L }, + { 0xc482e8e3c6e67f35L,0xeeb9cc9aee7cfe14L,0xff393c6df96fd071L, + 0x77f360224c240d36L,0x7aa6e6d4008cf062L,0x33f7ba5e334659e1L, + 0xb7bb47e3338f19e0L,0x24641529965a354bL,0x0000000000000081L } }, + /* 24 << 203 */ + { { 0xff3620b84bdbf53eL,0x9a92f844cfb096a3L,0x3b4c39d9d79176efL, + 0x5c23425d20c67ba8L,0x2a3a36c2aa571818L,0x131fbff694d33bceL, + 0x8a647ce852fc474bL,0x082f25d4708dee34L,0x00000000000001b7L }, + { 0x0b83c1f0799941fcL,0x39efdd8895537412L,0x569503fdb5578a16L, + 0x858772ff0ac52d85L,0x1eb29ef86eeaed65L,0x416b2a4900efb022L, + 0xc34726fb98e0e612L,0x460f3c084eb7474bL,0x00000000000000b9L } }, + /* 25 << 203 */ + { { 0xfd46e1db27a1181aL,0x1ee50b461711b1f7L,0xa60af377813334e3L, + 0x8151c9876ce57ecbL,0xc75b6a7d140d4b4fL,0x1b8f031e29797048L, + 0x142493c798c35dc6L,0xb2cfe7b7cbe996a7L,0x000000000000015eL }, + { 0x80544a487edde636L,0x46353ae9fd5db792L,0xb20013c8220846afL, + 0x2416e5bdde7762b7L,0xe64be27df87fd397L,0xab712e13e2ffa788L, + 0x5193155a057b1e4aL,0x5469e2a4b847b16cL,0x000000000000001cL } }, + /* 26 << 203 */ + { { 0x5d942f48eea1b609L,0x5b5928f55d45821eL,0x4e902a63e4334d8cL, + 0x9487288dfd9c133aL,0x076583ec0562afa4L,0x15ac51ce847620a8L, + 0x75614dcd6bb7db20L,0x61b4f0ac79817a3cL,0x0000000000000135L }, + { 0xafa3db2917e1ade5L,0x34c874f1303a98a8L,0x46a0324db21aa653L, + 0x6bbd6e145845d604L,0xa08a64424c41da8dL,0xa86f0eced381c0bfL, + 0x680383de6177267eL,0xd8a8c02c2eb78f7aL,0x0000000000000182L } }, + /* 27 << 203 */ + { { 0x510c5be254c5aa6eL,0x814e5b33a2c96898L,0x5c5409cc0bc238d9L, + 0x5b5f3771d0f892eaL,0xc6d889cb5adb6dd6L,0xd69b4a965bcc3a28L, + 0xb005053762d3d8caL,0xb649554bbbed8aaeL,0x00000000000001ecL }, + { 0xeed46bb08eb806a6L,0x9b77f90339b297c4L,0x768412874c81b21eL, + 0xb9f15aae9f97d21fL,0x860ca9350ecfb6efL,0xe4199e02d03a9365L, + 0x4b4f34036b22befaL,0xfce4b34a3e909e2cL,0x0000000000000035L } }, + /* 28 << 203 */ + { { 0xa29eabe4613c2400L,0x6da11ad55fd4843bL,0x6c479b3ea02471adL, + 0x36f15bc6fbb5c3acL,0x4fb2678cb1d9580fL,0xffe413ff946486c5L, + 0x5907bfdfa12b55e6L,0x32f96e37446dbb1cL,0x00000000000000f6L }, + { 0xd371325a5f34ecbcL,0x4381c3cd2296b634L,0x059d9308e5fd27ecL, + 0xf6ce8b258e757bbaL,0xddbf8e7ade871b1eL,0xe66728521ed3e5d9L, + 0x6d20b61da5d76131L,0xb8902120ed8a464aL,0x0000000000000029L } }, + /* 29 << 203 */ + { { 0xafd1249ea0049ad9L,0x17e3f90da460a3f1L,0x9bf7805cad6c8671L, + 0x4de57997e5e4efdeL,0xb6d31c92436e52fdL,0x3b5d07d0e66dfedcL, + 0xe8f61a8613b6040bL,0xdb120430540de2f0L,0x0000000000000046L }, + { 0x17a3a42b480fcae3L,0xe6724148df4383beL,0xd0d8e825e7398a02L, + 0xff5e909116413545L,0x80a83d1b9e9f32d7L,0x1e64d4128169cd89L, + 0xc06a950009862dd0L,0x5ab3d610135884b7L,0x000000000000017fL } }, + /* 30 << 203 */ + { { 0xb22b6df53fe8f109L,0xe4ae50e7c00edcd4L,0xbf891ce36387044cL, + 0x0716f49bd08fb385L,0x0efbe8c154934b45L,0x08e66176982f659dL, + 0x042ce73691962a9eL,0x1f08748ca2974528L,0x000000000000006eL }, + { 0x8fa7abdd6b9af5d7L,0xaa2fbf79978b04ccL,0x4ea61bae7f3136b7L, + 0x406b670e150cad9cL,0x22be2a1b08aa96f6L,0xffcc680d566c56d7L, + 0xe65207819a111236L,0x940aef1c69d2e843L,0x0000000000000062L } }, + /* 31 << 203 */ + { { 0xd129e91b896698afL,0x73b41a17181e6925L,0x97ce44352236ade8L, + 0xd75aebca34fb46a8L,0x14c944000e23617bL,0xcd8b9bfbeb166d12L, + 0xee040a4c3a905495L,0xb72ecd7ec410f093L,0x00000000000000dcL }, + { 0x061e964149500be0L,0x6bd1c08f37d923d5L,0x20ace6977d7c8942L, + 0x4b0799a17ec28340L,0xd78d922cc03f7a1eL,0xaa5ed16c52d72598L, + 0x9a9465fc536adef7L,0x1520c2f1b2190fa1L,0x000000000000014fL } }, + /* 32 << 203 */ + { { 0x0fc6c549c4c762bcL,0xc43915e34e5eb96fL,0xad88ee68d1bb0b5bL, + 0x484a9fad3f1f26a3L,0x0d93055c2bc15e9dL,0xa745abbb06d26bb7L, + 0xf60ed3b8464f0731L,0x963d8d94ea9058bfL,0x0000000000000039L }, + { 0x4608ac087b67b298L,0x3baf2ae6c85cf812L,0xb30aec550d25bedfL, + 0x581493507ce48459L,0xfcb39f45d3c41e2dL,0x48d241a0126e11a7L, + 0x7b425421909a1621L,0x9bb349dde746bf4bL,0x00000000000000abL } }, + /* 33 << 203 */ + { { 0xf78ef0177735364dL,0x4242a9b94f24972cL,0xaafb2ff361309582L, + 0x68159118c9fe5e6bL,0xa139c9d817d00812L,0x396e0ab8226a0decL, + 0x3a30e3c673ea4520L,0x18df75b8a9dd6918L,0x0000000000000104L }, + { 0x1604bdd3791132f0L,0x0d35e98dca54008cL,0xe9ceeb9b6c26b2a0L, + 0x4a4c2574b1f5b2a2L,0x69fbe01ff67ec174L,0x869a7c63315bc86cL, + 0xf08c67d6679659abL,0x5569ff11470dfd0fL,0x00000000000000dcL } }, + /* 34 << 203 */ + { { 0x3f0253b63cbfb254L,0x6ae0490dfbc31984L,0xe3211ed9838456d9L, + 0xa777ce33d6dda841L,0x2477049c8192bf72L,0x272fd5e9a4c1ffceL, + 0x4d93c00b02a926a8L,0x1f2575f0e049893bL,0x00000000000001daL }, + { 0xee660c10a1ea2fafL,0xd873fc0cb8c5388cL,0x567dcbe9dca70a47L, + 0x9d40a36f11c8a42fL,0x3676748d0ac98534L,0x2bcbf2174d49d6cfL, + 0xe11f56cea7891326L,0x1ab209d7060afa57L,0x000000000000009fL } }, + /* 35 << 203 */ + { { 0x4b6d579ba05593bdL,0xc6285d8e3e95708dL,0x9241a96b5c289411L, + 0x25ca7d1a7ff046d7L,0x2590294a1361f71aL,0xb49a914ec83ad83eL, + 0x89aceb3a85fb27d8L,0x6776c7f95575641fL,0x00000000000000ccL }, + { 0x4b835dbc0613476bL,0xc94fdc0798cd4c80L,0x335103581a99e1e3L, + 0x904d83c4be2dc3aeL,0xbd1970b7528f5723L,0x6bd945cdf6821722L, + 0xe3f06e613361a4b6L,0x6dd9696ad6bd360bL,0x00000000000001aaL } }, + /* 36 << 203 */ + { { 0x060220b0d5aabf1dL,0x7ec07ddee8faef1bL,0x1c30e81f33c88f67L, + 0x3ab29b3b460c1969L,0xc884e421d4f18871L,0x3d76eba1e1d5f78eL, + 0xad2f04f5f5600f5eL,0x2e07b7c6fdfaa998L,0x00000000000001ecL }, + { 0x94aa1de0801495d7L,0x35dead394496468eL,0x3b3856ac27ab597fL, + 0x0d1781704ce8ab57L,0x32add9b6a50d197fL,0x251a00a000be8d21L, + 0x8c65dd58c2200093L,0x2da03bd64b7ac520L,0x00000000000001f3L } }, + /* 37 << 203 */ + { { 0x7d77930ffe885231L,0x7c1d348265515480L,0x1028d2f332001982L, + 0x7bdd01a2f88d1e1fL,0x08e5a9624fc0514fL,0x5fe3e2f0bdf8eb8bL, + 0xf4028d9cf33bf562L,0xfd0aac8b9adec91aL,0x00000000000000fdL }, + { 0x262500f4c7eecbc5L,0x8e1ea6e773113904L,0xb4fda953c677fb11L, + 0x15e2bec7f004bb4cL,0x0ec674739adf1319L,0x30c7b3002e1908c8L, + 0x72b9155302f8fe1cL,0x4c1ae8241fef5d3dL,0x00000000000000bdL } }, + /* 38 << 203 */ + { { 0x65afe7c36fc63e54L,0x6c79e1d949f2ee4fL,0xc8c051c698930252L, + 0x433fbf5bda5a1e42L,0x0f5fb506d2262325L,0xc3ac5598bf7bb66eL, + 0xb4080aff038c1d7aL,0xae7fdc8a879a0b88L,0x00000000000000fbL }, + { 0xc8633f62b2ccd88dL,0x7a8c4df946413bf3L,0xbb3a0aff154bc142L, + 0x0a25d6637f99f5ceL,0x2f9b556d91512e7dL,0x5856e88dd4fcbfc1L, + 0xfbac60880d590e7bL,0x3e74b8f7572a6a15L,0x00000000000001f5L } }, + /* 39 << 203 */ + { { 0x2e92481a490bf8c3L,0xb234c92c44d07befL,0x23be000f6010e6abL, + 0xdd24b74db1f6e796L,0xb885f4d57f2b555fL,0xf884923f24e9d3e7L, + 0x12bb9608f5252325L,0xeb4d240142895b93L,0x00000000000001d8L }, + { 0xb7639fb2e02a475cL,0xbf16704256218563L,0xe8f52550b704617fL, + 0x35fbc89365a77f60L,0xb7cf34eb39d0d9f9L,0xfc9a3a37721dfea9L, + 0xfbdd63fe12b1bd09L,0x8319294c9b186376L,0x000000000000002dL } }, + /* 40 << 203 */ + { { 0x760be85ba57579c2L,0xe47a63aeea20d741L,0xf80319e5232ac17fL, + 0xe3a492894c6087d2L,0x3d8d470fe20d0690L,0xfb770c1ae9a1e231L, + 0xd7e1cba177a0824aL,0xc90a181ef7ab7398L,0x0000000000000123L }, + { 0x26bd2f3b59ec175aL,0xbf65beef751bd1bdL,0x242a83d6fe1b44e7L, + 0x9c16d231a1dccec8L,0x7ca438f66e6b3e04L,0x50b091d4c43e7f09L, + 0x23a893f32d886f9aL,0xb18a52c8b26c60c7L,0x0000000000000069L } }, + /* 41 << 203 */ + { { 0x9bd5bea7d2af96d4L,0x6fb5de3bbc502c9aL,0xb5fe88e904928dd6L, + 0x660accf4c03c98a1L,0xe59d202975520adcL,0x12c50027fe29eef8L, + 0x6d2739cf3758dc66L,0x49b418f24b9cb6a0L,0x0000000000000021L }, + { 0xdf55798a1a079e8bL,0xc7c4f956fe9f31c6L,0xca1c9c93604849fdL, + 0x29d32b26f33c9d98L,0x79f867bfd1cc0d6aL,0x776857b4bd3b05cfL, + 0xf445083cdc53ac52L,0x19f3a0e3be72024fL,0x00000000000000faL } }, + /* 42 << 203 */ + { { 0x4a5cee83dee4aed4L,0x670871eaaccf44e5L,0xd325710515daa7dfL, + 0x41ed4dee9573f014L,0x0621cfe9c7e9c774L,0xdb71a7db46ae0c35L, + 0x5fbbb44236d96fe6L,0xfc4ee442791497f8L,0x0000000000000025L }, + { 0xb58373bfda824d48L,0x2eba350be2b6feb3L,0xe732690e5cece104L, + 0x42f7dc57a0fade31L,0xb2ac02a777a8ea62L,0x2c39600f2730cc77L, + 0x71ec3499ab92a6ccL,0x857ac22e85b8f70eL,0x000000000000013dL } }, + /* 43 << 203 */ + { { 0x351b69db61e98acfL,0xb2699c2328ac4d57L,0xee2392c34b2cd5c6L, + 0xcb4914379838b68cL,0x11af2ea4a79b35a0L,0x0baaf41584ec5f87L, + 0x380dbda247432ae0L,0x5949262bbd9fecc1L,0x000000000000011aL }, + { 0x461c1a7c33de0a7dL,0xed6b4f1a2158de9eL,0x1b51b42f97af5453L, + 0x5dd95efc0b1f9f62L,0x43680178f2fe73b7L,0x43eb48a07ad44206L, + 0xfefa59bbbc57342dL,0x92be4ae731e62d3eL,0x00000000000000ebL } }, + /* 44 << 203 */ + { { 0x50720100fd88d0daL,0x6c96ed6eff0c9301L,0x5cfee40f95d538bdL, + 0xb8a8dc1d8ce5618cL,0xc4f7326f15fc988cL,0x2c93679915b83096L, + 0xf99f2517cfa85d25L,0x822fcfae5900af2cL,0x0000000000000011L }, + { 0x4c87b89e2e424f4dL,0xce5f4f37eb2ebfe7L,0x9bea312196392a9fL, + 0x30c5e7742855b498L,0xa9fef9945a06b8f3L,0x9372653173d076d1L, + 0x3237fe278510f6faL,0x2889eaaa1ed4a243L,0x00000000000001bfL } }, + /* 45 << 203 */ + { { 0x2854d519527d1fa4L,0xec81e71f030bfa56L,0x7c7a6f6c74f9b14eL, + 0xc14b2e2fac46e139L,0x266d547234be2050L,0xc76508966ee9b75eL, + 0xbcb2a8ff31cc54dcL,0xd30db5283217cbb0L,0x000000000000005bL }, + { 0xc60582de3d8f4fe7L,0x3571cea717d2bcceL,0x51f2c889cdb6114aL, + 0x67599f6dd8034eb2L,0x39ea1fb81c73d606L,0x202f869715dcadd0L, + 0x75b193f9ed7493ddL,0x0594f438966147b7L,0x00000000000001baL } }, + /* 46 << 203 */ + { { 0xe8a247b9ddb773eeL,0xf011460b3a15d54eL,0x513260d9c6c9729aL, + 0x3141d40090f7c352L,0x08abd3ed3ad70a9eL,0xecc1c0b0d4416182L, + 0x378a698baf4f30d8L,0x0e16d0c80d60ca6eL,0x00000000000001beL }, + { 0x1dfd2561ff8f8008L,0x81ea4aa79a575caeL,0x7c786f74b483667fL, + 0xd8fed3f94ee8c195L,0x90fbadb4a6f48ec1L,0x60ffc1f0427e2ac1L, + 0x50278a1d955ae40eL,0x6ad088ec707c1993L,0x0000000000000040L } }, + /* 47 << 203 */ + { { 0xc5fa1b24da9e4707L,0xb2ed74501186afbaL,0x6a7b01ee5671cf3cL, + 0x533241c421e91105L,0x33fe3ce08e6c7277L,0x6bc05f002376d680L, + 0xe35268f1a828e28cL,0xbbe9432cb60d1db9L,0x000000000000002bL }, + { 0xc11785a77cd09d89L,0xa68d9f4cc5495098L,0xfd929e7877817c92L, + 0xfc91a43abb6c8891L,0xa931f7edb133140cL,0x30f07e5841125b2aL, + 0xff9ddb7427f03ab2L,0x0cb025851fd99248L,0x00000000000001b9L } }, + /* 48 << 203 */ + { { 0x9cf44ab408998910L,0xdb3fc2ed1a5c47c4L,0x7752c31eee0be9f5L, + 0xbd4bdfa943658470L,0x6fe2594572f2fc86L,0x237e1bcfef842a76L, + 0x74a857e47220a961L,0xd007b3e3c6bc996fL,0x0000000000000075L }, + { 0x133e7f3d64ae70e0L,0x31966cdb96444608L,0x88fe449aacf0d4efL, + 0xddc0df233b47912bL,0xc19d556d5771cdb5L,0x122a6b4c834c9d4cL, + 0xf4fd9193128abd43L,0x42dde25bc55b3ec4L,0x000000000000004cL } }, + /* 49 << 203 */ + { { 0xfdb3c4406206a4feL,0x125e23203bd37671L,0x51afe42085c66b55L, + 0xc82746a7c940c824L,0xd2f7bb9f815d4539L,0xafc81b12635037a1L, + 0x52186ca4c7627416L,0xe1af5236303a7901L,0x00000000000001d8L }, + { 0x2f110c396dc058ebL,0xbe11880e4a4c8a7aL,0xd08d8ee763cc89a4L, + 0x609e4b127ff4eaa7L,0x34344cbe13acbbdfL,0x327f290902faa861L, + 0xa78fdd5ac3ebf0d0L,0x0bb5a4256c4d55c8L,0x00000000000000afL } }, + /* 50 << 203 */ + { { 0x42aad690805acc53L,0xa7493fc7dd905627L,0x487f9407d9ff6965L, + 0xdaf7232e6c45192aL,0x403d57cd8d6e7d00L,0x931d29065fd272e5L, + 0xfd7ddc1c35824aabL,0x587760407dd72d28L,0x0000000000000194L }, + { 0xa559885f94fb6cb8L,0x18dd399e1dfeaa2dL,0xa6f23d25eb1a0248L, + 0x4875945120e84064L,0xd481f3bae40bec92L,0x7bd5cf14464e0aa2L, + 0xdcfcce53ab13bd66L,0x47391f2ba2ec1297L,0x000000000000000aL } }, + /* 51 << 203 */ + { { 0xac5fad77cb3652b2L,0xf60c981b1702815cL,0x3f848d658c5451aeL, + 0x158ab410e63af4c6L,0xcd7bb3a5f46545efL,0x81dac1f4dd4f3e3eL, + 0xae18eb6b0ca68587L,0x3c11c12b0f2f0afcL,0x0000000000000027L }, + { 0xa770e6757e36cb2aL,0xdf6d0de92153e716L,0x84b953bc23da6df9L, + 0xa2b731d222921fadL,0x76d5d2bd6f721c04L,0x84826a176ccf0ba4L, + 0xc5876c6a0f44e9e3L,0xb215b5099f6f565fL,0x000000000000003fL } }, + /* 52 << 203 */ + { { 0x156a9949f151db06L,0x22b6b0fd06a70b1aL,0x32d582e4ce617d0dL, + 0x84fb33d0992eed75L,0xa1c7f6b2c077d815L,0x3285938f313dbd79L, + 0x8a8fede6076f503eL,0xfdae2afe7c95c93eL,0x0000000000000139L }, + { 0x96f542035812daa7L,0x479ba6d3a2d361b6L,0xa3793b005705ed2eL, + 0x943d444ad5ffe440L,0x474e4864379e007aL,0x9ff8bc4dacd38afbL, + 0x508f54e5a6195a99L,0x3085a04113817842L,0x00000000000001feL } }, + /* 53 << 203 */ + { { 0xfc91e9f6bfd0daa6L,0xaf6b749fa3f661c7L,0x01909cf557e3ccf5L, + 0x35b238be3ee3bca3L,0x4bcd54419bf8184dL,0xfba84a8c70887c36L, + 0xfcbb0d1de463801aL,0x50ccf27f439a5570L,0x0000000000000087L }, + { 0xc4493206a8126517L,0x7fe519f9f3614d98L,0xd384e085c3b33b80L, + 0x7cb14d0b2f997b13L,0x768db950e87f06d6L,0x970b51fbbfee3e6aL, + 0xa39779e815cd21a0L,0xf677327e5bb60c21L,0x00000000000000ccL } }, + /* 54 << 203 */ + { { 0x1da1477e29decfa5L,0xda072008f02d4900L,0x308e95bba4a4a5efL, + 0x7ea7a7e700684dadL,0x8fdecb5864c70f1aL,0xac1f215de2b6bc4cL, + 0xae5dcfe17aee88e2L,0x755dc3734b87ea15L,0x0000000000000113L }, + { 0x31cdcd38c9abbdf0L,0x002275fbb68c75beL,0xa597136936819c82L, + 0x0d32bd7478d0808aL,0x8c6272fd441d62d9L,0xac06165632ff00f8L, + 0x21b9e0076a1b924fL,0x38af0f3fff5ff630L,0x00000000000000dcL } }, + /* 55 << 203 */ + { { 0x55989a4e1513fe24L,0xd6fabb7a377c6d9dL,0xfb823e5b0578f12cL, + 0x2468f83105377082L,0xd4878755ace05f05L,0xcdb85e0e1e391758L, + 0x45635c6b145f3ebbL,0xd278b81411717273L,0x00000000000001c3L }, + { 0xc72cd647b50c5d7fL,0xf047d529dd74bb7aL,0x9300b986938084c6L, + 0x39476814f0b7388cL,0xc7b74b2fcc7f991dL,0xd684e2f5bed333beL, + 0xc6191b9f5f713216L,0xfacbf180c5995497L,0x0000000000000065L } }, + /* 56 << 203 */ + { { 0xe3cf0d5abdae44fbL,0x71ecef1ef6f5cfd1L,0x7fa5ff37a11bc661L, + 0x2e0d9032ab6a8007L,0x8e465ff4b99ed2b3L,0x63ee69531a62a4cdL, + 0x31ffa26bad2e514aL,0x0c5e085f6ff7bb4aL,0x0000000000000128L }, + { 0xe1b74ab9d20c8193L,0x0f67229ad63050feL,0xeeaf4ec6ce73ff43L, + 0x10094ecf114d6d87L,0xf515978540801d95L,0xa09334933f23d6dbL, + 0x0ac122515f242117L,0xa1a4a37f5643d48aL,0x00000000000001d7L } }, + /* 57 << 203 */ + { { 0x08bb5ac7384a5d01L,0x4b476ae6141e85b7L,0x00590a5e1d16b1baL, + 0x722dce02a2654e49L,0x380d3ac2483bd792L,0x247a5eb2ce16ad4eL, + 0x5ba4f9402eeba293L,0xec77ec84a7507cc4L,0x0000000000000137L }, + { 0x1abf1b23c775da9bL,0x51926f26193fd227L,0xd4a448fe01380d03L, + 0x681fe1925ea66a42L,0x794acc9567e1d118L,0xe712708229692e86L, + 0xbf4924323e527353L,0xd9aa506903fc2288L,0x00000000000001adL } }, + /* 58 << 203 */ + { { 0x8f6c6de220075145L,0x2005c1b150c10e08L,0xcb6f289fe8304e93L, + 0x8772c2f895b33af8L,0xfc9ac3cfb78f1556L,0xa4a401e2be444c8bL, + 0x140354191c3ecbe0L,0xa65d09c100ac6e64L,0x000000000000015fL }, + { 0xa6d278eaa028bb56L,0xf8fd2af7f7809e33L,0x8ae6c10268f7c2dfL, + 0x6c70558658cdaa11L,0x2bfbbef7e3c31f4cL,0xf8b8a7cb9e618b41L, + 0xb521782dd7782d09L,0xd779f775dff8910eL,0x000000000000013bL } }, + /* 59 << 203 */ + { { 0x5320eefa9777d185L,0x68beb1e5eb1e2074L,0x78dee05ec1e19dd5L, + 0x5f860f6599bd6062L,0xdee3934194257635L,0x342770ce8635d6c5L, + 0x8a06a3512e8be17fL,0x9d59963df78ab6fcL,0x0000000000000153L }, + { 0x39d400fbb2b8dd24L,0xf133abbb1acfdb33L,0xffd9d7a4d0506911L, + 0x134d6fedff9d17c2L,0x64622ea9d5149d82L,0x8ff0c386bab0505aL, + 0x7998c2288977e003L,0xc41aea628ad09185L,0x000000000000001aL } }, + /* 60 << 203 */ + { { 0xe8f81e67f0dbc26bL,0x67a84c46f8124049L,0x7c12baed629b2402L, + 0xd39ab731384afbbdL,0x7abb971abe6168eaL,0x3ec476fb5dc96bddL, + 0xf568fab44fa5f270L,0x739374abac51818fL,0x00000000000000bdL }, + { 0x83d36009af42bb0fL,0xc6958d2c42dbd415L,0x39f20878bfd8cb62L, + 0xb2528156e23525ecL,0xdf6b3537ea725e7aL,0x6ddafd2d6c2dc45dL, + 0xb696f0b8b8682ed4L,0xdd22cc010b611962L,0x0000000000000123L } }, + /* 61 << 203 */ + { { 0x10a08a1d429ade6cL,0xcfebf0de40932626L,0x2a55e9c5e357b14fL, + 0xd551713e4ac50b0aL,0xeb32aa3fa3be7addL,0x6c10ef40612e530aL, + 0x887ba5fed318f9dbL,0x9328d459bedf5ba0L,0x000000000000013aL }, + { 0x91af92d7c4ce63daL,0x2500a7b7aa099030L,0x5cea55f7f0e69a79L, + 0x28ca6b389e2136ffL,0xa0a71734e6472420L,0xfb6dfe6f191baf68L, + 0x80d32b3c985c5e32L,0x29d49cdaf82c5f47L,0x00000000000001a2L } }, + /* 62 << 203 */ + { { 0xb55c2c748f50537fL,0xdd9c6df320a3297dL,0xb0b5477afce6109eL, + 0xdde894cfd5a0ccedL,0x4499db297480d3a2L,0x22dfd2bb556180eeL, + 0xde7299830c83a1cdL,0xf4dc6b95a709c0a5L,0x00000000000000f0L }, + { 0x2f295e2db07bd50aL,0xc0a72738bd5546b5L,0xd52477a78ec7811eL, + 0x92e14d8eba3186a7L,0x8eb36ad32467a3e2L,0xebe38e24a07ed270L, + 0xfbebfb87fef24897L,0x6bf271137005debfL,0x0000000000000144L } }, + /* 63 << 203 */ + { { 0x9441cc5908dc7a12L,0x64ac736d8fca53eeL,0x19d3acb882a58c26L, + 0xc159d488cda04962L,0xfe82b61dba03928cL,0x8a7234e590149eeeL, + 0xa561263424bf8caeL,0xfb65e1ea2ac90b43L,0x00000000000001ecL }, + { 0x646b4ad41ab30bd1L,0x49c490c08bc81256L,0x0e07b398bb57c018L, + 0x8d00bbea5bae6201L,0x1e3cbbaf746d5441L,0x48ea6a235bc6f24eL, + 0xf6828b36cbbbf53aL,0xb54e8f928f84432bL,0x00000000000000ecL } }, + /* 64 << 203 */ + { { 0x8f45148da52d1421L,0xe89923fd09daf522L,0x112ca311c8f15492L, + 0x91b33ccb469e61fbL,0x040de443fb2ab91bL,0xb9d124fd4f9b32e8L, + 0x84da47a9f07573bdL,0x93b7ee6baacdd14fL,0x0000000000000064L }, + { 0xb3af161f4be64122L,0x5946d2ec149c5475L,0xdc04fcd0256a1ecbL, + 0x4fc571b7327e7a8dL,0x5206ea559580edcbL,0x7f41a742f20a94c0L, + 0x1bfddff0f7fe1fbbL,0x3860b232e2f1fddcL,0x00000000000001caL } }, + /* 0 << 210 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 210 */ + { { 0x7eaefe12155c337bL,0x357f27cec7186596L,0x90c26c69654dbbebL, + 0x6bcca278a1b63a6aL,0x776b7a928edd9123L,0xb2bef5076c6da5e9L, + 0x805ed3aa0e3c747bL,0x298ef458659df2efL,0x0000000000000098L }, + { 0xe3dd893935a79f8dL,0x672b109ca1972c6bL,0xad41cbae07953a4dL, + 0xde07527fd206de77L,0x1f55a4c0b61d9811L,0x929d56d57a75360eL, + 0x9f2becff5032efe6L,0x0de654c760f0622cL,0x0000000000000018L } }, + /* 2 << 210 */ + { { 0x50e39fd1ea8dc4aeL,0xc68d8b0f5fd16c88L,0xc95f46e90ced75abL, + 0x845ee3947375e952L,0x5865161d3c14b1c2L,0xe29ae3a6399431acL, + 0x02d1c6eaf0feca0eL,0x76059dad24f35b02L,0x0000000000000081L }, + { 0x29fefca13ac6a96eL,0x260a7f7d5d01c1d8L,0x63fa36033ef9812fL, + 0xab1a91c9f9404b6dL,0x32555cfda9406e88L,0x666eae1c5b63890dL, + 0x22ec92aafe9605e0L,0xde2a85103235e860L,0x00000000000000c3L } }, + /* 3 << 210 */ + { { 0x6587c0b191979247L,0xf06dc068ccf0762eL,0xc3a2b88e277351e8L, + 0x42e9494a2f607af3L,0xd1aeccc782b271dfL,0x3ac627d494648a93L, + 0xeb180b78bb62dfa0L,0x7e34653033232d06L,0x0000000000000183L }, + { 0x084e8c75e5c2357eL,0xa380334756c91ac0L,0x772e24a0015742f3L, + 0x2610a9b247393c4aL,0xb58ced0b436a641dL,0xd55dbdc01850fb56L, + 0x64c97fc5aecf40afL,0x6de1de5d00330087L,0x000000000000009cL } }, + /* 4 << 210 */ + { { 0x2bad7686d90119aeL,0x8dcd64f0464b230cL,0x1d1eeb94dad19cb1L, + 0x3b47038986f0eb48L,0x42abb37d2c50f380L,0xfabbcebba7a99eb1L, + 0xfeed69dd33fb0b08L,0x8de8b50e31dc2b1eL,0x0000000000000057L }, + { 0xe10dc8a6054ed7a9L,0x7094fc36cf39bec8L,0x487789a825848527L, + 0x8786eaa339974124L,0x47a0d6b0a6e3e5abL,0x54ed0b21e981e517L, + 0x85cf17eae768123bL,0xd9a933b0636e78c6L,0x0000000000000127L } }, + /* 5 << 210 */ + { { 0x773317e5e965abe3L,0xe8862edea879f77eL,0x4601164bc562701aL, + 0xbeb011903d4e07b4L,0x75a41c645c19194dL,0xd86e32ca954ddfb5L, + 0xbe5ca8c991a87274L,0x034c2e1d26aacfeeL,0x00000000000001e7L }, + { 0xc2bc377abea32ea0L,0x9de44c2dbe946f97L,0x5d4e3f655c44e66dL, + 0xbaa8d970ac2805f5L,0x706fe3da518a0081L,0x2bf6f7c79bc1f747L, + 0x1b468d12759da2e6L,0x3f5a410239f61705L,0x00000000000001deL } }, + /* 6 << 210 */ + { { 0xe3acc0e88e647c26L,0xf22855fa2eeccf87L,0xf31f71d569ff841aL, + 0xa9e26b1ad8ef203aL,0xb3fd64db61ef98cbL,0x29af19d73b642e77L, + 0x94474091741591eeL,0x904e7fd077619d47L,0x00000000000001b2L }, + { 0xe4c9e0608f0e5bcdL,0x9a35f72d0905a05eL,0xd0ba9e95fa0cf7dbL, + 0x445ca1a5614f1042L,0xbbd0d5dcaf5a5318L,0x67782d6e0a41d90dL, + 0xb296381bb6a00b72L,0x57fad3940de3382cL,0x000000000000006eL } }, + /* 7 << 210 */ + { { 0x6da8acb7636d8777L,0x17555ae8d341b800L,0xb1568202f44d6da9L, + 0x08a44805f4dc644fL,0xc55f2c18e121a82aL,0x5b5aad0b568d7db6L, + 0x3ee3c48b686c7713L,0xc57cac2c6d44f289L,0x0000000000000128L }, + { 0x0f14e697858b59f0L,0x00d714b14d03cf4bL,0x709f77084de0de30L, + 0x708cd7f0c5ee4559L,0x38dd88083e9a8665L,0x0a5fb9589f83fbcbL, + 0xc224694a5c7dd1b5L,0xeb7258003626ededL,0x00000000000000ddL } }, + /* 8 << 210 */ + { { 0xd388e288cf34553bL,0xd09b7877c6d36700L,0xc3e51b059504c41bL, + 0x0bbd1313d0ef31abL,0x455759080a3e2144L,0x774bf584d4b738b8L, + 0x3f4d85bae9d97733L,0xdfea7f28f0e4a66bL,0x0000000000000172L }, + { 0xa6b684558118ae5cL,0x7a387b8b8e41c0e2L,0x96193c31f2b2cd4fL, + 0x54d8c8934c5c7414L,0x397ba89f0ab2b85fL,0x2ab9e8f773eebea5L, + 0xf4b5eca1e8d4c927L,0x026a0b0842ac45bdL,0x00000000000000abL } }, + /* 9 << 210 */ + { { 0xc2c87545298feddbL,0x470bb0bd57824145L,0x8535201fe71f2a83L, + 0x15c5c54c117080d6L,0x128d4fe3a05e76beL,0xd4e70c4a4bf91f37L, + 0x54fe9a939826b38eL,0xd67c68802a561594L,0x000000000000011bL }, + { 0xa82826c1a97b6d7dL,0xe2da2126e413c90dL,0x2d897fea71a14c78L, + 0x507ffba7806b4541L,0xc9ce796501b35c08L,0xaced047127638092L, + 0xbcbf0b84ed0ea6ebL,0xb63a061b05d84400L,0x000000000000002bL } }, + /* 10 << 210 */ + { { 0x57e9a69316532d1aL,0xf478a1fbc138250dL,0xf361a41cd2ac2c5bL, + 0xf25c676d2e731c53L,0x279560ad1ebc7490L,0xe8236eddc10384e9L, + 0x7441e5732d5228caL,0xd118d9328feb88d7L,0x00000000000000a0L }, + { 0xf31779e746032b98L,0x05ab63064ee8d408L,0x4e67de56b0bcef53L, + 0x762e211f843fe3a7L,0x22bf07e8774b56d0L,0x3aade5d730c2d994L, + 0xa7f2f71246c80546L,0x3ac087b22883ca5fL,0x0000000000000087L } }, + /* 11 << 210 */ + { { 0x407c5d9aeb5d82e3L,0x30a608780f78a237L,0x245819947c64f8aeL, + 0x731df3f705e01562L,0xa2e62428758cc895L,0xd211176cfb7413daL, + 0x7e6e91ec84b68277L,0xa3f3f7633b3b5b35L,0x000000000000016aL }, + { 0xb6008526ecd1f4b0L,0x713463dc81f30634L,0x6453a4f7bb65e2e0L, + 0x1a1116bc0aa7ed8bL,0x363735d5d3c34ab4L,0xf0333b9b525b97f3L, + 0x284602fc50aa6cbdL,0xa9f283df0cadf6a1L,0x0000000000000145L } }, + /* 12 << 210 */ + { { 0x4b736d32df234e4eL,0x66237d17d14bed77L,0x552baa3e4dde4942L, + 0x947169dc23756ffcL,0xe595f2646749eed0L,0x2ed82e6ee3c8ec5fL, + 0xd52dd4bc495b60a0L,0xccd7a2b340553cfcL,0x00000000000001b7L }, + { 0x9de7e73e889fbda2L,0xeb68c0d9c02a698bL,0xc433363b1510c24bL, + 0x329f6b4ee2af686fL,0x3bf95cb33cd6cbd5L,0x2c9924b339e49b52L, + 0x3a4322bb3e1ae491L,0x3efb3b85504f582eL,0x0000000000000150L } }, + /* 13 << 210 */ + { { 0x013dc2bd0444d98fL,0x4de8b097924c66e2L,0x9ec9f89623d19d49L, + 0xfe687746b87f064fL,0x246382118a91a448L,0xc67b870471e274b7L, + 0xc4418f6008d4a531L,0x4586c1eb20ce1a9cL,0x0000000000000131L }, + { 0x7ad3cd88751ffcc1L,0x18d68df10ce22160L,0xccfeac397abea4ecL, + 0x29d9fddc2685bf7fL,0x9e6b18ff47a6efeaL,0x3b627d6f57d0e3d5L, + 0x58ce41ec7ff343f2L,0x5158919391a231faL,0x000000000000017cL } }, + /* 14 << 210 */ + { { 0x21e51a0354ff9df9L,0xbc033f12390d78abL,0x7faa458e9c43347fL, + 0x2057222fbf867c8fL,0x6f5b703c3e064f9bL,0x4e36ce8df6330e33L, + 0xfa0198f8267ee695L,0x68eaf31f89f13590L,0x0000000000000153L }, + { 0xb493ba909970b82bL,0x31e3a61197d166daL,0x0b11fb84ae14be07L, + 0x10d94ceb5fe0479eL,0x56c6f6fae10acb34L,0x46f3308f1d2441e1L, + 0x1374e4f3ec9db49bL,0xd948930f13d2533eL,0x0000000000000025L } }, + /* 15 << 210 */ + { { 0xbd79f69a17f3e079L,0x581645e9398eb5c8L,0x1164a83314fbf1e0L, + 0x578256f31c79999dL,0xc3dd57963e64e48aL,0x00207040947387efL, + 0x73a0b57ed406b606L,0xd9d3c48c970a0d17L,0x00000000000001dbL }, + { 0xde458dfe8fa49aa3L,0xe87a7a3cde431fd9L,0xc6f7d8ffcbc16172L, + 0xc105217099a5121bL,0x4a8cdcc81598125eL,0x748703e3e913bc06L, + 0x906112b45fb246a4L,0x784afb8f1a1a56f0L,0x000000000000006cL } }, + /* 16 << 210 */ + { { 0xa01f0a1936b984a3L,0x46ad91732ae5ad96L,0xbd7c413ddf12a570L, + 0x24a748c0fbe510aeL,0x27ff3748434d6671L,0x96ad6f33e1d69baaL, + 0x4b89a84a7daf1390L,0x2e25577782e527cdL,0x0000000000000129L }, + { 0x78b130d2615c0331L,0xf5a9b025fa3cef15L,0x67b99de5fea12b29L, + 0x5f5b07e21f56caceL,0x2af1ee2faeb8b26dL,0x04e82314346279b3L, + 0x0f1d88179ae70e2cL,0x85f03c614c3ea88cL,0x00000000000001fcL } }, + /* 17 << 210 */ + { { 0xbbf913425e25254eL,0xae8a194d65fa3727L,0xe2ed53b88c80987cL, + 0xe90dd4e4332d1115L,0x858c5b438276591cL,0x96af9612cf030af3L, + 0x334408b230f81b63L,0x50a0b93648908e62L,0x0000000000000156L }, + { 0xc07a9b16e44f7fbaL,0x64b736ca33fafed2L,0x701e1cb3ac0f2dabL, + 0x7d4fc722562b85a1L,0x2c86fea7d2b4d291L,0x83ee4a5d9f6970e8L, + 0x6620f63f3d8c97b0L,0xe3bca31b9ad21699L,0x0000000000000123L } }, + /* 18 << 210 */ + { { 0x92101b445ffe5dfaL,0x6b76bbf95527f907L,0x2869cfd96356a819L, + 0xfd98991f858d1a94L,0x8d5d5864f0fd9b00L,0x625cab968f9c11e6L, + 0x0da53e4909b866b8L,0xca447b81b24cabbeL,0x0000000000000178L }, + { 0xd0ac32d58a4893cfL,0x2edd684e9a4293e3L,0xfbc5f91fc233ef37L, + 0x2516eb7f682025b2L,0xc25529e742c123aaL,0xf5da8e54c9f4f7a0L, + 0x8c3d0de074eaf57dL,0x6794c4bfccd164a2L,0x00000000000000b9L } }, + /* 19 << 210 */ + { { 0xc69f59364255c77cL,0x62a41fef78ceaf18L,0x94bd08105a8dd0a1L, + 0x7afbb99a49892f3fL,0xb9544236faf945a6L,0x28836d015e540f1cL, + 0x244e6a470bc80b13L,0xb283cf827c32d810L,0x000000000000014eL }, + { 0xf47d267a487a9f79L,0x2ad81400b00b749aL,0x7f9b103420d209bdL, + 0xfbf1adbd49d3785fL,0x8d59d9898a036103L,0xc6f4b53a089998a5L, + 0xace1e9ee478e179dL,0xa749c8aad355bf72L,0x0000000000000005L } }, + /* 20 << 210 */ + { { 0x63c835437badaa12L,0x2929fbf993d1298fL,0x22d8a8e5a6d3c271L, + 0x4361a0966cc70feaL,0x4c1698920b060761L,0x43b3a57b7b83b19aL, + 0x8e4e16ed5d1e0382L,0x1c4596c101fb83b1L,0x00000000000000d2L }, + { 0x571e0f7a2d0878bbL,0x28efbf803ba6448bL,0xc49eadd53f53ec10L, + 0x5f67c5799b6b85b6L,0x999676ed0f279676L,0x62ba191b129e0870L, + 0x5f0823b621475e41L,0x8bc381adf2c8b56bL,0x0000000000000164L } }, + /* 21 << 210 */ + { { 0x343c04685b855e62L,0xd05a781d65a9e8a0L,0xad965cc0d76b69d1L, + 0x0a60d928e3a92585L,0x8eb709dc93667a1fL,0x4612c3b7f09f0396L, + 0x76e56cdfe5ba21c3L,0xefd26ea6b6dfce55L,0x0000000000000142L }, + { 0xe8ef0107c3ea51b4L,0xc510a688a6e86ef9L,0xe1382167be6e6ff2L, + 0x6235ea21a22146a1L,0x0e75ef83dec8e173L,0xc3aa17235d276798L, + 0x81b77ebb6f964ef4L,0xb332f534b9a1daf2L,0x0000000000000062L } }, + /* 22 << 210 */ + { { 0xc71a3f48dc690e06L,0x449a246c614381b4L,0xce239cf6f0ea8df3L, + 0x2384abc79f46bb3aL,0x1c93a8ec536f0722L,0x60521fc696bdd0e7L, + 0xd3c1a0736defa22aL,0x5e2ffd99b0905245L,0x00000000000001a0L }, + { 0x865d018d4fa96567L,0x2e861bcde51f532aL,0x0512006f4808fb7dL, + 0x3b0ec5befab3cc28L,0x79df334934155d68L,0x4799a6e956e98b04L, + 0x620b5b8666630df1L,0xdc690f70a3e08672L,0x00000000000000b1L } }, + /* 23 << 210 */ + { { 0x85d162cd106f9475L,0x44f080894ef3dae9L,0x9cf807e0bd35511bL, + 0x578df8ace663e312L,0xa653190ceeb0b989L,0x9ed6127f1ddf4b8cL, + 0x8b0c20c4f18fed73L,0xf4b71bd731359811L,0x00000000000001adL }, + { 0xc4f7a8d1c65be73aL,0xbe69a441506a8e82L,0x6d5546328fe4f097L, + 0x617fa14727ccad49L,0x1652a47ced10618eL,0x3daa7571df5f43f5L, + 0x2bd307ea683542bcL,0xef379a1097a6ae86L,0x000000000000015cL } }, + /* 24 << 210 */ + { { 0x86b14b1452e9a506L,0x06f9f838ecd178c9L,0x3d605ee53599039bL, + 0xdd3ad5a22e3df8f7L,0xbd51af94fdd6867fL,0x707034f3d45cf3daL, + 0x54edcf4a3f70a45cL,0x7b6164392979902cL,0x00000000000001aeL }, + { 0x7d63ee2728389bcdL,0x8b39bb03b8eba714L,0xfe5a3aaaa6e99d38L, + 0xaf6074ac14a97bafL,0xa15ac327aff2ed81L,0xba728133fe4fd2efL, + 0x9db04ddae93db974L,0xd50f26293a858d00L,0x0000000000000119L } }, + /* 25 << 210 */ + { { 0x5313dae6dfa56156L,0xd94f3ae7ac345c99L,0xe78fb5be35108fffL, + 0xce8566813a5e8c70L,0xa235711d5b1f6104L,0xa71be1748b87dc50L, + 0x896b4d545b8939c1L,0x8a8231c084f3ab7aL,0x00000000000000c9L }, + { 0x817bf1fb1e85d890L,0x184f61cf5d643f88L,0xcf5848918efc7679L, + 0xc9861a5c09913c4cL,0x9957437efca0ed7dL,0x26eaeb36d4e9c4a6L, + 0x62a2b4ce68bef05fL,0x35eee957532fb33dL,0x000000000000012fL } }, + /* 26 << 210 */ + { { 0x0d102216761b3f5fL,0xc20c57002b98c864L,0xf843d83e1aecabe3L, + 0x9f6208ea8ebbe2cdL,0x0a052c4a80658842L,0x10da1ddaf5022171L, + 0x75bb824b9d07cdcdL,0x13e6b38440d6c610L,0x0000000000000047L }, + { 0x9ba0732bd447fb06L,0xfaa3332e7c85db71L,0xa4b5cb19124daefbL, + 0x030a7ebf84434aeaL,0x90bb08622da1bf4aL,0x92e2162ee7273d3fL, + 0x58dad0586f9f95d6L,0x4ce76e55838aa919L,0x0000000000000099L } }, + /* 27 << 210 */ + { { 0x1ba73b76eea26fe5L,0x10311b589f6d8b56L,0x1e7165b4f59dcd15L, + 0xd84021591383a1dcL,0xe0e1eb96c044dfbbL,0xcd512afcb13d4262L, + 0x0770855a201de5aeL,0x426e52a9e44af8a8L,0x0000000000000064L }, + { 0x2b3f973f1b631b61L,0x3facac113c56e76cL,0x96d8fde961b33795L, + 0x3d15ae3c51fe1162L,0x966f56a2d095fad2L,0xe5cec26f7a7ed99bL, + 0xc86a297d339268ffL,0xac1327d95a7d5aacL,0x000000000000010bL } }, + /* 28 << 210 */ + { { 0xea0d44c4e34e4c6aL,0x6f50243de8b02105L,0xede2b16c796c212cL, + 0x3f7169f242a087c6L,0x33fa8470a4762cdcL,0x05f65db613ffc120L, + 0x9bf29f3f4fb1c4f7L,0xef4ca47345433134L,0x00000000000000f8L }, + { 0xe59da628331968bcL,0x3fbf6bc46946b696L,0xec6ab1d7f82ef2afL, + 0xd0423e4f260bb4a9L,0x956fef511d44cbf6L,0x26f861228e5195c9L, + 0xbe8b410c5daca40aL,0x3fac01f29d08bdadL,0x0000000000000104L } }, + /* 29 << 210 */ + { { 0xd296d03e4da85b59L,0xd20dca5fd6b01747L,0x3f815ec4dc2596abL, + 0x2e8ffb20523225c2L,0xa03e3bec4c45bcbaL,0x8b8a4e375be794ddL, + 0x1e80fe190c2625e8L,0xe38cdda7ecb0a216L,0x0000000000000125L }, + { 0x78939a7c40a4f676L,0x38a302b8686cd8abL,0xf39054024d368792L, + 0x52f9befe123a4394L,0x5386d9fb555bef5bL,0xc8d8eee487e29a25L, + 0xfd789f9f36cb1b91L,0xe75f606b085c477fL,0x000000000000006dL } }, + /* 30 << 210 */ + { { 0x76c885eb91229a3aL,0x2604b6f7f73e4d9aL,0xbebdfaa988fa9a1bL, + 0x442a747cb58bb763L,0xd8fa3f9ca1ef860dL,0x719d6316f2d89cacL, + 0x39eebc32fad21ad8L,0x5c00d02d886de11cL,0x0000000000000148L }, + { 0xab82282a837d7661L,0x638d139d5cbb4510L,0xe6066d3862fc0035L, + 0xce4a49a8185cd0ebL,0x48f9a3ee3d41d802L,0x95d7f1ee067d33eeL, + 0xc5e89c814c069fc4L,0x25e2b6e5d8b300b7L,0x000000000000016dL } }, + /* 31 << 210 */ + { { 0x125cb0656fd3f45eL,0x5d373f9c2da83750L,0x596131d335c71844L, + 0x93ab568a7590cd52L,0x4296d1f04f64934eL,0x2af6028ba38c427fL, + 0x5578235374fe642fL,0x10ae3ac1911c7354L,0x0000000000000064L }, + { 0x24d13d1cdca07c95L,0xa76e8a5d5578ab0fL,0x8a3c17a4af1f99f9L, + 0xa1a2a81e49509859L,0xf7a6e870a17853faL,0xe77c3c235989a9b2L, + 0xacb43c903f2d4d84L,0x0e10eb44d3af2b00L,0x00000000000000b5L } }, + /* 32 << 210 */ + { { 0xbfecf64a8f358002L,0xa9fa4a5e9179b6f0L,0x8c1ecbec5b44cf88L, + 0xf443c158eb342a9cL,0x0eee17b6ada473cdL,0x4eb51b723af99df4L, + 0xa524e6d1e0fe8dfcL,0xedc72c8efe2a4f42L,0x0000000000000073L }, + { 0x34eb386de0cd65afL,0x387c3874cc80c3b0L,0x75ac76b751883b25L, + 0x0f6ccbfb6b514806L,0xe85c5856867909e7L,0x61a5bc9e5a9b263cL, + 0x08b8491ac6228360L,0x6835d3592850dbc7L,0x000000000000005aL } }, + /* 33 << 210 */ + { { 0xe5a1c0f716762f99L,0x18e150a00e1ffb05L,0xd8d254b5dd30ab2eL, + 0xca9fbad497982447L,0x704cf09c072ceeb3L,0xd983772bb6dc2abfL, + 0x60d8e8589d4a88cfL,0xb7370cc99a762e11L,0x0000000000000042L }, + { 0xdd8c613394f66f49L,0x854a0a8a64840d80L,0x8b36199429e09701L, + 0x720bc46c87681a8bL,0x3e659cf95832499fL,0x639e121a3a965654L, + 0xafe73745a0e877fdL,0xdb5167d3c73ddb0fL,0x0000000000000041L } }, + /* 34 << 210 */ + { { 0x93ca4468eb54bb36L,0xc9f2df65fd1a5e06L,0xfc22174b03097b4bL, + 0x91fd9ae9c45c49f5L,0x6102dd35230002e3L,0xaea4b2e3ff6b1838L, + 0xb762468b28453764L,0xa05639a86875b179L,0x00000000000000b6L }, + { 0x406563adc60f9413L,0x61b6bb5f75616054L,0x1625fc74b653f9e0L, + 0x7ea58a6a21ca1e00L,0x6461c50537e49dedL,0x904294a956620a78L, + 0x4c01cc1dca8aa6d2L,0x3f8e60a4beb7b9c0L,0x00000000000001f5L } }, + /* 35 << 210 */ + { { 0xe65251c39be7d1b0L,0xd8e18151bc857666L,0x80d8d4670142a8f7L, + 0x411203864e50d192L,0x44e9b0d1d0683bb4L,0x6027de4339b0cd7fL, + 0x39cda0ca1570eb84L,0xe0cfbadb6ee54d8fL,0x000000000000007cL }, + { 0x613dc76cfd483908L,0x9f4c35cf97d9650eL,0x1ec7107c69d29e0eL, + 0x11de8c70c58a2a1cL,0xc1d1bd39db5936e3L,0x0645f3a73278328dL, + 0x44a30bf055cc8564L,0x918eccc88531a826L,0x00000000000001a7L } }, + /* 36 << 210 */ + { { 0xfbfa18bfa4b8ecdcL,0xdb161f18fa7c7c3dL,0x1ec53bad24a21f6cL, + 0x3e5343aaf4f6ce31L,0x8ae86fcd1b669758L,0x394b964b7befd9beL, + 0x84b25af45c60a585L,0x02ad7f6ba5888ffaL,0x00000000000000d1L }, + { 0x0b86e101c83fb0aeL,0x6e16e6e6d2b8e139L,0x29425fb60333faf0L, + 0x8d0d9ad20ceb6480L,0x9249e8d0b3e18ad7L,0xae3b935bc94d5491L, + 0x06fe7f0744260b3aL,0x1d8af88339ec64d8L,0x0000000000000158L } }, + /* 37 << 210 */ + { { 0x2e41b2ed1d704acaL,0x03c2bc0817bf101fL,0xce1bfaaa844c18f7L, + 0xb4df6b7a96909ca0L,0xbf63a091ee5e0867L,0xd4388841d246a432L, + 0x2f79892f9ad915f0L,0x32338dc06b41b066L,0x0000000000000118L }, + { 0x78bf5ce2c53b3850L,0x989487d6540132ceL,0x3b028312158f0507L, + 0xe5505d7a61847bccL,0xbd26640fe9c5f58fL,0xea786834f46d807aL, + 0x12b9c22738c8f52aL,0xbb0394c11e8ce727L,0x000000000000006fL } }, + /* 38 << 210 */ + { { 0x5c442a83c9cff0aeL,0x6e3e75c4d44225eaL,0xf41d8d06757bd27dL, + 0xabb92c7ca16c6a26L,0xdb02bef102370af2L,0xa394000dce87ef1fL, + 0xe4d6eb87c3ac8949L,0xabff55c1682ec032L,0x0000000000000002L }, + { 0xf2439cb836f99a0cL,0x871399611f895491L,0x86e73bb2972293fcL, + 0xffb0f9c0f4587a52L,0x9158a262fcccc9b8L,0xe20f8fdc77325b64L, + 0x9691d90945464152L,0x3026f5898a043f9aL,0x0000000000000147L } }, + /* 39 << 210 */ + { { 0xd75c363aca0bb47fL,0xe06dcec09b0fc2feL,0x1d124ff2ed47c2e6L, + 0x68ae6fbf9c71dd3cL,0xd425be30410658daL,0x88b954dad343a932L, + 0x64648b8bab08b1c5L,0xf891ed517f60614eL,0x0000000000000007L }, + { 0xe06bce61aef1da1eL,0x4d8cbe4802d489fcL,0xa5cce046ba95c57bL, + 0x71410ffba132fa69L,0xa708d710840160e7L,0x7d480584f5c1605bL, + 0x31d61aa873bad916L,0xc34e2a1339c4685aL,0x0000000000000032L } }, + /* 40 << 210 */ + { { 0x97330958b61d5315L,0x9b1fed27813d91e0L,0xfc2aac10d0c63423L, + 0x4a30af61043b9728L,0x0f62b5c1ed8502d8L,0xd9575c7c3b16f0d1L, + 0x03c27db727b8c8d4L,0x3244778e33be7fadL,0x0000000000000106L }, + { 0x8537bd35641d5339L,0x81bf82ddb3c40dc8L,0x20172430409d603dL, + 0xb5da061ce2469826L,0xed42f1f0959d0f5aL,0x00dad493913111f5L, + 0x1cfddf2bf851b239L,0x9851d1e84275e8f9L,0x0000000000000051L } }, + /* 41 << 210 */ + { { 0x4855d1dba965d2c8L,0x5f30449c244e4576L,0xf1f66ddc102836a7L, + 0x8e9131a9ec4310cbL,0x0408e0751723bfb6L,0x328592dc99ea53beL, + 0x0d16c086afb2a7d0L,0x08ea2ef221079ac9L,0x00000000000001adL }, + { 0x5e9c1e1cb8a4268dL,0x22500bb2fdca442eL,0x423f0e3ccdfb9609L, + 0xb07f48f43b66dab4L,0xac3334e53a9a16b3L,0x11f147c037448161L, + 0x486506187014f21dL,0xaeecce6968960de4L,0x00000000000000c8L } }, + /* 42 << 210 */ + { { 0x819fe7e06ed79a0fL,0x784ceef609f8e8a1L,0x57d5b1bfcb22dff7L, + 0xc4f56fc69ae9661eL,0xbdd03823162750f5L,0x92bb9df3f146dabbL, + 0xee6ff7d7605cee53L,0xcf0c1d6c6350f6ccL,0x00000000000001f9L }, + { 0x1c9c29e313fb9416L,0x60aac35195ce11d9L,0x7f9a4b110ffd00b2L, + 0xc81e3235f6f182b6L,0x398950bd16cf3c7aL,0x5b69da1297a24847L, + 0x642f9f183df06a74L,0xd8879543ae3dc599L,0x0000000000000184L } }, + /* 43 << 210 */ + { { 0xc62e4a9bccf48be5L,0xb6e83e2b8c35dbceL,0x0191a9c69385a75bL, + 0x8db45f037f8a7eb3L,0xc05fa5da716b86b4L,0x85aff148ef42c3eaL, + 0x0bdc01682db38738L,0x93337937d776daebL,0x00000000000000feL }, + { 0xa17b9fd4403afd34L,0x7b09c077646aff94L,0x90597e2d8dc6a08aL, + 0x744099e0183f3e3eL,0x1c5fb306071fb772L,0x92b8d6a4d997fb7aL, + 0x0de13d6454bb10dfL,0xf31dfa573608964fL,0x000000000000015dL } }, + /* 44 << 210 */ + { { 0x7f7941d9888f4bfaL,0xc4f99685476bc929L,0x3f73018d8e853ce0L, + 0x68a86f6134fdf8e9L,0x7c88b0312a6fb200L,0x3bab6755f6911821L, + 0x06b84c1788917fc6L,0xa9f91561f5254039L,0x00000000000000dbL }, + { 0x76c522936dd119d8L,0x6571dd2beb1cb340L,0xc66e8c823a33b42bL, + 0xaf289ebefc43ddfcL,0x203bba75ed2b085bL,0xdb1d86048176f169L, + 0xf762fbe0d91afe68L,0x7dd617af2026cf1cL,0x000000000000005cL } }, + /* 45 << 210 */ + { { 0xf92aaa90b3eec59aL,0xbffbac4cb98fa157L,0x4a6eda99c95c081bL, + 0x31c0fde9dbbc33a6L,0x9ad4e5b9ab1887edL,0x580110e38983bb1eL, + 0xd7156bd5507d9979L,0xe03e75829ecccea1L,0x000000000000002cL }, + { 0x672fb2c3fcff6b71L,0x7d73a95202c0fcacL,0x6217982052b713ffL, + 0xcdaabc9d8923912dL,0xf0d3eb3a651247bdL,0x5484c30ba50c00cdL, + 0xa9f1173c03bba882L,0xbe7465af19a18e5fL,0x00000000000000feL } }, + /* 46 << 210 */ + { { 0x88fcfb06d58a4ee0L,0x00a415e2c9be2df9L,0x84e1ba6b22d03ed3L, + 0x51cd83af2567aa47L,0xef6aac434fda2455L,0x2a32b1e255ea0dd2L, + 0x013e1751228720f4L,0x57afcdce7bdbd78dL,0x00000000000000b5L }, + { 0x094f8441471c7d66L,0xac4896c544539ae3L,0xe82e74eb5776831eL, + 0x21087391291c65cdL,0xd340f5ed3160a6b1L,0x85bbe11244d404aeL, + 0xca35de4823d823a2L,0xc20e39e406df497cL,0x000000000000011dL } }, + /* 47 << 210 */ + { { 0xfc68fe96d1751282L,0x8a1f297e5936ef4cL,0x4fc2f368e07ac5feL, + 0x5ef7311f109bf107L,0x6fbed28144342ef3L,0xf825e7ea55138f1fL, + 0x2c91032471e64888L,0xac2d0335ae2f2246L,0x00000000000001aeL }, + { 0x9fbad98320933d06L,0xea69d15035c00e2dL,0x559df842a04206ecL, + 0x2a9cc878d54938e9L,0x16e14d674408551eL,0x022ec0d86db7e02aL, + 0x54cacfa9f1bcbfb4L,0x25095d1626646fcaL,0x0000000000000181L } }, + /* 48 << 210 */ + { { 0xf147f1a2e7e50e60L,0x1309c8a5e30cad85L,0x565c5e69621ed64aL, + 0xbe5a64757938c494L,0xe56315e5d7792054L,0x64855cfdd97f771cL, + 0xe95a4b9e599e2f5dL,0xef1c8a857b06d720L,0x0000000000000015L }, + { 0x2331c4282a92c2f1L,0xe155a823f37e82a7L,0xb47cfc2976020919L, + 0x6131a11ffe98e8e0L,0x965adc12c0be6be5L,0xf773d72c153cccbdL, + 0x10d64e59d8ed6127L,0xfa7d60258b5ec467L,0x0000000000000063L } }, + /* 49 << 210 */ + { { 0x51c2bdba271b6102L,0x965c59b90e9a8af4L,0x0a54e4d7610fb124L, + 0xed96a5b2a543ff6eL,0xbab9d37243265ab1L,0xe63366bd24cf6541L, + 0x5fcabfec17413aecL,0x6ba1941437041b8eL,0x000000000000009aL }, + { 0xc746f20dfcc3cfccL,0x2b0937465e1dfaecL,0x2e75151e14285929L, + 0xb4ef90301b96cff8L,0x54cbbef998c89aeeL,0xcc53de423a028272L, + 0x8373e2805f57ba4bL,0x551c46b3b1498773L,0x000000000000008dL } }, + /* 50 << 210 */ + { { 0x1c238ed06300acf7L,0x8eb48c65bcf1125cL,0x65b5c0cb231365bfL, + 0xd9d40c2683d76f5dL,0x7dbe2462ae3872f0L,0x4262f282de107d81L, + 0x48bf1e05b21512ceL,0x27e6ff2817212e1bL,0x0000000000000184L }, + { 0xb39aedb29b451cb5L,0xe96de26a84a29aeeL,0x1a3154cc9725d43fL, + 0x587e1cf56d8be1fdL,0x840560b633539bd1L,0xd4b6a7c4663d836aL, + 0x4203e22eb3f669b5L,0x02be0e3cf18b437cL,0x0000000000000095L } }, + /* 51 << 210 */ + { { 0xc1f65b0d9dc37e45L,0x89275a9136b7d53bL,0x2949c4982e971c79L, + 0x8c04749a1a538febL,0xcc9b6fa02cc328a8L,0xe22336ea602cfc59L, + 0x47f83e70f41f9ba2L,0xbbfd03212cbd1052L,0x00000000000001cfL }, + { 0x4711443cede128e2L,0xea12ed3f30c82c13L,0x17f83633829d3106L, + 0xf1202f38f5b5bc62L,0x5850efbc562fc338L,0x07fa11815e423b47L, + 0x9a419bae32459cabL,0x8add9587ec4146c5L,0x00000000000001a4L } }, + /* 52 << 210 */ + { { 0xfe4cd502f1b7c167L,0x679fadc6dab47f91L,0x1bf2bb513037e209L, + 0xd9ea6d92822e8a02L,0x7a2a77d530ccb948L,0x6fde628029a7a46dL, + 0x8b71cb731bd2bdc9L,0xaf8d591ca3c0ca96L,0x000000000000004aL }, + { 0x63fcc569d852c164L,0xca7202764f07d6f0L,0xa7e6e11ee7dc1581L, + 0x325d3921440980ccL,0x098bd53a148a56a4L,0x4eea0c37ebc5e005L, + 0x89434ac1b9773a5cL,0x97bd03a875cae957L,0x000000000000017eL } }, + /* 53 << 210 */ + { { 0x00d9070caf1910dcL,0xc617cd48dcfbd522L,0xb533991595cfa421L, + 0xbed0e54c1c7f6141L,0x5fc82c432e31d64eL,0x4c95d5c841370bb0L, + 0x384401ba1dc077b7L,0xe88940db1b711f2cL,0x000000000000017dL }, + { 0x60e0ccb946cd94eeL,0xfbc8e63f705dad8bL,0xad2f8d2430990961L, + 0xd6724b3aa578ad6eL,0x8b11d8339c176f25L,0x12e79dbbbf8e3ac8L, + 0xe4a943d7fd9940cbL,0xc888a09a909e8fe4L,0x0000000000000122L } }, + /* 54 << 210 */ + { { 0x6109ea2de8fb7694L,0x3ec1e872086093e4L,0x978e96c0f4225f3aL, + 0x2695c6c91952ecdcL,0x728be492d83757cdL,0xc810df7023ac63eaL, + 0x9485d432d730ebadL,0x86e4d8518f91edcaL,0x0000000000000009L }, + { 0x1a3378469ea9e56bL,0xa27a55a28a9debbaL,0xb23af748c0f66a79L, + 0x5d8b134dd11bc92dL,0x00567e5825e02569L,0x4e55a9c941991079L, + 0x4ddebd3f77d6ad31L,0x6933b46e536f3d79L,0x00000000000000ccL } }, + /* 55 << 210 */ + { { 0x812a9b2184f5a0c8L,0x33d0143243a13fc4L,0x9e71afb2efaf3fc7L, + 0x1663885a0884b6c4L,0x97be8fad8c8fcea3L,0x8a3eed05a8e13913L, + 0xb2262ce22e54b510L,0xfe9acf7eba246055L,0x000000000000003cL }, + { 0xa302ad0e8b89f53aL,0xa73ed70070b8d772L,0x9e311983c5c37f58L, + 0x01e6b111978870fdL,0xd584e4e06afd83b2L,0x70707df88e6f511cL, + 0xfe0812356f6fab16L,0xc476c66841571d07L,0x00000000000001d6L } }, + /* 56 << 210 */ + { { 0x442afc6dddf45a12L,0x91e285345c5e6114L,0x29813a938589fa9dL, + 0xeb71194671a4ae9dL,0x5dcd8adc9044306aL,0xaedcb84bf9b52561L, + 0xc21e290a402e75f2L,0x70510bcaf6550a52L,0x00000000000001d3L }, + { 0xde9843e884f2532aL,0xa14af3a6f79d325fL,0xf6906268bb1868bfL, + 0x8ac9b2825fb4a6caL,0x6c5dedcd4676cd53L,0x149996604f339636L, + 0xde2a17658d7b427dL,0xe8c56a66710567a7L,0x00000000000001f1L } }, + /* 57 << 210 */ + { { 0xa8ec1a8d2b835d07L,0x8c8dfa457ecde602L,0xf08edd6c0d0246cfL, + 0xf33a88675d9bc1adL,0x39c5d909b8675ea3L,0x282a2d848a96fc50L, + 0x019ee082d2d27a32L,0xfcf2f31ba5e0dcc9L,0x000000000000003eL }, + { 0x6a0ed3b9c1f6a4afL,0xdb83b0612c7730e0L,0x169a96abfe5cb03eL, + 0xad5ae2df0f055afeL,0x50fa3820d27ed7d3L,0x050484f10e2b1595L, + 0xd07a515e63ebf720L,0x406f5d0cd68d846fL,0x00000000000001bbL } }, + /* 58 << 210 */ + { { 0x04d02219db404261L,0x85b9c71ed3c9e7d3L,0x52acbf57505dee08L, + 0x9e35ee60c4e1ad8fL,0xd4c0ce77194c1493L,0xfadfd42351103eeaL, + 0x11403891926541b3L,0x25c4c172eb4f082cL,0x00000000000000cfL }, + { 0x1f7c31b8ac3bd930L,0x88a8cccb65d3a485L,0xe081d0a295e69145L, + 0xc513ae2e9804d14eL,0xf7b6aa084dcb993aL,0x47de6f4d191aee26L, + 0x42b005f56011eb61L,0x05f03ae3f7e525beL,0x000000000000015cL } }, + /* 59 << 210 */ + { { 0x0f37890e7c825579L,0x4fd5d272b3c0d6d7L,0x9a299db6ddb4405aL, + 0x0730a9825deb2939L,0x28d8bac57c904d7eL,0xa71c627fe6a7919dL, + 0x52f1b334b49c2af2L,0xef7414134b512f97L,0x000000000000000eL }, + { 0xfedb9bfd92b96ec9L,0x613a1e5742d22f35L,0xdf9ee8d6b9175ebbL, + 0x085d38923a0ce558L,0xb7207bf1f3f089c9L,0x31660368ded15db0L, + 0x4ea4ebce0beecacdL,0xcaab38cc88eaeb88L,0x0000000000000159L } }, + /* 60 << 210 */ + { { 0xa10cda065dc2aa4aL,0x89d874e605ad6cbeL,0x7d51bfd5d061da12L, + 0x457d656c376141d4L,0x36ced50a1a715c73L,0x252b0b021d27223aL, + 0x053aef6fe2d6e01eL,0x8096c5e332716027L,0x000000000000013fL }, + { 0x1864a030cd3635deL,0x5aa25da659cc5701L,0x8af63026ff2bcdd9L, + 0xebdb7f1717c277ffL,0xf94d8299758b7979L,0x38025e0009bd3eb9L, + 0xf7c4c29e5bd5c30aL,0x202201c752f90b3bL,0x00000000000001d5L } }, + /* 61 << 210 */ + { { 0x1af17d6b9a9a68f2L,0x1922895850d8202eL,0x5f26f6141d379566L, + 0x8842f3f0ba041c62L,0x856e05ae211c23dbL,0x7fd5b4c8db979091L, + 0x61f3b3740064021dL,0xee2403c789416b44L,0x00000000000000a6L }, + { 0xf52f661156a4b455L,0xc3da69834dd9ba22L,0x58cee2db445eb941L, + 0x20f0bd4632a7bbb7L,0x212b03f7ea3e0325L,0x810b632b68653bd0L, + 0xf7a4c209c6cb33d9L,0x2f5c990c6c3facffL,0x000000000000001aL } }, + /* 62 << 210 */ + { { 0x6b3cf500b77b2655L,0xc203e28b60b8b5ffL,0xf19e8ec597cbcd7eL, + 0xb1cfa6bb6369ebc0L,0x4bcee60766936b2fL,0x49c8783749743037L, + 0xfebdbbae374df175L,0x6db093b69f79968eL,0x00000000000001daL }, + { 0x6dd1ddbd88f766e4L,0x91fd01c5e8ee248bL,0x5837f9b63c727a7fL, + 0x336706e3393e3990L,0xe1e10fe46d43c4f7L,0x20ca5f27d6266f87L, + 0x7f2db9d9c2c42304L,0x3f3536bedaad6d40L,0x00000000000000e7L } }, + /* 63 << 210 */ + { { 0x1c8bb48681b82b0bL,0xddb9f1979b95474dL,0xa53b69cef11441e1L, + 0xcc7971f5218104ffL,0x40e07e762fd8166dL,0xd7f1d93ac9d44c2cL, + 0x3322abe434b2fcadL,0x7d0aab3a9bcb07b0L,0x00000000000001c7L }, + { 0x60db8e39682100afL,0x747095769a4ff812L,0x93df55e1c3eef35fL, + 0x6ce66fa8b82acf43L,0x3b3b298818e25e0aL,0xc288d46523740df4L, + 0x824c5d4cecf599f6L,0xae5bb7388de7b001L,0x000000000000001cL } }, + /* 64 << 210 */ + { { 0xec9a8956f6d20ecaL,0xeda0a24d684e0169L,0xa55ae258e583c40aL, + 0xbe8c6286455d3ce1L,0x05bf63132096ae62L,0xf68e024920b48bdeL, + 0x1b6595bc4d99cd14L,0x0e740830774958beL,0x0000000000000030L }, + { 0x3241b0df9fd73274L,0x7b6532398fe5a5ddL,0x2d4dd56e38412a8dL, + 0xe2925f33cc58a62aL,0xb859ea32f5cee12bL,0x5af7c4027c25d1d2L, + 0xe172cec16f196e55L,0x34815491de00641cL,0x000000000000010bL } }, + /* 0 << 217 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 217 */ + { { 0x0869ed35fa226aeaL,0xcbc7bd56645f4485L,0x611101b13e6c3d97L, + 0xa1550183f7a56f83L,0xbe209b3c80a6fcd4L,0x0e2c9da9ea3beaffL, + 0x0fc7a16f14d9f7ceL,0xeb8a5e147a1e6db1L,0x0000000000000047L }, + { 0x55a78c253b0fb32cL,0x449a7852a430d4c9L,0x1383a88a412509f0L, + 0xe895d858329af78dL,0x14afc01cf8d65a07L,0x89303bc5193d67e3L, + 0x42b20c9716fbcdf5L,0x79fe69660ee012aaL,0x00000000000000d9L } }, + /* 2 << 217 */ + { { 0x7480912f507ce79eL,0xf2251da23a1ea21fL,0xb925aadbfbc957f9L, + 0x79a66979f4548e12L,0x9200714b6de9a9feL,0x779792bbee62d344L, + 0xeed3d08357501cccL,0x51ecec8b0ef8c948L,0x000000000000014cL }, + { 0x734a634b05271e42L,0x70e28c86643cdb88L,0x849fc7dd0c24a7eeL, + 0xcec60426b20506a4L,0x6ee05fce35655c14L,0x79ca2be8c072d950L, + 0x6c473203105c09efL,0xd9698b133dbf27b2L,0x0000000000000010L } }, + /* 3 << 217 */ + { { 0x15079b9d3a7de86eL,0x97e07c04354faf62L,0x045f48f34280e300L, + 0x9a5fd235bd9ea013L,0x3f75a31f8760c3d8L,0x18aec5ffd05d488fL, + 0xccfb484b617daf6eL,0xf33084debffab914L,0x000000000000015eL }, + { 0x1c034f37dd5bc9b0L,0x8e2bedaf882c69a5L,0x6789f307a61afc24L, + 0x8540d05dc57b651cL,0x7214914319f83ea8L,0x57af134d962c6ed0L, + 0xcd059751d98b1089L,0x08ad5d1d42e7077aL,0x00000000000000dbL } }, + /* 4 << 217 */ + { { 0x163cd52464d8c387L,0x0cfe9416cb029390L,0xce6b5dab9e9ec846L, + 0x96a78639e9ba4592L,0xa78e42968f174287L,0xe5d09a7637763d66L, + 0x9f47acf6b54f368dL,0x8e63481b6a0fa894L,0x0000000000000026L }, + { 0xe1663ca805c8bab5L,0x99bf02aaca0af033L,0xfb1c0c7faa1d7c42L, + 0x05ac75b011bdcc7dL,0xbfa1dc7399d86019L,0x8f11d0a4937cfa73L, + 0xf4795bd7d24e356dL,0x7c1f3dc478b4e4ffL,0x000000000000014cL } }, + /* 5 << 217 */ + { { 0x3dfeef486fd25746L,0x5940add47d758bd3L,0x9f08c453d26de538L, + 0xbc7dd28b040d20e4L,0x92cebd97aab4420fL,0x79f4bb203337466eL, + 0xa72fae2ecb132a17L,0xb394a9c29a59e4c7L,0x00000000000001f8L }, + { 0x6146396eb48c979cL,0x24b453db99a310e3L,0x7e3917a871967c56L, + 0x3be40e190e539ed0L,0xc357bac87206739fL,0x8f8c1d8229b17828L, + 0x06f0ea1f949db344L,0x922daa8467ea4ce3L,0x000000000000013bL } }, + /* 6 << 217 */ + { { 0xe2473fe401d47863L,0xdf01581ba1940ffcL,0xa35005ef71f1311bL, + 0x647d568ca5eb4e66L,0xf1dfe4475da1c32bL,0x05d5c8b5ba555b7bL, + 0x2584842719302166L,0x5bae9fdf71dcfa1aL,0x0000000000000184L }, + { 0x531661b422bc00eaL,0x3db39e0ce55a5757L,0x1cdac297fca4fbfcL, + 0x1dd0012bb1d24a8fL,0x52ae5759714bc921L,0x016aea1fe8481e89L, + 0xf34842d1254d5d7eL,0xeb75ac8af21fe811L,0x00000000000000f0L } }, + /* 7 << 217 */ + { { 0x27a66dd1e4c74b72L,0x9942ada0c51b39c7L,0xaa53c8461a3166bdL, + 0xc987efb7b9d48025L,0x991f01bd361c98d0L,0x5bfb025e3b09f9e1L, + 0xab478eb54514704bL,0x784f1a8de0c6dd62L,0x0000000000000031L }, + { 0x42757f2cc630dfe5L,0xead6fb443f3e6723L,0xe8128bbe1a1c8b83L, + 0xe35a9d2fab681466L,0xbf9be668356c2595L,0x0e3cf40f99e2b833L, + 0xfee57f088ea10a4bL,0x207da69ed45302c9L,0x000000000000017bL } }, + /* 8 << 217 */ + { { 0x36cdc3e624acb1faL,0xf593d32d6c6d43adL,0x766c2fcd373c8e18L, + 0x0a1518363d12b7f3L,0x924427f4b4234a24L,0xaab877852335ff44L, + 0x9e5811897c362011L,0xd86f8fba5ee724dbL,0x0000000000000184L }, + { 0x25f04ed9c9558569L,0x0a315324ef9562c5L,0x65a02a289ccefb92L, + 0xb2429dd488115608L,0xc1714c93641d646bL,0xba4d369148bb144aL, + 0x48ec2d9d9efa3a6fL,0x5ff6a4fbf1a93ae3L,0x00000000000001edL } }, + /* 9 << 217 */ + { { 0x6a3f7799955cae01L,0x8fedb6b1e4f38ce0L,0xdb6107f34e85864eL, + 0x03ade49547410da3L,0xddcbf5190038d029L,0xf0f29d5fe5d4d46aL, + 0xae17f5923b27f562L,0xf29afa56e7c3f779L,0x000000000000001aL }, + { 0xd5026e725d8adb02L,0xa06db93aec16acd0L,0x83acfde18042236aL, + 0xc38abf4033ae9809L,0x22fe6dfbdca5d7eaL,0x383a9fa5a2c8db99L, + 0x6d9ccce5d334a39dL,0x7861d1e9359da075L,0x0000000000000120L } }, + /* 10 << 217 */ + { { 0x8fec394bb678283aL,0x24675c5b80934192L,0xe910c009495ca6ccL, + 0xe27707e02e53b765L,0x804e24f1a4ea631dL,0x3f919eb5058db7caL, + 0xa5010f37cbbf29eaL,0x758bd2cc225ee400L,0x0000000000000019L }, + { 0x52fd1ce51535d519L,0x2b1decdc28e1db07L,0xe076dbeeb4b9815aL, + 0x670c75e2f9f5ad5dL,0xc9fc07876a78d26dL,0x52951cf34ae31294L, + 0xe318cad8e793634fL,0x0e9242886e01fcb2L,0x000000000000015aL } }, + /* 11 << 217 */ + { { 0x892b3b400cfb1d04L,0x3bea12122b96b4b2L,0x70f73b359c1f649cL, + 0xcae6ffb47e679dacL,0x96b8d2aec55695fcL,0x0050271f424e84bfL, + 0x8650dd5119533f6bL,0xc383e0b7571d0e67L,0x0000000000000042L }, + { 0x3b0bfdab1f7cfd38L,0xea8f9b107b7ec7d6L,0x43139c4e0919083aL, + 0xd69f6d790fe910eaL,0x094221d3e7ba5fb2L,0x4f374df8f556a667L, + 0x328a7762ed30e6dbL,0xfc57fcee326a62bdL,0x000000000000008dL } }, + /* 12 << 217 */ + { { 0x4c7be70cc22a178bL,0x396b7df601209f9bL,0x2816070b60b48d09L, + 0x0995b11e9d11e63dL,0xde65c7d35ac5b1b2L,0x29c06ce1dda415b0L, + 0xf95189d9abdb408fL,0xb6f3dde38bbf2454L,0x00000000000000cbL }, + { 0xbd38febf8dc44542L,0x2c9d18903f962af0L,0x34cea4d76e93a735L, + 0xf63e364df81f25ebL,0xad8d7b89df0cdfdcL,0x126ac26d730e8a2fL, + 0x434700f896d59c8dL,0x1aa5875eb3da4bcbL,0x0000000000000139L } }, + /* 13 << 217 */ + { { 0x2598aa4de58417d7L,0x73315ae4ba28fc1dL,0x5517268c46243119L, + 0xc4974c53b719bdfcL,0x6100c44e189538d8L,0x9db41e3800cd674aL, + 0x18d5dd5009fe8bf3L,0x4e55b10393ed2b3aL,0x00000000000001f2L }, + { 0x6bdabaafa06be83fL,0x7521de13204a1caeL,0x1a330aaabbfcf7aaL, + 0x7d263940f4260f0dL,0x9f8f411dec87d8bfL,0x7a6f66562f9d628eL, + 0x0ffef527076cad32L,0x1702ce89176beb62L,0x00000000000000a9L } }, + /* 14 << 217 */ + { { 0x52965a07e1c091b3L,0xc19999534000c8f5L,0xb46cb0961b5972d1L, + 0x4a308aa918f8f6f4L,0x3cee84ce606ccca8L,0x486fcae6a169fc9eL, + 0x5b19c4bb5b7288ccL,0xa20b1b3b9004638dL,0x000000000000005aL }, + { 0xc63c9bbd9e2f0f41L,0x23cb2e54e409e31cL,0x7da5e7c8549c6dbaL, + 0xa5c2d1b958a734a6L,0x710b2953b6fb8021L,0x17c27048b5adc912L, + 0xc775ab78900f7c18L,0xc77a1a632b014e3aL,0x00000000000000e2L } }, + /* 15 << 217 */ + { { 0xe77fd781f0aa0c3dL,0xfab5faadce9dd050L,0xdada6c655657f21fL, + 0x783b1101e768bcbfL,0x6589af44e3340018L,0xcfb592b9da65a6a7L, + 0x7bd5567297e1f3a0L,0x88785334ffb1fd45L,0x0000000000000119L }, + { 0xa63a0a55b5cce8f0L,0x6f3064b4eac3c4bbL,0x268dfc16c3931cf1L, + 0x2e70af6b4661d0dfL,0x1d3463d47cf9a2d9L,0x06221386427a35c3L, + 0x0fcc93fc18f7dd40L,0xe1df83f19e78e3a1L,0x00000000000001deL } }, + /* 16 << 217 */ + { { 0x8661728eb584af4eL,0x954bb8753a25e264L,0xc839860915be119dL, + 0x06a54d7f989fb866L,0x27b590ede0375bc8L,0xc394a6b7afbf60ddL, + 0x32cb71643a892981L,0x61b46241cae4336aL,0x00000000000000d3L }, + { 0xa2e80eed970af48dL,0x087c2e6320f95e36L,0x82956a6eee080071L, + 0x933a2568f9fc2967L,0xaa2b21a31b5430b1L,0xb1efdf09b6502fa8L, + 0x56b2b8b50a897789L,0x3a6f9fda663dc2eaL,0x00000000000001e4L } }, + /* 17 << 217 */ + { { 0x6a53c0b0f4e4f6bdL,0xbed51fa0b3339216L,0x9e1c31446ae0a294L, + 0xe8f9136d0186061aL,0x6f0f313f188527fbL,0xc8b712bfe45aa9f5L, + 0x6a4edc9711290895L,0x8f5a9d9328245576L,0x00000000000001a7L }, + { 0x77e9f20e207e93acL,0xf2aac110e8d7f6ecL,0x6d098ccf619f7771L, + 0x7cad915e8ce10c43L,0xfd880f1671cec05eL,0x1c2925678cf46b99L, + 0xfc5d2f1c5226011bL,0x157e69596cc74eeaL,0x00000000000000f3L } }, + /* 18 << 217 */ + { { 0xbc6a400038abf795L,0xee50c23a9287efd7L,0x2e49103a9a5fe5c8L, + 0x932f55e7ec47bc0dL,0xfb9d8ba5a1e42656L,0x14bc1a30bfec520dL, + 0x0e22436e2c51ebdfL,0xb45f409c227f0203L,0x00000000000001dbL }, + { 0xdb059428e549200dL,0x944be2239a2f5fa0L,0xb50e8d34ddb8f268L, + 0xcf19eea782193454L,0x89965b1de0c72633L,0x6d5dd72287cb4786L, + 0x87dfc6f877b57b33L,0xa3814dd5d3b6507bL,0x00000000000001b0L } }, + /* 19 << 217 */ + { { 0xd3422f5b02e8723fL,0x77b65a704b483835L,0xfb8db8f5e6c4fa44L, + 0x178119d69bca2cbaL,0xd3011b68fb7c2516L,0xb51eb8ed7b2422d2L, + 0x638d850b4fbdfe7aL,0xef478553a4a2cc4eL,0x000000000000006dL }, + { 0xcc1b4cbab66d461aL,0x85209539ab0614a7L,0x9284a459d7b45e90L, + 0x6261939b3d6b31c4L,0xd440f1eca261a369L,0x8b9f30274347957bL, + 0xeb40b1e9593ca13cL,0xe4bc3937f76f36a2L,0x00000000000000f8L } }, + /* 20 << 217 */ + { { 0x8979880e76ef73cdL,0x6b6723b19811ce62L,0x76b79fd3ebf26fb6L, + 0x485a962d3a38e353L,0x55df05bbf650782aL,0x2708cce29010a481L, + 0x55a55950611af69cL,0xdcb0ff444d195963L,0x0000000000000012L }, + { 0x02313a03b53e1297L,0x0496acc4d53e4561L,0x71dc5bb477fff348L, + 0xb3a90b9a3497b65dL,0x1cc2d09ab0eb0550L,0x6d92c629525a2a0dL, + 0xacf38f9941e776c6L,0xdde54e8b76ce2676L,0x0000000000000040L } }, + /* 21 << 217 */ + { { 0x75be2409fd243c58L,0xfb1f463bdac51e3fL,0x509862194d0a4586L, + 0x36bd9a9e93c8ee2fL,0xeef97f6992bc64f0L,0xa78103bf1086dfc5L, + 0x88aa88434e424ed1L,0x0a5b655e99ba7ee6L,0x000000000000009cL }, + { 0x249c1f37a7bff9c2L,0x524a308b3e8c734aL,0xbe9e7a1c4cca4101L, + 0xbfe5d60006384eb3L,0xb7d6b88ba7870afeL,0x9298e013c329ae1bL, + 0xf209c57749fa728aL,0xf266f26d14fa6743L,0x00000000000001f7L } }, + /* 22 << 217 */ + { { 0xecaa972f97a6c375L,0xaa67099f3c5aa416L,0x5e4734dc5f9037b4L, + 0x5a71f4d8feaac9beL,0xb66122ce9fd22e94L,0xd09f8f79796500a9L, + 0xb6e9a33d09e807a9L,0x44a1b6f515b33f25L,0x00000000000000feL }, + { 0xd1c9abc3752c8f9eL,0x9b55269a8dfff422L,0xedfdfd976e33a411L, + 0x624051be5fc39babL,0x09b5446a79090558L,0x59df039c097b7956L, + 0xbc79ccb2f4c7668cL,0x04b2b4066bda888cL,0x0000000000000157L } }, + /* 23 << 217 */ + { { 0x01657adf2b32a82cL,0xa562f4cf4f956c2fL,0x84ad34232910648fL, + 0x9ea4e24d2bbdbd02L,0xdead6b9a4da91c2aL,0x36d5efe14c04f787L, + 0x7fba5358fe4dddb1L,0xa6479901f366a73fL,0x000000000000018dL }, + { 0x3feba9211c557b81L,0x8a7d55ab07441493L,0x2494fea073a3361bL, + 0xe0b259a20c4b1040L,0xb916142e58b9d4d9L,0xbc680954f8a4db27L, + 0x5cd9b153e5640dffL,0xf4a05f27ae9b5981L,0x000000000000009bL } }, + /* 24 << 217 */ + { { 0x8b2f31a02c825d50L,0x5fa4a38e0b3dd67aL,0xcc07eafedd220e0fL, + 0x3a8496f853666cd5L,0xf1e8b23e116da23eL,0xf48de9f41631080dL, + 0xcc3274454139c8dbL,0x3270ce0a1dbe7c3dL,0x00000000000001c6L }, + { 0x210895785054f6b3L,0x8295ec2dd94bf609L,0x1922bdba854941f4L, + 0xa95eb4de357020e9L,0xd15fe663df0d7747L,0x89ca6db1ff86b9ceL, + 0xc3bbaadc4f1a6d6dL,0xdeab3f658a0c471eL,0x00000000000001bcL } }, + /* 25 << 217 */ + { { 0x52c4c94bafa6c8a4L,0xcccdaac638d082d5L,0x6756e588603919fcL, + 0x46e53d034f729c3eL,0x79a9a3ba3642d064L,0xf7f9370b6e781f51L, + 0x871f38423fc72274L,0xa2be2d7f631fa996L,0x0000000000000169L }, + { 0x33d1dbbdca0f5b6dL,0xfa5e0a4020137338L,0xe03314dea4fd1455L, + 0x15c0dcb0e68a4ad0L,0x5c99e48ccf09b744L,0x80be1812142f6b65L, + 0x13aa8e51ca2e9df6L,0x7a1772606a44f01dL,0x000000000000002cL } }, + /* 26 << 217 */ + { { 0x600b48b92806f212L,0x24f49b0b9ce93a73L,0xa61905f11fa33a53L, + 0xd8112bb3e9c9f4ffL,0x668461e5cd923562L,0x6a04eb9c7c8fd814L, + 0xcfebf8b2aa0b7176L,0xf5aeff5e9fdacbcbL,0x0000000000000089L }, + { 0x6a4e78ccb73a9977L,0xff5464997a7fe914L,0x9a3851b2e4e9e8acL, + 0xece7462ca3e78784L,0xa3a59ea640b86efbL,0x7247b5df42bac9ceL, + 0x0807058cc4751049L,0x0f02c3bc79e24b3cL,0x0000000000000120L } }, + /* 27 << 217 */ + { { 0xaaa07644734ec473L,0xd85e99103b702814L,0x21a498758173c202L, + 0x2c890717cb3f4b5bL,0x33c149506922f4aeL,0x124c5e49ad484e7dL, + 0x611d7a967d9e2082L,0x7df895594d15f0b6L,0x0000000000000098L }, + { 0x2e345ed3a5039457L,0x34947b7cce38e17cL,0x8fd52a5e30f99d29L, + 0x4f8f3001c0b186f0L,0x2581c68bc7b4641aL,0x5006916be7dbbd07L, + 0x2f2495f23e744e9bL,0x449cc2d31db7cbc1L,0x000000000000013bL } }, + /* 28 << 217 */ + { { 0xb30ff36d7eac7e6cL,0x388bbe38ee0df2a8L,0xf2398b6858bf4ee6L, + 0xc7af5233c8a3f768L,0xac4dd8d6f880b20eL,0xc09ce5090735320eL, + 0x50ad570da58d9c39L,0xadc538325bc36a0fL,0x0000000000000021L }, + { 0x177653737fa3650eL,0xd8e95e4f742687e5L,0x3209d3007af39d87L, + 0x67bc05f8a5910ecbL,0xc3af4f238d00bc76L,0xd314a1fac6edf5f9L, + 0x41cf7b7ee3a65fdfL,0xe40089715a11eaacL,0x00000000000001c1L } }, + /* 29 << 217 */ + { { 0x0c838f9887c8edecL,0xe5dd51181e7c478fL,0x2ff66c0512dc028cL, + 0xac5ad1a801931b29L,0x329df33f2a15b905L,0xf092020568a1a962L, + 0xebc983869ed7766fL,0x6f88971bad265df9L,0x00000000000001b6L }, + { 0xa06e9c10c9c14848L,0xcafd6d9fe8e7e01dL,0x77ea7583859a3229L, + 0x934d04af8688d9faL,0x35e7ddcd558f96b2L,0x6961dfc8332509caL, + 0xceeb4359f0cac525L,0xbb84703578878919L,0x00000000000001e1L } }, + /* 30 << 217 */ + { { 0x4348c0d62208ea16L,0xa6b7c867828ecfa3L,0x5e6e24f8bf2263f3L, + 0x44a63dcbd3a6da5bL,0xe19a8335346f0280L,0x92f45f0a44192c6fL, + 0x2683e6fff5c5e5e3L,0x687acfae77d82944L,0x000000000000011aL }, + { 0x64dadbd27318bba1L,0x06775603e713f894L,0x5a86db2702699287L, + 0x0eb2657c9dc65560L,0xad0ecf6ae85b2f8aL,0x5213fbe63ce623b6L, + 0xe728f470f79b4791L,0x2f462b02e21f708bL,0x0000000000000185L } }, + /* 31 << 217 */ + { { 0x316de848faeaedfeL,0x78a110cb74883e78L,0xc02acbf279b5bfb9L, + 0xf2a4e3d18302fa76L,0x82098a687e585e68L,0xab0f26c8b64373beL, + 0x17a96c041f6667b4L,0xa5804b6a8049357dL,0x00000000000001f3L }, + { 0xc4cddde35fe9e210L,0x52214a65e888afb2L,0xd1e861acda8a2060L, + 0x228f7b6e8c3a37f0L,0x81cb013a4631a725L,0x1500ad208bfd51f0L, + 0x17504e9c496fa6f0L,0xcb0a2b4070e311bfL,0x00000000000001fdL } }, + /* 32 << 217 */ + { { 0x1874803fd759f021L,0xe16e4f43f7ccdc14L,0x33b15c686ca96e4aL, + 0x16647507c0c8f6a5L,0x0a69a27b0ddc1a2bL,0xefa1d7d28c4496ebL, + 0x8bd873d347064396L,0x13b07fa653dd5370L,0x0000000000000008L }, + { 0x97373827dbf8a088L,0x0f48a5dfdcd37018L,0xf3bc5022172e6d9cL, + 0x96a0d7020867bc4eL,0x26130628d1a46e33L,0xba6c1f6a8c2bdda5L, + 0x273f3f4d4e5e6112L,0xc9f3341ecf810455L,0x0000000000000084L } }, + /* 33 << 217 */ + { { 0xba5684306a70e5e5L,0x2dd3cf84f677fdb2L,0x4639444c2093d4d4L, + 0x48145ac0cfa9d695L,0x1947f72dcbcdffdcL,0x8ee3d63223a695efL, + 0x3f7cef682c0c00e6L,0x50a326aa9f8878a3L,0x00000000000001eeL }, + { 0xf2ef9b77f3659baeL,0xb02047c9bb5d2078L,0x9342227bf5473552L, + 0x6af1262c972198b3L,0xf06c8495545c59d6L,0x5b76b44d7d2254a6L, + 0x0edf239adae2db07L,0x808bcb01b071a927L,0x00000000000001e0L } }, + /* 34 << 217 */ + { { 0x751ecd118ea4d1a3L,0xa416549385584530L,0x3c998d2f48ca5366L, + 0x5d2a945994867756L,0x40eb22d0ed2ce938L,0xf2384049eb6fa53cL, + 0x1dfae9f4452bfcd0L,0xfde76aeb67aaa0a6L,0x0000000000000070L }, + { 0x3971401bcff41b83L,0xe60901c2652b56a8L,0x4e58a099f4f9d6bcL, + 0xb822ec2f3cb0761bL,0x2199ca7ff5a4cea9L,0x52146c0e87d35cffL, + 0x507233f309f44c81L,0x24a9f28e1ba0c4d6L,0x00000000000001a5L } }, + /* 35 << 217 */ + { { 0xadddf2ffd9c057f1L,0x6634c99638431f5cL,0xdeb1e0e10e0ccc49L, + 0x5b08f82a9eda3938L,0x5ac5ba95b7be5ac3L,0x12e7c0a6d405b331L, + 0xa7e8f80e1ce1a099L,0x09192de2086e69e2L,0x00000000000000b2L }, + { 0xfd1c75f964436336L,0x02a5a6cdd3750eebL,0xfcb0759338ceb24fL, + 0xe613ce9408e8d3d1L,0x84bd977dbbc20a44L,0xc301f3f18b41ad98L, + 0x75dc2519a7586874L,0x2c179de3bb4cc82cL,0x000000000000001dL } }, + /* 36 << 217 */ + { { 0x783751f1ace85e33L,0xe9bdcbb4d36f8988L,0x51d219a6e7361da6L, + 0x5f374eab9fd26dfbL,0xd62ecb9f12301759L,0x77f528f2f3af5667L, + 0x38de7fed85a91b36L,0x52a085d7d022c0bbL,0x0000000000000045L }, + { 0xf7f423fb3a234e71L,0x69e75f34635eed5eL,0xb2196ded1d3590a4L, + 0x46a9094b52c84b99L,0x039260d5c36fa19cL,0x755dd132b5b09dd1L, + 0x85d599e7784fe05eL,0x71187dd1167916d1L,0x00000000000000fdL } }, + /* 37 << 217 */ + { { 0xc2003f2f71056506L,0xbd1af63cbedf3b7dL,0xb8bfebef0a027704L, + 0xcf2b3463da5747a7L,0xd7620b0138eb7d5aL,0x04ba1fe62aec3578L, + 0x99b988baa26a76faL,0x059d1e5079ac26dbL,0x0000000000000182L }, + { 0xca94d1c96ef3129bL,0x361e2c88bdcca8b9L,0x446fbd76cc7b11afL, + 0xbc12cc4345c1ff9eL,0xf61d708a1b41f633L,0xddb2bf042db37562L, + 0xa470ca1eff6e0b9fL,0xbf4c9c9e1ce7e20fL,0x0000000000000026L } }, + /* 38 << 217 */ + { { 0xdd3f12d68dbdc1c2L,0x01e34587f4c4a308L,0x29de44b9243d76ecL, + 0x5d17233cc2f30f10L,0xa09262e4bbc6db0bL,0xd17c9a64d629b3ceL, + 0x57d7afc416e00ed2L,0x7968b7727b3d962bL,0x0000000000000011L }, + { 0x07c08b240363a6f6L,0x37ca731f096b5148L,0x3bfea8f7cac7db7eL, + 0xda2e44f5fd9625cdL,0x5b4cdad91660ca28L,0x085b48054e493323L, + 0x7c6994fd2f3ba77bL,0x0a0387299decf7b4L,0x0000000000000071L } }, + /* 39 << 217 */ + { { 0xfffbe77ba9ae3378L,0x9beb958edc28dbdaL,0x7fec7dbe060ba465L, + 0xeee11a73a6098a07L,0xc83f4667ba7234f3L,0xdb034e18753fb743L, + 0xe274fb5a5f53a77dL,0xd2a161d640d9d73fL,0x00000000000000abL }, + { 0x9c9880a8ff15ef46L,0x97bf6f4576ed1f9aL,0xec02ab9ec55d9ee9L, + 0x8a3e57efb5ad0d12L,0xe9db08c164d665e7L,0x0e7074369e1b30c6L, + 0xe953299b83aecc46L,0x9ec97a400bdbf832L,0x00000000000001f3L } }, + /* 40 << 217 */ + { { 0x677450828f3756c2L,0xabf1e75d0a7bfcc9L,0x047931dbb8bad3afL, + 0xa380c9855e392278L,0x5195e47d0237d6bcL,0x8c7ef452fee7427cL, + 0x4f078dd2585b6365L,0x4e82eb4b1e10d24eL,0x00000000000000d8L }, + { 0xda54dc52610b349bL,0x96746e43904633f2L,0xd2a58f332c50aa46L, + 0xffea4d06987f9fc3L,0x5505265acc521553L,0xaf2b7da0c316df83L, + 0xa74f09b1ea362ecbL,0xcf9f0ac3a2e1f4f5L,0x0000000000000078L } }, + /* 41 << 217 */ + { { 0x59af0c27a1a02f35L,0xe0ac14ea079fffb3L,0x7eb918803ba6efeaL, + 0xf4f7ece1c20a4df2L,0x38d3a92237d80bd8L,0x54c4a59d6a6ea3f3L, + 0xb2a32bcebdcfa4d5L,0x700e28b56b9040fdL,0x0000000000000170L }, + { 0x56e3b014ab113b65L,0x0ec2175c548c1c84L,0x35fef7ad0213402aL, + 0x4d91c747b2ce0bedL,0x9c699db9ebfc8d58L,0xfa3d5a081329db85L, + 0x74b910498a16ccdfL,0xbb98d8300887072cL,0x0000000000000131L } }, + /* 42 << 217 */ + { { 0x96c26386e4cbd521L,0x0b1a38abd272b855L,0x30a03997f9875ff5L, + 0xe00ab13a61dbb1c1L,0xb31c80251bf9a773L,0x4d09ff2938888083L, + 0x356496085a48bffdL,0x8734455b7c0ceff5L,0x0000000000000185L }, + { 0x4e3f5b3369306de7L,0x54c1bf0426328640L,0x7405d971c8b247deL, + 0x0193c644218439ccL,0x91d5c51bc5bd4db2L,0x083af5e8d97ea0c8L, + 0xc29d34ebba9533ecL,0xa61393defdc6e9e3L,0x000000000000006bL } }, + /* 43 << 217 */ + { { 0x4cbc18fd8f743330L,0x794313312b0a18ccL,0x1a8f8cfe4b46dd3bL, + 0xea6a1b0a78faffa6L,0x71b836b8520dcc57L,0x5efddbd89b6e74bbL, + 0x3d6b6151dc6b7c9dL,0x95f2cabff235d63eL,0x00000000000000f4L }, + { 0x15b2e9802245f71cL,0x557c9c3eb1ea762cL,0x7121b8e7a57e9103L, + 0x31c6a917b2bff1cdL,0xe6883a84ff413e00L,0x33f8655f4a67ca7bL, + 0xa548bf42c27e7e2cL,0xe32f78ad352d4fb4L,0x00000000000001afL } }, + /* 44 << 217 */ + { { 0x75ec3f26656e2672L,0xcf0d9739ddf3748bL,0x5765797c7991dabdL, + 0xaf30965268cee722L,0x5bcf9603ed47b3c5L,0xb739ce4e493a140eL, + 0xb209fb5dcbfbd96fL,0x10de84841ecedf68L,0x000000000000007fL }, + { 0x297535a097eeb1fbL,0x40bc00d9479587cbL,0xd20d467736f110f6L, + 0x029ce5ca422c636cL,0x9c8133c2172916e3L,0xf6e1a538e3a60dc9L, + 0x17809428fbfdb3abL,0x399fdabc973f2523L,0x0000000000000155L } }, + /* 45 << 217 */ + { { 0x103d1964126508d7L,0x9374fc63953fe4a0L,0xa3e5fef2bf65b5c3L, + 0xcf1e9f434dea09f3L,0x73cf4433db5dd9c6L,0xe0cd97b415af4b48L, + 0x6ff6d7f6ebecabe5L,0x67ab5f01e7947376L,0x000000000000006eL }, + { 0xe36cafbd0ef747caL,0x69cff962339f8b52L,0x0aaf54ce45c3da2bL, + 0x15c960ce248fc6dfL,0x0464d7df61eff9c6L,0x1f783e7b3a1eb474L, + 0x37d81b791791de17L,0x356bda6296b6e9c5L,0x0000000000000188L } }, + /* 46 << 217 */ + { { 0xb0afc4e65c223c96L,0x88f2e5f35aaede44L,0xc3183f1cb71a856eL, + 0xa41a1487b9c00b69L,0x28b5aaaa69d2bc7eL,0xf5c5dc2172fcc598L, + 0x6c70b857400234dfL,0x861eedb2c2d21c53L,0x0000000000000060L }, + { 0xc9d7f77696cb55a0L,0x0497d6176a92197cL,0xa3e0f3a608b23cbcL, + 0x2110a78eda86341aL,0xdde07944da306990L,0xbf6cfc6ab988aed8L, + 0xe2add66fd14d708fL,0xa44d983157dbfd52L,0x00000000000001d0L } }, + /* 47 << 217 */ + { { 0xc56bbdf675812dabL,0xd9e2cea8a82d54d0L,0xc78535044a6379c2L, + 0x2c974afd849f9130L,0xb35b4d7bb75ea82eL,0x7fa6b09c58bd2e96L, + 0xe1416607f1d13d0fL,0xb684652fd79eb307L,0x00000000000001ccL }, + { 0x987764f187c31932L,0x99c1ba6ffb1d4bbcL,0x2ed38e948015c51fL, + 0xf424922c93ca5a9eL,0x201c868c6510cf1cL,0xfcaacc9a10c4ed5dL, + 0xe622fa46dc5316eaL,0x6f9a6df2518b7d2aL,0x00000000000000a9L } }, + /* 48 << 217 */ + { { 0xce7ae0b6075676dfL,0x24daa3aa29be05beL,0x795b32d17b2f292fL, + 0xf71dbf4f77ae64feL,0xe5f8b83bdcca4f00L,0x111a06db71e2e120L, + 0x6e7937f5a861e714L,0x7b14d15817bf3be6L,0x00000000000000e6L }, + { 0xa8748d319d04daacL,0x28e3fdac9325722bL,0x4384ac259e26fc1cL, + 0xcbce8c3320f8d023L,0xa21e8abb9fed4b2fL,0x5f1d2dfca075f969L, + 0x43d3c57f5e8e705bL,0xcac9948fdeb0bca3L,0x00000000000000a4L } }, + /* 49 << 217 */ + { { 0x66a65e83dad7d797L,0xdbd972bc359e9564L,0xe35acc8a5eccb5ffL, + 0x35e3ffac0ad3ef9bL,0x851de6e98dbecf69L,0x90c0b3bc520a3d86L, + 0xdca6e3dd40607f03L,0xba8a7ea766dc9c2eL,0x00000000000001b0L }, + { 0xe2a78b19c0187be3L,0xf0bfde9ebd554b70L,0x6a61dbf7e7357ee1L, + 0x7189a9916f16a7a9L,0x1027a7495028b3bdL,0xab2f2df6a104d723L, + 0x3f214e95834a7f51L,0x5033df8cf6019533L,0x00000000000001f3L } }, + /* 50 << 217 */ + { { 0x49d49732774494b6L,0x3c9506bd04770a4aL,0xbb8415e4c248a4fcL, + 0x60ed58b8374418ddL,0xc26440d39aeaa332L,0x1601233a7aefa5a0L, + 0x8ba67f98ffcc867fL,0xb93887e4e716de59L,0x0000000000000028L }, + { 0xf9467d0a3f1ea0e3L,0xd69dc7b3c349e641L,0x21bb7c3d47a1fb70L, + 0xcdce7b0485baf8f8L,0x6a94cd2df74d61bdL,0x9b4aaf70d80c40ccL, + 0x67958b15554675a2L,0x1212272b37de6aa5L,0x00000000000001a1L } }, + /* 51 << 217 */ + { { 0x65b30c4693d202fcL,0x69278e20a91bc534L,0x2159b74620ebfcbaL, + 0x9a039589158a9358L,0x3431bf268fbe3a07L,0x3f052e4d5ac5aff9L, + 0x0c9236c539ee1355L,0x95924b8148fcf947L,0x0000000000000179L }, + { 0xd60db53922fd1648L,0x6fa28ffd2d08381cL,0x48efc7edf1831068L, + 0xbcbc20d73ebf4420L,0xaa29a9d85f3f90fdL,0x79b874cc5a1ca0ebL, + 0xc177fb6d3a75b3faL,0x45005fcc887cd30eL,0x00000000000000e5L } }, + /* 52 << 217 */ + { { 0x22c042b9ae0ff839L,0x532ee1b07b82dbadL,0x05e95394dd5b19d7L, + 0xc6702ff7d56b7f6cL,0x55d30e5d0e9b86feL,0x709ba115f1732c78L, + 0x74a6cd649ac1f9aeL,0xc692aec65f088cabL,0x0000000000000067L }, + { 0xa276f7466deff8f2L,0x36b98e844ee21c9cL,0x862c26e1b552c998L, + 0xd5a2a5bbbc4fbe00L,0x4919ca8c18fdb4ddL,0x66d9bb81c77b7776L, + 0x5aacc969d0db6420L,0xd14d5266a5ac63abL,0x000000000000000aL } }, + /* 53 << 217 */ + { { 0xef65a4d01f4c8edeL,0xaa46b120dcb08a38L,0x40f468df7069f2d5L, + 0xcb4829542bc58171L,0x8be42227c99b25f5L,0xb0f18b7307eb98b7L, + 0xa1fae0b29b9ec47fL,0x8be5738cb5a371b2L,0x00000000000000bdL }, + { 0x4c571a9fdc6a7103L,0x34ce343edce86822L,0x8d732bf9cc528a1fL, + 0x4203e1f92a313cdeL,0x3ae79ca7a5626baaL,0x0b552456eaeef625L, + 0xfa5b3a161b406f85L,0x25b35d73eb95605bL,0x00000000000001d8L } }, + /* 54 << 217 */ + { { 0xa9dfea9c3c7a06c3L,0x529d3a3644d3e30cL,0x7f8d907f76897d3fL, + 0x309a2182581c26b6L,0x4d29dc6f0c0581fdL,0xcdbc510a7bf06072L, + 0x9d0cbdf3f55f687cL,0x5a5443e55b0b307dL,0x000000000000019aL }, + { 0x5738cbb543970892L,0x3219a82ae52498eaL,0x00e3f1db9a0b043fL, + 0x87e704902e29f37dL,0x3bb34b3bd2d0b401L,0x8ca921c189b818faL, + 0xf122b4d3fa6c6e6dL,0x70098c5f3ea74c79L,0x0000000000000097L } }, + /* 55 << 217 */ + { { 0x4faebb5ed4d176abL,0x793cb9749742e217L,0x42c33d81e013378bL, + 0x4ac4b7ac800a3d3cL,0x929871ea86ae5230L,0x02e2b5fbd923d457L, + 0xba95bf17b797df41L,0x4f2509a33651e982L,0x0000000000000117L }, + { 0x1a4e60c5a0afd2c7L,0xe96f14e4d2685919L,0xdf1961fdab7a498dL, + 0x8692057a65b97a84L,0x4ceaacb1ed5ec892L,0x806aa214f34b0220L, + 0x285dd9b1501fc893L,0x401b959d0115c368L,0x000000000000002cL } }, + /* 56 << 217 */ + { { 0x17100a613bb93eafL,0x7ccb5bf3d80ac200L,0x585d17b6eab320c3L, + 0x8d53eff1e7258f8dL,0x9d671ee13f21f78aL,0x76e7ab703dc4ad54L, + 0x9ef2272419b3ffc7L,0x67b54b1fafd5a3caL,0x0000000000000122L }, + { 0x629dfafb33a86edfL,0x237e33d97e4ae084L,0xe1d102344ca49a1aL, + 0x768d5901b6e2e180L,0xfebbd9dad2ec4ad3L,0x90ab63eba9e332dcL, + 0x5e2636c78da3786bL,0xcbabb3fad3f6ff9fL,0x0000000000000190L } }, + /* 57 << 217 */ + { { 0x96c2b30c8b4503bcL,0xef8079b76ea400dcL,0xa8693f0228e1ce2dL, + 0x3f191b4d2d51f267L,0x32e177ca3361ae9dL,0xbd7d0a66ddfc80a5L, + 0xdd761eaf25fb8d88L,0xa2e52fd503d4a584L,0x00000000000000f1L }, + { 0x023d3607fda8c4d2L,0x51a0c87a59aac242L,0xcf51d25c4f8a9c42L, + 0x3a8a96c6aaac8b07L,0xa635ca96feae776bL,0x09f4b77ce5b272d7L, + 0x1edda904f7a74fccL,0x3baf043648f0578aL,0x0000000000000015L } }, + /* 58 << 217 */ + { { 0xaaa50d58f000e356L,0x0b3193c7016dd924L,0x8fb754d8a0ea14b9L, + 0xd91c9f3fa50b4c23L,0x10e6d8a82bb95226L,0xca4a37214dd94671L, + 0xadafc0d9ce05e753L,0x1ed81caf97e6c298L,0x00000000000000e8L }, + { 0x230fcce8d8b0bb47L,0x04b5df73c0f13872L,0x844ee3ee24286c1bL, + 0xd25336e8292c55b7L,0x49b25059b9fe1d90L,0x88302407130bf270L, + 0x164ea993ee3efa36L,0xa2967cc81606a7aeL,0x0000000000000063L } }, + /* 59 << 217 */ + { { 0xf3e4e4d3c3c8fb65L,0x51ce545919cbf83fL,0x11a1dc28f47ff417L, + 0xc47cc26a9408c5fdL,0x8e59575bc25546eaL,0x462f9375aea3d032L, + 0x9c0a03996f33ed40L,0xedb8b2b6d15ac9d5L,0x00000000000000a6L }, + { 0x50c5c11ffcbc7ad2L,0x3cdaedf0e5d53272L,0x7e5df4def7d3737eL, + 0xfd4570fe5964b614L,0xf29db31b4d1d0bb0L,0x1927ae724617ec1bL, + 0x7f3dd228e3f040bbL,0x4b4fa22b6617a2b1L,0x000000000000012fL } }, + /* 60 << 217 */ + { { 0xa71ecbf6370b5ec7L,0x2ed3a5916ae50f0bL,0x5197ab87111470faL, + 0xaea260dbe30df740L,0xcd7e1f9d02e37578L,0xa3569bfd8bae68d9L, + 0xea8e96e7c2a89697L,0xb9d1f162f9fcc566L,0x00000000000000f8L }, + { 0x32cd11647ef36ad4L,0x1e58205fe4b08a4fL,0x6e7d004a3dc37cbbL, + 0xd639b5b527ebbda0L,0xe92db32e2cb36fdbL,0x25ea06757e17bfe7L, + 0x5f49bd54c8e8cd0cL,0x793d24cfc3e25af2L,0x0000000000000033L } }, + /* 61 << 217 */ + { { 0xe5c1b03d6c5f70c5L,0xc634f61e0bda5d66L,0xf7fa1fb6c3782a1aL, + 0x9da5e4eb94b3a1ddL,0xab10baef997aad2eL,0x9bfff40e40296f45L, + 0xa17714fcaf3ca04eL,0x44315b37424f4d32L,0x0000000000000152L }, + { 0x1898f6677a2bb552L,0xf73bf0419fba5d55L,0xa0505207adb92bfdL, + 0xb5f8949940775fdaL,0x0a4bba547c52106cL,0xb238bff05336edfbL, + 0x7586b6aeb6c0f11bL,0xf3b1a6d93320ed85L,0x000000000000016cL } }, + /* 62 << 217 */ + { { 0x1ed5330496a2c264L,0x25d19d7be58b1775L,0x87234824d1c26e46L, + 0x2eb0c2acba0860f7L,0x1ae2194cc81321d8L,0xd45ec0f1fcdf40faL, + 0xa0953ba97e3d11e6L,0x495a35ac11350bd8L,0x0000000000000184L }, + { 0xbcc2861b80ecf438L,0x859849509750ccb7L,0xbdb997929e1cc8e6L, + 0xf64a9df281f7c4e1L,0xd1860d60ca400f68L,0x9782a95d5e2b500eL, + 0xa3af2257d6320c6bL,0xd0a1666d0488032eL,0x00000000000001f1L } }, + /* 63 << 217 */ + { { 0x69a468fdb7a36758L,0x297d8dd0da785f23L,0x66c8518c215c8620L, + 0x2b420fdb7e84feb7L,0xaef8a39b26a3e7afL,0x81979bfa23484aabL, + 0x5b9c8bf22d0d13d8L,0x1dcbdb5fa6ac2309L,0x0000000000000132L }, + { 0xa9c831fa65bc4bffL,0xbd8b10387d5f077cL,0x361b762378a3206eL, + 0xbeec611b4724ba57L,0x423f56177b690d52L,0xbc6dcd6aeaa09b15L, + 0xf0b60ebf7d9c160bL,0x1c3af39c6918b37eL,0x0000000000000110L } }, + /* 64 << 217 */ + { { 0xcf8e03c512a09898L,0x2090118438287af8L,0x8fd47e85371620a2L, + 0x5a2b2ec87f16350aL,0x45841b1c608c7488L,0x627c632e01739426L, + 0x3cb05aa78f8e8224L,0xbe277fd79b643a85L,0x00000000000001feL }, + { 0x950843e61538cb4dL,0xe88383994f7ae0e6L,0x6b90ac85d2e1d225L, + 0x78c211321e5361b4L,0xce09f24034b950ffL,0x3884016974b71400L, + 0x58ae40a665e2bbafL,0x5d370978c823b954L,0x000000000000017cL } }, + /* 0 << 224 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 224 */ + { { 0x7ddf47051733b3c3L,0x7c7878a3f4a25d85L,0x4d86f57c29ad09a6L, + 0xa97d4ab6b26d5c69L,0x71cc925004b07913L,0x08a84131b4c0b7b2L, + 0x388fc8aa5119f83eL,0x23a3bd0b6bfda0cbL,0x000000000000007eL }, + { 0xdfc7c8448e549373L,0x439dc785b52ad215L,0x9e157e18fb9de6dcL, + 0x9283c3318793d562L,0x79f9ec9033ddc16cL,0xed0dbb7fceff5086L, + 0xfa4668078765145eL,0xba9411a9a3b387b7L,0x0000000000000176L } }, + /* 2 << 224 */ + { { 0x319885d110e10587L,0x2da8979a609853a4L,0xfe907fe389b08ef5L, + 0xa3a338a61a97d9dcL,0xa5ad1a9ad7e3fb92L,0x77a3bfc07a0136e3L, + 0x99634588aa6ed29aL,0x41f148beef4787b6L,0x00000000000000dcL }, + { 0xb8671195ebf698bdL,0x323b6965e2974ee2L,0x1411c67084c20702L, + 0x840b396eba4bc1edL,0xd857a525eecff9a6L,0x0afad4f4d2d6829bL, + 0x84ef6315bf6ce02cL,0xc38fe857ccc6b333L,0x000000000000001bL } }, + /* 3 << 224 */ + { { 0x2d9ba7e36f07eef9L,0x46ef4ce6eb692362L,0xa869ee262fa2cdc9L, + 0x5b7d3a81642a2378L,0xc024410ad4d6bcd4L,0x88cc0f7121ceafefL, + 0xeee011ebe04af152L,0x9cf87d1d7829107fL,0x000000000000002eL }, + { 0x49260571e07147b1L,0x632583460ca0c303L,0xf1cda5ae6602e056L, + 0x418b618681a45861L,0xcbf17fcdd1921fa5L,0x355e5d1ae07a08f1L, + 0x3f4f5bb0a26b6b2fL,0x898ca884ade69b50L,0x000000000000008fL } }, + /* 4 << 224 */ + { { 0x1a00fc111c417fadL,0x8816dd0b3fba7366L,0xb3a5499232c0e5a9L, + 0xb6a634789ec0fa14L,0x880b10b8a8d8615eL,0xe3635bb83abbe5fbL, + 0x62a209f64de9be52L,0xac41cce182b76cf9L,0x00000000000001c5L }, + { 0x287c7bc77dd1e73fL,0x072bb2ac1f8c7d2cL,0x399011fb3af5565cL, + 0x787b94845e2a7e13L,0x5858c3df5761ae9cL,0xf0f831ae6d2875daL, + 0xd5f5bf2dae9c82f1L,0x191b45fbb247a36eL,0x00000000000001ddL } }, + /* 5 << 224 */ + { { 0x1d2d4ab0fe772021L,0xd6e75054041053d5L,0xdb480d2e5705d868L, + 0xf68a09f76aa63e89L,0x0bcf4c31379a4d0cL,0xcbc0c737b681873cL, + 0xdb6931a6b886d407L,0x0fbaa44bf84e033dL,0x0000000000000056L }, + { 0x5339b4f34f90a0fcL,0x2743f6818ebc0b28L,0x37567e5bf7db2715L, + 0xf339c91aa5b1053aL,0xde65f7188125ebfbL,0x32f7c06d6bf4ed15L, + 0x8071d1aa40353dcfL,0xb8fdddd93c994068L,0x0000000000000198L } }, + /* 6 << 224 */ + { { 0x1c081356b6965640L,0x7a01b38990c7a0f0L,0xf72fdba4874007eeL, + 0x6bd85d94dcddefa2L,0x80fbde7e48db2fa6L,0x60b2be1c4a319dadL, + 0x996217fbee51fb71L,0x85f1b13be0edb983L,0x000000000000012bL }, + { 0xdf53aa14ecfb1d0dL,0xdd77fc37760a217cL,0x659bf31e99b56fbdL, + 0xd0f042ebaca3e106L,0x50c215ba1f907ea8L,0xc6bb892ca4ef5f48L, + 0xc3beb73bfeb86870L,0x04b740bd4123f812L,0x000000000000007cL } }, + /* 7 << 224 */ + { { 0xd50c9fcf6016e8f8L,0x27f12c32b8c12f9aL,0x2e13fd2b08e0e13cL, + 0xe48a1bce89e659efL,0x1bbe7f01f62a49d6L,0x87e0ca4fd417e497L, + 0x1cf55a48364ee9cfL,0xf4fbc7eaf66635a7L,0x00000000000001a6L }, + { 0xbe3e55113eb12a0bL,0xaffcad4b462ebdcdL,0xf6f097ace8f4d182L, + 0xdb1fe0f2404a92feL,0x9e2a51594fc084ebL,0x6acd3a8e7af882b2L, + 0x17654a579602fc6bL,0x1307d12271de39cfL,0x00000000000000deL } }, + /* 8 << 224 */ + { { 0x470bfdd42540e5c8L,0xc2c849a1057592ecL,0xd8f4b30d2f886637L, + 0xce56d15025ba43e0L,0xcaca4a05f200d2fbL,0x38fdfb3595d80545L, + 0xe274649f22418951L,0x155badb2cb7437beL,0x00000000000000a4L }, + { 0x09195792c828a666L,0x3ceaf45353f59336L,0x3982e19643850a1dL, + 0x02f8077df2d2afbdL,0x83197e1e6a6946faL,0xf158b0b4c4ffad53L, + 0x12559573853b51daL,0x8838409e8d35c9deL,0x000000000000019cL } }, + /* 9 << 224 */ + { { 0xa844a48f37315ba1L,0x2db10df96f401d40L,0xb092031359508b56L, + 0x4ccd40a51df7356bL,0xf6e6de667cba0994L,0x8d052d22aff1e407L, + 0x6579f0aee1ad5185L,0x02a2654c2fe56ff9L,0x0000000000000107L }, + { 0xfbe7245fc52e3a9aL,0x5cf1f315767802eaL,0xd1bc98865cfa064bL, + 0x6fe2e272218c2d74L,0xfbc0f239e105d3c2L,0xe93040f86c2d5d8dL, + 0x3564207844f0c1ddL,0x526cafb534a20e65L,0x000000000000007bL } }, + /* 10 << 224 */ + { { 0xa403b2cd3e0bf039L,0x1f331cd769ae1607L,0x5808812d9341e113L, + 0x3bdc37f4acaef9c4L,0xeef79000a056ccf7L,0x6e36e89079f26a8eL, + 0x225a7216a5a165ffL,0xb0f49eea4a8d4e72L,0x00000000000000c7L }, + { 0x7df03cd678eaf6ebL,0x4e96fcfc7a3ef5e3L,0xb4578857fd3bfa43L, + 0xaf079521906c3961L,0xa8e06dea1227fc18L,0xf0f06e6808be24bbL, + 0x0af15713b84b3fa1L,0x6bb9ba45d30f7aacL,0x0000000000000111L } }, + /* 11 << 224 */ + { { 0x7055735e2734bc7cL,0xdbbb0d7f2015daffL,0xde7f735ad048c6f7L, + 0x1401c4fe3d025035L,0x3017088af09b1518L,0x53beaa128c919403L, + 0xe623ff8d6e6f4894L,0x1622621fa6c45b25L,0x00000000000001a6L }, + { 0x3aacbd8120e1fd32L,0x21963a49f0c52d67L,0xb521f29dda79f988L, + 0x5ca6538ab632a340L,0xfeccb403a1c38793L,0x800a81e46c1993a7L, + 0xbfe7989e08775f1fL,0xa7c0bb935f6056cbL,0x000000000000007bL } }, + /* 12 << 224 */ + { { 0xba1f3d04e155a234L,0x83285cb7a2f5d3afL,0x120231908196da46L, + 0xc05f48389399a9b8L,0xe1604051a097e0c1L,0x3c48963b8f3cb46aL, + 0x7ee7debebc20857aL,0x74c375ed8ef449ddL,0x0000000000000148L }, + { 0x6086f7e549a22d3bL,0xcf65faa71ff513f7L,0xe5f24ae161c6db58L, + 0x085cdc7708664d9aL,0x8e2385ab1df20e4cL,0x58910a818006cd4aL, + 0x1858bc67c143e34cL,0xf88a9ca118f7d8ebL,0x000000000000000bL } }, + /* 13 << 224 */ + { { 0xac6ed780d8dc7694L,0x1517db0f7f0ce3c9L,0x14e574efe2724276L, + 0x06bf1c0ad420a1b9L,0x092fcdc0ecb5bb09L,0x00b1952db3c22e3fL, + 0x8486050d66dfef5cL,0x4162baf5b4aa2363L,0x0000000000000035L }, + { 0x5f188ef1ccd62e93L,0x357224c0a78fc426L,0x6a38a6a0f311d595L, + 0x632d3b1d9363d024L,0xda7edb5aefd29098L,0x1ae7e00046198910L, + 0xd299ea6c8637f141L,0xa33a21d96dce1f27L,0x0000000000000191L } }, + /* 14 << 224 */ + { { 0x33b557a9edcbf5e2L,0x8d42c77f4fb31e95L,0x3f006dce258e5a7eL, + 0x07ab56329dc0f10dL,0x6a79f1d388012825L,0xbbc9f71d997f67e9L, + 0x733aee4fb0484d0cL,0x0d7d26af06b23588L,0x000000000000007eL }, + { 0xae584763447d2b4bL,0xe3c380900abe0533L,0x19dc9a0946a66debL, + 0xc5e21596f7351889L,0x67efb3732e9062b0L,0x38c539278def3d27L, + 0x8f2afe546ec1f561L,0xa80f0c62159002baL,0x000000000000007aL } }, + /* 15 << 224 */ + { { 0x0574b1c11fe4b65fL,0x564bc4eb99fc3470L,0x040cf7e88ad913eaL, + 0x72f64fde21e6dc7aL,0xf061a2f945e43f1eL,0xb30d7ace4cf73cd7L, + 0x4e67338fed5303f8L,0xd1ca92f827fb18b4L,0x0000000000000123L }, + { 0xec92ff11b2ff61b9L,0xa1196425dd3bd645L,0xeab1a0b3ffc4a720L, + 0xbb01e20dcde12819L,0x222fca782f6f9062L,0x5343239fbf735e40L, + 0x8e4c78ee7a1efa11L,0xbb68361cacee5111L,0x000000000000009bL } }, + /* 16 << 224 */ + { { 0xbce0f1074175e7e0L,0x51ac63d71965464dL,0x429f6b9f52aaa9ebL, + 0x6ed3c53b2a1846d7L,0xfa87d5876dcda98aL,0x9858c801c72d6d0aL, + 0x94a5bcf367681451L,0xb7c46ecdd3de0285L,0x0000000000000183L }, + { 0xfd301b76d2dfd89cL,0xac9c2b0d0f38e6c1L,0xcf9d3acf2da8e6d4L, + 0x99db13dceb9024c5L,0x4ebfc5ba9ee659a9L,0xbded80949b2fa547L, + 0xfcf035f9c08d98c5L,0xb50dd13656b8b217L,0x000000000000014dL } }, + /* 17 << 224 */ + { { 0xbd6785f67bfbd788L,0xf5b0998e64b74e2aL,0xb9807c2339968512L, + 0x7de81d72cbb6513fL,0xca27798033ef0d45L,0x09ea26e95b9b19d8L, + 0xbf8414db4390e170L,0xef1287bc2e6a7e43L,0x00000000000000c3L }, + { 0xd10112737f0c5ef6L,0x47393f003fc452dcL,0xb193a4a17f51c291L, + 0x7d0ffe318cbc8483L,0x330d79be90e56adaL,0xb913a2e9c8c6c717L, + 0x3c5e0d0c3f227508L,0xf57d2ea48a43f045L,0x00000000000001f7L } }, + /* 18 << 224 */ + { { 0x00b8eebbbd959bd9L,0xc5406f8e65f420cdL,0x6a60db3f063154c5L, + 0x5d36e596311dbdeaL,0xa2bd49a842a485e7L,0x661fe033dd24a2ecL, + 0x00d78bfb695ba009L,0xfcc1198129afe784L,0x00000000000001afL }, + { 0xf30ad5348384e611L,0x46f55e5266df249dL,0x5cb69969ee339313L, + 0x266a39971707af6dL,0x58d83aaf3a65d217L,0x46289bda29bc352bL, + 0x13a780b3a7375993L,0xc694ec0434a72490L,0x00000000000001d0L } }, + /* 19 << 224 */ + { { 0xf54fce2fa396f979L,0x69dd3e5e44540d2dL,0x4bdf033a2929586cL, + 0x055090d894086923L,0xda62e5374c8595c6L,0x8a9e1def4d489859L, + 0x84a3f9502ac811faL,0xe6753ec71733f749L,0x000000000000011bL }, + { 0x1d3b67d036f97445L,0x6f154f990f2533b4L,0x69493d6eb0e8573aL, + 0x4ca3dad5b8f6d7acL,0xd3ec695236a717e5L,0xd8c719b7e78bfc25L, + 0xf4746d3e069fc4b0L,0x8d967a3a6486c44dL,0x00000000000000e9L } }, + /* 20 << 224 */ + { { 0x7d8f48632cfeec8dL,0x9974152a814a01dbL,0xddc0bc6cc92694faL, + 0x4e7bff595e2b5689L,0xbaa96e1a812f16fcL,0x800462f4f6585b6aL, + 0xbd8c7d984cf44364L,0xe49ad28a84fa6863L,0x00000000000000d3L }, + { 0x417df8bca87cb33dL,0x4fccdc1bdcf62845L,0x8784400a576b1235L, + 0x708f13ce2676a8a3L,0xa7534d0dfdc376c9L,0xbdcc20da643f1159L, + 0xc0f21e2e86b8fbe7L,0x632c15d52f0326c9L,0x00000000000000a6L } }, + /* 21 << 224 */ + { { 0x68fdb156f0af9d3bL,0x5f53c821be3429a8L,0xf37ba120798adc16L, + 0x13793b18980624a3L,0x5f6297ab421c8a0aL,0xd429fe05b9a9bd9fL, + 0x80b7d860ff3a84abL,0x10ecc26a3e1769e6L,0x0000000000000020L }, + { 0x0a1afce2c04cbbd5L,0x9b6b5b9cdb6c081eL,0x5759236d62cada78L, + 0xce57613aedc49bd7L,0xf66a9105f4df2865L,0x738e3e09768d556eL, + 0x4ea024b2487b98d6L,0x69f02ea84e9bb27dL,0x0000000000000078L } }, + /* 22 << 224 */ + { { 0x8499723aa10fbd04L,0x638fdf9d1d392e78L,0x7befbecbdf12c367L, + 0x522a5d26db60f06dL,0xce23f73fe6ce2fc3L,0x42f4ea36b20e9b7bL, + 0xcaf5a320e0401d85L,0xdf4bf7a79938d740L,0x0000000000000131L }, + { 0xfc8a14c7b9493be8L,0x2e89cef551148e47L,0xea3375758b804aadL, + 0xce58b786362d9aaaL,0x0f3cdaec13bc5120L,0x1e773c0d49f1afd7L, + 0xe9127a8b4ef25b4aL,0xe6d30caf29f670bdL,0x00000000000001c6L } }, + /* 23 << 224 */ + { { 0xbd401bcae131ef57L,0xbbe597a154c52236L,0x6d1c702efcd7d43bL, + 0x0eb0316d8a359106L,0x143b98b6ac6374afL,0xb2578bfac4a2b559L, + 0xa210d0c1bd53053eL,0x5fcb9211c1779f63L,0x00000000000000f6L }, + { 0x5f83788c000e35a2L,0xd00c38172eeb99d2L,0xa565d239c63ff60eL, + 0x147f29dcd0d4caa8L,0xf8b63389115dbd52L,0x2dcb407f6856405dL, + 0xb1bd3bb6469671d9L,0x4468b9a148cac2d5L,0x0000000000000013L } }, + /* 24 << 224 */ + { { 0x2db0707f8144a351L,0x0885661375ca2dd2L,0x7a5052e1db5369b9L, + 0x0ee205ccd81351b7L,0x17352e8de1a0c288L,0xbb36a1cf89c9cc19L, + 0x5797a9b2768d2d2dL,0x2736a45cd28ca1e5L,0x0000000000000118L }, + { 0x0c9e5469931503b9L,0x2c0487af3d942235L,0xb71d495e95ae408dL, + 0xb5c2e5d1a853a205L,0x09747539585803b3L,0xc703600a44cf93eeL, + 0xa0a16aed1904461dL,0xfb5c6e6ab0333ff2L,0x0000000000000119L } }, + /* 25 << 224 */ + { { 0xc4c7b8c62b36290bL,0xe2fb417d81e58d27L,0x086f3d384750dfe7L, + 0x8359ab7548c6d58eL,0x7b78687a094386f2L,0xcb1d4021a140ec3bL, + 0x52a8d9962d7dbaa3L,0xc7552a9b8facccf9L,0x0000000000000182L }, + { 0xfc3ce1ff88706b52L,0x5cfb2c61ee0aa036L,0x3c52b2c9b87e28faL, + 0x01ceb01f22eb006dL,0xb0fd7d823174e206L,0x190d8131ade8d5d2L, + 0x1d195d6ac3f7f087L,0xdbfae9d2eb09decdL,0x0000000000000134L } }, + /* 26 << 224 */ + { { 0x9b21b0eb2daa2fb7L,0x62506bd10665aaf4L,0xfbdb475b82e9704eL, + 0x4088093f50b1377eL,0x5397eea199707c61L,0xd3356b6e75fd8b3dL, + 0x561c380705fe8914L,0xd0304825f77342d0L,0x00000000000001b8L }, + { 0x7daa92433fdaa949L,0x12f461af51ea7441L,0x56c37d995a63573fL, + 0x5d6c77c84a7025e9L,0xd2f2be05817b31a0L,0xc3b01b281f472be6L, + 0xa9bf6a9f861524ccL,0x1dc74be29f0ca5b6L,0x000000000000002fL } }, + /* 27 << 224 */ + { { 0xdf2687a38ee78e5bL,0x5ca01f45dd1c0e1cL,0xf2bb5db54161d203L, + 0xabe1d9781258d82dL,0x9ed61dc1371f1fc3L,0xb5882d2e8ebcd9bdL, + 0x62d2a79ea86df9f4L,0x925a80af6c50a563L,0x00000000000001b8L }, + { 0x472e8479a891d8f3L,0x47741910543b0678L,0xa1df2d293d53f8d9L, + 0xde28fb0e30acb4b1L,0x9683ff8d5d4e1f2aL,0xeb74a8044ae8a13eL, + 0x727434e967e736d4L,0x514af3dc0db30c54L,0x000000000000010dL } }, + /* 28 << 224 */ + { { 0x8b883f22fd5b9cfeL,0xebc175f592656667L,0xbe995418d9c86f02L, + 0x9dee3ec51ed7a5adL,0xe9c7b177a10117b1L,0xae7ead0547724e37L, + 0x4baba8316fefa7e4L,0xb496a2d1d7727641L,0x000000000000013eL }, + { 0xed31fca226014cc5L,0x3b36d6148a223091L,0x32ac1f7b521e4fcdL, + 0xcc7348015ad644d7L,0xf60d0ae0931450aeL,0x1db3acecd993d7c7L, + 0x0f3e95427a9cbf22L,0x7c688fba58fede8aL,0x000000000000005bL } }, + /* 29 << 224 */ + { { 0x53259e5950ef71baL,0x1dfc6bec47c25dcdL,0xfbd8d5c47087cf41L, + 0x5a5ecfd10249e9e4L,0x2c6af4a91ea582fdL,0xd01b1f80cc54d09bL, + 0x9cfa9960500567ffL,0x7c744e9348b2e9d5L,0x000000000000004cL }, + { 0x5c596099783f1792L,0x7e2b1d15d0f14bddL,0xcc195fedecad2e07L, + 0x324953104117835cL,0x8eb7128ca206aabcL,0x0b9d40cd4873c84cL, + 0x4ffb71b40648225eL,0xe7ef4f33a4139697L,0x0000000000000081L } }, + /* 30 << 224 */ + { { 0xf6171f9bdc8d6167L,0x1bef4c1bdcdaca1eL,0x069ea1943224f5b9L, + 0x16e603adae98eedaL,0x1c2551a03527f99bL,0x677bfceef3324cf7L, + 0x89739e2afaa65d81L,0x7c63f321ee01a2a8L,0x0000000000000083L }, + { 0xfca4b775684bf04eL,0x12b2accc10aacf28L,0x60eb59796f5b70daL, + 0x0973753341ad9c9aL,0xcbfd801c2698b98dL,0x6915f2608eece898L, + 0x932c7de6e88b02bbL,0xb183858db0f499f9L,0x00000000000001daL } }, + /* 31 << 224 */ + { { 0x9a4db1714ada6df2L,0x113d6ca8a6503065L,0xfe1ce2a393f30e86L, + 0x0ec4df0f5b883187L,0x2ba9f9377b6de31bL,0x6319ca2907d3cc32L, + 0xadf2d42c0de3d0c8L,0x1cf47cbc8b60ebcbL,0x00000000000000f3L }, + { 0x9737b3155a5c472bL,0x405097d38f44ac04L,0x11d7b5e098060980L, + 0x1031d3a49c649638L,0x851dd24036cc36a2L,0x72174467106c19e7L, + 0x0689ee3ef1ab03b9L,0xab05c26f83c47bfbL,0x000000000000002eL } }, + /* 32 << 224 */ + { { 0xa198d04b1b9ed16aL,0x2244536251340438L,0x77078e91251deca8L, + 0x1d7b7be1be67e98eL,0x106648be2cc726d4L,0x79dd8d7bf5877e13L, + 0x42bbd30e37993843L,0xe768f54dca7796c9L,0x0000000000000078L }, + { 0x91c5e19d1acdf2fdL,0x0a2eff89e93c20d4L,0x6a84e609ce41ac1aL, + 0x186ab6e8df08373cL,0x44c2aef6f6f9abc1L,0x9c7070b0dd735102L, + 0x0e58b51807643871L,0xbc9f2e800b15d805L,0x00000000000001ecL } }, + /* 33 << 224 */ + { { 0x2cd086126ac3eebeL,0xe2163c355db014f3L,0xcc0834dc214e34deL, + 0x67d2cc1775ee3864L,0x804612d5c4b02021L,0xa11ef6c76c805b05L, + 0x898fc69d371dfecfL,0x8e977d1ba62b7df8L,0x00000000000000d9L }, + { 0x037816ccb8e7e3c7L,0x2bc7a1eb52cd66b8L,0xf12d63ac37ef823eL, + 0x42f8b36b29442388L,0x5b1d0dd6caa4d841L,0x21e213cd81986c48L, + 0x492df17ff19ef6e3L,0x8a08cec5a5bac400L,0x0000000000000033L } }, + /* 34 << 224 */ + { { 0x49d545a21757c756L,0x8610764d3c78c7fdL,0x888363b0285ce4b1L, + 0x6afe14c4d52e0e81L,0x612a58397177ffe0L,0x05a4bea49679b41dL, + 0x121bb1c14c519b94L,0x844a95ddf521675cL,0x00000000000000d7L }, + { 0xc6045c5c7288cf7bL,0x79319df2da1016c6L,0x67ef5845055e3ea0L, + 0x91e7ead7bee71541L,0x285c08d78843f3afL,0xa90f3a94c2089d1fL, + 0x3d0e6d1caa37b132L,0x2f18d93b62fb149bL,0x0000000000000013L } }, + /* 35 << 224 */ + { { 0x9287f41090921a21L,0x0cb3476c2f63ade5L,0x87eea82b66e3e879L, + 0xb3c50d698eaa88f3L,0xe5b64f705624741fL,0x6fcac6de5e6a215eL, + 0xe786b1dd66acd6caL,0x9e478411a5244800L,0x000000000000009cL }, + { 0xf784aa4fa84c388dL,0x801593a26cdc8009L,0xadc07673ff1109a6L, + 0x708a579d42678ae7L,0x2d246455e0d1dc79L,0x42f5c10a232c0027L, + 0xd7adecb1785590d3L,0xeb58cecf139cf371L,0x0000000000000061L } }, + /* 36 << 224 */ + { { 0x5f0ef75a63dbd14fL,0x70883cf607b91b63L,0x2a1a32252f16a270L, + 0x4d2ea66709740ebdL,0x50d5f3687e12f70dL,0x94442fbf06eda6c6L, + 0xff1df80648f80934L,0xb5b52dadc7599220L,0x00000000000000e1L }, + { 0x307f5ffafef02d74L,0x980fc52d49e703feL,0xa1006542c0eb1a94L, + 0x2748d7af25476aa3L,0xcf9eda78370f6f57L,0x56c8f00f6ec2b826L, + 0xc61b847a72375d88L,0xb0833b255e744eb8L,0x00000000000001c2L } }, + /* 37 << 224 */ + { { 0x78124c4f967e4b4eL,0x7ed9ab8d15a66ffdL,0x26af569efc3a3f52L, + 0x0286ff20425d38d8L,0xfec9b67f51e9e240L,0x6d8a01c657256933L, + 0xfc8f705ad2ca6b4fL,0xac3d7833da28a9d3L,0x000000000000008aL }, + { 0xc4371b8ea0924274L,0xe543f30a55a72435L,0x03405ef6afa7c869L, + 0xa44ce2098226a4b6L,0xc0aac941a0f9fc88L,0x3fd69258a497b61bL, + 0xd431b511f80688b3L,0x664b77780a42eda0L,0x000000000000009dL } }, + /* 38 << 224 */ + { { 0xb9d84fc9d0cf69e7L,0x98f1940ea8e34a97L,0xe4bffa94dfc118ddL, + 0x1849a3edfc66b9fcL,0x623db3947287a7dcL,0xde0d7d087019e9ecL, + 0x2331aabf80aa347bL,0x44a365b11b999a3aL,0x000000000000010fL }, + { 0x198f73ac92ae7427L,0xf62dc73002e04aecL,0x0506c93d3088101fL, + 0x945ac732184edd51L,0x57f825fdaf7c6201L,0xeda00ab31c31f295L, + 0xeda9d910f425b2e6L,0xa360259c7cdd8528L,0x0000000000000040L } }, + /* 39 << 224 */ + { { 0x7f0b99ba527dce55L,0x34b0ebfe5507d70eL,0x17cf8c47887de484L, + 0x24a47965c1e5db40L,0x7d00b4a73f930257L,0xb294ecc224daec20L, + 0xe8b20a6d4dcf26d0L,0x3214127cf13fd50cL,0x000000000000015eL }, + { 0x8cc788b35eb3732fL,0xfd1ae000133d4b75L,0xf9bce1bc69223ceaL, + 0xdb5a3d114511bb55L,0x39a956cd174c7629L,0x8b986a5b97c29420L, + 0xa2a1eb4b38c80915L,0xa61c0e9f0cbb3c1eL,0x000000000000009dL } }, + /* 40 << 224 */ + { { 0x225c059ed015590aL,0xe49bada67483540fL,0x2318feb81b31b7a6L, + 0x2f21ceebe675c20eL,0x3120aaf01c4da767L,0x96ab6834faf8f96aL, + 0x3d4658ec30ea971bL,0xd1ea8938bf6a4e03L,0x00000000000001acL }, + { 0xfd222beee78e187fL,0x56be63dab3c6322bL,0x8d1df732b9be02d7L, + 0xa3df259fc2738beaL,0xafba1c5a5132fe56L,0x3fa503e9e7b27139L, + 0xa27ca7549faf45eeL,0xbfd0a46e69c674caL,0x00000000000001fdL } }, + /* 41 << 224 */ + { { 0xe4addb8a7c626af5L,0x95410e92784c28c3L,0x46cf5d1e41cc58ecL, + 0x84ef22e1ed080326L,0xd2a89dd536622acdL,0xbbce8e6ab278d4ffL, + 0x7e82c968ef05bffcL,0x529507651613c4e7L,0x0000000000000191L }, + { 0x66b1666ebbbb1311L,0x6e7c4d8967c415b7L,0x349f95b4f99b5985L, + 0x1570e3124410e7e1L,0xd580b98a20a06e99L,0x6d57fcf02719178cL, + 0x33adb81f803d3d62L,0x00289e3d44e383ffL,0x000000000000014cL } }, + /* 42 << 224 */ + { { 0x4c6047e27f3de4d2L,0x52a7f1291f69a829L,0xad9795a543bd0bc3L, + 0x5c71a352ab36a628L,0xb208e87f6fe6bd16L,0x8b2cd1443b7ddcaeL, + 0xec7432bb74041659L,0xfb284cccef17cb28L,0x0000000000000032L }, + { 0x046dc83e96a2607fL,0x8134edca02e02d8dL,0xacdfc4551b88ca47L, + 0xd0074a612695bb9aL,0x11987bd8c3074bc3L,0x5438070061264550L, + 0x00b82a987a57f0b8L,0x66c456c5454b48d6L,0x0000000000000168L } }, + /* 43 << 224 */ + { { 0x900944e57151aea8L,0xdbad74a8f5bff8a2L,0x1eafcf55286f204bL, + 0x82458315f3dc6626L,0xd4007ce5a7714adcL,0xf73b313fb5ba570bL, + 0x044331b1151fb782L,0x45880fd4a6e31445L,0x0000000000000167L }, + { 0x42f4af3c14b5e10eL,0x8b5d030c99a6683cL,0x79e437bc5e5633aeL, + 0x2d81f1998b1a40ccL,0xcddd50cbdacea56aL,0x8211a78b39d40f43L, + 0x5191c58618becac1L,0x749d45b75e116bf3L,0x000000000000006aL } }, + /* 44 << 224 */ + { { 0x6b40cb65a890ca97L,0xd7c6159338fc3078L,0xc78089f208c33a15L, + 0x43e1b721021432bcL,0xc865bd2ccbef3f81L,0xbbcbee060108433eL, + 0x336daf037cbe3b8eL,0xa10111577168018cL,0x000000000000007dL }, + { 0x267f0241c0b153a9L,0xfc22a8b8984c2ad8L,0x6906a223572e597aL, + 0xd964050675f9a034L,0xc9187173533e0c25L,0xd8b9f9f87c94582dL, + 0x66fbb9c58980b28bL,0xf8446492c9daecc8L,0x0000000000000098L } }, + /* 45 << 224 */ + { { 0x2d9ea0622878a241L,0x46de4cda5368642aL,0x4fa9ad358a5243abL, + 0x5ca45b8ff67260e4L,0x50a702ff0b4f3d28L,0x0f7f2095dd1fce0eL, + 0xe417061dba620770L,0x86cae35273b13911L,0x000000000000002dL }, + { 0x8ab3ae4b76e1fc5aL,0x06f704572033cefaL,0xa5bff96c08db98caL, + 0xff04cbaf958d6e43L,0x43eefc8fe3c74fa8L,0x76185b498db92137L, + 0x920ee668bd3c9f38L,0xe1f4f3f2758c856bL,0x00000000000001f8L } }, + /* 46 << 224 */ + { { 0x0f97a466381dc2a6L,0x3113353553f904e9L,0x69dfb20035d8fd47L, + 0x6cfcbaffbdee4042L,0x54a55759ff2c161aL,0xadb79909f50226fcL, + 0xf13ec08be24595a7L,0x39d14bf39ab2b842L,0x0000000000000127L }, + { 0x87045804e123ba29L,0x795dc65abe8c6f6fL,0xbc7dda3badade7d8L, + 0x9f5dae17f9cb4dcdL,0xf1289fa8b261a79cL,0x9e5958fc2330157cL, + 0x20d13331d798dd7cL,0xadd88400a218259dL,0x0000000000000065L } }, + /* 47 << 224 */ + { { 0xd7059bcbd7a1a42cL,0xcb29b62ddd63d4acL,0x2ddfe66885e05419L, + 0x78011be6b23f63caL,0x40eb7f025dfc1c3aL,0xea4802d856a4fd4cL, + 0xb4ec884c80924d5cL,0xd095d9a909fbfe59L,0x0000000000000115L }, + { 0x4b5bb25c529993bdL,0x2be02abeb9b7521cL,0x8c0176b7641b7babL, + 0xf479474a9c6a564aL,0x6dfc0b64f26968abL,0xab5dfefcd5ea697dL, + 0x6f077f0cc6b8cfc0L,0x67e7275df6bfcd48L,0x00000000000001acL } }, + /* 48 << 224 */ + { { 0xe83dfbfb6987a9bcL,0x311372dfe03b29b7L,0x1b88ad50f4176bdcL, + 0x5771946f37733cf5L,0x4a020165d5c0cd18L,0xb8e35a40c7603086L, + 0x3d06b8d0db55f57eL,0x9cdb00cab17b440aL,0x00000000000000fbL }, + { 0x840ec34134bd8619L,0x40b0c10700ab86c3L,0xf478205885c69c9fL, + 0x22e8d92e10d63734L,0x5b8e637420a53cdfL,0xe80c3d14fec8a052L, + 0xb9c0834eede62895L,0xb2ae0eb3de2ef959L,0x00000000000001aaL } }, + /* 49 << 224 */ + { { 0x61c9a15ff1779177L,0xf9aa8a6cc76f1327L,0x913619829bedde20L, + 0x2649f43e26525601L,0x411daf56f5d6018bL,0xaa5c02480e72e330L, + 0x5d4c0d86d07c778dL,0xf6219f689575ab53L,0x00000000000000d2L }, + { 0x3b3e3119f9e2f91bL,0x842508feb3dd20abL,0x1715d9be576222d8L, + 0x47805f30e4ac2305L,0x7d5a8bff610df90aL,0x82289362f166a8b8L, + 0xb10cd13ca0b00aafL,0xce3012dc2bd95a52L,0x000000000000008eL } }, + /* 50 << 224 */ + { { 0x7f3557744356b52cL,0xe4df74da343a8082L,0x0ad02ea630fd3ccdL, + 0x41c092de81ee408eL,0x5b7314cc0b7af2cfL,0x4f14ea6e18876155L, + 0xee09e162c5bc9211L,0x33b0203b754d1a04L,0x0000000000000119L }, + { 0xbba2396d5a659158L,0xc8bd42a381dc2121L,0x96ca706eebae2b49L, + 0xa47f6a3b27997ddaL,0xea8114cfcecdd7f9L,0xd2c8ebeae936e2e5L, + 0xd7472cb9fb68d089L,0xa23bd4403d346567L,0x0000000000000024L } }, + /* 51 << 224 */ + { { 0x1cb466c1b9c5c233L,0x644e56e7ca17089fL,0x87689df22f23ec6bL, + 0x7fb193160983cadeL,0x3f174e023542b10fL,0x896aa0aa52204547L, + 0x0fac42698f3a3319L,0x16bfb9186ea6d467L,0x00000000000000b8L }, + { 0x9f2675ddc7905198L,0x2377a69ba0bc6a9aL,0x789bb3609de78affL, + 0x3c9e13121dc56711L,0x7e45f51897a01744L,0x57d6f870229ee331L, + 0xf4e661027c84830dL,0xa8a3ff8f92a41beeL,0x00000000000001e7L } }, + /* 52 << 224 */ + { { 0x47a968bae826a120L,0xc0fb676354042559L,0x97dbf43c0b0b45edL, + 0xd11788e1d819d0baL,0x6209a0c57eda726bL,0x65a8225cbb83ca8bL, + 0x1cabddf0a6e5d980L,0x19cc1e75572ce15bL,0x00000000000001f0L }, + { 0x292a9cf080e60532L,0x93ad928f418cbfdcL,0xeefb9eff36c16b79L, + 0x2b2bb7cb6df5235dL,0x06920ab01a3f0e35L,0xafe3ce52c22c596cL, + 0x7e04284c52a95640L,0x2a270b1a7b4789a1L,0x0000000000000121L } }, + /* 53 << 224 */ + { { 0xc6b314c1f4239b73L,0x4e54432f8debd8afL,0x1e45f0025aec3601L, + 0x4b7090c312f51571L,0xdce4e7a4c9c1cea5L,0xe75161b104928b9bL, + 0xa454b9d35dd8b9f9L,0x50ebb50835b06602L,0x000000000000011dL }, + { 0x07a84718985c8551L,0x2563d00d0e100ba7L,0x3f69f4101b90b25bL, + 0x6cf41c9be0ed8b78L,0xb71319ed7d01acacL,0x978d8b39525d0edaL, + 0x064db8e6734ed194L,0x9ea11ecb8d6d9654L,0x00000000000000cbL } }, + /* 54 << 224 */ + { { 0x2b8b31c4eed9cdf5L,0xe7e616bfcb262a4cL,0x41dae45e4b7d7dd3L, + 0x982a2408caa8c982L,0xffcbce7a3f2a9e6fL,0x50121acccc21ea38L, + 0x4c2cf642e32f6ca0L,0x22f3a5335b36d5a2L,0x00000000000001c9L }, + { 0x1f2ad6eee92b1fe6L,0x6a3bf10db5ef5acbL,0xae6f23edd3503528L, + 0xc2baee0f5ded8513L,0x1f29e1b304c9a105L,0xadd1d742e6b6e61fL, + 0x72dd27a22e532676L,0x9a53e3a5ac0aaa7aL,0x00000000000000a9L } }, + /* 55 << 224 */ + { { 0xe0c8af94cdd6ae96L,0x7a4ea983e2977012L,0x9e749b02365a4de3L, + 0x2ad28ef487969ffaL,0x93dfae73b6015cd4L,0x6a0350fcd291c2bcL, + 0x8899eb4142cbcfd4L,0xbf324b61613604beL,0x0000000000000112L }, + { 0x2a22b916e110f342L,0xe9b1f40520b9a301L,0x7429d456c530c11bL, + 0xc8c6bb369c275befL,0x65e9dba104319247L,0xd3433e680d8f08b4L, + 0xe6d50c0a297ee342L,0xa35cd9c083615fc4L,0x000000000000016cL } }, + /* 56 << 224 */ + { { 0x8439b14cbaa20680L,0x5f16560db2c77c25L,0xe3c19ca9554f95cfL, + 0x9e494f8ec3e722e3L,0x3d4b917f1908e637L,0xbc08600dfe6f4915L, + 0xd730de4026e8d819L,0x28025a154c864edaL,0x0000000000000106L }, + { 0x0fe4b4805cf7d60eL,0x1d9c195fd22286e8L,0x7c30b704daca5bfaL, + 0xbfddf0e2b1185d19L,0x1ddf3bfebfa68c75L,0x95d6947fa94774a2L, + 0x491b041fc28aaaf6L,0xdd984208014c07d3L,0x00000000000001eeL } }, + /* 57 << 224 */ + { { 0xc006f8261c14b152L,0x607b76d8548caad4L,0xc533d8f6644b2fedL, + 0xb5949a09a9cbaf32L,0x980f4e5ee9b82934L,0xb69c4940e0bc902cL, + 0xc483bd7010ceb0afL,0x85a7296f0da99f98L,0x000000000000015dL }, + { 0xae2476dd7693b27cL,0x4fa6dd3599041a13L,0xb3ce6a53c4e4e838L, + 0x4eaf476a9801412bL,0x615d927d421334a8L,0x663ccce26297dbf3L, + 0xe2e7668e3d03a5e1L,0x4d15aa4099993736L,0x0000000000000188L } }, + /* 58 << 224 */ + { { 0xb7639306e80de633L,0x8206add9ee5e527bL,0x2f5e86f4baa789b6L, + 0xbc772de757d25d54L,0x3ce87740b4b2b380L,0xf61acf7ebc08ab4aL, + 0x1b81b706e4c17950L,0x70e16670f97a672bL,0x0000000000000084L }, + { 0xb3c11d39ee5d0d74L,0x57470243c23309bfL,0xc04f806ad970c46cL, + 0x0a91f05641f15577L,0x0888829070ab35d2L,0xb413e83fe301207eL, + 0x0b17e3648bba92e8L,0xb1b97d4ed7b430e6L,0x000000000000017cL } }, + /* 59 << 224 */ + { { 0x89447b27bd4b54d0L,0x19cf8afd7f8112f3L,0xf8b4a959bf831e34L, + 0x6dd1726a6b244017L,0x5783a070413e1aedL,0x27438461d5a29d55L, + 0x29a921f789e2fbdeL,0x1abdad7e001a5932L,0x0000000000000106L }, + { 0x9c35f7e9947b1ac9L,0xfe5f4c5b086b5e09L,0x12da650c105c5c10L, + 0xd6e60f998dbaa678L,0xd3f99655ca972737L,0xeed2c3bd48d3ad22L, + 0xfa0d06cf42a51f2eL,0xafe258e8c9d10b1eL,0x000000000000007eL } }, + /* 60 << 224 */ + { { 0x920ef3b8f225d101L,0x14533fb672346f7dL,0x8f391dc232eaa7edL, + 0xca99c1fc9cf81d89L,0xbd298ba671bd3ed7L,0x2ee73abe68942dd3L, + 0x2cb7b2009bca1375L,0xad67f957123137f3L,0x00000000000001b8L }, + { 0x838fad634c6ecb7dL,0xb79af8cbb96744e2L,0x7c4190fcd3bee960L, + 0xdb298cdf1740bbdbL,0x801cfc4a655bc379L,0x35869a8d80ce1b99L, + 0x437c60f11004665fL,0xb904ae8df965f967L,0x0000000000000176L } }, + /* 61 << 224 */ + { { 0x9b6cb18391a51175L,0xdd88aec4fb80c7eaL,0x29cb000bb640c84bL, + 0xb391ad58d58c3da3L,0x2671ce7f83dea34bL,0x3daf6c2d490ede19L, + 0x39e152910ec82d99L,0x8c5df3d73928795fL,0x0000000000000124L }, + { 0x00f19b087e411379L,0xd74a2e9ee6aca0bdL,0xac5723e381207f79L, + 0xe888e1b58224168cL,0x9f54500800c0b0efL,0x14e8160dad9e4155L, + 0x8f04a2a28203bc81L,0xef916792d7fb7e1aL,0x000000000000014aL } }, + /* 62 << 224 */ + { { 0x5057d40664da7708L,0xcff34770c7f0fb54L,0x71779c1c72de7d6eL, + 0xe3245536baafe87eL,0x3547d3cc27ee8200L,0x6fd335b373ca8819L, + 0xfe68353f13487c33L,0x3465439099cdd431L,0x00000000000001d5L }, + { 0xb1b27dd4e9031706L,0xce565fc49d72eed0L,0xb48715abb1962bc7L, + 0x849d8b56bae93f30L,0xa3548c4ddb34ac91L,0xffa60fb4985a3543L, + 0xe43b129448a4c7bcL,0xeca115250e74194dL,0x0000000000000021L } }, + /* 63 << 224 */ + { { 0xf1bac1afb0f94217L,0x93744bd640f2ef25L,0xd920ea38f832f1feL, + 0x95ddcf8c13a22b90L,0xaf759f72e0087be3L,0xdd03642be6baf27aL, + 0x012d7445cd0dec66L,0xef35032ab1bfa5cbL,0x00000000000000bbL }, + { 0x980f0d5290448667L,0xf4cca39fef1eead8L,0xe11005d6d02b1b57L, + 0xff6253f2cc5182aaL,0x75f3838e96a49c6bL,0x49b5a038a5348a1aL, + 0xe2ae0b06dcd79d60L,0x976e296a0a8fa296L,0x0000000000000196L } }, + /* 64 << 224 */ + { { 0x4b67fb4118d0a514L,0x0a9fdb8e32ae28f3L,0x786b00f8c238ebb0L, + 0xf73d53d11cbb7ea0L,0xe3928456988a4366L,0xa108e52a1d62fcebL, + 0xb923f2471d9484f2L,0x9c8dfc0caf781f08L,0x000000000000012dL }, + { 0x6485c32f15c72d41L,0xf7ef7e581a01f229L,0x2dca15d289c87f68L, + 0x91a22920d86c6968L,0x6015a068fe0747cfL,0x429c4d4dd744a2cbL, + 0xd707e9ea43d3b8a7L,0xa03aa304040ed223L,0x0000000000000107L } }, + /* 0 << 231 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 231 */ + { { 0xa9338e3d41b1cb9eL,0xe9173c96c409be61L,0x2b8ea989ba7ea91eL, + 0x5ad99388870edd75L,0x9d3c5a11c2112dafL,0x5a7c3d52851694a8L, + 0x8ecc1d6dfce31234L,0x2aca74ef790d720bL,0x000000000000002cL }, + { 0x66dc50ee20df1f66L,0xe8785c5b3b05ea85L,0x0e28d55e569d94c6L, + 0xacd3c1651a119f91L,0x51910b8e7226e29dL,0x849b5229dc3eadb4L, + 0xc8c44e85a88f3b08L,0x990504945e42bc66L,0x000000000000019bL } }, + /* 2 << 231 */ + { { 0x27068f87578b7a3fL,0x506446f1423a5346L,0xe9d9c75b58dac5e4L, + 0x8ebc86e561b8f695L,0x272ac14e611c51e3L,0x67863a1d4e24fb32L, + 0x270d4fdc02e5b539L,0x96d7b7a9a4186d8dL,0x00000000000000f4L }, + { 0x1a60ef0526627252L,0x31682a839bdc0f5fL,0x52a055ada112be82L, + 0x9c3a88484ecf7367L,0x5235087249e70680L,0x61b6b478b881b9cfL, + 0xbec16c5a887d3c1eL,0x8a8b2b82d598a671L,0x00000000000000efL } }, + /* 3 << 231 */ + { { 0xc493e06c7cfbc9e3L,0x7238a446e82d3bdbL,0xf388c069940bcf25L, + 0xb82837d2da27a485L,0x693111380941d771L,0x93a43f0de8bede22L, + 0xb3578669b3bafb47L,0x44d6e546bc7d5cc6L,0x000000000000010bL }, + { 0x9f703d04c2f5a320L,0x6cf923ae1a4d8cbeL,0xe24663d06bec90d0L, + 0x94870abbb4276e9aL,0xd690b52fecc55c0dL,0xbed5a84236316797L, + 0x8edde42f1e63e3fcL,0x2f6f5311b2a7c97fL,0x00000000000001b6L } }, + /* 4 << 231 */ + { { 0x3dac1eb44cb7b72bL,0xb027bce2e0c7deeeL,0x69032db2e994001bL, + 0xde2c253a4d22b49aL,0x957977f9bf66a47bL,0xc118eae521ad303dL, + 0x40c78cb818ba4889L,0x09901c2ce69238cbL,0x0000000000000162L }, + { 0x8099814b08bc1ceaL,0x3e74578a38139921L,0x899412a6d9d15fe2L, + 0x973a21ac29446072L,0xe6c4ce99ee7401cbL,0x03a86c736a4c5223L, + 0x6976124e6f3e93cdL,0x074326df34dd40ecL,0x0000000000000065L } }, + /* 5 << 231 */ + { { 0x02f0014275880224L,0x6ee059da7021973cL,0x931f296db3dfe309L, + 0x9ed8b1e9485c0d9bL,0x6f3fc0cb1e018747L,0x9add7a9b697a142fL, + 0xe2ddd8f3e94cc581L,0x666a9b29b6d8d36cL,0x000000000000015bL }, + { 0x1eed14bb24add8dfL,0xfcf71bd2216b86a8L,0x0f87aa7920faa29cL, + 0x7aba4fdd9fce47e7L,0x48902baf0a014042L,0x93e911622f4b7154L, + 0xa39079f6e5f3dae0L,0x6114f249e871766cL,0x0000000000000071L } }, + /* 6 << 231 */ + { { 0x943dbc45cedc41c5L,0xa3889cfc075fbcddL,0xab0a889dbee1ac11L, + 0x9a5319b1d8721711L,0xc4228d8987ecea8eL,0x55cfce64e8221d39L, + 0xd3e9699d39e3e962L,0x8ed1dd020549f4cfL,0x00000000000000baL }, + { 0x1729967dfb5fec20L,0x16246ab37a6208f5L,0x10904e4b5f5af897L, + 0x68c575bb273b4049L,0x973f128b97335687L,0xb696381ed6cb8cd4L, + 0x3ef2ec73d2fba17bL,0xe4c62fde5233aeb4L,0x00000000000000bbL } }, + /* 7 << 231 */ + { { 0x2f171a11b947f8c2L,0x56959046cff17f3fL,0x824fb57f66097883L, + 0xdd07bd642c2c8782L,0x5b17dd22867ee913L,0x9adb1675796b2aedL, + 0x9cd315516326bf4eL,0x5839ce47d98c3c62L,0x000000000000001eL }, + { 0x03323a3fb463977aL,0x95dccccf667a3e88L,0xe3a04d01dea1a6bbL, + 0x427caf8c2c159eb1L,0xbb2dd277ca73e601L,0x833e194b414b5d11L, + 0x6594999ed51e212eL,0x5423b8373e583089L,0x000000000000017cL } }, + /* 8 << 231 */ + { { 0xb685f627dd308ae0L,0x78d327cb5a08f7caL,0x83e55dd1ee74108bL, + 0x906408358d01ffdcL,0x6d7694e81625f9a7L,0x48b92a43d782c8bcL, + 0x61f7b571bf98ee6bL,0x02b2a743e511df25L,0x000000000000003bL }, + { 0x020b09053aea9b6bL,0xc648d06d989114d9L,0xd7861a73ec1e3d92L, + 0x2c0eb0720a0581e5L,0x5be8c4da3fdd93bdL,0xd45049f2897946c7L, + 0x0fa12b386ebb7fbdL,0x7b5391783aca1751L,0x0000000000000001L } }, + /* 9 << 231 */ + { { 0xc8f1de47497aeb0eL,0xa1535a60dfd637d3L,0xda8089a50d3b3e38L, + 0x9981de1279280d2bL,0xd0259e6c483a9659L,0xb16ad60f1cadaecaL, + 0x150e8c7e335fcb39L,0xba8a6b8c3224d554L,0x0000000000000028L }, + { 0xcdf6bc521dfd36bfL,0xd89e8950109ff325L,0x2bfbb3aa949c6cf7L, + 0x5d155ffeb638dd02L,0x9f778149256af551L,0x6ad7a0913e7b2743L, + 0x4045ec8341939950L,0x6d36b14a36c29ce4L,0x0000000000000009L } }, + /* 10 << 231 */ + { { 0xf61f56901631f481L,0x31cd4a4460057252L,0x11c6764a7f3683edL, + 0x813e617d855abd6dL,0x07dd6c6674bffc99L,0x4eff77903d28a486L, + 0xda809b5a1236cc16L,0xf3a0a984e1b1fa07L,0x0000000000000015L }, + { 0xc1557c02fdbaa820L,0x22f1b155490ace41L,0xaadde80bbb30fe96L, + 0x3ecb3b1e8005fcbcL,0xbfef06d968a1db30L,0xa8e0a6a1a6b85ed3L, + 0xa602e5b52b82ae83L,0x567eabd6fa699305L,0x0000000000000177L } }, + /* 11 << 231 */ + { { 0x714021c9367bc63eL,0xdfe85b6765d131c1L,0xaba75de76eeda148L, + 0x53a1d3775e567a81L,0x2990d626909efc1aL,0x178755cfa636420cL, + 0x9b168b964750968fL,0xdb2cc57e5af133a1L,0x000000000000008bL }, + { 0x58ce08b0e2ffaab5L,0xfd0ff4ad1865bb21L,0x838c66c9bb65b3fbL, + 0x4a4b95229893cf54L,0xf50f0afb0467f480L,0x1380b003555da012L, + 0xb2a7dda45126e11dL,0xc88ac327dd17c592L,0x000000000000018fL } }, + /* 12 << 231 */ + { { 0x9d86d84feae13cfbL,0x6d235a5f994991dfL,0x0b5b3e99352cbf4aL, + 0x9512101aa087ee0eL,0x2ab9fec1900fd50dL,0x48129fe5c8fd91f5L, + 0x69ed1f4d08c5117bL,0x533e83c6e7007bafL,0x00000000000001e1L }, + { 0x10a0c08d2f0fac2dL,0x35a77d2af3e1daa4L,0x447a8e905f5e78a5L, + 0x0f5835b0d3acde30L,0x5486f6b961b9aad8L,0x9be0a9b8ecbfd6b2L, + 0x95a5bd0409452928L,0x4fc3182dea830996L,0x0000000000000125L } }, + /* 13 << 231 */ + { { 0x91c85cd2fc7a969bL,0xb16347ac0b3e36b1L,0x66a36f694a3e8b27L, + 0xb8ef5e3f91cb5d5eL,0x0aace6f26be296f5L,0x4e6d9d9d548235c6L, + 0x9f24f5c8a7a35b0bL,0x3260930390a55e8cL,0x0000000000000048L }, + { 0x57924d7afc9c2b83L,0xfc60aa68e403b1e3L,0xdb73a35932ffed37L, + 0x68d0af3455642effL,0xef0d8368290a4929L,0x7be7c6143930fca3L, + 0x22ac1195c9f52bc6L,0x074fde05df055539L,0x0000000000000191L } }, + /* 14 << 231 */ + { { 0x9e71f0bb8bff6d2cL,0xf35c6c1367b26058L,0xb8791d9e9fb2cd17L, + 0x834f77fb655344b7L,0xf672749488c6106bL,0x1d51d49a83bf0b92L, + 0x2888ca81b2598cc2L,0x454c00258377dcf4L,0x000000000000000fL }, + { 0x622ab6ebe9046dc1L,0x3594e53e40d5f30eL,0xaf4fd362632372beL, + 0x08601954aa276034L,0xa2369d15a10a69efL,0x9c9ffcf525bf301bL, + 0x7cd516d5dbaf10e3L,0xc24aacc1f35e5f7cL,0x00000000000001f9L } }, + /* 15 << 231 */ + { { 0x518f40a0ba132499L,0x3773088dea2f1aa0L,0x232cfab18b326435L, + 0xa399e58d73900055L,0x0b416d331a80306bL,0xa34d9a80d9f9b09fL, + 0x8f9f4355cb95746eL,0x73a7c35d1ce94cecL,0x00000000000000a6L }, + { 0xf619fafd8b3a57a5L,0x80e70710632fe43fL,0x287b444e581e2becL, + 0x916afe78030be2a7L,0x6ebe236508f41e02L,0xfd7b7641b70e8c84L, + 0x1a131e7bdda3edbaL,0xb8e1748a0677e206L,0x0000000000000110L } }, + /* 16 << 231 */ + { { 0xb5b80c0e82a8845fL,0x6fdd0a0f42fa43cbL,0xebfa4307803cf184L, + 0x1dd85b5b4935a211L,0x2438607c6beecb33L,0xfbbb723e40ea88b4L, + 0xaf5258a6e84db1ebL,0x2c2b8cd9b7ad5e3bL,0x000000000000013eL }, + { 0x313ce4450a092935L,0x9fc84cec8d0efda6L,0x004929dc197593c0L, + 0xebd50bb3fbef27f1L,0x9e71f61938c08b52L,0x590ef00571196231L, + 0xae3c0d2eafc73d42L,0x768bd9624da4a91cL,0x000000000000010cL } }, + /* 17 << 231 */ + { { 0x47c8d3c8ab1c60b6L,0xaf198413d12a659bL,0x73fcfe24bd4d19d2L, + 0x59c7728758032192L,0x48b1aa1f88887224L,0x01e399cd38e96fbeL, + 0xf1a74ef9ab366e8eL,0x5087c7eb4f2103f1L,0x00000000000000e4L }, + { 0x8861cfef7dda9880L,0x92b60a92d734c648L,0xefd3e299ed7977feL, + 0x658f37a6e874dc55L,0x4b3fb1954e7aa888L,0x1c6111122ff97d89L, + 0x25ad74f9f8345cd3L,0x676666b8ef50e0ffL,0x0000000000000157L } }, + /* 18 << 231 */ + { { 0x716cad1e90c8a719L,0x631d6acf882637e0L,0x42fd6468b140d288L, + 0xe8dfb49488b10f5eL,0x5fd3394471d6849dL,0x51459c14c0026192L, + 0x4c72b94fd25c7d5bL,0x157cf7e3c3a7ed7bL,0x0000000000000123L }, + { 0x046419ab6bd8dbe9L,0x4be7f1c837eee4d3L,0x27f5a81aac98ab3aL, + 0x830a7200bf7071f0L,0xa4dba25c4ebd5b40L,0xa8af3addf94bab5dL, + 0xc5156c404fd3e972L,0x1fe23347c1a472c5L,0x0000000000000096L } }, + /* 19 << 231 */ + { { 0x314e6990933c502eL,0x3eb198b24230fb72L,0xed31a6ac78b0ebd9L, + 0x98ac708cb7cffa58L,0x41e4880762ee91bbL,0xdb80a5fd6ff7ad3fL, + 0x647ab9160c32c160L,0x258ca51ad736fefaL,0x00000000000001c1L }, + { 0x65c312791e5576c9L,0x8393256a7c73070aL,0xdbcbab2ebfd3ad2cL, + 0xb4ef593498b6146dL,0xf3e6d641f150b4ceL,0xb4ca1b92aa5589d8L, + 0xc19f635f440c31ffL,0xf3d5705f5bb1fe31L,0x000000000000012aL } }, + /* 20 << 231 */ + { { 0x96618bec30be228cL,0xdc83ae086b84d4f4L,0x260b2652348c399dL, + 0x7c10ac42e6ac3a94L,0xe3d44aeb367c5edeL,0xe0c1989639ce735eL, + 0xa2bccd0b708b357dL,0x30d6969b489d30b3L,0x0000000000000039L }, + { 0x3e9d894367d235dbL,0xe6ab2b4427ea16dcL,0xd5b60a504c54afbeL, + 0x48f39402c0c645f2L,0x44fb14e9a03ed92aL,0x8a66c28c8ecf7a8fL, + 0xf5ba4cb58d9f6e44L,0x8dd59588edf7d7d3L,0x00000000000000c8L } }, + /* 21 << 231 */ + { { 0xa9f649382296ca7dL,0xd80c7ae06b898494L,0xe041220c5a93cd3aL, + 0x5d42e026f46c53dcL,0x9574fdec710ec9c7L,0x2f2e24b48dbdece9L, + 0x87afdef250244f09L,0x325f4866235858d8L,0x0000000000000003L }, + { 0x13e624ede637e34cL,0xa841d34a17e610a5L,0xf899a907bf2faa12L, + 0x3d28b531ebb38d02L,0x73d1eae320ec138dL,0x6799644ed687d71fL, + 0x51b93047fff6691cL,0xeccdd0242e44e87aL,0x00000000000000f8L } }, + /* 22 << 231 */ + { { 0x0d8a75e7972b1d30L,0x9556f1ee4f05a10bL,0xb0a90e0afe233210L, + 0x5899ed3fe8c3ab51L,0xb3f56019411d224fL,0x2fc2ad6f9a30b1b3L, + 0x66b1a36860c33436L,0xcb5310cde3490548L,0x000000000000011eL }, + { 0x520fb4e711647babL,0x2c5403554e4c5f55L,0x3b4f3cbd58a8e072L, + 0xaac1cf335d4dbcaaL,0xb5ff4b0426ac1c58L,0x456d3fb2cf35e684L, + 0x6c05aac04b1eebaaL,0x3eb4e1447d0debb1L,0x00000000000001fbL } }, + /* 23 << 231 */ + { { 0x9f9f128f83dccaefL,0x285a6d9d3e4d202cL,0x1dce04e0fe2f32abL, + 0xb63ad64b5c47d746L,0xc620a818ca9e5cbaL,0x5599183b102c367cL, + 0xfc28310f1bb11cf3L,0xa518ef8ae7fe07a0L,0x0000000000000125L }, + { 0x05d6e328aaca5233L,0x3485f1f7c0d36868L,0x2418102af2b53b08L, + 0x2d7820ab3d138665L,0xcfd8d841fa9a106eL,0x70abdc68a83768a0L, + 0x666ec137bd673c42L,0x0f55e404e77d49adL,0x000000000000002eL } }, + /* 24 << 231 */ + { { 0xbb5261dba4cc0ff1L,0xf63383eb52d5233bL,0x8ab1587033aa18a6L, + 0x1779ee6ed6b62ce9L,0x80ef23be8ef0f8e7L,0x9ec1cfb401c71a76L, + 0xb18a364ac7c793f1L,0xffc4c25b753ab994L,0x000000000000015bL }, + { 0x80645523d4ca91f1L,0x7e8594c5f895f3afL,0x5e0d810510b5a95dL, + 0x9a087f09fb4356b9L,0x7784383794c16770L,0x5f6c04ef5837e54aL, + 0xa0803997f3166db1L,0x797335e56b68a586L,0x0000000000000075L } }, + /* 25 << 231 */ + { { 0x8821f20b25ea7faaL,0xea6cdaf038359721L,0xef36817a60f5c37bL, + 0x79058c6589c3b6f8L,0x5f3e57a4f0e9c92aL,0xea23b73ca0eeddf1L, + 0x3ce00ce36e7ca79eL,0xd7301945b21fe070L,0x0000000000000199L }, + { 0x37f5e88ca60f2418L,0x4eab2df52a6caeecL,0x5a02d837c64cc4b6L, + 0x6316cbb4f8a52989L,0x54985f0512b8c43aL,0xaa0b8a9313cc4fa3L, + 0x19b9999e164acd3cL,0x3e99a0388122f26dL,0x000000000000002dL } }, + /* 26 << 231 */ + { { 0x86bb6a2fcf51acf8L,0x8cfb578447a49e34L,0xa931fc6be3b6d1faL, + 0xd960cde64a9d14a8L,0xe4c4116245b21189L,0xe4aeb21abbbeb75eL, + 0xe45db020d9be6508L,0xa001afb4789ad9e8L,0x000000000000002bL }, + { 0x5f754551c271c0d6L,0x4926b87f0543b435L,0x3157188ab3123bbeL, + 0x2a77d4547b3addfcL,0xb1c6000b0ee40ae4L,0x6de2cc2872f6a072L, + 0x64fd97f7f1dd26b5L,0xd43467c38f229a39L,0x0000000000000041L } }, + /* 27 << 231 */ + { { 0xd00fdf12e6fa0314L,0xd447ca319be251fcL,0x27bc3f9abfd2b9e3L, + 0xda101c4d5b716609L,0x109ee4ee24e9876aL,0xf8887370d3fb25a8L, + 0x0491476e95f97cceL,0x6b7b4ebf8296d338L,0x00000000000001a2L }, + { 0x52f3da4126bc2048L,0x0be18a87f37ed665L,0x83c03d105741e841L, + 0x9477ee3354bbb810L,0x0ae3b6318e10be02L,0x13581e1a7ec3bc35L, + 0x04ed1310294c0925L,0x8b6b71746718a4e9L,0x00000000000001baL } }, + /* 28 << 231 */ + { { 0x5717fd84a98f0156L,0x968337919a823677L,0xebe83d77a05e6d66L, + 0x39e669f95c1d28c5L,0x0f577b1f0bf89425L,0xb3c2f12c9dfb7d73L, + 0xe9ffe3e1987f449fL,0x14f8a1d9a57e7e4aL,0x000000000000017dL }, + { 0x4e980189dfe5b34cL,0xa5561e4e202cd41bL,0xf6df35fbfb0d13d1L, + 0x7f002e1707e1500cL,0x9324d1d0ca1bf4b4L,0xd3112b6a3e7adfa3L, + 0x65b4c876e04902b5L,0x4ebbc331bb04398fL,0x00000000000000e4L } }, + /* 29 << 231 */ + { { 0x6863329b21c34854L,0x3ecb8aa8e4cc2e73L,0xf0e9fa087d257dbdL, + 0x41ae92e4882c3c29L,0xd637e8e96de51602L,0x60745807093e1831L, + 0x4b79a97b71922332L,0xb6903e0875e68278L,0x0000000000000157L }, + { 0x4c8fab3b704b19d2L,0x550c2a56b34c0d32L,0x67347887bbcc5f6eL, + 0x63946f225a9bebe1L,0xf0c5304d01c39700L,0xea8d8d6872deb3c5L, + 0xd8369ef1c2a03da6L,0xaa3fd01140135bcdL,0x0000000000000025L } }, + /* 30 << 231 */ + { { 0x7b5e2027b631ab3cL,0xa4ab4559f2c42e69L,0xd8a3adc1b1670c20L, + 0xa2240db40abc7c17L,0x104ae5cff054f41fL,0x5c08b0bf128df5f1L, + 0x651f1be1877d5d9eL,0xafb7992265e13f7fL,0x000000000000015aL }, + { 0x140370ee632ac61aL,0x7cc393058a188e1eL,0x21a228d9f6eac7d0L, + 0x8a6f15613fd21c04L,0x64418721c35936a2L,0xf9f06b28f3e01680L, + 0x7efe7361fdeace5bL,0x6a7264a96e30226aL,0x0000000000000017L } }, + /* 31 << 231 */ + { { 0x3f5aafe0bddc0937L,0xd2d0dd4a79f1b07eL,0x56e48aadca9cadcdL, + 0x44f035c701af31d7L,0xf288f9b9baf4a56aL,0x8a486f9d65fc0205L, + 0x0047f8d1941bb761L,0xebdae91b757a98e1L,0x00000000000000c1L }, + { 0x1bb72fdd52fd742aL,0x5073de5a4fd8ea03L,0x47d8be4faa00a9ccL, + 0x9c262879e45ce75eL,0xbd62c1d3f0a9c65aL,0xbb434e5b8c15c467L, + 0x611816bfbd22def7L,0x8aa8cf5383778ac8L,0x000000000000016fL } }, + /* 32 << 231 */ + { { 0xe1f0c1aefcfefdb2L,0xb5bc33d344930c59L,0x253d5f4964d2c6e0L, + 0x25efddede316ba1fL,0xeb3ab21fe30c3a34L,0x797a1ad6cfa52a9dL, + 0x6a3ca2e0a81ba831L,0x27bb664ce112dd07L,0x000000000000007eL }, + { 0x91fa1e121961aa16L,0x381b2d7167dd7513L,0xbae8635144db8a51L, + 0x4cc1915fbc31c1cdL,0xf79f40598ec6e9e1L,0xb142b6eca8929a72L, + 0x4629c6488ed71fb2L,0x9f79bccfbd52faa7L,0x0000000000000029L } }, + /* 33 << 231 */ + { { 0xa550f18a6cc76e30L,0x22627d8f61df412dL,0x1d0dc5d69ca28ee4L, + 0xababc480beafb2bbL,0xb0a7d6926fbfd3a5L,0xf1a64e38a521f4e5L, + 0x2ee40ff432ada17dL,0xad661a64254597f9L,0x00000000000001a4L }, + { 0xc6d509050b6c9498L,0x0853d7fe81c37fddL,0x41fec397945c0241L, + 0x9d94a286648033d3L,0xa977521cd11a039fL,0x9d70047366f73195L, + 0xf3925c9498f1d8a5L,0xe03e6214f2182e10L,0x00000000000001c4L } }, + /* 34 << 231 */ + { { 0x8a13a69fe9708f78L,0x127338fe9439033cL,0x1a55f4be1c0ae257L, + 0x687f3349da266c1dL,0xe84a7b031264780dL,0xdc796cef48a52f85L, + 0xc0fd19a2aa4f5258L,0x72b52c44c6e60b07L,0x0000000000000069L }, + { 0x754a8552ada70b8bL,0x7384869e2ab69b35L,0x1eb9a71bb23f4955L, + 0x699c11f83f8656e9L,0xacab43310b640cf6L,0xee38be2d21b0d6e8L, + 0x9d57cbd55499e2e9L,0xb90d2b06448f08faL,0x00000000000000d3L } }, + /* 35 << 231 */ + { { 0xbb9af2d1f25aa87fL,0x633f9eaf006ff7eaL,0x32cc47c05d875219L, + 0x9245422d65a7fed1L,0x7b6095ee882e7479L,0xb0921b30038514acL, + 0x599167f588ba84a2L,0xf15c510b4de07284L,0x00000000000000d3L }, + { 0xb152f53babf4a045L,0x019016bf296dd5f3L,0x3eafdc360650071fL, + 0x4843e9fc756a424fL,0x3f90b62472a46c6dL,0xfd2948483854ef62L, + 0x139dc631ee3f68a4L,0xdc88b9b1f8d64497L,0x0000000000000044L } }, + /* 36 << 231 */ + { { 0x9ef8fc68b3f02f00L,0x4eaaefc11b404dfdL,0xb1a6864fa257718eL, + 0xab09391e2d8d0067L,0xc38aa83ce8204af9L,0xe1bfb2b08c822776L, + 0xe2cf738131273df4L,0xb2d517e40cd8fa47L,0x00000000000001f1L }, + { 0xe5e30566969d3c42L,0x14fdf845f7d7eddcL,0x7eb18e1c25c3adf3L, + 0x32abb9b1be777247L,0x3b129a9caeb73f86L,0x112ad378338cedd3L, + 0x885cc5a12602d011L,0x4fc5afd3bc5586fbL,0x0000000000000066L } }, + /* 37 << 231 */ + { { 0xcc800639d6008057L,0x9c266f39bbc8d8f7L,0x95ceb2f0607153c1L, + 0x45f0801babb7937fL,0x3642f3177d002d68L,0x3c72feaeec4c17bbL, + 0x86921b33df45d803L,0x2166557618a187dcL,0x0000000000000116L }, + { 0x16fc57bf455df4c8L,0x6a3f8e24a0a801ddL,0x44f80fcbe3f46c99L, + 0xb6a005d8815493ffL,0x9fb7e1ddb10c1c29L,0xd427ba4473b5524fL, + 0x8b46a23436db93d5L,0xa2c3267a65278f1cL,0x00000000000000b2L } }, + /* 38 << 231 */ + { { 0xcd0681f765c96952L,0xd11c084db226a120L,0x6961cff3a518daf4L, + 0x72d85fbfaaa2d432L,0xbb0bfa37d8029adfL,0x8b307974f3e263afL, + 0x1e78acfe4357cbf0L,0x1b24c3c478faf796L,0x00000000000000beL }, + { 0x29e359eb15c49c42L,0x2a0e85c6bd5142a0L,0x2ca4919c8f74c2f3L, + 0xc2c0522dab7dcd71L,0x78448d5f2687cfe8L,0xd7fed0780bc1b515L, + 0x21d8acbbc78392e9L,0xc825030ceddd3383L,0x00000000000001b5L } }, + /* 39 << 231 */ + { { 0x6ded17c668888e6fL,0x7d04fbec48d7a438L,0xc95939f56720bd0fL, + 0x32116dca5b339abaL,0x9868395c78b10c61L,0x9a631ebf64f83fe3L, + 0x353adbd78b655864L,0x780ed89288891331L,0x000000000000014fL }, + { 0xa1034f139209f475L,0xc3119769ebdacdb4L,0x5e46e5b81adcc433L, + 0xb13ab568c9ea84d3L,0x65af681d3d982349L,0x495774cdb9fe5358L, + 0xc4f40572e4dea0dfL,0x64ddade9d672b677L,0x00000000000000e2L } }, + /* 40 << 231 */ + { { 0xdfe1e6afd3bc4454L,0x940641a79e92d2f3L,0x9ad218730e1c2fa0L, + 0xb6e06bc5a411d441L,0xb54c27d838bf29f0L,0x7dacafceb14f35d2L, + 0xbfe2b09082b7bc07L,0x868699ab8d3eeed0L,0x0000000000000072L }, + { 0xbb74a259b8f76318L,0x3ed5995df91b3df3L,0xa4a97a19ac6557b1L, + 0x07255a2568cf70d0L,0xb28d48077f106380L,0xef6b891b7375c430L, + 0xf638f42c90c29af5L,0xdf57cc5407c206daL,0x0000000000000088L } }, + /* 41 << 231 */ + { { 0xcce61ebaa95f4034L,0xc03193858d3934cbL,0x48125313045ecf08L, + 0xa5293e9e00a86a56L,0x9685609ce4739621L,0xb4763b6e45a8cdacL, + 0x0f7efc7417e70b13L,0x3fa90ba2b41aaabcL,0x0000000000000088L }, + { 0x38849e404a869a92L,0xbe5d13dcf392f0c4L,0x564182b6a7a77b35L, + 0xad96f8613b51e934L,0xcda67468a8944a6dL,0xc3a4639791afc06eL, + 0x46517e758aa12264L,0x600044e225aa0d28L,0x000000000000019fL } }, + /* 42 << 231 */ + { { 0xac0f2e82bf2a6824L,0xe4a045edfde799c0L,0xf4bedaf812796db6L, + 0x852692d02f95b690L,0x5991cfd95d3f8741L,0x91cffa0d40b8c83dL, + 0x26c9dc52a566930aL,0x3dcf171f8c1fd3deL,0x000000000000001aL }, + { 0x977041071ef70b29L,0x3419dd6b356da0a7L,0x69a6690c3f4259f7L, + 0x643596f23f596c22L,0xdfa2ac5495452620L,0xa98ed767c7c2b5cfL, + 0x78d44543a1b281f8L,0x2d6f804fb1800c0bL,0x000000000000007cL } }, + /* 43 << 231 */ + { { 0xd820a109f48d85ccL,0xacacc587ae63a21aL,0x64e3da09e0beb46aL, + 0xa5020c60fca415a6L,0xb1ce6feb30ac9de0L,0x796e8d8ea918a87fL, + 0x14d8b838e24fc12dL,0x50543fda5a2748e6L,0x000000000000007cL }, + { 0x998570177a2fdb75L,0xae937494f7002ebcL,0x99906c28e38e1a23L, + 0xc922a7b49285a1c9L,0x3783412560208b56L,0xfddcb5db55aa5343L, + 0x7e1aa3e079405671L,0x717994d9b6c5afaeL,0x00000000000001ccL } }, + /* 44 << 231 */ + { { 0x3a6e35396a057b5dL,0x96bfc6907338fb26L,0x9dd6b30a6ec9a962L, + 0x13980a46d62386f5L,0xe2efd90ec9e88bceL,0x2e952aea56736496L, + 0x0f711da87eeed705L,0x25c0012c4f696d10L,0x00000000000001ccL }, + { 0xd4f64d9daaecfe2aL,0x77d4ff6c04f3c02dL,0x6dfe2746f2d2d351L, + 0xbe133c6bd547c567L,0xe3eefd95ed234f60L,0x5de33b9d0cbf5f2eL, + 0x563fbcfd11ecd741L,0x4dd1ff2449ec76f9L,0x0000000000000166L } }, + /* 45 << 231 */ + { { 0x7f90db705dc505baL,0x0ed287e682dff023L,0xdc1e823736006439L, + 0x45f7189e5d5fbcc5L,0xb0519882a90fa834L,0x88e1b59f135e2379L, + 0x5f22a5ed447a0e94L,0x12230205384b1b44L,0x000000000000006aL }, + { 0x4c6df3168a6a5944L,0xdd99a4c395811e9aL,0xdc18aba483642434L, + 0xb5d321bda71f9f25L,0x338aadb635fc1540L,0xf7d5d16c9c5d076fL, + 0xe9aed76046537780L,0x0b88e00e065f90bcL,0x0000000000000050L } }, + /* 46 << 231 */ + { { 0x5eba88dab51a448cL,0xc59140a1a3bf6be4L,0xfcb188c338515c47L, + 0x5d212b1bf85097c4L,0x0a031432c37af0c8L,0x1434b11f8a69ea4fL, + 0xf196646e16e47ba2L,0x49acc62d4ce5d965L,0x0000000000000062L }, + { 0x63725ac292f56298L,0x18022c719412d3b4L,0x992db0a6db2b50a3L, + 0xc57263c1c99dae3fL,0xa5cf2e9592d1d97aL,0xb5f93d02b730a5aaL, + 0x500efdb45dff5370L,0x30e83357974821d1L,0x0000000000000198L } }, + /* 47 << 231 */ + { { 0x8e008d522c941ea0L,0x59156f3bb3ec2760L,0xf8e5257f72bd1775L, + 0xfeb8a6caab104dd1L,0x0aa7934f1e24e935L,0x082a542f75b78c22L, + 0x08a13d3ac08f5967L,0xafcdee9dbd0f2e83L,0x00000000000000b5L }, + { 0x7d14271c4b5bc3a0L,0x0aef85c6d39586e5L,0xd92fb2fd460c3ea9L, + 0x17c4554cbee0de36L,0x80e5aba407dd1df4L,0x23e588707cb3ce68L, + 0xae5fe85678e22f39L,0x8cbf32748b9cde6dL,0x00000000000001aeL } }, + /* 48 << 231 */ + { { 0xf4d18a133b6da0e9L,0x10102e98bbaeb6f3L,0xb82cf999e6f414ebL, + 0x5a2d09f201420725L,0x51ef37070d541392L,0x044404a7a4b6cbf7L, + 0x98b51c307f783763L,0x154e7643eaa19df9L,0x000000000000012aL }, + { 0xbba4d0924bea27e5L,0xfba7ca7dfc03da36L,0x4399448fa62fbccbL, + 0xcf7863be93fbb08cL,0x41954ccdf35720c1L,0x6c37e6594ed8dfc6L, + 0xfd3d7cc05aa73a82L,0x616281f077f1e04cL,0x00000000000001e1L } }, + /* 49 << 231 */ + { { 0xb8843a3baecb9c56L,0x471c521f71eb02e7L,0x2646f42f1978dff2L, + 0xd1840b68db9b06ccL,0xf1c565cdd2eaefe7L,0xac478d82e5aac498L, + 0xbb81c5731219236fL,0x9cef590eaf3ec7ebL,0x00000000000000fbL }, + { 0x00fa48fd12839c79L,0xbe6930b2f9ba96f6L,0xca60400a52dbbdc6L, + 0x97cdebbba58fbab3L,0x6cd668db0120fba7L,0x030f7cb1b6282babL, + 0xf6a0f50c8096a500L,0x13f0031822365ca4L,0x0000000000000137L } }, + /* 50 << 231 */ + { { 0xd3e0a88bc7a47d95L,0x9c63dfdd73d59495L,0x02515679ddfa7285L, + 0x719c60b134a9c1aaL,0xf7ed64522d297d47L,0x018ba27bb3864320L, + 0xf0bbeb08c51f9896L,0x55977ff5c2943b8cL,0x0000000000000106L }, + { 0xe29e2e6d9d310344L,0xa73c141821499e9fL,0x37f93b334f2b6177L, + 0x1a953d005b41fd72L,0x079596887fc4c74bL,0x51fa8d6a18b54a16L, + 0x8a2f0b23d73b8d7dL,0x81dcdf27c35d31feL,0x00000000000000bcL } }, + /* 51 << 231 */ + { { 0x86a623829fd8b32aL,0xabdfdda101d41983L,0xa53f4563528a7673L, + 0xab916c12bedacf84L,0x3dc44fc48b14352dL,0x993c3213260d4c30L, + 0xc539686f7e662556L,0xf1138c58b39a1a90L,0x00000000000001a0L }, + { 0xcad7199577e5eb4eL,0xaa31beaacc030f64L,0x0f60b8841f7cfe77L, + 0xa420cb1721b8d4f6L,0xd09feccb7de24d35L,0x63320b49b3473f35L, + 0xb5bcc3e10bfbe930L,0x57a963500fedd838L,0x00000000000001ccL } }, + /* 52 << 231 */ + { { 0x00ed7a23a6fdb2c9L,0x1409a2b34b8ba58aL,0x4851489b0a6a87eaL, + 0x66659b7f33733bf3L,0xda3870049ec4876fL,0x35ad268a1554dc93L, + 0x8bf6bbf30c00d57eL,0x32a2935c06b48eaaL,0x00000000000001e1L }, + { 0xe212ae20816d8641L,0x8cebe342575da6b9L,0xf2bd08277c7ccd5aL, + 0x7a418ca5b8c2e3c6L,0x2348a14f2994a6fdL,0x5a31412a5490a371L, + 0x73f8d42f36fd59c9L,0x161fdd18a541ed5dL,0x0000000000000038L } }, + /* 53 << 231 */ + { { 0x300696f3057c5ceaL,0xcad8a2c50df8356aL,0xfc4cb7fd7c706e7eL, + 0x60e7d69ebe6a669dL,0x40003669ef923169L,0x5930b76457070b74L, + 0x3ef2e5f99cff0d14L,0x7ade11730db36e1eL,0x00000000000001e5L }, + { 0xa0705ba5db6d6631L,0xb1a50dd1e4903c95L,0xa8b727c765e44762L, + 0x07f6f4c60825fd15L,0x33baf983444de623L,0x655caaaca138402dL, + 0xe0cbfea729652452L,0xa99c262e681e5a6cL,0x00000000000001d6L } }, + /* 54 << 231 */ + { { 0x3132e6ce76fc9e0bL,0xcebc5afbd0dd32bcL,0x2c75ba8753bcc7f9L, + 0x8cc874cd8ab343c3L,0xd2021ec20024924bL,0x2cda2074c5678891L, + 0xd724916957e36dcbL,0x7097b4f7611442f7L,0x000000000000002dL }, + { 0x6e6cea215db4d265L,0x7257f2934bba329dL,0xaa12a1bb9bf3c783L, + 0x0702399538239dc4L,0xb4d22bdc73524369L,0x204186e3b3252770L, + 0xe61c9fd037080fbcL,0x9c9a7d157396cc53L,0x000000000000010dL } }, + /* 55 << 231 */ + { { 0x4d6356cea8b6ae97L,0x55e296caebe17bcaL,0xfb9ed8b353d6b21aL, + 0xedf72d6ada093a8eL,0xfa81761c24490122L,0x97e3e7c62f808d4fL, + 0xad75f995aa0130a7L,0x641a63cfee06c9b0L,0x0000000000000083L }, + { 0x309921eba0884ac4L,0x1cea2e3f6c788b29L,0xb412c4919295fc7aL, + 0x9eb836cabbe43b7cL,0xeffb3ae33c591e1bL,0x825cb6d838f93652L, + 0xcef6276e83301934L,0xcd857684c28e8b6dL,0x0000000000000195L } }, + /* 56 << 231 */ + { { 0xe5755448cd5974dcL,0xdc4336085d6bc190L,0x291521f7431ba450L, + 0x31e71c894d80164bL,0xb42bb11ca6ed9fc5L,0x3a0966fead792be1L, + 0x7f75f881469b8352L,0x38ef2c56448076a9L,0x00000000000000a2L }, + { 0xac95a77d7d743f9fL,0x06026129f3465546L,0xd56bee30e9fc9605L, + 0xfad39345bbcc7494L,0x2b178c653f526356L,0x22418748c661ca67L, + 0xeb53cf749b79aeccL,0xe7c48bdce76a53f4L,0x0000000000000092L } }, + /* 57 << 231 */ + { { 0x781ddc8410eb4bceL,0xfe20bc22689f566dL,0xabf9614b250b11c2L, + 0x2abc1259fc25b7b2L,0xeb9d15476817ae1dL,0x9e614c5fc00482a9L, + 0x284a7be697aa02e3L,0xd2786becb912f11aL,0x000000000000006bL }, + { 0x3e5b8b80f712bff4L,0x2b276fe8615a7bb8L,0xb7e4a47741667babL, + 0xdbf07505785e403bL,0xdafda3048e5d87d1L,0x651fa13f2485f1e7L, + 0x0e96561c41fa8c33L,0x9a964b6afc9533c6L,0x00000000000001f6L } }, + /* 58 << 231 */ + { { 0x5efb8a35a9a4aefeL,0x69f838d2ce792292L,0x56e06b1041f75d79L, + 0x39c0f9fb6db28a88L,0xdc03a5303f58d36bL,0xc80062fdc6689d55L, + 0xcd52297236e0a293L,0xa0179bb20ffb115aL,0x000000000000000dL }, + { 0xab0bb76d856d0d54L,0x2513146539f307faL,0x4bedb07820813767L, + 0xf9a1263f59928c34L,0xefe5bdc4909d6dd5L,0x5bec7330c4f8aaecL, + 0x434209fc1c5c1187L,0xc6f261bcb3b2946cL,0x000000000000015fL } }, + /* 59 << 231 */ + { { 0xb939ba08271eac87L,0x01b74a5f96572526L,0x0128e1a8d003e6caL, + 0x0acc7456cdde9dc4L,0x645651efcd70a637L,0xe19b922266eff529L, + 0x12e19d5d5e40a325L,0xe0ba91f712d0572cL,0x0000000000000009L }, + { 0x1b2ebb6600213e90L,0x0f796868e8f5a418L,0x460b815804d5336cL, + 0xef5b776a8f3acd2fL,0x2d61e4f233112586L,0x13cb818d4d8859ebL, + 0xae3177cd776f15c5L,0x9ce64970a87d1497L,0x00000000000000b6L } }, + /* 60 << 231 */ + { { 0x9cb870731e4539afL,0x9478d2def6271c13L,0x090c027268d3482eL, + 0x8c977ce0f731a796L,0x13f5710bdc318bd2L,0x2dae989a9a151ed0L, + 0x598f15eed94e7992L,0x72aafc8f4d65daf0L,0x000000000000012bL }, + { 0x4ad64f6d702eb8f6L,0x2151ae34a1ef79d5L,0x98837c0e3ce0c875L, + 0xd63615b6a2e6eb4eL,0x447c6fc3b7d9edb7L,0x67f864f5a86179a6L, + 0x1e980d0ce354cae7L,0x983691a48c4e9674L,0x00000000000000b8L } }, + /* 61 << 231 */ + { { 0x81446ce047b083ddL,0xeb0eb48ded4164dbL,0x92159ee968b817beL, + 0xfb501e7cb43bc0efL,0x7486c8f67ac944b2L,0xd091b2f3d63a9c1cL, + 0xd80fde4c25e0d296L,0xc22410b3ba3d2bb5L,0x0000000000000042L }, + { 0x547f85ccae50848bL,0xaa2af55884859461L,0xdbdd0e1a20f810acL, + 0xe6f7bb90536880ebL,0xcd47da15f32226b1L,0x8a5bf84843885abeL, + 0x0a95beffe5ade276L,0x49f09071102f85efL,0x00000000000001c8L } }, + /* 62 << 231 */ + { { 0xe16cc278d16c8677L,0xf0f1c0291aa4823aL,0xf235fc167c6b2ef4L, + 0x854561cc58128c62L,0xb9553b64017d2fd0L,0x8f733f2883213fa5L, + 0x52ededb79f93720dL,0xd7f0a1cf64f4fb42L,0x0000000000000070L }, + { 0x23d01627ac7cc128L,0xbe2bb56afa287351L,0x961cade267bd39eaL, + 0x1ab8cc062eaf0e26L,0xb46886315db2972dL,0x6e48cee137868d66L, + 0x37856b801193521cL,0xac3f87662ddd16edL,0x00000000000001b5L } }, + /* 63 << 231 */ + { { 0x90c3310c6550603cL,0x368ddcb260508b36L,0x22724f188d23847fL, + 0xe486e47c49eb4fe7L,0x4fe67da23edc4892L,0x437b4cda2c8767dbL, + 0x61352775146e8e6fL,0xfc0a61384acc18f9L,0x0000000000000043L }, + { 0x2faf71994c5f4459L,0xc82c0ac392e4b40aL,0x0044de2896e9663fL, + 0xbf301ebfb957c578L,0xa25deea70d544928L,0xa78c79201fd91e0aL, + 0x0e8f9087ba045978L,0x33fe0d7b7b01bdf8L,0x0000000000000142L } }, + /* 64 << 231 */ + { { 0x5a6db3a8e35ab524L,0x2e6954315117b9cdL,0xd81f6f6417217cb3L, + 0xb4296467910f3354L,0x7249dcfb5a83e39dL,0x7b6df96f8c87a277L, + 0xbc8ccd9fcfb09d4cL,0xb644e4e35f67508dL,0x0000000000000174L }, + { 0x1a228c95246be9d7L,0xd9b7805b669d44f0L,0x7c240e63f5a7d167L, + 0x863012f56bd7f19bL,0x1cf3d77e3a9321beL,0x407dcfd2782b061fL, + 0xdbccc9df881439f7L,0x4a3a07a3116d1beeL,0x0000000000000198L } }, + /* 0 << 238 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 238 */ + { { 0x9ab5fe51e870201bL,0x181be214428dcc9dL,0xea30361a335ab6fcL, + 0x8f43702a3af87eceL,0xa46b867147df3e87L,0x4b2fdc3fbb02e25bL, + 0x72e3bc8e68e4d43eL,0x854ae7242cf96610L,0x0000000000000075L }, + { 0x56ecf90bfeb21bf5L,0x75d38316e555d52aL,0xfaea840084cbcf08L, + 0xbd4a0672b5126460L,0x33c3dd8fee751f85L,0x4d658970d8fd6a67L, + 0x49e1041a439f0c52L,0x0480366fbd63268fL,0x0000000000000101L } }, + /* 2 << 238 */ + { { 0xcc24e895e1756b7aL,0x3874a79a3c09615bL,0x30e2489d9014ffc4L, + 0xedfa2851e9070c97L,0x69113ac763017e47L,0x2dbd4d59ee9161c5L, + 0xbd93abe740c20e53L,0x2eb660e491ccc0f9L,0x00000000000001f5L }, + { 0x1363f45a719c9f85L,0x32dfb30ea43cd38eL,0x35933e9a0b75d1ebL, + 0xaf1fc78775aebc26L,0xb444f393a27240c4L,0xdb46c96df7592075L, + 0x59c26ad852510ad2L,0x163890bf029bb7a0L,0x00000000000000a6L } }, + /* 3 << 238 */ + { { 0x35e461c73f298075L,0xdabce92b476fd938L,0x1a928e413e13910bL, + 0xb2e8f656cc7dd6cdL,0x8a8aebfa4d4cd65aL,0x23cc2419d684d87bL, + 0x6e96b40ba93085c3L,0x303b86411f9c3babL,0x00000000000001f6L }, + { 0xe66619d05a55180bL,0xab397f7d3be53831L,0x103c38d48f88fdb7L, + 0x7e6d67dfadf9520eL,0xc93512456e517e33L,0xd8596b94a69929b8L, + 0x9611f83a43dd1ff0L,0x46568571d30d5f6cL,0x000000000000006dL } }, + /* 4 << 238 */ + { { 0x0f8341fe21dd6e7aL,0x5c106f8eea93e5c1L,0x8e2ad8e9ad0114bcL, + 0x277ef823f059d291L,0xed4a28e5dc50fd78L,0xa071965189cdf20dL, + 0xe393b2641fde162cL,0xd383b86d30a5b16dL,0x0000000000000041L }, + { 0x04b8115bad6dbeafL,0x7d7f0bb9a996519bL,0xab54d4c89c397c91L, + 0x1750b97d77a6a3e3L,0x1637909b14554321L,0x7311da687a5aeea3L, + 0x1f91a0fd52130453L,0x4338b3d686753f28L,0x000000000000014cL } }, + /* 5 << 238 */ + { { 0x555a5ed3e41bdc8fL,0xa4c7146e378bf6c2L,0x38ec797a397ff807L, + 0x0a90aceb5054b6b5L,0x51c4fbf35013868dL,0xd02e806b4c84f3efL, + 0xe6b25f2947687bd6L,0x9c80d5336dcd81fcL,0x000000000000003dL }, + { 0xa804c672bf2ef9aeL,0x4a9ee6cf0608b597L,0x2e9648af9b9c23c3L, + 0x4b4c51d97c465908L,0x742376d23f199fb7L,0x2c2466d04892a894L, + 0x65b4a926f76de379L,0xe991d330e599d231L,0x000000000000018dL } }, + /* 6 << 238 */ + { { 0x524e9cb4da163afbL,0x8a0e4a8f228fb3bbL,0xf664c14b45960ee1L, + 0xe6eea91bc51f2265L,0xbc9515293ce6bbedL,0x1dcb0c979f66a923L, + 0x3c1dcaeffa6e34abL,0x8566db771ec37478L,0x00000000000000deL }, + { 0xbabc8a979886e4daL,0x7be9df692fc58670L,0xfecd51cafbbd20c7L, + 0x3d9edb6c856ca68fL,0x842d8464e1a1b32aL,0x7bb8fb391fab27d8L, + 0xc7a7b8614b4c1060L,0x942bd576d69619c6L,0x00000000000000e9L } }, + /* 7 << 238 */ + { { 0x6870abbb49d18a7eL,0xeca5915c9e4dd9d3L,0x394f185c0196125eL, + 0x5b1885351bb56e6bL,0xcdaf07e8e1299083L,0xca4421e89e734760L, + 0x4288c2236376839aL,0x06e9e6aac7f2476dL,0x0000000000000043L }, + { 0x83c257a2e91a87cbL,0x37d3a58193ba96d1L,0x23c753bf4ea5fd63L, + 0x202f819864a1df10L,0x97a5b3dc5b6d5dc4L,0xb3262185add5915dL, + 0x37a728f3055d11a8L,0x1110f66b8d850652L,0x0000000000000028L } }, + /* 8 << 238 */ + { { 0x99a1daa94cf92b1fL,0xb08a7bdecf013530L,0xcbe6c7bc1576e51bL, + 0x9f88c296e6c13e9fL,0x13fdc9556370791cL,0xadefac0da42b1973L, + 0xe69418a412cb2ffcL,0xaf05ce9762eae4e0L,0x000000000000005bL }, + { 0xadb36e6565e89a12L,0xde7fee5853131bd7L,0x7335c5cbabffa9c1L, + 0xabf94bc392947061L,0x11d2e02b6109bc5bL,0xfc1fc1d28a097cdaL, + 0x4512fbb6f3d5db21L,0xd40e8637336a3d5fL,0x000000000000013bL } }, + /* 9 << 238 */ + { { 0xb927982ec0e8a1abL,0x478a75ff53dc6cd9L,0x45e6200fbe786ed0L, + 0xee2c3a3699401156L,0x042c35b2176e07e7L,0xc09e54deb073382aL, + 0xb23b8397c14ed4c6L,0x296c490bcedd676cL,0x0000000000000181L }, + { 0xab1a53120317d2e2L,0x63ba4b47c5b44b41L,0x2c8a05a1a006a95dL, + 0x4c3b5bebc7f285b8L,0x14371a425d385fdbL,0x465ae20ee294b472L, + 0xd3ec2abb5edaa3f4L,0x72641b9f580e0a46L,0x0000000000000078L } }, + /* 10 << 238 */ + { { 0x005ce3f7d14c0264L,0x8ef018dc6b7d372fL,0xad9b3a6ec20ea32fL, + 0x860938b2c327793dL,0x6c1669174b6867f4L,0x8d96f5fa19062b2eL, + 0x484a2deb22be3819L,0xcad084fb965dec51L,0x000000000000004dL }, + { 0xd3afa07be469f94eL,0x58e526d7762e7cf6L,0xec8641d0a6338b20L, + 0x7694b364330d3bfdL,0xf5d8cf6f4193c78fL,0x7395355f11616b29L, + 0xe29b7a83ab78eb55L,0x71c471aa4eb0dd18L,0x0000000000000079L } }, + /* 11 << 238 */ + { { 0x27a0dcec4640a9fcL,0xe00471295ee9a3a5L,0x94d54b86054af15fL, + 0x21f6ae1dc99e6364L,0x9cb3b7413a5ecd7dL,0xdd128fa1c6840ab5L, + 0xf62bd90375f71498L,0x5651778f8bd73c30L,0x0000000000000148L }, + { 0x0c84978d1977a031L,0xee1868aba79b8071L,0xfcf1485672e09b67L, + 0x0b1982dd174d8166L,0xa43baf2886045a38L,0x089d267e5b6bcf85L, + 0x7bba2d8b97f39f60L,0xacccd7c488b1a299L,0x0000000000000030L } }, + /* 12 << 238 */ + { { 0x3671d520e411c88bL,0xe56d2622af747caaL,0x8c2d939f3f298a36L, + 0xd09f955e38b36307L,0x7aef92fdacfe3a5fL,0xaa1b543a3c3413e3L, + 0xa68bcf7585e8eea2L,0x1c33b90957271ab1L,0x0000000000000175L }, + { 0xe829b5480993374bL,0x93ace6b43758fa53L,0x378932adfb48d303L, + 0xa0be54cd7b9aca58L,0x6f020d2f07d56bfcL,0x661cc6df332d5724L, + 0xe1c67b4c4702e14eL,0x6d7e1534fe0ce1f4L,0x0000000000000165L } }, + /* 13 << 238 */ + { { 0x542bcf698602b37eL,0xa5abcec8d9ccafc9L,0x9bbef4144d01c669L, + 0x1cbe4eb53f21f94dL,0x73e4bd548d17cca3L,0xc71a14ef0fc467fcL, + 0x965e6d4741791872L,0x53d248008a5d401cL,0x0000000000000045L }, + { 0xc5e9fb2b7bc260c3L,0xaff4fa860768d8edL,0x3d9b189fe9bbb088L, + 0xc2b32e2cb971a9c0L,0xef0b19a9500994faL,0x9a916d6b4ca815aaL, + 0x3808a111df10d2f4L,0xfa259ef2d7d6440aL,0x00000000000000c1L } }, + /* 14 << 238 */ + { { 0x427e8420abc1c27eL,0x21124fb3118cc0c6L,0x3fa9121b7030b6d2L, + 0xda46a2b7cba1c68bL,0xb248a68d3fa27645L,0x55268b8ec3876d05L, + 0xd12a2229ca45c78bL,0x65aabd782de0e319L,0x00000000000000e4L }, + { 0x6e660d9b77d9dd90L,0xe4edced2707fbf26L,0x8c8269de5d53a600L, + 0xa9d2ac7cc1d0cb90L,0x7adf4c79998ce4bcL,0x553294473903cbb1L, + 0x2af44eb383b6a801L,0xf3b473a71f55b558L,0x00000000000001c8L } }, + /* 15 << 238 */ + { { 0x650d6b150b56ca3cL,0x510d05a999013060L,0x8f317f930173d49dL, + 0x1ba9a82c346efba3L,0xf21915c9238c3a66L,0x400b5e7081e3727dL, + 0xb7a6c68ff2e8bfe8L,0x4ff4a4efe83e0afeL,0x000000000000003dL }, + { 0xcdd4dd7cc85ada21L,0xef6508c1604c767bL,0x1155f07962894489L, + 0xe85ce6f65b58e49fL,0xa5be7867bcdbe11eL,0x94251f2e0254ac77L, + 0xb7e1e2fcb2a4c84fL,0x93b3013dad5da02aL,0x000000000000013eL } }, + /* 16 << 238 */ + { { 0x70eae5e2b7c097fdL,0x690c65c6becc7c65L,0xf684ce2cbeb8a529L, + 0xfb5bd6fa772826c5L,0x12025c313b0d8680L,0x8f20fac7cbef00adL, + 0xb810bf456fc66ac3L,0xa960e1b3c7878181L,0x00000000000001a1L }, + { 0x85a5e619f6103b53L,0xcc4cf223fc14a4b5L,0x68985b1f977a8c64L, + 0x410eaf63fe82447cL,0xe20381490453d341L,0xe1d008ad719258deL, + 0x26c1b1773410d895L,0x93cad3da45bae984L,0x000000000000012fL } }, + /* 17 << 238 */ + { { 0x83470dd85087e317L,0xdb7733bae1f29847L,0x54f0d79b32b8ee6cL, + 0xddc71c2d83573e46L,0xfd1fffb55e2b6321L,0x3daa5b01a31cbcafL, + 0x084026ba3ea75ecaL,0xd4e58127f22d9f33L,0x000000000000018aL }, + { 0x3ea9cefcbd40d455L,0x4350051a09e79ec1L,0x0e7888a7ce0a3585L, + 0x32dab5ea59e3067aL,0x4843818f5953e6d3L,0xde8b4b0fcc703daaL, + 0x386a70895d1c9c6fL,0x68d1bfbc6eac5abdL,0x00000000000000b4L } }, + /* 18 << 238 */ + { { 0xbd7c96176411ed7fL,0x39202b58f9a594c7L,0x4c20a11472ff2f6bL, + 0x8eae9ef7641d9c13L,0x89aa633f48b0cce2L,0x92b63306557288b0L, + 0xb3a581c0ce7380aeL,0x8666b913fb705467L,0x0000000000000095L }, + { 0xf15719ceee9abfefL,0xe780789a4e41aa97L,0x9d21215431e6afb7L, + 0x17d9bfab5cbf72ceL,0x0c48ccacb0237bb2L,0xe0a08569d349a092L, + 0xa409e08ded3cfb4aL,0x79e67928a1c0e87aL,0x0000000000000182L } }, + /* 19 << 238 */ + { { 0x1d3c45e847667107L,0x866ca278c080ca9aL,0x9a41e8792dc42392L, + 0xad3283a25b513845L,0x14d23b50b285d0a7L,0x4aef4370d7364e64L, + 0xc5043319b807cdb7L,0x88c508189673ed5cL,0x000000000000002dL }, + { 0xd56fa0fcf39ca196L,0x97e6dbd9852a5db8L,0xd8361629027563f3L, + 0x93495e12e4c1ef8aL,0xcd5776f11bc10140L,0xb21ef334e3b873c9L, + 0x48f0ece3098c8fa7L,0xd97318a4f2302f9dL,0x000000000000018dL } }, + /* 20 << 238 */ + { { 0xb83fdb669de748b4L,0xc46f81000c41f158L,0xade8932fb8f8f374L, + 0x5728e8aac7ab11feL,0xa3be4365601535d1L,0xf366f0d450c52b71L, + 0x6e00860084db7699L,0x39c5934c32bd9061L,0x0000000000000003L }, + { 0x4c5f3ab49c964a3fL,0x30ee02c369a70ba7L,0xe4f4e7721699b55bL, + 0x957efd09f2104682L,0x7be1bdb0298947d8L,0x128be588a6ce37daL, + 0x10f0abbb5110d219L,0xfe63de4b6c433528L,0x00000000000000a5L } }, + /* 21 << 238 */ + { { 0x0ce1685131ae8a81L,0xb72c77b2d7337ca8L,0x7e74dba79aedcdf8L, + 0x18d4e5e3da3c5c9bL,0x19ccdba8edb516a9L,0xe62b675c446ece23L, + 0x5b4aa29acf1e7f89L,0x37dd4f643940453dL,0x00000000000001b4L }, + { 0xb3c5170a3b3b6d8bL,0x476b1d951f6c85cfL,0x7eba2355afd2f7d8L, + 0xd6496bd907671014L,0x9b1e6ea26f122c61L,0xcf03fff746b200adL, + 0xeac4278017b28fdeL,0x38eab344da84d1a7L,0x00000000000001bbL } }, + /* 22 << 238 */ + { { 0xb5bc3117e7c0701fL,0xb48e5dc0d6fd7595L,0xd1b5fe1a3b0f2afdL, + 0xf1099d2f9248f207L,0x6cc721d9ca2b2ed7L,0xdb339d46a360ed1fL, + 0xfa226637396de0adL,0x2fba134ae405fd2eL,0x0000000000000110L }, + { 0xb1d2d7ac0b4bf098L,0x79d1b226f5ff126dL,0x92a0d70fbd01aa18L, + 0x67e5828531f22085L,0xf694f833d8b14f5cL,0x13c3cc45a40691daL, + 0xa3405c019388bfb8L,0x6003716261c4da19L,0x00000000000001b2L } }, + /* 23 << 238 */ + { { 0x9a02b009aaccf1f7L,0xb6ffb0541f2fd3a2L,0x16512f3487e4db60L, + 0xf23615007af3e638L,0x3fe5d84a1d4c8fc1L,0x73c21064f2251788L, + 0x54f27fc6fd65b906L,0xc2262a4040e07e0eL,0x0000000000000072L }, + { 0xb8c93aab02b4791eL,0xae54bd0ca0bfd55eL,0xebb909da4ea41398L, + 0x8af3c61555c3e881L,0x61d0d6b9cff9966fL,0x1c28b62eb85433a2L, + 0x58167c244f422e5bL,0xae2dfae6446a4077L,0x0000000000000063L } }, + /* 24 << 238 */ + { { 0xee8b94e0bcbcf019L,0xa3965127952a5a75L,0x5d306ffdbd99b8e6L, + 0x9726acfcf7f77f82L,0x52a8c48f333e69a2L,0x2561b47ee5310f8cL, + 0x5c707992ddba50adL,0xca39fec1d49f9302L,0x0000000000000197L }, + { 0x773374e03330c1deL,0x3c186e5a12186692L,0x7c50d77c2a07fe4fL, + 0xb0387d01ec8cb87fL,0x891a741524af7e0dL,0xacd3fc805dce5454L, + 0x2a92f9cb257238d4L,0x25075cbc4415b2a6L,0x000000000000006fL } }, + /* 25 << 238 */ + { { 0x7884bbb629f1ff9eL,0x2b3706ed9719d7f0L,0x3a418c4929172624L, + 0x1857e4ddd83e00c4L,0x8d5bea69add00324L,0x60ee0dacb3d682c5L, + 0xf34b52b594bf155eL,0x9cbc6954fa986cc0L,0x00000000000000ffL }, + { 0x23d72f25e31ed109L,0xa053addcebe1cbecL,0x375503015674aeefL, + 0x2651c46a6eefe964L,0x600c18699dff26f4L,0xc20ce2878be15274L, + 0x17f6b16a86f62663L,0x919c4e3d4158baeeL,0x00000000000001daL } }, + /* 26 << 238 */ + { { 0xe8d55c1e634e034dL,0x344ec79cd9e13479L,0xf3b583bf4be319f2L, + 0x36bd595ea43a6c72L,0x64c244cb8ee00c73L,0x592642553034b765L, + 0xf98e04c8d6d25e62L,0xfe83950036bfb0f0L,0x000000000000004cL }, + { 0xcc19547c4f773a47L,0x1b974525f727e658L,0x36290dd2b17ab2bbL, + 0xb1e03bc6dd339b6dL,0x3d2fd734d4b510dfL,0x56e14c3136363925L, + 0xa61a6fd8574c83b9L,0x7340921e4c17fa62L,0x00000000000001afL } }, + /* 27 << 238 */ + { { 0xe6eeba2ac872b54cL,0x3734508057576f82L,0xc82723574939b69eL, + 0x4c9f83c24460d5baL,0x635d558bfd73c262L,0xf58b7a7750cb2d56L, + 0x14bdaf8abd4c14f1L,0x31d03f3b5acf2c6eL,0x0000000000000099L }, + { 0x595629f11a4aae0cL,0x2f72899cf87bedc0L,0xbe2d7dcff6797377L, + 0x49fb22c2ff654225L,0xfa99f364161f0820L,0x6f069ff13b8a9200L, + 0xf07433d474600803L,0x4035ba9921b24016L,0x000000000000003eL } }, + /* 28 << 238 */ + { { 0x64f0ae6630ae5727L,0xeb7fee1cccfe2f46L,0xcb52d65cc17226f5L, + 0x11bfdd3c4bde2ff6L,0x14593dba22f56523L,0x7549c2a8e5a07b06L, + 0x27e857b0a259420cL,0x5f2702cfd795bf2bL,0x00000000000000ceL }, + { 0x50247b7799ef6f42L,0xc5d4d501a9496281L,0xc2898fd0848ee90aL, + 0x5916df473b105ea7L,0xc963d568a3fc2a93L,0x4f5dbd88a71e69ffL, + 0xb1dfb820ed98520aL,0xf91d6e9e5bbb0779L,0x000000000000010eL } }, + /* 29 << 238 */ + { { 0x96ad602860bf7a00L,0xc3426e16c47cdc65L,0xe57e994a1e2760b1L, + 0xeb93b5f7b3fb4b03L,0x31bb9ec24a492c4dL,0x65d3cf03584d5be8L, + 0x601b91ccc97a4958L,0xc969b84d7336cea0L,0x00000000000000e7L }, + { 0x0979646c913be9f6L,0x6320c63456598b42L,0xfb1cb31e59f91ddfL, + 0xa2f17bfc0c88b8d8L,0x894985b268d26956L,0x3e960fcbabfa8376L, + 0x913526f20caa7b2dL,0xaa97458ead4a33e5L,0x0000000000000087L } }, + /* 30 << 238 */ + { { 0xa3931ead385812f2L,0xa5d9d3ab9ff9b9a1L,0x8b98aee83299344fL, + 0xb9e1f38bbfc5c57cL,0x66c8223742e07798L,0xf2bb539d6f4bc8baL, + 0x022615e0eb11077fL,0x41059629845c0f9aL,0x00000000000000d5L }, + { 0x05e13d3b4953c8ceL,0x4892bd1e05ecb056L,0x15c3d629e54ce7b4L, + 0x818460931bc0c8cbL,0x70ae6768e5b06fc6L,0x549cae2800e46659L, + 0x674019953a739df8L,0x3323b1b868fcc1e8L,0x00000000000001c8L } }, + /* 31 << 238 */ + { { 0x1b81beaa3dce6ebbL,0x3232328e2986bcc8L,0x08562a2e7a7ec1b1L, + 0xeed1e9d9c46c24b9L,0x15a05107ef0106c2L,0x54dbe4e566610461L, + 0x4c5fb5c51ec80c7eL,0xa7507d581e15f329L,0x000000000000011dL }, + { 0xc3de850e31675fecL,0x2164d583de66dee1L,0x2346eecfef36f11cL, + 0xd3037b676d397030L,0xc88b32b46c94a3e5L,0xb9494d80fcb7031cL, + 0x45060066a79a0c8bL,0x2c283d47fd946bacL,0x00000000000001ffL } }, + /* 32 << 238 */ + { { 0x6bce4008db3002ceL,0xe109980c03fe67efL,0x35f077201cd61540L, + 0xb0adfba4723c0ef2L,0x811b100860ca9a5bL,0xedd17f04c0d112c6L, + 0x8cc7cc5f49688782L,0x716f36bbd5f89c31L,0x0000000000000031L }, + { 0x700cb92a1d19d891L,0xb99918262daa7a23L,0x834cd1fdd9ad0c74L, + 0xd74ee37682c919daL,0x6569353497696deeL,0xe2615c6e12e9ab53L, + 0x4be93a40090bbdf3L,0xb811069663437098L,0x0000000000000162L } }, + /* 33 << 238 */ + { { 0x3776e68de439b6e6L,0xc11c5cf2a93fec79L,0x18322c456e8cb760L, + 0x6d23d5923074466cL,0x922635baff1e0ba8L,0xa37b56544de73b12L, + 0x0767e97a82224c9fL,0xae0f9650044cded8L,0x0000000000000050L }, + { 0xa0a9a5b35971b232L,0xa1aef7a02da1a761L,0xdf1cfc2c85b63dfeL, + 0xf91bbb7d5d293c49L,0xcba0ba2a9a832665L,0xe5a1b4c739fd3a27L, + 0xe647ded0140ed954L,0x6ea2c367f0e619ccL,0x00000000000000dfL } }, + /* 34 << 238 */ + { { 0xcdf28f7cb59ac237L,0x2d2fb106ea4e11ccL,0x0c4d36c2120a150bL, + 0x21adb80a382f0b0eL,0x3559de088dde2c46L,0xf419d3758b9d7854L, + 0x925741d95bea2eceL,0xe94a66a4372032e0L,0x0000000000000170L }, + { 0xb34b3fbba5e4a112L,0xf14faedf1303ea36L,0xfd2f1daa22e48527L, + 0xee04e38138ae2616L,0x4f70a2f74b4262fbL,0x6cdf8c988f9eaf7fL, + 0x89287d7a67171cc5L,0xbc4f369fa911d65aL,0x00000000000000b4L } }, + /* 35 << 238 */ + { { 0xe0b1e01ea1fc8413L,0xd1272c9abbb3e2e1L,0xb79eff0eb97e058eL, + 0x3e22d38d68027d9cL,0xb8a10d739d6e9ed5L,0x2987ceb003d3a5b1L, + 0xc039b0fe1d7eb0f8L,0xa40991bc58942c9fL,0x000000000000004bL }, + { 0x0b8602254bd1a64aL,0xa2a6a556eeeaa4f6L,0x2006573bd5fb4f71L, + 0x7e2ec04cfcf5eb9fL,0xdded1c6ad3080364L,0xf0eac914ed587661L, + 0xbbad7d89912701dbL,0x1926f99ed89b48faL,0x00000000000000c1L } }, + /* 36 << 238 */ + { { 0x2640b0474717da49L,0x6b303097f0a90682L,0xe3fbb37ba4756b74L, + 0x084581b1e3f7ea6aL,0x942b168d67a536e8L,0x4cecb7ef8e8e4dbdL, + 0x48f4b7c0b17164d3L,0x8a464b3a93bfca74L,0x00000000000001ffL }, + { 0x21f4906f7268ec93L,0xd61c24432acbde61L,0xde475d32d6fa2261L, + 0xbaae1526a02f45c5L,0x16b2c6b30212c1fdL,0xd6afdfe9fddea6a2L, + 0x1f8fb94d5daf789dL,0x2f89389b576846a0L,0x00000000000000ffL } }, + /* 37 << 238 */ + { { 0x9382f1039bf6e708L,0x17d1164758ef0269L,0xace45ade38ec27d2L, + 0xcfc559ceb0a356d1L,0xa771069d6edef1e0L,0xc0dc328f753868d8L, + 0x19a2e784b9621a36L,0x2a959514f8607fa1L,0x000000000000007bL }, + { 0x2edb1cad55f7e034L,0xb17fb1b8a475236eL,0x06c1cd91ec88e88fL, + 0x01b3ac7c9b4b4972L,0xb826191aad44bee8L,0x80260416b37c96a9L, + 0x3ca2a771b7132816L,0xc38c82ad652906e9L,0x00000000000001edL } }, + /* 38 << 238 */ + { { 0xec3a61c2762601e9L,0x4b70960158620f6dL,0xa4d1de2b8cb3717dL, + 0x926fc94297198836L,0xa3b5a26b93c6aeffL,0xc6f933c6ee4999ffL, + 0xf2fc0ac93b15ac85L,0x645c919b43ced35dL,0x00000000000001d9L }, + { 0x543749ecccb952e0L,0xa05917794fd35411L,0xaf499a4fecd3e486L, + 0x3a78d1f489b19c12L,0xe110f2333129acbcL,0x13e4941b73ea2d43L, + 0xed5c7b9f61892b8cL,0x9cbe3ebf9fd89b47L,0x0000000000000092L } }, + /* 39 << 238 */ + { { 0x5abb2e1f0648f550L,0x6f964c0a58d46cc3L,0x568ceb9058852020L, + 0xe01f9481e794a871L,0xe5687af6d5c440dcL,0xd88b9baa6ed0bf7dL, + 0x8730befb073a24f1L,0x950d38f59614bd97L,0x00000000000000d5L }, + { 0x49e471eba5517b5bL,0x1fdc6a0f9ba2bb7eL,0x9e3baa4d4a47f403L, + 0x96ba18b57bbc5ccbL,0x570a28b31bbd2780L,0xbf948e37002edc95L, + 0x6a92422b11a4eca3L,0x00a21cb769b7b1c1L,0x00000000000001e8L } }, + /* 40 << 238 */ + { { 0xff9f5e5ed7c958a0L,0x99585b2f434a426dL,0x07b475a96db1ed0eL, + 0x1ea7f5d7a973929cL,0xb4006a0b7bbc180bL,0xa86f75d4925bfaa9L, + 0xf986de1075c8a7e3L,0x43e74f08f25ae706L,0x0000000000000088L }, + { 0x4ffd5c105068acf5L,0x6ffd7eb6063d53ccL,0xe899d17938343cb5L, + 0x9451a0150a3df7beL,0x8a581525a24b1528L,0xe5dfdb1b67a03545L, + 0x715d2b87caaa5064L,0xae4594ce0ea125baL,0x00000000000000c5L } }, + /* 41 << 238 */ + { { 0x3d8b52ea7a44f134L,0x9831076a6478318dL,0xc343c2e93314ea42L, + 0xce35d8739724b677L,0x4f9c688b8f14677dL,0xc7c080a58d1bc77fL, + 0x512359ab2b8053a1L,0x5bcae66250c3b651L,0x00000000000000aaL }, + { 0xc82f3a8769afbb4aL,0xb073c1da2d601213L,0x68d6cf6518ce0d09L, + 0x620f23a591c7fb05L,0x938c96dc6d56e8edL,0x966e1f705007a705L, + 0x3e4a940a2db66d9bL,0xac446031d466e3ccL,0x0000000000000026L } }, + /* 42 << 238 */ + { { 0x87aa2f8da0cc66f1L,0xcf4b9975e899fbf0L,0x45cc21ef33edf673L, + 0x7005319b091438f1L,0xc1e62e05fd50eaacL,0xe49ef8a210d2041dL, + 0xf49778b07531821dL,0x8390c7378c684a3dL,0x000000000000012bL }, + { 0x435518173d6cf7afL,0xc011d17b97f2560fL,0xd222f97d5c29d8b3L, + 0x5a9ffa8ccce3a4e6L,0x9271c90c8587b8ccL,0x73277e5099870509L, + 0xe004a07eeca76a93L,0xff9a8582086c9530L,0x0000000000000115L } }, + /* 43 << 238 */ + { { 0xb04a4b529d47f4d6L,0xa1c4d096aee9c324L,0x98e9d57517076ad9L, + 0xea9ab91c22dea26cL,0x58e4d80a64a5ea57L,0x2ec9a6e0dcb2a76fL, + 0xd05f78756a1249a1L,0x0cb424f2ba719843L,0x00000000000001c8L }, + { 0x20832c3e5f3b404eL,0x033963e7451b36cbL,0x74046f923cc7d7f7L, + 0xdfe2ad55c47171c1L,0xe7fd701821fb7d83L,0x7bb4722bf8319c2fL, + 0x1b7cb3224986e88cL,0x7ab64a64a9ab0760L,0x00000000000000bcL } }, + /* 44 << 238 */ + { { 0xaa0898403516e4d9L,0xdfbb159afba094abL,0xdcb895e26bb49ebfL, + 0x7a7b572756cf3c64L,0x52861b3c38c7bc1dL,0x7c34176c27352c7bL, + 0x5b3cc2177d33ae38L,0x4a2083698c494a63L,0x000000000000002eL }, + { 0xd32926625f514fadL,0x5dee99706020b420L,0x8aff8c84fdae8486L, + 0xfdc0ee6551e60c42L,0x564287bd236681daL,0x6da3e12ecaa82af6L, + 0x6d17c67ce1efbe58L,0xa0334020ccc6c519L,0x000000000000006aL } }, + /* 45 << 238 */ + { { 0xea94c88e4fe1d344L,0x0a08e3ce5bb17e77L,0xfa1bfe20b9b51a79L, + 0x8d455cf8fb324d32L,0xe58f0de564d6dc3fL,0x01e80692f396b39aL, + 0x86a1250628441338L,0xa81733dd8e134355L,0x00000000000001e3L }, + { 0xb5952292228e3d82L,0x3582cb17e1eb802bL,0x84129ab6accfd9feL, + 0xd3cbf4eca3db67b7L,0x5dfcb4576325750dL,0x638916ed9030a81fL, + 0xa26d196d5d39a3b3L,0x26d30fec1b8864fdL,0x0000000000000124L } }, + /* 46 << 238 */ + { { 0x6c72d4108c03da42L,0x3b05e597d8d0e71eL,0x947e55e37b5f1e9dL, + 0x3c899ff708629865L,0xffbb711ddaaa3d57L,0xb888cf7bedecbe75L, + 0xd7b6bd46961d1f1eL,0xda9a7125dc451900L,0x0000000000000121L }, + { 0x683d251ae65785ccL,0xd00439685a2422c2L,0x6d4668e5d36c1417L, + 0xe8df49820c580ec5L,0x80f79552b28680aeL,0x4e777d7eab114520L, + 0x67995b1ae5b2f6b7L,0xddedce535855291cL,0x0000000000000064L } }, + /* 47 << 238 */ + { { 0x219101904aeb5bb1L,0x3e9a204ae5de24ffL,0xe8ec4721ae3b23c7L, + 0x1e87147a6dc89b75L,0x13ca53950d19680dL,0xf1352672920c68b0L, + 0xc6cc77db4ad11314L,0x78d984df75daeeeaL,0x0000000000000186L }, + { 0x6cad576e442cc089L,0x74bd0f59409ad43fL,0xfb0ec10fcd1f82dfL, + 0x6b46a052bf822010L,0x90658aa13d6c9741L,0xa56000a1d91bb102L, + 0x7c8e2b47f0edb777L,0xc0607199447fdf10L,0x00000000000000faL } }, + /* 48 << 238 */ + { { 0x0609a97b58033238L,0x5b9678b1d40689e9L,0x4ecd26c3986c40f7L, + 0x7a6083dc2002e8ffL,0x73fd1d708b769797L,0x0fa7df7924e06e55L, + 0xfa7254f8d49b9efdL,0x742ad69df862350fL,0x00000000000000b1L }, + { 0x78e2366a314a07f3L,0xd0ecaebdaa487012L,0xfb13a82507fd4b1bL, + 0x6ee4483aec0c854aL,0x0e7d617e72fd006eL,0x0cdfec5b291b4018L, + 0x2b9fa205f66cca32L,0x261f67f6149d20deL,0x0000000000000075L } }, + /* 49 << 238 */ + { { 0x664c27d0a20d557aL,0x896b2675f0744080L,0x8beaf01afc5af408L, + 0x40ffc5cd38622d77L,0xc43885d4a2528371L,0x81e1f6745b02e290L, + 0x4916d18799d1631bL,0xb223123e8dd86517L,0x000000000000009dL }, + { 0x40d9d05f2fb7c4aaL,0xe40e4753a50abfbfL,0xf850526214078e53L, + 0x5e24344182804fa8L,0x7915bb4726da0a8fL,0xb2c6b557779d3504L, + 0x04e9ded5050e4e95L,0x9e99a2bb5f476a78L,0x000000000000006bL } }, + /* 50 << 238 */ + { { 0x6c34a56a6ea0b1cdL,0xc71325505a787710L,0x7b88ee97e9a5ea07L, + 0xe74726638239f9a0L,0x4d37a6dde17258d3L,0x15bae9319708c6bbL, + 0xc174958fbbc04aa0L,0x7625733b86528ebdL,0x00000000000001efL }, + { 0xafb5465248095b06L,0xc1c05fafd067b459L,0xa6ad20025bc1a76aL, + 0xaa096c10cd725a94L,0xd37e975f282785d6L,0x2ab89b9ff67d3a9fL, + 0x5d6f3ed7b7b6e4c2L,0x04ff41ca29502853L,0x000000000000005dL } }, + /* 51 << 238 */ + { { 0xc888d1d441ece6c0L,0x26df9e1bbfeb1062L,0x0b4ddd821bb94aa4L, + 0x71efd8d119ffddd7L,0x9b3998224e47d498L,0x59872b31aa653dcaL, + 0x759a84fb2712e2c9L,0x398af98399e74778L,0x000000000000007aL }, + { 0x35ebaf9392c7b6c2L,0x8bacd47278141bd8L,0xd60364f09c53d1e9L, + 0x790e9dcba27a7a57L,0xb5e554f8991e40b3L,0xc7f3ce27da7a13f5L, + 0xfb65dc3926398b00L,0x2c6cb9b8229dca96L,0x0000000000000036L } }, + /* 52 << 238 */ + { { 0x46d6266d92756c0eL,0x3164f6a8305e1611L,0x65dfea3d613b00c0L, + 0x75ea9ab9571828a5L,0x5184dc138be872a9L,0x8c59c14192347d22L, + 0x96b331914baa4678L,0x0d34cc5a2ae17a1bL,0x000000000000000dL }, + { 0xc05c3c516fcb342dL,0xb36a7efb677d6ea3L,0x016811b2a8067b2bL, + 0xffff97f94822fa85L,0xfc646b5145883bb0L,0x56c40d49c28d61dbL, + 0x91be8847eadd1593L,0xecf0a8f5128d8ad9L,0x000000000000002bL } }, + /* 53 << 238 */ + { { 0xe276b588d7262b3fL,0xa64b1324064fc6ddL,0xf82cb69a9cd02e25L, + 0xf399f6f385ba9d89L,0x3903c34dc52f03e2L,0x91d41630888c7aa6L, + 0x0402b6f8cc39ea02L,0xfda829d4f8c6dce9L,0x000000000000015eL }, + { 0xba3c550fb2f9160cL,0xc4ef6745e2ad5099L,0x528d3a72cba7c269L, + 0x38e13dec67106a10L,0xff878c2e05687e58L,0xa6f53fec246e5459L, + 0x4f30e0c2f126c843L,0x4d3dedcbbdfc0320L,0x0000000000000131L } }, + /* 54 << 238 */ + { { 0x897294e8dbf874eeL,0x84ec352267d9abddL,0xacb0e94b830bb2fdL, + 0xe4e81bd945db3a07L,0xfb029eaa52e22815L,0x920edf272170ad75L, + 0x4ebb3c5001e13530L,0x01db6df29b62da53L,0x0000000000000096L }, + { 0x555a5a6823815d09L,0xc3c7463a59449b21L,0xebfe5fcc964b5b8bL, + 0x1683e495c2076735L,0x361c14526e18bdb3L,0x14496fa3aa0ef3fdL, + 0xdd45b105a48eee14L,0x91cc21d205ac61dbL,0x00000000000001d6L } }, + /* 55 << 238 */ + { { 0xb539df1f919748f9L,0x942df28d9ab585efL,0x22d555d147e56084L, + 0xdee5bf1a34baa8ddL,0xe9eef1adca2d8eb5L,0x78505f54b404690aL, + 0xe4d77116b87f8bd2L,0xae4949dbdcee6b7cL,0x0000000000000177L }, + { 0xb6861e9ac81fd641L,0x113403993596b4c9L,0x588dcca1444a7b1cL, + 0x573e6b83df0cc0d5L,0x429e5ae8d6610ceeL,0x060d9bb429bd62d5L, + 0x1337eefec513318aL,0x5c3cda2664cc2019L,0x0000000000000081L } }, + /* 56 << 238 */ + { { 0x436121db4291d3c6L,0xdb32e56791d00413L,0x17e048dfd571849fL, + 0xcadf80d09d174b7aL,0x504fcd8cf3a16f6dL,0x0bca040b55a79e74L, + 0xae5e562a9dd8e72cL,0xa2ad35266d83c58fL,0x000000000000001cL }, + { 0x68cc0e2f8be48c08L,0xa4cf89f3a957cc18L,0x7ee580a6720e8f3aL, + 0xd45347c5ce9222edL,0x2043ba4b964685a9L,0x8a59c2fd1ad8acb9L, + 0xb2ab5a3fa3a1fb58L,0x6f0d28c3e7a7befeL,0x0000000000000060L } }, + /* 57 << 238 */ + { { 0x02c32597e24f4558L,0x6b8cd177f3b8d3e7L,0x51fac672365fb517L, + 0x5553c4053855aaf3L,0xfa6c278a2250753aL,0x5c1b7cbccd07fcbbL, + 0x84300435520e2f45L,0x4913a52b00bf3f6aL,0x00000000000001a0L }, + { 0x438a1803fb3388e2L,0xa15ebc4ec2b44780L,0xa830255c9087838aL, + 0xb7e6abf7884847d1L,0xf31fc86eaba507f4L,0x72009f406675e50fL, + 0xadacd3a574ec178aL,0xd6d36127afba1b1cL,0x00000000000001d1L } }, + /* 58 << 238 */ + { { 0x768d163a3a14efdaL,0xa080b14468d84579L,0xd477061fdb39ae5fL, + 0xbfea91ebc229bda7L,0xe28c79fa07f1e026L,0x0a0fb62d143ffd3eL, + 0x41b2fbc3a2ac222bL,0x184753613ba365f9L,0x00000000000000f6L }, + { 0x70329a2a27d288b5L,0x02abc5a928617e54L,0x14f646bfa0ba713dL, + 0xf5b3652fb5d28656L,0xe0841164ce97deccL,0x0aa84aa74b63dcf6L, + 0x59613768715e4e05L,0x23d572c0c884e71cL,0x00000000000000adL } }, + /* 59 << 238 */ + { { 0xbff79e46c5aae205L,0x4c77219362eb2de7L,0xa6d723f0b383be53L, + 0xc2f26458e641839bL,0xfcf6980d5442a5ecL,0xc755fd48bfc3bddbL, + 0x7a1a100b9504bc93L,0x8864ef9fedf3f45aL,0x00000000000001dcL }, + { 0x58dd0f0e5a20814fL,0x52770e9bee9ed846L,0x64c23fef02e82a1aL, + 0x4fbca3bc96d16619L,0x547101a099489c36L,0x92869dbbaffda8d9L, + 0x28fe8e2924c28bf1L,0xa859941e8b9885bbL,0x00000000000001e7L } }, + /* 60 << 238 */ + { { 0x33ccf08de10db8f5L,0xe481ea75d2b4cba5L,0x8db73d5bee9cdb6fL, + 0x5d11d992b5c523baL,0xe9d79978d11b927bL,0x61be8fc17a355c88L, + 0x5c31c7a65a062e74L,0x2aff5a1117a4a844L,0x0000000000000061L }, + { 0x9f5386773c9f31b4L,0x7f7681c7766dfcb0L,0x1d759dd9cb41b746L, + 0x1842ce5a3df8b332L,0xd6650a1d46cf5438L,0x3afad154d8e57a31L, + 0x72df5ed3c779ec95L,0xa9f04f62733f1f9aL,0x0000000000000072L } }, + /* 61 << 238 */ + { { 0xa4c9bad76cd25119L,0x0893c15c677a2ff2L,0xd4c74a285fa5309cL, + 0x811ace597343a331L,0xb4a1a3738b5d30f1L,0x2d2b14a4650fba78L, + 0xf38fc6d263c2365fL,0xc7ab7a5135ada1beL,0x0000000000000021L }, + { 0x24c8de56b90ba651L,0x47e5107fc3a16ea7L,0x48d7396e8d859e56L, + 0xd9eef922d1c2b3d0L,0xf7d7c0fa24267a36L,0xc1b416e77f92ade4L, + 0x88ae119ef0634ab6L,0xe2c4fb17dc882f42L,0x00000000000000f7L } }, + /* 62 << 238 */ + { { 0x5678bea8848f8152L,0x09edf78a2db8f5f7L,0x577354a80e7a5101L, + 0xde84b145d55f3d58L,0x6ce8a6e52f8a88c3L,0xab0c3fd1f9ac5318L, + 0x163316926681788dL,0x4e1eba4bb9aac85eL,0x00000000000000afL }, + { 0xaf37fba2668c4bf9L,0x5916958ee908dc73L,0x3b15e5e0d1351d09L, + 0xcd0e3ea56388db27L,0x37a3364b5ddebef5L,0xf881e32c12e18700L, + 0xd52db48980d36881L,0x67630057f5db73f9L,0x0000000000000094L } }, + /* 63 << 238 */ + { { 0x1574e5fe8e01c90eL,0x9a04e874681401adL,0x29186da48992dd9dL, + 0x2625bf89f6a91151L,0x7292c99664fb81e6L,0xdb4e7747f161c777L, + 0x21977978208c9469L,0xa25997d421d3699eL,0x00000000000000ecL }, + { 0x3546a978ddd52ddbL,0x9242e4d42d7cf82fL,0x1080e50bb80f359aL, + 0x70bb06fc98035b62L,0xf1ea6ee2098bbe60L,0x0e5cc73d8ba58df8L, + 0x383ef670e8608307L,0xfdeaac2ef5e05b50L,0x0000000000000064L } }, + /* 64 << 238 */ + { { 0xdd9166282d3d7641L,0x99029e9b39c02ceeL,0xce03fac9b6de7881L, + 0xc2cd0f784f66ebbcL,0xe391948364c83e37L,0xfc853cb8fd7bb155L, + 0xa5a99b24432314ccL,0xef0b7ed0133b2709L,0x00000000000001d0L }, + { 0xe4c4d8a054de3ecbL,0xe2f88ac25f8cb5ceL,0x17a4e60d68eb11a2L, + 0x6d0e05fb3b79619aL,0xa54874b7d5da75efL,0x18c2b1b3c77f6ac1L, + 0x5a2a6fdd829736c3L,0x9ce62f22cd44843bL,0x0000000000000059L } }, + /* 0 << 245 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 245 */ + { { 0xc3e525d2afd7c4f7L,0xc11fd457ae2b9475L,0x6cf55e765d48ead5L, + 0xf9546fe48f93eb95L,0x3e3712a75e124ba8L,0xcb71caafcf7a3e81L, + 0x512060172f3d629dL,0xe226fe2dbab1761cL,0x0000000000000079L }, + { 0xbb618c7bf963a03aL,0x2fd9211cd55e0ab2L,0x25be6c8a20dfe924L, + 0x40f5ff09135b7156L,0x7770ba4af706b788L,0x5a2146e0dc114357L, + 0x48f25eda54b6fae3L,0x5adc7aa519ead5c4L,0x00000000000001e3L } }, + /* 2 << 245 */ + { { 0x21e292cb67355505L,0x7cdf9c24e192a20dL,0x07228775f6753eecL, + 0xcd83121b93217c7fL,0xf6e99141ad41ce70L,0x2214296a41a8ec10L, + 0xcdf7237b0200ddb1L,0xe2d943f5c7ca562dL,0x0000000000000056L }, + { 0x9f743d7c9627a12eL,0x33ff2e7e31099d8aL,0x3c9e84d12a07248aL, + 0x9cbc405a6136b3f7L,0xb131c000970be1e5L,0xea9c160b84162e6bL, + 0x1c54fe695ba3f7a9L,0xd5a98fdcd51b962fL,0x000000000000015dL } }, + /* 3 << 245 */ + { { 0xd01bfb1f891d513fL,0xacf22e961375d0d4L,0x6f5c76f9d43c24f1L, + 0xa220f50707a66e1eL,0xa0b91c2af227b93eL,0xefe049868fc8f377L, + 0x58482184fbf6c7cfL,0xb224b543cab58946L,0x0000000000000094L }, + { 0xe07e7d7914855341L,0x68881fb60a675863L,0x4083d236b9736e0fL, + 0xec6ce43102dac8a4L,0xb0477cf188891340L,0x67c0cc0512cd1f4fL, + 0xf3101c296f7b5340L,0x60f49d6d1e980837L,0x0000000000000116L } }, + /* 4 << 245 */ + { { 0x3cd54b988ffa8932L,0x73a8c09947855c60L,0x5272d16b3ecc6b44L, + 0x7cf96927b4e83ce5L,0x08af41c8a3908edeL,0x98de920a73495d2bL, + 0x6ddc94e1be247419L,0x0936e34f5e3e7c96L,0x00000000000000acL }, + { 0xf3b1a1aa9d269d4bL,0xb20aff071103e749L,0x9a43c7a4632098aeL, + 0x221c12cbf450a9eeL,0xf04a8e7c7f773a12L,0x427a8cccb2ad0ee7L, + 0x93f7ee9dc0b2a786L,0x6592bb0089bbc18dL,0x00000000000000fdL } }, + /* 5 << 245 */ + { { 0x8f1c77f7a9e8cde3L,0xeb057a9562fd5799L,0x8428a885dbee1bc1L, + 0xaf8c03c38acd2865L,0xd5c2c0039061a6feL,0x4daad9cad2016bd3L, + 0xf698f86cd10ce994L,0x5da25e28b620fd65L,0x000000000000007cL }, + { 0xe3162c8fff2198a7L,0x60b05d1f9084af55L,0x64ecff3c74ccc32dL, + 0xadde221a29f6e28aL,0xd6d98db85f2a1653L,0xb661bb0b597a14a9L, + 0xbaa4d0b80bf62ea4L,0x60b8da55e7feeacfL,0x0000000000000118L } }, + /* 6 << 245 */ + { { 0xf760f828ebf01d55L,0xd9e13bb2143c38d6L,0xdd69a7832620bb25L, + 0x6d16106459a570bfL,0xda4ed76759a7c1a7L,0x5b07d2a0a22ce561L, + 0x2d1614b0616ba6a2L,0xc2c99b092013b7b5L,0x00000000000000f6L }, + { 0x532f835b6606eb53L,0x11e4eb8cd0fca568L,0x3d12bcb83abd1c3bL, + 0x293651b7eeebaabbL,0xe49cf58535da243eL,0xf0377e4414e175c4L, + 0x12a1f99f39a28cfcL,0x5b3c4c81c02ecdf6L,0x00000000000000bbL } }, + /* 7 << 245 */ + { { 0x6670d91e241fce4cL,0xeff7977950cec570L,0xcbdafc5ecfa2eafaL, + 0xa40c09e53265b4a2L,0xc4fb20c0fb68c280L,0xdd0b12e9bb2e6910L, + 0x31437bff0f2bc13aL,0xa92209d7493eae11L,0x0000000000000124L }, + { 0x0dfa947c893d23e1L,0xed7f8f9c103e6b81L,0x2dd62ed73002e4c1L, + 0x6ddda0871f9973e2L,0xb79ed2773b014d60L,0x12fbda2fe51b686aL, + 0xac9c34b6911b0e3bL,0x6fa28f425f117d64L,0x0000000000000106L } }, + /* 8 << 245 */ + { { 0x1ebc5309b6bf5ec4L,0x29ca5ba73a92ba16L,0xbdb1bce470839c14L, + 0x963fd369de1f966cL,0x208fdc754faba5b7L,0x2f1ea65a678fc163L, + 0x792fa255a773bbc5L,0x752a08fab22a653bL,0x000000000000008fL }, + { 0x096aff662e58f405L,0xee92b17dd6658910L,0x65792cb3d33b5b5aL, + 0xfa503f215678f269L,0x4e2757c8efdffc67L,0xbe46a6f1665347f5L, + 0xd049a1586d9773e7L,0x9555b002b0a28ac5L,0x0000000000000145L } }, + /* 9 << 245 */ + { { 0xa7c17d0350b289b1L,0xe3a9a8a3443f567bL,0x9c99bd6ac3e63db0L, + 0xcf6a4c889362bf69L,0xbba95ac8927e459cL,0xb80f52a8dd937105L, + 0xc518fc9b74c7d215L,0x8fed53990bae5104L,0x00000000000001ccL }, + { 0x2389f46b189ab2bdL,0xf139d7af7d235907L,0x9aca655892901705L, + 0xbdcdd09046ee4c1bL,0x0a0a655db42f41a5L,0x284d525ace537427L, + 0x18dd8dea58dd7840L,0x379387c05ca95c31L,0x0000000000000040L } }, + /* 10 << 245 */ + { { 0x239fc64e9727b936L,0x1de0020395de5e1aL,0x219a07de1a43157dL, + 0x2f427b59cdcec250L,0x557502cb4c36c8b8L,0x537492cd37e0f19fL, + 0xe3d2dfb34d57d9a8L,0x48ef0bf3324ed81eL,0x00000000000000dcL }, + { 0x9315dbc85a1538b2L,0x0591ee6e61aa41f9L,0x0c0c5fc2151dbb76L, + 0x651f06aff4c47091L,0x89a2c2611a1f91b5L,0x6fe23d1b6d668c27L, + 0xf8dc9ee1aec2fdebL,0xe253b8d8cc0d69afL,0x00000000000000bdL } }, + /* 11 << 245 */ + { { 0x9eccc0619669ebc7L,0x7bf661a6126d010aL,0xe72acef27390cab5L, + 0xe4a7f348dffd3e7aL,0x3e37551fb1e12c74L,0x53e9e142887a122aL, + 0xb0209d65daf5f134L,0xcff03a74033bfc15L,0x000000000000011dL }, + { 0x3a133010d1f95cedL,0xdec6cfb744c85df0L,0x31308eafa9e486feL, + 0x454ed725f49d5ed4L,0x4a3e969ce6dd4580L,0x6feab60dd7bb11baL, + 0x9cd05773f075d873L,0x010563d04c9c43e6L,0x00000000000001f2L } }, + /* 12 << 245 */ + { { 0xfba8a8a05b07b574L,0x936590094fae646eL,0xdf8077f4446c60c1L, + 0xb7680afc92fd0bc0L,0x1306288acacfb628L,0x674a6a5a74ed5a73L, + 0x75a261a6d68d0dd6L,0xa583ee269868f3d7L,0x00000000000000ceL }, + { 0x62c16e2068186114L,0x2403de3e993c15efL,0x1a539413fcfda8e5L, + 0x97f7f8afd65a7fc5L,0x72fcd1fd33fff67cL,0x83b84ac442b4cdceL, + 0x8b56062c4f14b831L,0xe29d467a09559420L,0x000000000000015dL } }, + /* 13 << 245 */ + { { 0xcfd36b8773920568L,0x199605c3bd6b45d1L,0x3d17e798231be8deL, + 0x93901889a9b26b26L,0x922f8d5d101ebf1aL,0xdcb7b7b5e9748c52L, + 0x87e18bc1250b67efL,0x1e23453034bdfc23L,0x000000000000016eL }, + { 0x604df42a18efccdeL,0xfa49291cf9a7eb84L,0x1685a7f7495c62acL, + 0x58a272e26b469c70L,0x7fea89e721cc23beL,0xa979783e76435dfaL, + 0xe993c6cff9404884L,0xb653fe56934876d1L,0x0000000000000079L } }, + /* 14 << 245 */ + { { 0xe586141f43c118d1L,0x0d76256cbe65c263L,0x8f0413fd8fb5ce5aL, + 0x451d3b65c1f76b2bL,0xf9a2ed094e7dd6faL,0x538af492d1e10a14L, + 0xb8e78de206963767L,0x59887d285b003a25L,0x00000000000001b5L }, + { 0xc9b05b2112b423aeL,0x6df1631e64549910L,0x1eea65ded96c323bL, + 0x6d740f2bfd188fb9L,0x73b45366ed093cd7L,0x987a12abdcd33ed0L, + 0x5807600262ef953cL,0x58db0e16370b300dL,0x000000000000012dL } }, + /* 15 << 245 */ + { { 0x9fbc14aadc38ead1L,0xb9f7df7f1a362565L,0x0745197f6a333aefL, + 0x73dccd67666cc110L,0x8f3f2dff9fd92535L,0x7e7180ccf014a6eaL, + 0x59028fc434220f3cL,0x92c45cc3fcf7431aL,0x00000000000000e3L }, + { 0x8b17b3e2eb8c17e2L,0x2b02cf29251a6593L,0x415193432efcf7e8L, + 0x359e00a1d098ec6eL,0x25f8934e771d7b65L,0xb51d511b14852dbeL, + 0x36c3fe357bd243eeL,0x36a7b1f3424f4d55L,0x000000000000012eL } }, + /* 16 << 245 */ + { { 0xb09a3e8b160e37a4L,0x8ed9ac0627db36b6L,0xecf1838a99cbfcbeL, + 0xb12c94da28b4ee00L,0xd0841818c5f1fdc4L,0xc43e50b2bee0adf4L, + 0x19f3f05b122324c6L,0x796bac65988338bbL,0x00000000000001e3L }, + { 0xf211af1ab50f7626L,0xdecf7989ca2f6779L,0xcbba6b74e1ecbba1L, + 0x73f60e835e5fb261L,0x3eebd6890629dd5dL,0x8906a06e05fba7eeL, + 0x7a23d83a58152375L,0x0f4b9b2de9f1d23dL,0x0000000000000009L } }, + /* 17 << 245 */ + { { 0x6ce126e4d85698daL,0xf003860ccf075ef6L,0x94424096d5716919L, + 0xb0087ff76be87caaL,0xbc01f71788808904L,0xcdcb3e3812c3e361L, + 0x66f47afef1ea9907L,0x2ff5843cd4fd5d8cL,0x000000000000005dL }, + { 0x75de692e3ba79e08L,0xc49213b6aa146821L,0x61e443642707efa7L, + 0x2fd64e12f5923cdeL,0xe51ec2ec37fea533L,0x03d72d4d2b36407eL, + 0x6b84a2abe430e4e0L,0x1a1435c7bb4cafc2L,0x0000000000000028L } }, + /* 18 << 245 */ + { { 0x109514806348bb81L,0x58ae664bb1393e25L,0x206c4681f3ace22dL, + 0x5984dabba050e142L,0xe1505d6e42ded28eL,0xfe2d93213e9a8afbL, + 0x93770eed1a368d4eL,0xd3341598faa53c5eL,0x0000000000000136L }, + { 0xf6818bc6a2785e49L,0x4cead604627405ccL,0xfcf4a9e7f9aa0a7cL, + 0x8edff908798642c5L,0x34760c98e4554131L,0xb0849ed68b48863bL, + 0xe5ed18d214caa88fL,0x556aff0a0cf7a0e4L,0x000000000000002eL } }, + /* 19 << 245 */ + { { 0x8cfd5c7326c01d7eL,0xce0dd0049cc28137L,0x7bdb6939b057fe81L, + 0xf5655f1d1a601839L,0x700651ddc4d38ed8L,0x4715da6ad0c0338cL, + 0x7c55a32fbe60326dL,0x30b13a3e98b89aa3L,0x0000000000000038L }, + { 0x557d7dd83a5be149L,0x102ac658be542331L,0x66ff48f819017e3cL, + 0x27a63afe85e0ba4aL,0x432a65afa1825871L,0xd6468f214e9d0950L, + 0x096b0aeda4e2a365L,0xdb22f9adc561a95cL,0x0000000000000046L } }, + /* 20 << 245 */ + { { 0xa85ed91b7adf4067L,0x5b954f9965cacc6bL,0xb7da20a12f4aa0c3L, + 0x23a0819e1862e363L,0x6c89e6eef4277b93L,0x49352790f3cbf94eL, + 0x05e6e05515eeb890L,0x36ead153d2477c80L,0x0000000000000152L }, + { 0x8ff423f9e980c969L,0xc70c24c96aeda7ddL,0xd2de4028b39df2f6L, + 0x39a559b5f10f8f9bL,0x63fd365c5f8db29cL,0xed98f7141e8cd83bL, + 0x1a8436d2a9f2a6f0L,0x089cf3ce58244f99L,0x00000000000000eeL } }, + /* 21 << 245 */ + { { 0xa9fd702dd495b62dL,0x766d1e6148af0c7cL,0x47f352707278e7c5L, + 0x2aa27ecc84042027L,0x52159d31712a50d9L,0x180d507f699ec99eL, + 0xe39fe323c1243d21L,0x7401559b0f3447e1L,0x0000000000000004L }, + { 0xe920ac3e65cb715bL,0xd8cb492cf07c740fL,0x833b8edbf1d5efc0L, + 0x43948f5714286a01L,0x935934879d912791L,0xb5a2432fb3767c2cL, + 0xe21bc65688710c3dL,0x1a221b9a8df3f76aL,0x0000000000000077L } }, + /* 22 << 245 */ + { { 0x387b2584b1906296L,0x8584a570a0a5329bL,0xb0661a62c81806c4L, + 0x5ad2e7599a819368L,0xd0ba7b8c1935cf34L,0x9333ac71b650c085L, + 0x8c44bf98270788e5L,0xa1cab6f9e489817cL,0x00000000000001a9L }, + { 0x44b0c84dda4fcce7L,0xe0dc701408d74a53L,0x29f3c3aaf88d0826L, + 0xc605de2dd59e82c3L,0x5a98c98fc992079dL,0x356000aaded5509cL, + 0x4574e63a9a00a60fL,0x02d48eb1e09e5c5dL,0x000000000000000eL } }, + /* 23 << 245 */ + { { 0x97d1f1180b0ff62fL,0xfa0770e729e1d0f6L,0xca7267da66d367dbL, + 0x5b45034eddb1fdd8L,0xb3c238531ac7f4bdL,0xa4b9b677a12063dbL, + 0xfc210b01c13ec6ecL,0x08dbbba6a89a2e69L,0x000000000000003fL }, + { 0xe4ea8bbb6db96407L,0xfbbdda3e4fc2c9b8L,0xd18a140e6d2779d3L, + 0x61cd8ce8845b983cL,0x6761497b8625b6adL,0xd2cb01c2f6ed8b35L, + 0xddb4236ebbd0de0bL,0x5b9f0f78c73e54a0L,0x0000000000000097L } }, + /* 24 << 245 */ + { { 0x353ae6fd77b9aba6L,0xbaab19af013f3158L,0x14d0564c7b9b7aaaL, + 0xe591b96a2d9ff473L,0x327e3fb873f46109L,0x61c0191e6be11242L, + 0x9696f1bd6345cf6dL,0x3dd87573684c060bL,0x0000000000000058L }, + { 0xdab49ba0285a6ddeL,0x7d66e2c3075ea1c1L,0x721973502d8ce1f9L, + 0x5e64c366ae730c53L,0x17154d6dd30475e5L,0x14153f7a3341d88bL, + 0x711dfab66235304eL,0x0e010897bd4533fcL,0x0000000000000177L } }, + /* 25 << 245 */ + { { 0x90afab7861c062deL,0x085ac4c2aae92dfaL,0xcdd94ace38f3885aL, + 0x6c65b82d47a3478dL,0x66eee2c9f6eaaf3aL,0xe8a38973dc89840fL, + 0xd2521b00921f59d0L,0xbda51f4f1181d481L,0x00000000000000e5L }, + { 0xe3827db66a211907L,0xf410b7cc6515551dL,0x09bbd3d40a46f1f2L, + 0xdaad1c9c2e0d139eL,0x54b99d040d9c10adL,0x9f1f110e278404afL, + 0x29de72bad41fbe0aL,0x6f428ceb3c27e332L,0x000000000000002cL } }, + /* 26 << 245 */ + { { 0x76f0baf9f438a35fL,0xfa3966c30e5c85e7L,0x665a4870128d42bcL, + 0x8d58fb6289898e12L,0x9626f184d06176ebL,0xe88973ea03d85e8dL, + 0xbb142568436dda00L,0x712753b8eda9d061L,0x00000000000000baL }, + { 0x8b191872ecfb2bb2L,0xe7642ce7114331a6L,0xd94e89fb7b3f2f87L, + 0xdb4f769e3ed434edL,0x8c523d17a03c029eL,0x3d8653b866bb80baL, + 0xed8721cc07b5bee5L,0xd64141c9844de9a9L,0x0000000000000140L } }, + /* 27 << 245 */ + { { 0x1181f23436682974L,0x63798602dccc4ea9L,0x80305e1d99b2ec36L, + 0x274a84d12b53add6L,0xf45a7bce6bc50022L,0x7d855ff90f7d22c4L, + 0x5ff789e9d60e96d8L,0xc84f80d186a8a1fbL,0x0000000000000087L }, + { 0x49754d1d801c8558L,0xd27f7b211e49e2c3L,0xd93d0ac5d01e947bL, + 0xf581f071a6785e70L,0xe2049e7954584369L,0x300d73a40f8c465dL, + 0xaa7f684546083b76L,0x9f6345f0a50d63b6L,0x00000000000000c0L } }, + /* 28 << 245 */ + { { 0x6d8d90ccac308a80L,0x862fae36c3f7e859L,0x9bf414eea742e940L, + 0x4ddc964c49029fb4L,0x39750fc64a18bf6eL,0xf8942dee1635f7d1L, + 0x93e9fc2d8da2b6c8L,0xd42be164a18d4d86L,0x00000000000001b9L }, + { 0xc831040e0d6c2213L,0x15f8e86f52d9e6dcL,0xcb79a0d697616828L, + 0x7953d51dae14208cL,0x6a92f51122ec1792L,0x560d3bf6a8891fdbL, + 0x418b3565a7acd242L,0x523a75944f8e5129L,0x00000000000001f8L } }, + /* 29 << 245 */ + { { 0x503e9a04849613c3L,0xdfe7b77ff0d7e56cL,0x6f2697fcdb76d90eL, + 0x65933ee209bd9c92L,0xd5685ecd28343738L,0x710f4176749952faL, + 0x42e023189f5d102fL,0x36d10f77f94440f5L,0x0000000000000159L }, + { 0x0d4263d509266d0aL,0xdd8b8663db5bdbd7L,0xefca1b51b558c145L, + 0xf5ee60e3e70a0974L,0x37dd6d658dc33d8cL,0xfd4a3bdf6814d0a2L, + 0x153b585c1553384aL,0x7d4f0dea08b2ec0eL,0x00000000000000b8L } }, + /* 30 << 245 */ + { { 0x4b3eb15613fdc5c9L,0x449dcca9a182c015L,0xc78094722f93b771L, + 0xda7036300e7455d0L,0x513554a53b5a9e06L,0xec7fcdbf4fa313a3L, + 0x047528ffe3fb4d67L,0x38bdf53faef7306eL,0x0000000000000118L }, + { 0x24680a5476ccdcebL,0x96e42aa84664792aL,0x02494069a27801bdL, + 0xcb8439da7bf928a0L,0xe60d6eda322f44ffL,0xb0642335e482b462L, + 0x6336c8709f3803ffL,0x92e9919ed725c323L,0x000000000000013bL } }, + /* 31 << 245 */ + { { 0x5e729774cb774815L,0x17da6efe704192afL,0xdb3c15ed0faeee6aL, + 0xd9b8570bec65984eL,0x56c96381470571eaL,0x6962e90b4e573376L, + 0xa7e5c4ff990ffae0L,0x4c90d86ef841ae38L,0x000000000000013cL }, + { 0x27cca1fe0c9cf81eL,0x4e5348da7fef5618L,0xa59515bce0a75316L, + 0xd66ee09d1c82ccd1L,0x79be306784c1bea5L,0x13fa7967e65dc45cL, + 0xf47f47eda8987a90L,0xf329d0e19d0d80bdL,0x000000000000014fL } }, + /* 32 << 245 */ + { { 0x75b620639659f9e9L,0xa5568027c4b6dc18L,0x1c5dbfff1f499e7bL, + 0x35b20ffdffa60eebL,0x4368ac975e2237e6L,0x502c0e789ed425d7L, + 0xf8877b29f5aeaa27L,0x137e015dcd2e9bcfL,0x00000000000000fcL }, + { 0x69be3c6d3fffeb71L,0x5739d6c9e33b4ca6L,0x94672002643a1eb8L, + 0xd1359e0570fd238bL,0x3b7ef87493916a8aL,0x4ef08127efdaf809L, + 0xde4174e74143ab1aL,0x3bb964e042e7aee4L,0x0000000000000151L } }, + /* 33 << 245 */ + { { 0x3693de23e707614bL,0xf7ec069499298960L,0x556f28cf89f11c00L, + 0x7931968eb75a3c61L,0xd6c72278485683d2L,0x734c3512d4369fa8L, + 0x08671bfcf17d7c5bL,0x1f06b63a5d69bd69L,0x000000000000011cL }, + { 0x64d8055d7c23e2a8L,0x96251f5e1d95c234L,0x5fce87cea0f43bc1L, + 0x67071f622f15f523L,0x40eb36b8d3fea005L,0x1d9ee274012556e9L, + 0x10730e770887e50bL,0xa1a356784004f24dL,0x00000000000001c2L } }, + /* 34 << 245 */ + { { 0x8bd447567568da86L,0xcae3a56a8d725c85L,0x2211f108801c5fc6L, + 0xe5de07da4c3c8b44L,0xff2167cbd0fbe37fL,0x7ef2307074994e56L, + 0x150e804810cf15abL,0x052337af2c095024L,0x0000000000000106L }, + { 0x7c6057f0e87d4ca6L,0x77900e45486f4fb9L,0x566981d17ba84399L, + 0x8f863f24198b27dcL,0xa84e0841e7a49f6dL,0xd17dd7370f1060b9L, + 0xe4f4d9b344f7961aL,0xa1b242e5da29c5a3L,0x0000000000000027L } }, + /* 35 << 245 */ + { { 0x47c2e6ed3a5f7687L,0x2dafe7db3cce6bb8L,0x419ffe4b771db38fL, + 0x8410ddfd2d624c59L,0x0779b3125b900ecfL,0x1b01dc1c0ab57b8eL, + 0xd245703de2fc4c74L,0x37c542c2f9f31707L,0x00000000000001f4L }, + { 0x941e03652f8379e7L,0xf76dc50d288ef711L,0xba0af92876ab8d8eL, + 0xa3e8cdbdc0c73e45L,0x522c45b349565c3cL,0x00f5e95a79c8c078L, + 0x7554d72bef1b71feL,0xa36855168a5118f3L,0x00000000000001d2L } }, + /* 36 << 245 */ + { { 0x8bf63edd8e519803L,0x7d6d968474760503L,0x99afb2fff64f6308L, + 0xc2df0c31c23b3a72L,0xaf8529c6950f14adL,0x9af0832dee96370cL, + 0x46194ea77aef8e49L,0xe92679536da47b01L,0x0000000000000031L }, + { 0x55bde6f749681136L,0x41ab4f3da512f655L,0x6dbf743677c5141eL, + 0xa2c75d2e5507fa84L,0xc8117d02f9b98137L,0xfe2760c706888e33L, + 0xc13bb97d53002110L,0x385619defd63f2e3L,0x0000000000000067L } }, + /* 37 << 245 */ + { { 0xfa1aa2447f27814eL,0xab02051c5105d84cL,0xccef8bcfe9b00b16L, + 0x3783041e1158a067L,0x5b79215725e29b71L,0x401c2417fb1b4107L, + 0x39c42434410a421fL,0xf10e887b5c4f6b31L,0x00000000000000e8L }, + { 0xacea55d402076600L,0x9054fe9a59b7f920L,0xbcb07980e8a4b5e1L, + 0xe33c2ab1dbb2df1aL,0x1651e2d0390680daL,0x68446f35401d8675L, + 0x9f2b69d026985f00L,0x4446113258d0f8faL,0x00000000000000fcL } }, + /* 38 << 245 */ + { { 0x29d634179986b9d7L,0x39e022af0df6e934L,0xaf6cc96ebe1e9fb6L, + 0x0891c88dc858c743L,0x7925f4300375dff4L,0x02fa4b94b52848d5L, + 0x6db74714e2416cc1L,0x7af11e364b95f2caL,0x00000000000000f2L }, + { 0xe003401840add981L,0xccccc6929796397dL,0x387856d06074f7dfL, + 0xe812adcb61907678L,0xef4f33383c753d8cL,0x052c6e49e90021b7L, + 0x384bcf9006bfb694L,0x5c0dec98d371fde8L,0x0000000000000012L } }, + /* 39 << 245 */ + { { 0x8029b1cd33b35443L,0x26829ac6820cd2abL,0xde4f535f48a6b896L, + 0x1b9e10d51639919fL,0x20ebdfe88bdab109L,0x6ed42c3b39b129b6L, + 0x73dc485879c78aa5L,0xc98fc0ea9a71f600L,0x0000000000000184L }, + { 0xacb1ccb55aea360dL,0x7d835bfbddc1cfefL,0x32bfa37ed7c954a0L, + 0xbf2da393f2998fb3L,0xa1d82913b19bfdf4L,0xe16c3b4a32ac5588L, + 0xb9ac5ec37b25f23eL,0xe15ddc917547c489L,0x0000000000000011L } }, + /* 40 << 245 */ + { { 0xaa4a212a2177c1c1L,0xf08f1f5d4c8a2a83L,0xeeb0a8940fe14c27L, + 0x564a1c8f3f0c1f0bL,0x6b73048c9297b55eL,0x3ddce9077a810e5bL, + 0xd8f99efc3de8797fL,0xd9b3fa2e37379157L,0x00000000000001b0L }, + { 0x65a7dc22fe9275e2L,0xbe8b93fb6960a690L,0xc783df8863f437eaL, + 0x4fbcf1465a22f1efL,0xa1b86ab3cfdcf231L,0x0b1309cf50cb90c7L, + 0xa77f0bdefc3cce3cL,0x86deff3cc30ac697L,0x0000000000000170L } }, + /* 41 << 245 */ + { { 0xbfed1d85a5415e7eL,0xf2cdc9c92a5ee8f8L,0x3a474a3965d153a3L, + 0x1ba4eaf88d3a6303L,0x1710d1c4a46a689bL,0xe2f2cbbc3a54597aL, + 0x3b223c6c76dc2731L,0xaff989c894979d28L,0x0000000000000158L }, + { 0xa41d3a99df88fbd2L,0x75c1ec4cfb23896bL,0x915a2173fcfce02aL, + 0xddc565efe861b7b0L,0xb1d0fa3f1ec84240L,0x93bee4d79ffc81bbL, + 0x236a25221687f61cL,0x7d49c63e462bda79L,0x00000000000001afL } }, + /* 42 << 245 */ + { { 0x692567a5f802386cL,0xa13a56f6a6dfdde3L,0xa50394cb37c39a0dL, + 0x3d16f5aa7040d3eeL,0x3aadd2f5e6edf55bL,0x187fbe5ca51277bbL, + 0x55e700a285daa140L,0x280d135ca930617bL,0x00000000000001bdL }, + { 0xf8d3e2267e0c0d8bL,0xdbb1cca204aa1cf1L,0xbb77b8f06552a12fL, + 0x8e70802ad8ad6b41L,0xa021a4d7010ddbf9L,0x76ea576d780704b9L, + 0x9e3a04148cfb7119L,0x8a6f4f2384e51f1eL,0x0000000000000177L } }, + /* 43 << 245 */ + { { 0x12aa2288f305b6b9L,0x8f748eae3e09e0afL,0x8a8746cf8e3f1a4aL, + 0x51cbe927d1e329d8L,0x1e39a4366f29094aL,0x54d285eabc8f1209L, + 0xe590966b4e8e7975L,0x5dec63afe66ae4aeL,0x0000000000000116L }, + { 0x3b0e71ae7e09821fL,0xab231a0941d1372cL,0x852865276a4390eaL, + 0x4d60e9bbb914c18dL,0xf0ad8d4e2ad21e36L,0x8bb1cc09fa2444eaL, + 0xbdbfb2b40d8c7208L,0xcb2f1f7a32f413d5L,0x00000000000001efL } }, + /* 44 << 245 */ + { { 0x4515599dfdc05854L,0xda8ff3a6782c42a8L,0xdd8e941349490283L, + 0xd85e445578dce0e8L,0xa77329fb7ac7177cL,0xb067a59806fd1d4eL, + 0x6f1ddf79c8cd2f44L,0x1dee178202e39ce4L,0x000000000000007bL }, + { 0x1a0dbd3e38a3a617L,0xa3c67e5f162bbdfbL,0x316f1b4a3173b0f1L, + 0x1e49c5c3e5224d7eL,0x3f88443f64108b19L,0xae6531a1f6f95472L, + 0x7c046d1e7c89c919L,0xcbc20e84ca70cbb4L,0x00000000000001e6L } }, + /* 45 << 245 */ + { { 0x45ae06b2bdcc3342L,0xd509e8020d55ebf4L,0xd1fe846db7bfe591L, + 0x7e45117b66ea6d0cL,0x07f7ba94b5cb8e33L,0x2365cffa4c1fe4bfL, + 0x59614a7a9e7ed518L,0xdba553f88f9fed0eL,0x00000000000000aaL }, + { 0x99ca2d5a345708b0L,0x835ad98cebd1a22eL,0x66f253ba9c713fbcL, + 0xf1015a40ae272ea1L,0xe582f157d00914a6L,0x9906ea50402ca5c4L, + 0x299870d22428dccaL,0x7cfd6b258095af1aL,0x00000000000000fcL } }, + /* 46 << 245 */ + { { 0xe3be5d0d627f185fL,0x2c3d75b32b3389b9L,0xd0313bacee1e001fL, + 0xe82de4821eb0a534L,0x8321862f2d78d8aeL,0xa2872545e1ffbacaL, + 0xf8d9d239c1719916L,0x15cfd49783909f63L,0x00000000000001daL }, + { 0x1e3e01f01063999eL,0xc5d2f9e1bd7538e6L,0xb25f6bcfbe3c8745L, + 0x20c30f7613cc6a66L,0x77edef8bfee4b1ddL,0xb7b09fa8731e7a9eL, + 0x3c514cc2bc686eedL,0x699ed1d2ae1d335cL,0x0000000000000025L } }, + /* 47 << 245 */ + { { 0x8850e09b2de78492L,0xa4f91fba9d81e3c5L,0xa47dca1e2b1eb8b4L, + 0x2983ad5bc084e7f2L,0xde8c13ab5f4efc52L,0xff2fcf43652354fcL, + 0x81e86497f601c563L,0xc3d6d3864dffcb43L,0x00000000000001a3L }, + { 0xbab2f6d53a79466eL,0xa087360abab23b64L,0x5b37df4aac31247dL, + 0x2b145d97d5de2624L,0x4851c9515cb00cc3L,0x22c4304ef34ad1bdL, + 0xbdb1ce92cfbb9914L,0x07ccef0abe21104eL,0x0000000000000170L } }, + /* 48 << 245 */ + { { 0x9584a837980c0f9bL,0x4c2c21b4f234ff0fL,0x584a18241068eee2L, + 0xb3c08003c5b71c77L,0x98a59e372505132cL,0xfeeb536724513321L, + 0x83f623522c9afa82L,0x5a470aad23d28bbeL,0x0000000000000160L }, + { 0x2aa901fdce4e21a9L,0xc0fb45706c14573aL,0xe0711b5e21c1a7afL, + 0xee3bfbfa2a47a293L,0x12830261ff0de285L,0xd334f3ee68d87d16L, + 0x3fb8d74765c6b8bcL,0x1c40f1aba12a5638L,0x0000000000000070L } }, + /* 49 << 245 */ + { { 0xd8de01e7d09513b0L,0x0e224cf695a57479L,0x66098ecb776d5c68L, + 0x31dc808681d73198L,0xf34ab49771885d5aL,0x245b101068c07bccL, + 0x1b0415956f754ffeL,0x28d98ac39eed986cL,0x00000000000001edL }, + { 0xa5b7b90083abb8f7L,0xbbccf881305576feL,0x7ecadad7abfff444L, + 0xac85c6b037ee04eaL,0x7fd13117a17cd3d6L,0x21d25a19ddd5bc0aL, + 0x28de095c009073caL,0x3961d13dcb0c209fL,0x00000000000000beL } }, + /* 50 << 245 */ + { { 0x10bb54d9391ec713L,0xf25eb2bfe18ff505L,0xa45400ba6cc76e34L, + 0xb1e654a2e55f9f1dL,0x6a929b7df4ab5d8aL,0x7cbf9f1318655654L, + 0x1221d22fac713cefL,0xcdac9c9be1fc247aL,0x00000000000000b8L }, + { 0xb83d4bf76480f3c8L,0x0cf881acf07b9b03L,0xb1fb5d2137534ebdL, + 0x15bab0952c0e8fe1L,0x7f18cd140fecd0e1L,0xbbbfff81257469e3L, + 0x7067debb1615060aL,0xd2be688fb55abf01L,0x0000000000000016L } }, + /* 51 << 245 */ + { { 0x608780b7e2bc7bb1L,0x561f8901ef5a0e12L,0xf0542f44549f8bd6L, + 0x963cf806da34a9a6L,0x478e5efcfe94c12dL,0x35bba59014a9626cL, + 0xa601a1bbc34e6af0L,0x269dffd4d19c944bL,0x00000000000001f8L }, + { 0x6f2956333e41b8f0L,0xee0766054b4a2e7cL,0x15bdc7de38d4d8e1L, + 0xaca4398008c99e00L,0x34c67680c545ca80L,0xeae77ef56ce03f0cL, + 0x27fa04e27e5bff0cL,0x87b2f2a846a0e91bL,0x000000000000007fL } }, + /* 52 << 245 */ + { { 0xf60ae3ffcb6d2a3fL,0x482de542c33f5aa1L,0x22d7de4039386ccfL, + 0x798f81e53a7d4c56L,0xd4d64bde3c8827d9L,0x1faf02c38b3b5cbaL, + 0xaa438f22b1afda7fL,0xfbfbebdfc3170b92L,0x0000000000000122L }, + { 0x429680862811743aL,0x82b9e326d780ee2bL,0x9105ddae1fe0dcb6L, + 0xee40a9d2d989fbf7L,0xb3bbbe7a4dc53350L,0xb68fe7420a6e7da0L, + 0x3e0083720512a045L,0xb766291ddaec0849L,0x0000000000000088L } }, + /* 53 << 245 */ + { { 0x01a091d8d3c4976dL,0xe976df9f909844adL,0x5a02c8a4247d52fcL, + 0xb1aa080f7be65bb4L,0x7706859fef4a179aL,0x527a3e36b3d4fcd2L, + 0x3984a1c97a4a6782L,0xefe16fbd714ae36bL,0x00000000000001fbL }, + { 0xa8211b1c25cbb098L,0x4859a0eb3bd99868L,0x6c9649c20efd41d2L, + 0x1bb4ba1856a41c6dL,0x0c8121ddb6bddebeL,0x34837d357d24a62cL, + 0x5c3d55bcff33e24fL,0x45030bfd16840912L,0x00000000000000abL } }, + /* 54 << 245 */ + { { 0x5e50374c3a2da57dL,0x5f78556b1119606dL,0xd16ae65786badbf3L, + 0xbcdc9d3ef7ddc184L,0x4272a8b0992238fbL,0x60ed6dffce0899b9L, + 0x0c3d43145f78be82L,0x250527665a51c6b0L,0x0000000000000069L }, + { 0x0fa9b9ce4a2967c9L,0xde64435bda5253ffL,0x466eacc0338fe90dL, + 0x8ebe67813d7ec117L,0xd32402bfc1104e59L,0x5dba885e90be00a3L, + 0xadfd197ad73138beL,0x8137f0c52ef27a3eL,0x00000000000001c3L } }, + /* 55 << 245 */ + { { 0x7f02f467394ad9efL,0x285df1a69f23b02aL,0x16e5676ecde7acc7L, + 0xe3e9c6238fac90dcL,0x76aef25902c87d17L,0x92e6a16157eb512dL, + 0x37eca5054023938aL,0x6b1963cf77e7fdd6L,0x0000000000000158L }, + { 0x27a4063d61891784L,0x858d6af50fd9cb7dL,0x8af301b292403037L, + 0xf4b2e253c2b52eceL,0x226bffe2eb92c43fL,0x50fbc9f200c974e8L, + 0x7d07d1186b7dbb3fL,0x8fba39c8af045ee6L,0x0000000000000194L } }, + /* 56 << 245 */ + { { 0x1a4d19d6b580e3a2L,0xcee647c220834fccL,0x94066a7c2e9454e0L, + 0x220b4993b1b480e6L,0x0a89c3e330e72308L,0xf22f323e6294e865L, + 0x27dad374645c364dL,0x16b70ed4b47a7a2cL,0x00000000000000cdL }, + { 0x208a7f56cc79e99aL,0xb844ed7d1991087eL,0x6f19c1c07ee9fc8bL, + 0x12a4140bb9208633L,0x8f976b252c1a5947L,0x083544dda1bfc7c3L, + 0xd432604b206df951L,0x0f3c03b57eca93b3L,0x0000000000000115L } }, + /* 57 << 245 */ + { { 0x99d40986753d916dL,0x76e71d0dc6f9ad8aL,0x7c1b28a9131d329eL, + 0xd572e26068eda873L,0x6573c96b3dc992fcL,0x7f37ed7ff8056b40L, + 0x8763b71e7e52cfb2L,0xb95a7916e3cc1875L,0x00000000000000d3L }, + { 0x063f06bfa0d93ec8L,0x8e86a0ae68082206L,0x00d504f407041b08L, + 0x4dd113bf92c96a08L,0x52e9b7eac04ae2b9L,0x84e384d5fdcb7b5fL, + 0x493ca1391d0c6202L,0xcca8ca15fe81207dL,0x00000000000001a4L } }, + /* 58 << 245 */ + { { 0xd5d4d7cff6f13b77L,0xa235e197561ed919L,0xfb2fe413292a81c4L, + 0xc0813a2712fd60fcL,0x5810faf1d63cb00eL,0x9f5f197d3fd6725eL, + 0xf14546b1874b8146L,0xdfba72ae7fee7377L,0x0000000000000194L }, + { 0xeae4ae28cbc9ce37L,0xec3f756dcd373c41L,0xa06e06a8f77cea19L, + 0x94d4d6712105ff62L,0x72cc585c06f6fd6fL,0x663727a62e929fcfL, + 0x8b961949dbf5886dL,0xda00c0f69718340aL,0x0000000000000021L } }, + /* 59 << 245 */ + { { 0x1f690177297f5c08L,0x829e1e491a44476bL,0x546ce72c95b8a526L, + 0x775796e3c8b75808L,0xc88ded0ba53f8e9cL,0x6c9d25f77d4c14caL, + 0x19c94323081d88efL,0x558a9201bd375822L,0x0000000000000011L }, + { 0x3f9d75a8b8d1b7eaL,0x7922b2448df0af6eL,0xad5e8cc1abc15eedL, + 0xe3b3754bf3b890e1L,0x379f1383a7648fddL,0x9eaff495874c0014L, + 0x67e39f779ab784b9L,0x620e2eb4778ccdbcL,0x0000000000000127L } }, + /* 60 << 245 */ + { { 0x8f73b2234438a469L,0x39a3151a312f3e82L,0xc1b4a65f4fd6149eL, + 0x5cfd0145d0d76e86L,0x391146bb1a77eab4L,0x3e4961491c97071eL, + 0x60e4eddaec115d98L,0x0e58bcd6dcd6b9e3L,0x00000000000001a1L }, + { 0x64049099d8bd20efL,0x2204f491ea12243bL,0xa219aa014de1a0f6L, + 0xf04edad6989ff1f5L,0x5187c5776afebbdeL,0x6b50208e8707b524L, + 0xf92f5bc8911785b9L,0x939c770bffc55f6cL,0x000000000000014aL } }, + /* 61 << 245 */ + { { 0x23bee635f7258c06L,0x167cae1604f1f357L,0x44dd3c13265880aaL, + 0x14d4fcf659e7b653L,0x1cce371ab23c6ca4L,0x1f0d3fd2be665d7aL, + 0xf9a6b67edbd611d0L,0x3914288b373b5d3eL,0x00000000000000b8L }, + { 0x35ece3f280efbc9eL,0xf5ca78c4e90d9d2bL,0x28a4ac1626f91851L, + 0x6c410ee1547cb8abL,0xe034876ad121a20cL,0xef1f5ccc10ad2acbL, + 0x46b897f276faa816L,0xcfc360975d09bc6eL,0x00000000000000d4L } }, + /* 62 << 245 */ + { { 0x4e9b70429fe870d6L,0x6b3a18c95b05c44dL,0x9402371050c1dd23L, + 0xc73dd6f17ec19f72L,0x341988d5d13247c2L,0xa2507b4211b903adL, + 0x317804a96ab937b4L,0x61a65fcea908e4c7L,0x00000000000000f0L }, + { 0x39645ebd823fbc2dL,0xb735849ea4146076L,0x3155c49373ffc246L, + 0x38a8186d9eaef0bdL,0x6dcaacacad300f96L,0x1aa11954454bd19bL, + 0x4742ad6d770ef5f2L,0xb5e674ead3dddd4eL,0x00000000000001e4L } }, + /* 63 << 245 */ + { { 0x2e70fc86d8f34db1L,0x5465c430a63abec9L,0x7ec081a39b336fe3L, + 0x5dce23079972d290L,0xd92f92e87c892943L,0xc47b48bcd6dacbb0L, + 0xace480e295957dcbL,0x762e668932a0e43cL,0x0000000000000187L }, + { 0x3475d0d61e89009dL,0x943cd5f0db2bd19bL,0x534c64936ed81e64L, + 0xbe764b65e36cda5dL,0x5a315225d65e0449L,0x6e045dc36797dfb5L, + 0x50e92a969bf6935aL,0x08bc07d4aa863084L,0x000000000000005cL } }, + /* 64 << 245 */ + { { 0xc3246fc8bddb33c7L,0x68dc29bb81972f1fL,0x7b5edc9bee7ed0fdL, + 0xee83430ce158deafL,0x8a924b48df7c5708L,0xb421a505f2c65a04L, + 0xb5da215e5c289f82L,0x752d5ff9b9d02b8aL,0x0000000000000165L }, + { 0x55564b4a0d9d7788L,0x70c744916cab3607L,0xf53bc98690091526L, + 0x1e88044d9abc41a6L,0x38e9d3dd2bb6d384L,0x4177105e84bc3dfeL, + 0x2b95a432674df1b0L,0x38196726eb1de0dfL,0x0000000000000137L } }, + /* 0 << 252 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 252 */ + { { 0xa6d6b095dbbd171fL,0xf0d3b95744f06181L,0x46b9786578fab381L, + 0x7da97845ae356e29L,0x4a0c3e00d01b3c09L,0xaed18677e106d5beL, + 0xd7309c761affdb67L,0x9715875425f2b8c2L,0x0000000000000193L }, + { 0x65d3657b24785079L,0xb1d2b5f75e064334L,0x0736bfda2b7b61e1L, + 0xb39c4db1284e4fa6L,0x66cedf43bd934998L,0x8f9f6243324d2de1L, + 0x92f2524e4c193171L,0x75705acd964a8383L,0x0000000000000031L } }, + /* 2 << 252 */ + { { 0xfce13e03d0739036L,0x0af573f2a4161596L,0xf3db4c750bdedd89L, + 0xb594cca3e8eae413L,0xd9ad2edce2e66f2dL,0xe99c048166f7f599L, + 0xd141ff976886e453L,0x087714a153e3221fL,0x00000000000000beL }, + { 0x91eab39e5daebd46L,0x3f9ddea494fb01f2L,0xfaaa200b142ae67cL, + 0x853da6b0ca631c4bL,0x4c9ca396fb3f4229L,0xb00a8de2e3690f95L, + 0xe8000000c2c7704aL,0x65c0ab7393ad0dbcL,0x00000000000001e0L } }, + /* 3 << 252 */ + { { 0xedec6d350f810445L,0x2ee468445fe4215aL,0xdace7f0e969acde4L, + 0x3708f4f4c754dff6L,0xa510303f6c0370abL,0x9d5cac3ab4adc2feL, + 0xf33d4a2b395a765bL,0x437bfdca1856e0c8L,0x00000000000000c5L }, + { 0x287dc7f70812a78cL,0x30cb0cd905dc3fcdL,0x80343b51008ec68bL, + 0xbb3da3a5cc404f65L,0xd5276af593d9f303L,0x85a875d47daa5950L, + 0x5baae52f078e6556L,0xddd9e3efe58a4e97L,0x00000000000001c5L } }, + /* 4 << 252 */ + { { 0xa3ff48a37252eba7L,0xfe771fd391359daeL,0x9dda3a45d05be01fL, + 0x5cfa713de97a9a89L,0x5fd23bd887cb6c64L,0xbe45c0758d9ae4bbL, + 0x90f9438478e27270L,0x3e1b0d18f0dbddd9L,0x0000000000000018L }, + { 0x6e5283f7f2fb7facL,0x232e9cd13593d0e4L,0x97418d9cee81f08cL, + 0x339540f51f2e8dc0L,0xeaacb20b619b5ee8L,0xefc8079a4ae00660L, + 0xa968fb1c765802b5L,0x60088783f20650faL,0x0000000000000145L } }, + /* 5 << 252 */ + { { 0x322efe142a7cbe50L,0xaf425a7db2f85051L,0xcc7c9d91812b26a2L, + 0xd89fd04968066e44L,0x79ba895fec83b2f1L,0x570c2db410e90a64L, + 0xe00f1ae889c59658L,0xd94f065c78edd742L,0x0000000000000021L }, + { 0x3fa27bd8c5b5b0bdL,0x432599d6a5121de7L,0x528925da2e8294cfL, + 0x28c15142cf0f9464L,0x38498320a6fa7f3cL,0x92cde9b1ba93f4b8L, + 0xefe2455e4bb98cacL,0xe0cc0ef1d1b0e59cL,0x00000000000000d6L } }, + /* 6 << 252 */ + { { 0xc538d35074a00efdL,0x4776ad9697bc3ac7L,0x22041b531867f889L, + 0x2aa3594319cbf055L,0xf00a49d00c38f064L,0xcf40e6177bd0991fL, + 0xa59df69b0155ffafL,0x1f2d50feace4aa07L,0x00000000000001e4L }, + { 0xf1a777ee5cce1b0dL,0xfb8a0ab09c7ad9aaL,0xc0d3b78058a6aff6L, + 0xa9f4b3a3370af1d6L,0x3a0731a53d39d147L,0xf7aa9df732c93fa5L, + 0xd9a0599f9c938c2dL,0x2702735d01b0c0d0L,0x0000000000000152L } }, + /* 7 << 252 */ + { { 0x64f62dcba1d24f4bL,0x134da7c1f5776ef9L,0xb6699e3f534bf835L, + 0xc9b93539ef275259L,0xa9d68c870f7cf548L,0x4ee54ff7fbd2d9a9L, + 0xcaf81404465e7c2cL,0xa02fce30048cad5dL,0x00000000000000d4L }, + { 0x9ce2e24c6e76c091L,0x8353d7c941ec4c75L,0xe7f5bfbb43a019d0L, + 0x74cf2bbe508e92c9L,0x58f2e1b68dfcbacaL,0xbe79df8b98bdc65dL, + 0xceabfa1cc699db7eL,0xe237815e0e3f9421L,0x0000000000000093L } }, + /* 8 << 252 */ + { { 0xcfe4f02e36e4a40dL,0x3af87a7df2b51e5eL,0x3b6baa2683fbf682L, + 0x9a7a9339063e5979L,0x77214ebf41b680c9L,0x1fbdee591ae59f71L, + 0x9a4dd12ee1530c0aL,0xe113839691fe54b0L,0x00000000000000baL }, + { 0xf84d66ebfc229914L,0xfcc47954452da1c3L,0xe662b9c06f9743c5L, + 0xd0db646d087d7194L,0x2abc096b6e1bbf6fL,0x24272840331f3a96L, + 0x0da86ea2a6b9f40dL,0xedb363317dcb4554L,0x00000000000000d5L } }, + /* 9 << 252 */ + { { 0x35ca7b714da6061aL,0xa9bf19f0003e19c8L,0x1298c7034c3c37f1L, + 0xef43fc92921f123dL,0x065e9cb5cf8acdbbL,0xaf1392df0a25165fL, + 0x2174ceea344d3fc5L,0xebfe81c529d22225L,0x00000000000000d6L }, + { 0x0fd8fdf2bff99ba3L,0x7789d27d28498d5cL,0xc5d731b872535985L, + 0x4dac761544e0ec8dL,0x1997b34953628596L,0xda8f1b453df08420L, + 0x2cf56fdce0df23fdL,0x3c9a7005f2c7864dL,0x000000000000019dL } }, + /* 10 << 252 */ + { { 0x5c1c16872c1e8de6L,0xd70ec472b1c70780L,0xeb11585ec5b43635L, + 0x943929d2fe1004b8L,0xefec44e1735d06beL,0x22380d73c2f367dbL, + 0xcf498e0d550f497bL,0xb9d7707958da0a67L,0x00000000000001cbL }, + { 0x4a86da6a6139e58aL,0x59828182eba6ee4cL,0x117ea761d5b978b9L, + 0x6e5c95fb421c7b72L,0xcb8ab1e8b9da6233L,0x8a99b06f696c2a02L, + 0x128544479166802eL,0x66c2c35cf1aa2cccL,0x0000000000000120L } }, + /* 11 << 252 */ + { { 0xd558d1c2d75dd2d2L,0x835a529183c47104L,0xbe1680cbd6a8d82eL, + 0xf1ed775571b65f2dL,0x375ac4137edd4aa0L,0xca2bf9c9863979e3L, + 0x8b2d261eb6a3f48cL,0xe3fa313d74fd8349L,0x000000000000001cL }, + { 0x0b8664383e70dd66L,0x6a5fb43b15cef5dcL,0x83a0fe8cf8871f0cL, + 0xb879a0e6848f9812L,0xf93ce978f35ca817L,0x74643f68687d2a6dL, + 0xb9e7ba00c6df8a5fL,0x151a645afdd5d515L,0x00000000000000c6L } }, + /* 12 << 252 */ + { { 0xaf39cf7917f8115eL,0x538d81c5b1e2e5c2L,0x6d3a29b22661d6e6L, + 0xbf9ab97716f4f3d1L,0x1ff2e621c84d4f92L,0x985dff6bdb14f203L, + 0x24559c328f4f0e0bL,0x960527201a1e7a96L,0x00000000000000feL }, + { 0x398cc66a79786d47L,0x009bc173b2b9653fL,0x295b412ed23322beL, + 0xaa78b521ffe060a9L,0x5ff5a5feb808d2b8L,0xe84af56c3182e463L, + 0x94972ab444022236L,0xa17d7b55de52f2baL,0x000000000000014fL } }, + /* 13 << 252 */ + { { 0x8c90e71abe54c3a6L,0x449b10eaf71c5359L,0xeea87dfd829fa44eL, + 0xd8353ef02ca80e90L,0xaa71620cd1229447L,0xb0445ef2d4344caeL, + 0x2792ebb0451ce474L,0x3581deedc2907707L,0x0000000000000199L }, + { 0x39ebef0294adefa4L,0xc44fb92c3ac01ca7L,0x4df141e2c456c3caL, + 0x20aed69b471dbf3cL,0xb31db16ce85b7ce0L,0xc239e7166911a4fcL, + 0x3d8c230321f7392eL,0x98f6f7e6e19b03ebL,0x0000000000000142L } }, + /* 14 << 252 */ + { { 0xf3d8f02c9371d8ccL,0x034277222083bc29L,0x1bc1a1a2e0a508d0L, + 0x75402b4365386b1dL,0x7832c7a2a81ea689L,0x30fdf8d8d3c43e8dL, + 0x11af93da8bdabe83L,0x1fc117f8df22b64fL,0x0000000000000015L }, + { 0x266747ceaaca36f0L,0xa3b4bacfb8eb0f62L,0x60b72b71577519e7L, + 0x689653954bdded3aL,0x0366164aab2ff463L,0x5615191444c3432bL, + 0x2040ed772700d67bL,0xf6b08ac8cff9e250L,0x00000000000001c8L } }, + /* 15 << 252 */ + { { 0xb8e81a2dddb4eb9eL,0x89f9c08f3947b43aL,0x69d84b2bb0e758a3L, + 0x862f559972bc3e5fL,0xe9c567914ac68eabL,0x8d66059f8912bd54L, + 0xa386e77fee35a78cL,0xa434e2e64303b216L,0x0000000000000165L }, + { 0x545f743413593d90L,0x745c910055bbcb3fL,0x45e63824bb344485L, + 0xc26c34dd38c0f16bL,0x55c10ecddf952e71L,0x60c4a2e598fd979aL, + 0x5ff003efa6ff9ef9L,0xdfb5fce95e578209L,0x0000000000000077L } }, + /* 16 << 252 */ + { { 0xa87650d1eaffd713L,0xec781c97a797ad44L,0x1e94accdb4bcac17L, + 0x2284f96fb3981cc9L,0xb8b2ef57091634ebL,0x4accf5e8cad053d0L, + 0x0eca4f9b5d1ef480L,0x1598ef27d6e76957L,0x0000000000000131L }, + { 0x7cbc9061b72d8840L,0xe611924d86a5ba36L,0xe02e9ca8e066d292L, + 0xdb092f52a68a0466L,0xf5d97f5d0c0b18f8L,0xb5e171465c99b95cL, + 0x1bafbe749d3d6acfL,0x606103fef370bd5fL,0x000000000000013bL } }, + /* 17 << 252 */ + { { 0x3fe39e9cda6f368fL,0x4af61b0fabb8e7a1L,0xc4fc73f42b9c9912L, + 0x20ef2e5ecc24e5dcL,0x481cdbd4f1023146L,0xcb417697117bf9ceL, + 0xb8436bbb544036a2L,0xd828ba5c0a8f949aL,0x00000000000001fbL }, + { 0x19515a04ac53c4beL,0x909688614a652fdcL,0xddbc6aacea6eab85L, + 0x15a84e31dd8f09c6L,0x1482d4ed2a71c6baL,0x48887451b38da85cL, + 0x885c36530c77e28cL,0xca5e7a1b99505152L,0x0000000000000004L } }, + /* 18 << 252 */ + { { 0x040a2260aa34bc2dL,0x121519766afe1aeaL,0x6cef13fc4b8bff60L, + 0x648802d6ff7615d9L,0x840563e335e69c6bL,0x39b0d6a55fa0b253L, + 0x88a3c3ba4bb58678L,0x126b85d347abaea3L,0x00000000000001afL }, + { 0xcc56805c23a0415aL,0xc577101dc281dd52L,0xfd3ef16d7ca89e69L, + 0x4a7283f560b671afL,0x852732a794a73553L,0x3cdb84d0f7951035L, + 0x6d9330e01c556d8dL,0xec1e7cd492cf6b85L,0x00000000000000e1L } }, + /* 19 << 252 */ + { { 0xd51138a7d7841050L,0x5a5253a482ee0b99L,0x724f84f5c6740508L, + 0x80a3e456c2d2de09L,0x19dfa21c187141e2L,0x4d41ef7b42877c25L, + 0x9d6b3326d75209b4L,0x3a21cd023587efacL,0x0000000000000006L }, + { 0xa492f40456ad32a6L,0xfcda204c21031b3dL,0x4cebc3ba8fa9f767L, + 0xf96068d423837f90L,0x99013dab42ea9f03L,0x6e21bf1b0ddd2cb5L, + 0x0c9a452bdf051b9cL,0x5c9d2ed27034ea63L,0x00000000000000efL } }, + /* 20 << 252 */ + { { 0xc3c8d26f643c4cbaL,0x3a5bb59455e63c4eL,0xe0a25b35545e3cb6L, + 0xc6d52abeed177530L,0x2eb287968f4cd3c3L,0x01b5433943c6ceddL, + 0x85930ae934575ceaL,0xcf13a14404294cffL,0x0000000000000172L }, + { 0xe9cf3a7c89e62cecL,0x19f62eba02e58adcL,0x0b98464bd3762108L, + 0xd148d77d942b9f9dL,0xefd8cf2b6ffd03d5L,0x2ce97d571fe049ebL, + 0x684bd7957de2640eL,0xf509af614fadaf12L,0x000000000000000eL } }, + /* 21 << 252 */ + { { 0xdaeeefc8bd7db126L,0x9aa0f6ca3e712793L,0x3219db3fc2424c7bL, + 0x176f35eb708e50ebL,0xecd6fe934b0b3588L,0x897e0be7e25dd575L, + 0x37764d7c511df00dL,0xf2534cd41e410d17L,0x000000000000005fL }, + { 0xe086716404e39c47L,0xfcaa5822faa76d22L,0x325e11738fa74426L, + 0x744a07fe3c45bc8aL,0x554c7032d61136b6L,0x0530bfc9ba0e8c67L, + 0xf25a3a8ed53c15a6L,0x2f2c8715310c9174L,0x0000000000000134L } }, + /* 22 << 252 */ + { { 0xc4edf2abef129aabL,0x669fce1c0d57c727L,0x1d02a96d42e87512L, + 0xe150a353f32d3174L,0x6808c4ab4d52e358L,0x16c366cbb7c74f3fL, + 0x143e3bb816eb7940L,0xc50b78ebcc3ed5a4L,0x00000000000001a8L }, + { 0x9cb287a40ac87da7L,0x35ecfd38b9dede9aL,0xfc155649fad6c214L, + 0xfb97929926af1c4eL,0xb4fd33291b4ba2a3L,0x5027b62cd8079f78L, + 0xcef56123ef96d8aeL,0x5c7c3b2ab1952e0fL,0x0000000000000147L } }, + /* 23 << 252 */ + { { 0x5a3018dfed37f7b8L,0x7a3b0c2a057bed1eL,0xd616a40033284d2aL, + 0x86517965e501d780L,0x2216349cdf6fe150L,0x8af9ad6f9114ac94L, + 0xc1a53ab4a61c04b5L,0x8dcbb53b09eb46c2L,0x000000000000010fL }, + { 0xe672328378dcf9b4L,0x3aeddb5334ae43fbL,0x616116d1e4997ea6L, + 0xd0fa538e3f22345fL,0x25131525fa6fa9fbL,0x7c1a8081705d6d97L, + 0xc89bbc7389f5d450L,0x542a0882dbd80319L,0x00000000000000ddL } }, + /* 24 << 252 */ + { { 0xd883edca1854dcb7L,0x158f2ad5e4b33262L,0xa89ba702743cfdc9L, + 0x762616ff85efe2cbL,0x01c666119880709aL,0xec6df8890999a8f8L, + 0x11cbe7f35006451cL,0xdb0dc5696a551e2dL,0x0000000000000078L }, + { 0xc8ac5659a91c9a13L,0xd146235491cfcd3fL,0x8031f8ffb83429e2L, + 0x12f36b88c7c807beL,0x9bbe73ada17696d9L,0x631f9dbe13e49ec5L, + 0x6a868794fccaf136L,0xe0341491d4c71b95L,0x000000000000001fL } }, + /* 25 << 252 */ + { { 0x714fded2fb616313L,0xd1f8be5c4138197bL,0xe06020deda814497L, + 0x16e6c5994f00cb7cL,0xb75cfe69712c2c0bL,0x303d77a3db529279L, + 0xaa2d207ddca146f7L,0x587e3f839bbeb98cL,0x0000000000000158L }, + { 0x988d1f50a6b10157L,0x43a1d2fb914103b3L,0x8d3afdfca224786eL, + 0x52bfdecb99a99cfbL,0xa6f20f93dc38e50cL,0xe71ad506ab79cb34L, + 0x937405c803ca3b19L,0x12b16d20ade81e6bL,0x000000000000000cL } }, + /* 26 << 252 */ + { { 0xe14ddf295a95e0d7L,0xfb5740d3f0aa2850L,0x5e8c11d18d654360L, + 0x3aaed8340252e7caL,0x1aeba10cf88cdedaL,0x1a57aa22cb63f2a9L, + 0x89ae2ab42123c4aaL,0xf8665895bb8d9ec1L,0x00000000000000a9L }, + { 0x73de3b4627fe0dafL,0x1267a3e741ca4fd7L,0x26684605cca84b06L, + 0x9190d4d64bded61bL,0x50d97803989719ddL,0x605632471d59b025L, + 0xc151df922083bb72L,0x6fdbeb823d3ce909L,0x0000000000000094L } }, + /* 27 << 252 */ + { { 0x0626cb92c7c9761dL,0x20d73ca5c6afa3e7L,0x1e20927bbdd40e51L, + 0x39788dd2d806e2caL,0x143aba83269c8534L,0x127e8992cc4cd1ffL, + 0x29eb0e1d86bb67bbL,0xe6cd55afdef639abL,0x000000000000005dL }, + { 0x42f52e70509595ecL,0xc41012f3bf39cfe4L,0x6f7fb05ea709badbL, + 0xd80b13e292cf6184L,0x23683493edc7bd6cL,0x277892f1dc43b987L, + 0xc76285574535695eL,0xac2a363a1b8d2e1aL,0x00000000000001aaL } }, + /* 28 << 252 */ + { { 0xe90f84570012e81eL,0x6fa09e16b98fd2b0L,0x0a09399d9805fedcL, + 0x2e315eef2d5c3795L,0x890ace479a886fafL,0xa901e7b5d1690ec4L, + 0x45116659bbd30a26L,0xa07c60ad19302f7cL,0x000000000000016fL }, + { 0xda9350157782fa75L,0xb0253020981b2b85L,0x3a2434f0f76c5389L, + 0x51b7a3ea04884584L,0x09d81db0d12297ecL,0x46842ca99d202166L, + 0xe9fd1469d502c092L,0x61703bf654ddf5cfL,0x0000000000000176L } }, + /* 29 << 252 */ + { { 0x6fd8079edb2c1400L,0x143a2304cd0e2f67L,0x9987e77144296e69L, + 0x6d27cba4601afb95L,0x8f6d58c41a75b55fL,0xac76bf6fb30a6c2aL, + 0xb6e0fa7760de6c0eL,0x1793832b98b30d3eL,0x000000000000018dL }, + { 0x7d7582eccc35416eL,0xf0472b74398092a7L,0xb122b93c58c6b243L, + 0x100b1fe07e032f1dL,0x62b71644058ca84bL,0x036760890ac11b8bL, + 0xb36d7ad9092839edL,0x092759cf5f77a937L,0x00000000000000c2L } }, + /* 30 << 252 */ + { { 0xaf6d6bee01c05a63L,0x3d7282dbd5a612b2L,0xa4bcb677f9b49c9dL, + 0x1186eeb8b8f5a993L,0x08fede59fdd551f9L,0x6ac2844cb7c047eeL, + 0x972583e24e4c3786L,0xefb8e22533d3b9fcL,0x0000000000000145L }, + { 0x9164f2c84f585b1cL,0x7f49bfd2d53a4a93L,0x4ad4bfca07ec3188L, + 0x0412a46f933b2990L,0xddfcdb113f03779dL,0x1d7078ff17678e3dL, + 0xe3fbc9b2e9b85930L,0x25bbb056ee1174c2L,0x00000000000001e8L } }, + /* 31 << 252 */ + { { 0x6a2da2be4951a177L,0xa7712dade0249695L,0xca9500ddebd45219L, + 0x9512855fe6a249cbL,0xd2fd8a82b7b464f3L,0xc32ea5d18aea2d80L, + 0xeb2a2ca3e13e3278L,0x629f37f5ac6e4e6bL,0x000000000000001eL }, + { 0xfa92ca3d66778869L,0x994101dfad4fa305L,0x00e70ff2866528e5L, + 0xf4e501e9fe725c14L,0x35f8f43e07d0014bL,0x55689c488108dad8L, + 0x998730a593fa8b70L,0x640054d4497da817L,0x00000000000001fcL } }, + /* 32 << 252 */ + { { 0x272ec59646e69dafL,0x348d7acd79b09a47L,0x8731070bd806f931L, + 0xe1191599011d80caL,0xd001ced01b8cd947L,0x31dd3c9878a6654eL, + 0xde8dbd0cc6409599L,0xb8939d44691b6141L,0x000000000000017aL }, + { 0x146fdd629c45660bL,0xeb9a13d3a6283d26L,0xb99e7e66c9e2a86bL, + 0x926ee0abe626a5eaL,0xf01ec218e8a93dedL,0x8562b91a040fb497L, + 0x25fa595f6ddd2f63L,0x9389fffca2612413L,0x00000000000000d2L } }, + /* 33 << 252 */ + { { 0x6c4d9184bf16e29fL,0x70e6d2ab705bb9c2L,0xfcabd209b2118b5cL, + 0x66321abfc9722264L,0x7a36d66f3200176fL,0x674a9b43fdf743c2L, + 0x0b70838fed49945dL,0x274a8631bf04eab2L,0x0000000000000028L }, + { 0x08650f55a3b6345aL,0xa2cda848cc418bd7L,0xea80cd7d13c42880L, + 0x6895e5e434f7c51aL,0x1c51266d65380432L,0x5a024a2f94c33187L, + 0x5e5fd4988a8c0674L,0x1b9f5d2d250b1cacL,0x00000000000000ccL } }, + /* 34 << 252 */ + { { 0x54a1d996f85100b2L,0xcbc5aa64db80cbefL,0xe91d2b9f59e324dbL, + 0x9a5be55343d54cb3L,0x0ce9b03acb01dac4L,0xba4de3a2c63fd291L, + 0xb1deed8fe51325e2L,0xeb22652f5595c748L,0x00000000000001d1L }, + { 0x1ba3d9749c6ab6f4L,0xea84bc15d4814870L,0xbba9c3a7b8034e24L, + 0x07482120618eb92dL,0xf6e8b897bf7a2f69L,0xbb141cd921f09ce1L, + 0x4737cb5e28f04b32L,0x333210812dbf0affL,0x0000000000000148L } }, + /* 35 << 252 */ + { { 0x3fbd36619f67786dL,0x0301a8134bc980f7L,0x3ccde4731d1abf93L, + 0x7ec69b27aac22970L,0x02e4efca9d9ef464L,0xd3ecb5f7e4b97c90L, + 0x22fc87ce2817aeb6L,0x3703fcc9926bb15eL,0x0000000000000151L }, + { 0xf0906667a44ed08fL,0x685d2d12441de931L,0xfbc0bd4c2d6d7984L, + 0xf76a510d39dbe5d7L,0xd2b30ce9192862c4L,0xbdea2ba3ba2b8335L, + 0x020d4e210976edb4L,0x67d9047cef64f6acL,0x00000000000000f6L } }, + /* 36 << 252 */ + { { 0xcbe3c6553fca4498L,0xfe7348762b198c78L,0xd29325d2244e4325L, + 0xf811d9a114fc0ce9L,0xaefd7d9540622672L,0x9e01aeebb59070b6L, + 0x14811a71f8dae8cfL,0x9c4680d5c01426c9L,0x0000000000000154L }, + { 0xf106ef2abc07c7efL,0x249a2234a39f9edfL,0xc8183c09615d7a51L, + 0xe250cd31f509f19fL,0xf28eefe149134f13L,0xee2b97e95ddaff55L, + 0x28b2f72ad3959547L,0xf40f9d6a47cd3c7bL,0x000000000000009dL } }, + /* 37 << 252 */ + { { 0x319d18853945bc6bL,0xc0569cb414c1c53cL,0x0e23bbe24104fd05L, + 0x22ec69d1af763b66L,0x79816606bb0000e7L,0x6a3a8ae493893673L, + 0x0573de9d2d6abbc4L,0xb28bdaa38fb6032bL,0x00000000000001d6L }, + { 0x50baaea5ac05f0cfL,0xe88864181615f297L,0x9311a1dfa9d25c58L, + 0x3333e14f8d6c9eeaL,0x6a8f3dce13608d91L,0xbb6b6955b736b86fL, + 0x5aeb5a41eb0a22d8L,0xccf9043b974aa869L,0x0000000000000161L } }, + /* 38 << 252 */ + { { 0x9634396daf728c46L,0xa5bc9b7d247224e7L,0xdfbc08af5c081218L, + 0x40daef2f74944bf7L,0x363b2863461700f9L,0xc7b2d7bc173bb294L, + 0x99080d3eefbfe051L,0xf987b4b797e98cf6L,0x0000000000000023L }, + { 0x3984bf231aeb61d0L,0x4d59b524b46acdc8L,0x5523fe15227d50bfL, + 0x77b1691dcec5964bL,0x6517492432344e0aL,0x0778001be2ac2d32L, + 0x8cea375f288c0542L,0x92aae3acad80ca37L,0x0000000000000047L } }, + /* 39 << 252 */ + { { 0x5f72b28080743791L,0xf6d24353bad4058bL,0x40941ebcfb22ed42L, + 0x46f017e280467bc1L,0x43155ecf5b2eaf8dL,0xe7c3771ea97e0752L, + 0x87b477068a9a2623L,0x8ca0232ae08b1132L,0x0000000000000184L }, + { 0x32ef7ae5683dbb52L,0xa8dcbbe761094cb0L,0x9f2062fc2c48a96fL, + 0x16bab4ceed9269ddL,0xf2b4713d2b9caa66L,0x81ccd94c07417deaL, + 0x71dfb81ae196d3b3L,0x836e59ce53016c55L,0x00000000000000c7L } }, + /* 40 << 252 */ + { { 0x94804b92b95527d4L,0x90dd532b78404dbdL,0xf8e5b1c62c372391L, + 0x7c4b488d68836ee7L,0x9d266ad6f8300a06L,0x16fb86b496340c5aL, + 0x593521324c6fb7a6L,0x20328cf8e55babb5L,0x00000000000001a7L }, + { 0x8cc5cf828c337a9eL,0xed52a678cb12ddf7L,0xa38d7a38af21e702L, + 0xf4ea981de8c19a9eL,0x0e4e6bdc79a44d8dL,0xc76b800a5236cabdL, + 0x4ac864e69c3644f3L,0x418d9fe3162729f4L,0x0000000000000132L } }, + /* 41 << 252 */ + { { 0x632fac9b6c0b884dL,0xe64b31b11fc25e52L,0x96132f079cbd3314L, + 0x0e24dd947d867c6bL,0x895f8df9b24daf39L,0x8436f4630d7b8ad5L, + 0x292df4b9f79f4019L,0xffefe90e02a7ed35L,0x0000000000000007L }, + { 0x4ce7024fd1b1dfcaL,0x8ec5462cbcd0728bL,0xaf6d3ed726fa57c6L, + 0xe6c42b8d3f45f510L,0xf8c2493a062a663eL,0xe988a4782075f60cL, + 0x641270957d167671L,0x3fd8cd1558cf1479L,0x0000000000000152L } }, + /* 42 << 252 */ + { { 0x46159bb21866454fL,0xdb5d23dbf1c1de9dL,0xa5b2b20f73b122e7L, + 0x1a5328d36e867746L,0x82d316f43432f556L,0xe1773f940a06b40bL, + 0x283be2a70cbb906bL,0x3ba5f206a07eb52aL,0x00000000000000f9L }, + { 0xf0fb626d3c7cb6feL,0x823e7d48108f5d54L,0x3e2713ac7b3646d7L, + 0x23042db5afba0565L,0xc8d983df57696cd7L,0xdc2055c0f11edcedL, + 0x3476704b495ce49dL,0xcb1299b72f5349ecL,0x000000000000002eL } }, + /* 43 << 252 */ + { { 0x44b93e40fd864ec5L,0x605efca6e1c25de8L,0xb0a3cd7d5f1178d6L, + 0x4bff709678414d02L,0xc6c0f44c9e501072L,0x77967c9f92ad4719L, + 0x579a4782a54e73ebL,0x0130dc8919eb16b2L,0x00000000000000dbL }, + { 0xc93dcbfb4e105bbeL,0xe67766b5e0552c72L,0x05677f76faca76d4L, + 0x766985710bce1720L,0x63946321e3270162L,0xa67411fbc2d2c96cL, + 0x1e3dfd40c17bd0d9L,0xac6177b43071540aL,0x00000000000001b4L } }, + /* 44 << 252 */ + { { 0xccdb3eded06656ccL,0xbe5af4fa347da6ffL,0x22c0b0573b487907L, + 0x027b5dcf0b018142L,0x18cbedfa397640bdL,0x296c1adba9ac930fL, + 0x2dc0515599454bb2L,0x5a7c04edc0072ff9L,0x00000000000001ddL }, + { 0x817ec973da3ec7e0L,0xbf7de96bcb692075L,0x5ec2350044bff6eaL, + 0xcefad43c6f9cedb1L,0x808cbeda0e575221L,0x9631a43c7d0ed004L, + 0xbbd48ef5cc055d44L,0x7adced52db3a9305L,0x00000000000000e8L } }, + /* 45 << 252 */ + { { 0x4479d35b7ce3f5eeL,0xea8a93523bdf4c0dL,0xccc3b4b4a0f641a1L, + 0x4f9baeb1f1275498L,0x288f6fbb535c1ad2L,0xaf50e68f7d7e5e34L, + 0x1573c99dc8c366c5L,0xf29ed752cda48eecL,0x000000000000008cL }, + { 0xf61dfe1cce19d1fbL,0x3f0ba2cbf727fb66L,0xbf17e60c5a46a948L, + 0x58bd4583e9e2c738L,0xe2ba0170ca15e3b6L,0xffe816af2fd57fd1L, + 0xd258abc333e06a1eL,0xd48a7702b820bfbfL,0x000000000000012aL } }, + /* 46 << 252 */ + { { 0x1e9da85c3cb7986aL,0x8ca319700a232882L,0x6c1d4813d8db00b0L, + 0xf05b2bb6728803a7L,0x757a5bfee989dbbbL,0xfe3782b857179a1bL, + 0x580279433184f652L,0xed24df1e636b602fL,0x000000000000010eL }, + { 0x23f24bae4b469488L,0xa30177834d56de00L,0xe304cfa2ed103477L, + 0x4a55640cb8c80476L,0x58e7adcfb9b7feacL,0xf4f50f1087583584L, + 0xf38761e72ee6decfL,0x62d87e7311f8bef0L,0x00000000000000d2L } }, + /* 47 << 252 */ + { { 0x64c6e2ee38f640b0L,0xdbff010c230cd78bL,0x7b6805e3711d0b19L, + 0x31953411b2ead66cL,0xd03cd197e3497d2eL,0x7aa2213ee0b7847dL, + 0x68c6e618540fd2a5L,0xee78f1944b427d04L,0x000000000000019dL }, + { 0x110e84797561c23dL,0xcad5537536e5b2c6L,0xcb7d30ac9827ac1bL, + 0xf609071cb3eff64dL,0x5a62d75bde8b2b8eL,0x2232be091a7ce124L, + 0x266ebe5057ad42dcL,0x1b421359c61e775bL,0x0000000000000065L } }, + /* 48 << 252 */ + { { 0xc65023db6599420cL,0xd5f259932de4f201L,0xdf04eb7f325db6deL, + 0xba7a4c67088c5b7bL,0x4288030314fff349L,0x5d8c5ac1ced29b1cL, + 0xba44893aa9d0ff4fL,0xc94974baed3f3117L,0x000000000000010fL }, + { 0xd804dafcffe235f6L,0x1701e38d67c31efdL,0xd6b499096b9307c1L, + 0xf33c74961fa96ebaL,0xfab4f9f892099d0bL,0x3cc5ed866d5334f5L, + 0x41745249c94e6d74L,0x27ef9399dd4b2b7cL,0x000000000000018fL } }, + /* 49 << 252 */ + { { 0xf099cc75f8a9e112L,0x3c74b4cb76c67a3aL,0xfa20479f8dc52ec8L, + 0x65abe5c0989c6964L,0x3af2709a4f2c71f1L,0x57640513f76588b6L, + 0x6353f91e26a792a6L,0xe89c1bdb21c67c8dL,0x00000000000000bdL }, + { 0x0b515fad1e10dca1L,0x90abf43dcdaa45ecL,0x062412e84bc4b73dL, + 0x718bd3c5b7e454bfL,0x39ad2babc0ac0ad2L,0xc873e3cf0fcc426aL, + 0xd62abb4369ef63f2L,0x2586fe964669d4c8L,0x0000000000000034L } }, + /* 50 << 252 */ + { { 0xe39f0511307f1c16L,0x5b10e8b6bbb6c973L,0x0309e94f4d0b7a24L, + 0x260ce1fd6621d244L,0x45fc1a57139898f6L,0x8ea7b91a596fd366L, + 0xde6371e00771a0eeL,0x5482ebb5731efc15L,0x00000000000001dfL }, + { 0x11d6093d8aff3f7fL,0xcda42fb9470d25aaL,0x005b142823539f03L, + 0xb8c213cf33a131c4L,0xc8d4d05bc2d06673L,0xc280e948257b0746L, + 0x382e6cb317e2d479L,0xdb86d2f4febdf3feL,0x0000000000000027L } }, + /* 51 << 252 */ + { { 0x78caffd2c356407bL,0x20edea9e58f1eb89L,0xd16fc85fb2dc193bL, + 0xb1cd53747c8d19ebL,0xe2e606b24e7a43f9L,0x8c6f1b375ae2453cL, + 0xb3e9a3ea3ab64627L,0x043518e52094ca40L,0x0000000000000084L }, + { 0x2c793d0352c34b1dL,0x21222d8dafca2b29L,0x951527fbcef8af6aL, + 0x73984b4bc18db31eL,0x30bd73641bf872baL,0x6c06495be01d557bL, + 0xacb554b9fc5f7d63L,0xc02a11b7f50b0bc4L,0x0000000000000017L } }, + /* 52 << 252 */ + { { 0xdf677374c18bfe67L,0x03daa39249685742L,0x69d2a99736b62aa4L, + 0xb7c3f6dbf4a734a8L,0x6ad1791560bb2339L,0x266dfc1f08f94b78L, + 0x2d3d477dd21b35dfL,0x41aea44988cfec52L,0x0000000000000125L }, + { 0xfd65ad5e5f780039L,0xffe76cbfd96b8044L,0xf7ae0f1428bac68fL, + 0xe84394e6b4333bc8L,0xccce5687ad2c4225L,0x70471c81a6a9c7e4L, + 0x60c5e87940b576e0L,0x2f4ef8bb92e9f4efL,0x00000000000001bfL } }, + /* 53 << 252 */ + { { 0x3e0a60297892ba11L,0xe21b499382962ba4L,0x3b60b6f50126f7fbL, + 0x981850279dd3e473L,0xb337b151eb77334dL,0x32fdde882bed2f71L, + 0xfb8f226fb6070306L,0x8f2db293b0c2ca16L,0x0000000000000130L }, + { 0xcd05c367cc0de01fL,0x07bdf2d67df2cb4cL,0xb8082bfb2af2a6bfL, + 0xfe5118c41f02c02fL,0x5c991d3da008a47dL,0x7f1fa1d4d8b33356L, + 0x0e9d143a397fafb1L,0xaa7061308868a859L,0x000000000000011cL } }, + /* 54 << 252 */ + { { 0x827702a8005513c4L,0x716246abd9b83e12L,0xd204accf5fb3021dL, + 0xaf843f358a4d8221L,0xbc3afbc0f965adc0L,0x7123eaadba9a8f54L, + 0x21d08b162c8578f6L,0x5e0ab2f832307214L,0x00000000000000baL }, + { 0x9c1fc84883e1c1a8L,0xb9c17d7ec713b134L,0xb1fbc3be4cbb0030L, + 0x933803110d6f6b13L,0x5881c5cf7e61b510L,0xfeb0553c44ff008eL, + 0x2c22a2f1e14b8b02L,0xbdb4134af7f01665L,0x00000000000001f9L } }, + /* 55 << 252 */ + { { 0x1e57a0852a7b7c1fL,0x501cd11a379f8b64L,0x2005ea95719460b2L, + 0xe4cd1ea340555356L,0x6a2088ea43dd7f96L,0xc825ffeab958affdL, + 0xec97ea43d0db0c21L,0x4d5da9be6175e131L,0x0000000000000076L }, + { 0x754f2673e8c6ac0fL,0x822a8787328cfe16L,0xa2b1e03d7390327fL, + 0x71a953aae3470001L,0xd29bef5157384b61L,0x6faae163b4e19442L, + 0xd180a23c76c973c0L,0x356ee274d45e2bd4L,0x0000000000000021L } }, + /* 56 << 252 */ + { { 0xde124a53d9996176L,0x1ec1e39cdc53c313L,0x91917a6c42d879f3L, + 0xa81372c74e70e810L,0x33d61ba95780fa10L,0xb52f521c73805446L, + 0x81bba0727bcbb8d7L,0x91a608a605f22a02L,0x0000000000000164L }, + { 0x6f11a4a870220345L,0xa3513d6d297b6e84L,0x3dc485a1de56cf82L, + 0xf5e5240385610560L,0x72803421fa22b180L,0xb1e2031c7f0dc46cL, + 0x676c5f9e43d4c03dL,0xa891aa30dfa7f707L,0x00000000000001c8L } }, + /* 57 << 252 */ + { { 0x8080622ee89b1d39L,0x34718e3b8c5f94f8L,0xd8aa15ea60bd7116L, + 0x795258304437b505L,0x82c1c2707e5823faL,0xfcc0012b92d8a1baL, + 0xff16cf3c978289d5L,0x01b487fe51fcb704L,0x000000000000007eL }, + { 0x271a09c44bfec059L,0xe3ab9cd3fa138ec8L,0xdc8d866309c5dcc2L, + 0x4caf43f7e599f66bL,0xbb4dd3b4688cd1edL,0x07eb1d9caa0ebb96L, + 0x80eac6dc610cba68L,0x55e5e866ead2696aL,0x0000000000000085L } }, + /* 58 << 252 */ + { { 0xd27f55b9c46df5c6L,0xd940f368e310c4baL,0xc1697f3a32602d58L, + 0x020061eebd564d22L,0x3042100cfa5d7c97L,0x937a3a303d8a5709L, + 0xc60488f76c8b2008L,0x3dda8ffaab473dbeL,0x00000000000001f2L }, + { 0x6f893aab79642b56L,0xf329524470359a71L,0xd7462e6ed933ef1eL, + 0xccf86cd167e8de14L,0x0e8675605a4acf89L,0xc72ed3fef8e9e4d6L, + 0xc0dc88afd19e4a39L,0xf2e51d088ff2b106L,0x0000000000000161L } }, + /* 59 << 252 */ + { { 0x94c5c2faa76e3067L,0x74c5f7baa9e0ba68L,0x503de3ffc79e67a2L, + 0xe9da75b49903d81fL,0x52f7d9e94da7e1f5L,0xa83731d7b613e973L, + 0x2dae3eefc35a30c5L,0xbfb55d4be5ffe984L,0x00000000000000b8L }, + { 0x174b687d0636d4a9L,0x00f11f55b75b9375L,0x70524e62e10ec42fL, + 0x0ef83d5da15440e5L,0xab7c2c2b7a7046f2L,0xa155482caf88ba29L, + 0x5aa1f5c5edd7b984L,0x5b7323ff9eab391eL,0x0000000000000001L } }, + /* 60 << 252 */ + { { 0x4f18d1477a7d8443L,0x0d76e1b7d1967058L,0xee35bb031486e355L, + 0x2b32b859da1bc577L,0x063f800d38adb65fL,0xd65ee21ff3f59f4cL, + 0x56ccb1068382ccb9L,0x74e330b878c00d95L,0x0000000000000179L }, + { 0xa3aa4a6f8436a8d9L,0x2b1f2c1a7144c8c3L,0x95a3d612fccef510L, + 0x3c20cfce1af2d8c4L,0xddfdbbd52c0b33e3L,0x4236a2daf230e136L, + 0x22b8cad2b533d171L,0xc5398e4f52bd69b3L,0x000000000000001eL } }, + /* 61 << 252 */ + { { 0xf9998925356a789aL,0xb035d4d643e44f5fL,0xa362c1d3c0491bb1L, + 0xf9139080777ccd77L,0x7d0109074fefec2fL,0x77961dd61ea1b160L, + 0x3674c27d2f9de773L,0x6d1ff90f84e79d39L,0x0000000000000065L }, + { 0x54b1da37ba03c500L,0xe9a2696034fdc983L,0xb4ab12ee290f32acL, + 0x6589b027a54917aaL,0xef94b1549a84fd5cL,0x1c598975cea54c74L, + 0xbc50a9b7ee3e0bdfL,0x5d755951d99ab48eL,0x0000000000000004L } }, + /* 62 << 252 */ + { { 0x3607f884f22962e3L,0x7fbb0064f0bfe22bL,0xbad02b9b716b8109L, + 0x2d2f55801bea19f6L,0x119a51eda3da1600L,0xed4bdc13a03e7da1L, + 0x52cf75e0e1594dc2L,0x5cd2a02ad2740de2L,0x00000000000000deL }, + { 0x1c1cb857aaa37069L,0x69623f57f69b1560L,0x77e24490cebe0023L, + 0xca262a30e6395606L,0x4c68833affe15b74L,0x4daa7aa356fcb2a8L, + 0x4beede7dc40f75e2L,0xa00dd3197a34c150L,0x000000000000002eL } }, + /* 63 << 252 */ + { { 0x15c921c8bf854109L,0x91333f4fff524ec9L,0x739e37ee20bfd5d0L, + 0xe61087f8f8a8bc93L,0xcfc07fcddbe59f26L,0x1f5ac6d2f568fbe9L, + 0x78a1b8654452a4d4L,0x585d2501d6b939b7L,0x00000000000001d0L }, + { 0xdb4c45affd113ba3L,0x0741c4004abbacd3L,0x84f69bd7c86145c4L, + 0x023850e1d597a974L,0x5d3a93d41f667362L,0x24eaefd5d8326769L, + 0x79ebdedb45edadbcL,0xf8833039310dedefL,0x00000000000000cdL } }, + /* 64 << 252 */ + { { 0xd1a48c6847edd0e3L,0xfefb60be253c6bb5L,0x7265ebb75ab3e95dL, + 0xc8d1b679f058192cL,0xcdc478d6fa21c3ebL,0xdf6360dc10b2b221L, + 0x824b28a71101d18eL,0x4bc043f1e04269d1L,0x0000000000000032L }, + { 0xa342984e5d49e112L,0x981ac5544e80cd3bL,0x489ede322c45f14dL, + 0xb97b3ba50671c724L,0x3818c351bae2cd87L,0xf3719c9261521947L, + 0xb9116ed9a8252d72L,0x992761ed9c406f54L,0x0000000000000141L } }, + /* 0 << 259 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 259 */ + { { 0xdfbfa8c0b9f59031L,0x683df70ba5e17023L,0x07958fdc42ce28b7L, + 0xb8965b5bc6d4dc78L,0x78441b9a6e4b4290L,0x3909e659ea4b4aa1L, + 0x9c8f59c629b6cd62L,0xf400a01919a026ccL,0x00000000000000cfL }, + { 0x09844edef77fa34fL,0x9da8cdc495b4acfaL,0x5bb762074d52835bL, + 0xc39505e29c0584e1L,0x4c9a0f296c4f318eL,0xdd1a8f417d87f206L, + 0x423805cd95ec312cL,0x21639f519304343eL,0x0000000000000092L } }, + /* 2 << 259 */ + { { 0x6d9b0c3c9185544dL,0xad21890e8df2765fL,0x47836ee3cbe030a2L, + 0x606b9133f7651aedL,0xb1a3158671c00932L,0x9806a369cfe05f47L, + 0xc2ebc613f57f3700L,0x1022d6d2f065f07cL,0x0000000000000109L }, + { 0xb292c583514c45edL,0x89ac5bf2947e68a1L,0x633c4300af507c14L, + 0x943d7ba57da4020aL,0xd1e90c7ac0ed8274L,0x9634868ce59426e6L, + 0x24a6fff2c26bc9deL,0x1a012168152416cdL,0x000000000000000cL } }, + /* 3 << 259 */ + { { 0xcc2fec030b4a18c4L,0xfc29fc82165d1776L,0xc8ab611ac317fc9dL, + 0xa8d299571029913fL,0xa225949049f64636L,0xda588063c1db5e74L, + 0xff94c5e6c0d70127L,0x199c1b8e74f89721L,0x0000000000000062L }, + { 0x168ebf642d3b8836L,0xb05a8c0f1c2db301L,0x409e24edc0a052a8L, + 0x6b6bb0163125a8edL,0xea9b956bf64bc66cL,0x4453021083f44d9bL, + 0x0eb41037fe8b6632L,0x555c39b858ef78d4L,0x000000000000002eL } }, + /* 4 << 259 */ + { { 0xc2453d609102b570L,0x70840e5c142c3e23L,0x7294b1f1c30037d9L, + 0xa2500f121dbf5ab6L,0x03eceb67c9c8ee6eL,0xa19c319330820abbL, + 0x7aad59e87da6b820L,0xe97823f8c38d842fL,0x000000000000006dL }, + { 0xce5bb75ea7b6bb9eL,0x9090358d569c9eddL,0xa8b5ec369cd0c065L, + 0xb2b5ac1cf81c82b8L,0x8feb364c3ffceb86L,0x355ed9ecdf4f9f7eL, + 0x6f2efe425ce12ff4L,0xb23168780155070dL,0x0000000000000035L } }, + /* 5 << 259 */ + { { 0x38a82571854f48c1L,0xd1b1b09af520fd6bL,0x7f01089cdf7207c9L, + 0x96a6180fc9393ec5L,0xa736bb517aea3ec3L,0xd25dcafeb56b8f87L, + 0x48933536a43ea781L,0xbbdbedab8e0fd61dL,0x000000000000003cL }, + { 0xe8b599d37f7a088dL,0x85ade4938e5efd0fL,0xe3049a17adbab77fL, + 0x3a24830b430f3f92L,0xe267f23e183b494fL,0x05f42dd6166e606aL, + 0xbee09553f479301aL,0x5adda2a63ee09623L,0x000000000000012eL } }, + /* 6 << 259 */ + { { 0xd8be53eb86660e96L,0x72c2673c8be7763dL,0xa7c5462ec6b55bf0L, + 0x9def337b574a104bL,0xa8fe70ae9e9a1386L,0x6070f210bd010cf1L, + 0x5d3eda3894ae907bL,0xa69d86791bea88cbL,0x0000000000000000L }, + { 0xd1dde97203c8dd6aL,0x562332d1d8950c30L,0x33be213e5ea340f7L, + 0xebb49ed4eb40e91eL,0xefe4aac270b2ca5aL,0xf6ad3cc1900ceeb3L, + 0x67e9a8816cc704d6L,0xdd3f2baf12fe8a0aL,0x00000000000000beL } }, + /* 7 << 259 */ + { { 0x7d8fff086a2f9346L,0xdf0fb4f2d1e2a388L,0x45ba8c01535d98ffL, + 0xce448379f67301f2L,0xbedd58c03767978bL,0x7b1171f555aef5a3L, + 0x66b7c88d5a486411L,0xd35c98709f6c12dfL,0x00000000000000d8L }, + { 0x9dc04b408b746e49L,0xdef5954d4eef5b2bL,0x4bc53645fdc0ba16L, + 0x3b488da37054acf5L,0xc610686016a0c2b8L,0x299f1abb911bc8b8L, + 0x5422e599ecec19b3L,0x9ec27cb47fd2c939L,0x00000000000000f0L } }, + /* 8 << 259 */ + { { 0x5f56b5a41bc0fa77L,0x6cdd6bb564fd36f5L,0xd0ac68b58a5b7c7fL, + 0x4a92d9bf09919ef9L,0xc305e12b71c3c520L,0x554a9d1cdb699aeeL, + 0x7fde007761f54643L,0x99c13124479115ceL,0x0000000000000039L }, + { 0x25f890e1c271ac2dL,0x1353ccd394b370acL,0xc7b5adf6744d4011L, + 0x9ccd7687be378127L,0xa8489b5c06c4e3cdL,0x1945580a305505f9L, + 0x07190a204ab3b12bL,0x0ff53eb11534ea4dL,0x0000000000000159L } }, + /* 9 << 259 */ + { { 0xa3f06f7e03301a8dL,0xfadeedf1ec05ed54L,0xe26adc226b4601b1L, + 0x1b283cdddadcb386L,0x3acec9e811b4a113L,0x91b8284f1118431dL, + 0xe18d0cc4ac01391dL,0xeb2c1b0a7cc7d837L,0x0000000000000074L }, + { 0xd0662bde8e3877a7L,0xc979f8a15d0641caL,0x28d798bd044f4903L, + 0x296941594e7b2108L,0x84d5ff89c5381df8L,0x299250a7a51d6348L, + 0x02ee4fd3909ecdbcL,0xcfa39db1c84b1fafL,0x0000000000000114L } }, + /* 10 << 259 */ + { { 0xe774eed9b1167003L,0xecde86e1932845b9L,0x0381d17897a8a10dL, + 0x995a4926aeb8bef7L,0x3ee19c812304bae0L,0x541449e3fe977e97L, + 0xd7ef476ba517b358L,0xe55330bce0d847cbL,0x0000000000000012L }, + { 0x1620a1d6f06a91d6L,0x714b3502ac578bfaL,0x2606ebdb6bed112dL, + 0x8b7b271563f8e778L,0x31833913651fd543L,0x14929f1adc9638feL, + 0x2637ec813ee709d3L,0x39faf7e705547eaaL,0x000000000000009cL } }, + /* 11 << 259 */ + { { 0x224aff4aa7b53656L,0x93b4629f1786efa8L,0xdcf8ce96a546dcbcL, + 0x55f8dbafc17cbe15L,0xe0377d7cd48c57adL,0x28143a41d9a9a135L, + 0xc1c07a77d9b71cc6L,0xed6d2a8fb29ae6b2L,0x00000000000001e4L }, + { 0xe5dc2a6836867275L,0x7bb1a0b5349e1d5fL,0x326d1d94c30c763cL, + 0x06389c1a6db2148cL,0x3a572e33ba33a05cL,0x4244f4884ff8334bL, + 0x0a63419a240e3005L,0x8fdde11d479fe00aL,0x0000000000000038L } }, + /* 12 << 259 */ + { { 0x785e09ff6c6a364bL,0xd00a50fe53e3c393L,0xad964c6e0d36fb23L, + 0x353441feebada914L,0x4e16727ea5ba72fbL,0x5114c6e07bdc78a4L, + 0xca3a8e27f38384ceL,0xe48f64cc77307092L,0x0000000000000089L }, + { 0xb3152778351aa439L,0x0bbc970e6a2e6c8cL,0xae03de8312cfdc8dL, + 0x1622d6db779c99e6L,0x82d6078b59ef9e70L,0x3539ffd07368486aL, + 0xe61a3e2b083f7b1aL,0xb214f0b74b874dc2L,0x0000000000000155L } }, + /* 13 << 259 */ + { { 0x97a1306e9e1d014bL,0x59ede0475517a3ffL,0x327b01f207a6b3f4L, + 0xfc8bd2c2fd4f1a9bL,0x0e1b92c4b1af517fL,0x646bae59e1f96248L, + 0xd14955bc75d5c91cL,0x6f849a406c2dfbfaL,0x0000000000000134L }, + { 0x984d6925802559b7L,0xc51f33f880d5fd4fL,0x6fb6927113165975L, + 0xb5ed83cd5dec0cc7L,0x53314bcd8649d155L,0x716d891fbef7bfa8L, + 0xd7c07da0c3486607L,0xcab5e610669368bcL,0x0000000000000048L } }, + /* 14 << 259 */ + { { 0xe77c03eed7ecb581L,0xb0b4de18cef15beaL,0x318e5b24fa1ea3ffL, + 0xfd1d902e710e95fdL,0x8daeda0655cafa30L,0xcaba29bf2bd11001L, + 0xaec1861bd88085bdL,0xfaa66562e591d19aL,0x00000000000001f6L }, + { 0xeee386f7b68c7429L,0x0a876aec502c0797L,0xcb587df8c6806fa9L, + 0x4f7cb6dca84de772L,0x407a1f0ff746cc93L,0x7862e9bb6b953e5cL, + 0x6c106caa0a4c9357L,0x04213043c0096e5dL,0x00000000000001abL } }, + /* 15 << 259 */ + { { 0x6c0ea19b675e5fddL,0x40b9015015507582L,0xf7237ea726e603f5L, + 0xc4dce1026e0be9ffL,0x5f0d8a2e3c5b733fL,0x26a280f86ed664deL, + 0x5f532bcb33c11487L,0xb0f9d44cacf164f3L,0x000000000000009eL }, + { 0x70fbc712b712571dL,0xa2becc32da63e6aaL,0xaae77cf6c170057eL, + 0x2f1d39620a4897dbL,0x4c1b5709079995e1L,0x5771457fe2de734eL, + 0x7b355bf238fcd883L,0x1acad483a6f023b3L,0x00000000000000bdL } }, + /* 16 << 259 */ + { { 0xc68052c94876bab8L,0x81f704baf84c2e38L,0x28bb9436f2cf9927L, + 0xb89bbfa0d06c972cL,0x00fe1dd8b7a78e76L,0x3adfb05af7f1e1aaL, + 0x45321e42b7278eeeL,0x13d850e0a528ebe4L,0x000000000000006fL }, + { 0x6b724c681934143aL,0x2f944afdb657d809L,0x38e501dfaba2fa08L, + 0x9285ece1be66e568L,0x7af221f7f9df3327L,0x00b8a86fde74aefcL, + 0x19d675189a992072L,0x806d1c88d9ddbbb6L,0x000000000000009bL } }, + /* 17 << 259 */ + { { 0x20ea1405d04d2000L,0xea35dca5c089a6c9L,0x038602bf3bdda2e6L, + 0x477b746aebf1aaa6L,0xd47867e49e7854cfL,0xd8b1935170bc2ee9L, + 0x76412c62a44fee1eL,0x129d55355abfa5d8L,0x000000000000019eL }, + { 0xa5e36fcf3ab81c33L,0x6245c51eaf823a1eL,0xd62852eb0e836757L, + 0x3b2db4b779c4b928L,0x11dc7ad28063e57aL,0x0316df7eeafc4648L, + 0x44a90cbbfb758030L,0x3db503f2caa01c6eL,0x00000000000001ceL } }, + /* 18 << 259 */ + { { 0x2aaea367bd35d8cbL,0xf2a64bd7ae020e8fL,0xd7f80264d6dc5572L, + 0x890e23621f36c56fL,0xb9daf028a09342ddL,0x6cee1fb190bf076fL, + 0xfae167e70f0412ceL,0x2a79d89a8a2fd483L,0x00000000000000e0L }, + { 0xc222a2450849a624L,0xed4033dd0082dedaL,0x11f78e31df0dc525L, + 0x34240b3ebb3eb85dL,0x135d407dc287296aL,0xc47fbfce741ff1b3L, + 0xe34dd021da643502L,0xf183174d65e3ba7aL,0x0000000000000116L } }, + /* 19 << 259 */ + { { 0x10f9009fe7aa5ec4L,0x125939c308e38826L,0x85a399e720424b3fL, + 0x04eafd33c0ac71f3L,0x79c534b4452782e8L,0x57a8e52137e8307cL, + 0x905909823134ebceL,0x8c0acf7565ff10a6L,0x000000000000015bL }, + { 0x27afc0afe6436fe3L,0x9739bc6f13b24125L,0xe7b31b50902904b5L, + 0xf7ac9e9c0e8dd2deL,0x8b0c28b2295bca58L,0xdc5817376beafa7eL, + 0x1ccce1e33aa35160L,0xe57661cc297eafc9L,0x000000000000000eL } }, + /* 20 << 259 */ + { { 0x83c6b9ded09b1b31L,0xc2325238e3c06d55L,0xee4a352851db1a32L, + 0x69bba3f21edd1449L,0xa445212e855c819aL,0x00a83136b62b4fefL, + 0x1c51eb23e3edd8bbL,0xf5eba0c09a238dcaL,0x00000000000001daL }, + { 0x1223b0b5b2448058L,0xb6e3ad9f699217e3L,0xa7ae6434584dfe78L, + 0xf7d3d93f5b407cbaL,0x68b167ad39035117L,0xb369219cfe239c88L, + 0xf3d9bc4913bba594L,0x6123679d698e3bb5L,0x0000000000000183L } }, + /* 21 << 259 */ + { { 0x95f7b69fd2f71619L,0xf96d943794b12017L,0x6d0cf626a6415ee3L, + 0xd6ca14a945b74621L,0x9cf18c6eccba609cL,0x3d33890bfba5a052L, + 0x38426ba5c7e9a4e6L,0xdd28b78e61840224L,0x0000000000000062L }, + { 0xfff724ab6d9606c1L,0x9b61bc1178750133L,0x2137470f8fd0f355L, + 0x84605f37daf83165L,0x24ee8714edc5f305L,0x77cca00adb7ee62aL, + 0xf45654e1212cc286L,0x638f7126ef2ed80fL,0x00000000000001abL } }, + /* 22 << 259 */ + { { 0xfa37c4d089b3a5a0L,0xa8ed9bd86065deccL,0xc5068acfef8cc1dfL, + 0xea2a71018c98a08eL,0xde77dd771b4baeaeL,0x855a8b672b794f71L, + 0x1acf5585cfc02477L,0x1535d73cc30f94c2L,0x0000000000000149L }, + { 0xc7f723eba566c87bL,0x88d22c712cd0a69dL,0xd7f9318bf60c47c0L, + 0x8d652ed79c239254L,0x186ab5d6c46760cbL,0xf83bb0d588e7879dL, + 0x2976daac122a83b2L,0xc7bc8602f240460aL,0x000000000000010aL } }, + /* 23 << 259 */ + { { 0x6c4da53b55c8c9fdL,0xcae3b7ba785cfe10L,0xcdb8f1c59c4cd91cL, + 0xbd1c603ab874a86bL,0xfb8654e1043ea4acL,0xad5c88fbca482074L, + 0x89d5aa873262bbd5L,0xdcda775402ad836dL,0x0000000000000076L }, + { 0x4d5190e63441966aL,0x99698619c7ff468fL,0x0a5e1697bb1a77beL, + 0xb5c0dcfd32ea31f9L,0x2121cd321b6f6ba9L,0x53b7874f779a966aL, + 0xadb4e60ea3c6172cL,0x5de2f937754ab9e7L,0x00000000000001fbL } }, + /* 24 << 259 */ + { { 0x4a7aca2f4dde4b24L,0xe56e7b54b4a15f13L,0xecb9c748eb5705b0L, + 0x675ffbc695977aa7L,0xd200cc67501dca3aL,0x82b4152d3c43e1ceL, + 0x64be95945c124198L,0xfc52e520389aa160L,0x000000000000000cL }, + { 0x157e8281f875201eL,0xe98a64c03b179b49L,0x8df77ef0d4bce1c6L, + 0x8b6764909a9fd8e8L,0x6d8879b8bb67721dL,0x558091cc19e81bddL, + 0xebacde3fec6e7394L,0x987a87d51bc77288L,0x000000000000010fL } }, + /* 25 << 259 */ + { { 0xd2fded8759ce0d63L,0xfb2c78fdad3f2b6aL,0x1e47201d30bf7a0cL, + 0xefd45dfcdd7cca88L,0x9d46cdcf392768b2L,0xbddabe42411e4cd1L, + 0x6a2d18754088dddbL,0x56ac6278c731d94cL,0x000000000000011eL }, + { 0x11aec7dcca650d47L,0x0f4e8a9e2d8a5dedL,0x92a664a4c2c1f5b3L, + 0x2cef12ef01e930d9L,0x87e8b0bd01ac4347L,0xdb547402a854a695L, + 0xcc487db9b9d1733eL,0xe8dfdc0472b35b30L,0x00000000000001a5L } }, + /* 26 << 259 */ + { { 0x866ff3c908ea3335L,0xfb58377c88d2ec52L,0x5715d9e014a8829cL, + 0x9376778b0595d929L,0xc1ccd5d59b8ed6b5L,0xd90d82dc00d1367fL, + 0x15bce4d84cd41c48L,0x1144874144a2b97eL,0x000000000000017fL }, + { 0x0e3e35fe60aa94e0L,0xaa1456964891e593L,0x51ec590f402ead1bL, + 0x23c0996d7345d47dL,0xd93e5a1a2ca244e6L,0x80c00f6fdf85d7f8L, + 0x1ca50525867d760eL,0x29cf9fb20984377bL,0x0000000000000196L } }, + /* 27 << 259 */ + { { 0xbd692d023a5647efL,0x964c9bae291eb75eL,0x704d6f07f201fd7aL, + 0x6fdc417b52db9f6eL,0xbf85909f6291d4a1L,0x75d316a483e42991L, + 0xaeed45ef9cc62549L,0x1f4d10c5995a6c4fL,0x000000000000002bL }, + { 0xe5ff3d1dbc6c43a2L,0x552282bdff596546L,0xc9a7f0f45518beeaL, + 0x29e5b89aa6339aaaL,0x1b7acff5f0d0399dL,0xcec8edaece588c77L, + 0xaeb3a8e88fe82680L,0x96f35f5b9fc80e41L,0x00000000000001daL } }, + /* 28 << 259 */ + { { 0xb95253361ea445c9L,0xa8a0fe24c10de158L,0xaba4da83211f91e9L, + 0x307538caf2390466L,0x1e0313e0153d0062L,0x8f5ee2c89ffb2a03L, + 0x49f2777ff412ebc8L,0x463f67093d268ae8L,0x0000000000000057L }, + { 0x3856bef0a32a201aL,0xd240074b30514b6fL,0x277c70d8a9c71d35L, + 0xe35d68cf7aa435f7L,0x8a3c9a0960681a98L,0x4ef5a19a5d25e946L, + 0xdb60e7fc4316cb53L,0x2693e9a568601ecaL,0x00000000000001a5L } }, + /* 29 << 259 */ + { { 0xad35ecaee98cd7adL,0x906bd768a474bc5dL,0xda3926c0d0ff4afdL, + 0x9b850c9da31111e9L,0x426e3fdc6caf82a7L,0x9644b0d4bfb03a61L, + 0xc524fe4534531e4bL,0x0bfc8bcd7c3802beL,0x00000000000000c6L }, + { 0x9342cee294463876L,0x83cc9e39f1be35a2L,0xb18ead7df321b335L, + 0x00c2fb7044628d65L,0x7b320756ac074cfbL,0x71f3cd03644e931fL, + 0xf82be78c1e0df54fL,0x397a7d9ed542dddeL,0x00000000000001e0L } }, + /* 30 << 259 */ + { { 0xd8bf062c1e073747L,0x26c96d9fee2e6d05L,0x79d919d7890b4d75L, + 0xb42e2bb900653fbbL,0xf8987798e3ccc877L,0xb47d6eb25033de8dL, + 0x39f94c596c5fd1a3L,0xf3c0c3882fa0ab65L,0x00000000000000c9L }, + { 0x9661d7f82b18b119L,0x6fa045ecc4b5c3d0L,0xc858de883bcf37a2L, + 0xd81d90fb3224d885L,0xf251a9f77a8377e5L,0xfeb83a50a3d8c771L, + 0x853cfabcb5394fe7L,0x950559bf67c9eba6L,0x0000000000000000L } }, + /* 31 << 259 */ + { { 0x08cf7dfbd3c147b9L,0x12ac86d2f10d9656L,0xe02add8249b8805cL, + 0x9456f02a5f74b988L,0x4a631e1087cdba37L,0xb51ee73a96da245fL, + 0x1378e514042f40e4L,0x9ca9b65c1f1d6aa9L,0x000000000000002bL }, + { 0x01757173a246897eL,0x943f9cc04c7b233cL,0xdf545f8e7858a222L, + 0xb8b36cae3d074306L,0xb7c5c74d9a30dc70L,0xe3b361764c35c88cL, + 0xb0a1f4f1f0bac884L,0xe86e2f8874506af4L,0x00000000000000d4L } }, + /* 32 << 259 */ + { { 0xee31e71a286492adL,0x08f3de4465f86ac4L,0xe89700d4da713cb4L, + 0x7ad0f5e9a86b7104L,0xd9a62e4f2572c161L,0x77d223ef25cc1c99L, + 0xedff69613b962e0cL,0x818d28f381d8b205L,0x000000000000008eL }, + { 0x721231cf8cdf1f60L,0x8b640f2b6717760fL,0xbe726f8ce045a403L, + 0x422285dc0370689fL,0x7196bf8f72ea0dcbL,0xa16f7855c8086623L, + 0xd4e19fc7c326fe48L,0xfdbc856e8f68bf44L,0x000000000000013eL } }, + /* 33 << 259 */ + { { 0x28ae363b4f9810f3L,0x3c8be1a72b4adaa2L,0xb6ccf2e9a106c1dcL, + 0x1eaa8df5dc082342L,0xa9d31a0f093f4db0L,0xc21ccdd96a7d1821L, + 0xb65f98905a9fd515L,0x2277f550f370da1fL,0x00000000000001c6L }, + { 0xe9c244cb48277947L,0x1c4bd7fbd7f2e795L,0x0f131239511d2132L, + 0xa8414dbb86c7eecdL,0x191644a8486dca5aL,0x1ca965ad6c84a2bdL, + 0x56022cf1985eb7e9L,0x25b227393c581accL,0x0000000000000118L } }, + /* 34 << 259 */ + { { 0xb1b6d3a65bbb33a1L,0x3075d6caf5c56c88L,0xe6dfb0e533d4db68L, + 0x7f2ef47003fc741dL,0x2bd5d92bec7c0497L,0xaa499ca8edca6a5fL, + 0xb16771e43fd78f37L,0xc110eac51e8f7acaL,0x00000000000001a6L }, + { 0x1faf956a555073f9L,0x2e665871977f2a0cL,0x2fe68703a4ed455aL, + 0xcc2bd95fa496e153L,0x8233f9bfafc21f60L,0x402fea60f2144bfbL, + 0x680a736ca2ffc242L,0x69634dcc94d4f0c9L,0x000000000000013cL } }, + /* 35 << 259 */ + { { 0x230cadee4136c79aL,0x53a2dce673945040L,0x94f8a859e6429d9cL, + 0x52383e90b1ed3b5dL,0xf2a0901f5fa55cd0L,0x2026ccf13e2f8760L, + 0xcdbaa7cc91a22dacL,0x5b2163adbd3c3a35L,0x000000000000017fL }, + { 0xdd444ce700d782bcL,0x100dcc2abe1fe73fL,0x640c492a0ab9b972L, + 0xa487fe3863528daeL,0xfc1e9dac220fe227L,0x299b97cdc6c10c53L, + 0x598cccf628179b7eL,0xe4991c506af23688L,0x000000000000017fL } }, + /* 36 << 259 */ + { { 0x1f021ef05478138aL,0x53076dc4067de90bL,0xb325a033c6cbb9c7L, + 0xd41768278793b908L,0x3cb14074a18b1a70L,0xf50ef63a236f4953L, + 0xc59bf449c8bc84feL,0x2952abb6f0b868b8L,0x000000000000005cL }, + { 0x2a0e56c1652bb7bfL,0xf2186af3b9a9a9f7L,0xbfb054232c2d7ef8L, + 0x9aceb677cabbdd86L,0x40314bd8270a74efL,0x71e9c648d933146eL, + 0x9a19956d14b0eac3L,0x9797358c0c75f494L,0x0000000000000121L } }, + /* 37 << 259 */ + { { 0x80bc51e0c4a4f043L,0xf18f17f98fe0c29cL,0xdd6b0e054c815822L, + 0x63bffee5b3d55da6L,0x84b0917351b59231L,0x71689568a7d603b1L, + 0xd579f92e227fbaa1L,0x962feaddbb1f1d79L,0x0000000000000079L }, + { 0x52e249efd48c66eeL,0x400c89e816b9737bL,0x39cc526b4abe856cL, + 0xf38ccc7eca5d4e44L,0xb215a07bacb8ef4aL,0x0712bcf081443047L, + 0x494d622ee238c55bL,0xcf6a1baa50e72dfdL,0x0000000000000117L } }, + /* 38 << 259 */ + { { 0xd8eb671dd0e16c37L,0x426b4fe04fac827eL,0xa5ca693c33cbc22bL, + 0x54de649e6a5c797dL,0xd9e900bf9e0e10b6L,0x531cffe217dfa29fL, + 0x0482aa87674ac4efL,0x210528300059648eL,0x000000000000010aL }, + { 0x056126f75d644042L,0xb2cdfa0f893ad834L,0xdd7935cd9cc62fcbL, + 0x820515a09f640985L,0x6aebdaeeb809328dL,0xdfe17ad7fc12ef5eL, + 0x6b650484501f50e3L,0x3b8eaf3215fd9a0dL,0x0000000000000002L } }, + /* 39 << 259 */ + { { 0x82ed9a8dda6639adL,0xf6024a774e995343L,0xe6a1cdf6070572a1L, + 0x6a15e3512ab3eb5fL,0xcb4f48bb6655ecccL,0x9295854303dcb66dL, + 0x060fd5396523aa99L,0x3004bc44f42a7f0bL,0x0000000000000090L }, + { 0x8e2060ce679910bfL,0x512c4ea8e342d6b3L,0x6d125c9b569f8e2aL, + 0x833d68f3ecacb92cL,0x7ecfa091e570be21L,0x14539cfe17dae806L, + 0x111cb671599c5e7bL,0x7d0361e735cad2bbL,0x000000000000006eL } }, + /* 40 << 259 */ + { { 0x0206454024a7e70dL,0xd7c69d0cb892b167L,0x4b5a7d36f6f94cb8L, + 0x7237ffff807698d1L,0x90ca471730d2a69dL,0x052ba947da584fe9L, + 0xea15b35b970ee0ccL,0x0d7ba4f0be2d7cc9L,0x00000000000001ccL }, + { 0x10d8cd17f953d168L,0xcd255ba241da2817L,0xed38eccb2381ea26L, + 0x53eee2ef55ed1ab5L,0xc94ad92333ac6a32L,0xd000d271a71253bfL, + 0x77c8d5c9547f8b3aL,0xbef4d3aa18ae2ddeL,0x0000000000000159L } }, + /* 41 << 259 */ + { { 0x3eeba73f7dc5de84L,0x61f94421baf4ce8eL,0xebf5e4c01e5e5d3fL, + 0x87b7a9d18b008fdbL,0xc78a071f49b69718L,0x56743f3d87413117L, + 0x28ca8e2f94c872cdL,0x5f36f860fac7f330L,0x0000000000000100L }, + { 0xbce6c975e85d255bL,0x21e0235a4066ae6dL,0x786e6d4ac50be9f7L, + 0xfdd3a4ee582e9ec5L,0x5d8ee443c1fda7e1L,0xd076758e1bf06a98L, + 0x31508bfc8534eecaL,0x27ab9f1931641184L,0x00000000000000d7L } }, + /* 42 << 259 */ + { { 0x76b363b63c5ce17cL,0x75e8ba323e1f0c00L,0xafe8f47ea3ad49f4L, + 0xdeb90e8dd38a4c8bL,0x59e3777d07743abbL,0x1092a633dd2a10deL, + 0x082446e157e17d7cL,0x0d01da2edce36407L,0x0000000000000019L }, + { 0x73a9f09639a6db94L,0x96bc010721374c72L,0xbceda9caff17e1edL, + 0x32b708692d130febL,0x14a201008f0e1601L,0x34f1cbbbee4a4a76L, + 0x488b76db85c287b8L,0x4e64b547b6a7d1feL,0x00000000000001adL } }, + /* 43 << 259 */ + { { 0xa836842eb6ae189eL,0xf1b19c170056ab75L,0xae3374042b01c3a6L, + 0x89d5cc7a0bb8c2bdL,0x6e02b009fb866289L,0xdcadc0fc5abf3d3bL, + 0xce7b2d6604290e7cL,0x94ee4a4d18364ed0L,0x00000000000000afL }, + { 0x672841bfc58e7b41L,0xf2fbd6fa87db5ad0L,0x2511d4e7dce7195dL, + 0xd21c4d314dffb69cL,0x8231cc4396b5a36fL,0x75c447f63ea477ecL, + 0x2574bee0d6186096L,0x52870fd9c167aad7L,0x00000000000001b3L } }, + /* 44 << 259 */ + { { 0xece40f9775f9275cL,0x686e628af28cddfcL,0x544b842b887b5aa2L, + 0xb33a8517c4ad2061L,0x1525dcd7bdfe3c4fL,0xc771ef08b09c44b5L, + 0xeb02c6bf781ae208L,0x784dbff997fc9c2eL,0x000000000000012eL }, + { 0x10717e71d93974bcL,0xd6a9e82900a10d31L,0x27b00c7f35d5e291L, + 0x451d556e0bed8699L,0xa3ff4fe0f5118f55L,0xfb5d9590f07091d4L, + 0x2dea085ea6378d60L,0x24dac7bb4741b622L,0x00000000000000b6L } }, + /* 45 << 259 */ + { { 0x58f22e521589fc91L,0xd26d923c1572f936L,0x119123075356419cL, + 0xac8ef534d050441dL,0x2095327f1a4e0cf4L,0x39cc6a1ee169d2ffL, + 0x8ebb4d099b67523aL,0xcc3a215be1bf9afcL,0x000000000000001bL }, + { 0x39abe3cfd0c96670L,0x1948c476066b056eL,0x47932f316512b7d0L, + 0xc905d875ca50f67eL,0xe94876bfcd454f38L,0xf15a676e69ed3d3dL, + 0x32320896d595a940L,0x0656340a48ad1e79L,0x000000000000006aL } }, + /* 46 << 259 */ + { { 0x8f6ca036a25c9b50L,0x56356d4785bcc87aL,0xe646d82299817898L, + 0x6f9c87155d43bafeL,0x09e17a9509926705L,0x1eb95e765fc6bd98L, + 0xe5894e0146e44174L,0xf345a2821c1f16f6L,0x000000000000018bL }, + { 0xe6a6189e502ca51fL,0xea04086caf88b82cL,0x400cf34589a390c7L, + 0xf1da9ba44ed71572L,0x07b61a37913f522cL,0xc69f7f0c0e27f8e4L, + 0x25d6a2dee6073472L,0xb1d97674586573e4L,0x00000000000001e0L } }, + /* 47 << 259 */ + { { 0x3bf9c246be511a2aL,0x528e184cd12e41f6L,0xe473382fff9c33cbL, + 0x7b152e99e6e39137L,0x01d3d113a4a2e625L,0xa7ab2c27eea47137L, + 0x0a69cf3194b87f50L,0x36af1fa902fcf00aL,0x0000000000000177L }, + { 0xc5a45d54b2ff7576L,0x18998a609b2e0046L,0x70720cc450e4fdd5L, + 0x8ab8a8766f9adbf0L,0x0c09fe59285f4e50L,0x41f772e1aa49ce03L, + 0x70675276e7e928b3L,0x9e4c40d95f1dffb6L,0x000000000000002bL } }, + /* 48 << 259 */ + { { 0x3b52947390b32585L,0x57359ccd7b3ba1d5L,0xb6daa5e0f8889d88L, + 0x6ef5de0607674ccbL,0x5c000c596eb2b8c9L,0xdc4f33e5fbfbbf74L, + 0x195a92f58a060ed9L,0x49c845be41beff63L,0x000000000000014eL }, + { 0x7190d74d265e6dedL,0x95373592453d9809L,0xb087112de8f1b76dL, + 0x1341db8a73d34d52L,0x54075f5b4bfa2fc8L,0xef25714741071528L, + 0xfe227bc8c3a0079cL,0x6dff050f64de7e14L,0x00000000000000d2L } }, + /* 49 << 259 */ + { { 0xefa6ae34e7d9c270L,0xb1abcf3dcd99fc38L,0xed1681698ef0d319L, + 0x3648e51e7269773aL,0x0b08bb8036ce881bL,0x9055f280db3f6fc8L, + 0xe95823b16ad5efdeL,0x0a18738401dfee23L,0x00000000000001acL }, + { 0x2a8f55bea8a15ccaL,0x96c9c72dde4c36a0L,0x439e0198306f2426L, + 0x225257a55ad4151dL,0xbd8a9150e4407dbcL,0x93ded257490cbb52L, + 0x70f1e3f2f0fb6dcfL,0x8d424da6ceb42cfaL,0x0000000000000194L } }, + /* 50 << 259 */ + { { 0xd80664eabe1827eaL,0x8444e48081e18779L,0xb6e6246bfc3ae63eL, + 0x48ff2368b6e36f8bL,0xf79455fdd4a8a970L,0xe3403cbb1f46a06bL, + 0xad00139dab2a8c5dL,0x68a9ece4fabd20d8L,0x00000000000000bfL }, + { 0x8240e0275b7b9242L,0xe89bd598dbd8c035L,0x761d0b2eec9d4f1aL, + 0x111fa0e57df88f57L,0x4741f6b016d01143L,0x0f4b1c1ae59c8337L, + 0xa23d571e0fe0544eL,0x84be6651ada2d65aL,0x00000000000001bdL } }, + /* 51 << 259 */ + { { 0x4835246b6aff5b05L,0xb280322a955831bcL,0xf96ece94ea39c9f2L, + 0x3282ca94819d4198L,0xffed1187773856f8L,0xd9f62443678893c0L, + 0xac106b951224557aL,0x4ab67573ee3a017cL,0x00000000000000d5L }, + { 0x5a4f21a86940798bL,0x25024af6ee0b3a60L,0xeeb9fdf73f8b5370L, + 0xa76a2c14c102bf6fL,0x0c1da7e1f47f6803L,0xd2f244e7ed271ffbL, + 0xbc823e462de8cdc9L,0x07bbcd925979772aL,0x00000000000001aaL } }, + /* 52 << 259 */ + { { 0xe1db6b7c281e407cL,0x2bd41b23f0f0127aL,0x8c62b1db49d40963L, + 0xa0a5378e4aecf129L,0x0ab063f6afb779b9L,0x9fce0a3976caf17dL, + 0x361c82ec51f85f9dL,0xdc45a48c857d1c19L,0x000000000000019cL }, + { 0x01fd89c64251f9f8L,0x1ba0872e3d68ecbaL,0xed3894017d29b968L, + 0x7de4a3b2496be4e3L,0x3e8815a6d28dc1a4L,0xb4a743b262683444L, + 0xc9c4a64ad0be70a4L,0x3e79e5754095638bL,0x0000000000000053L } }, + /* 53 << 259 */ + { { 0x65d9c77be8976523L,0x0ac11fd66fc3be5eL,0xe939a935d9655ecfL, + 0xc9debc12fed311c3L,0x0b460f9bae0088a6L,0x28a82b09d0f0139bL, + 0x15465c4bc746dd3dL,0x4c149c73f132fe91L,0x0000000000000061L }, + { 0x7556718de6868a48L,0x8da2dee8b28b880eL,0x24856a384e5dda6aL, + 0xdc76c7a70ea27973L,0x9fc3053c360bd595L,0x8d41a89c3e1540e4L, + 0x4b7e977841c20751L,0x14907ac0e8d84b0eL,0x0000000000000011L } }, + /* 54 << 259 */ + { { 0x4f736d9a66c5c0c9L,0x2677a79c58a4b4fcL,0xf612bcf8a49ad272L, + 0xf39799188256b986L,0x1957f6626eb7515dL,0xefbd5ff12964c253L, + 0xc33ddfe1e8447ca9L,0x9638690a39b7430dL,0x000000000000015fL }, + { 0xa3552fe58470d9dfL,0x2a0d1fdf093484ccL,0xb81bda1be2eb7fa8L, + 0x3634d9242688d523L,0x11f94f70cc600f24L,0x87021e6c2adf964fL, + 0x35b07acd5ace8426L,0xfa2853e1dc2e6f71L,0x0000000000000142L } }, + /* 55 << 259 */ + { { 0x2575efbd283e89e9L,0xdb769726720a620fL,0x37c9268d59c0da16L, + 0x7f35d8bbd0357166L,0xbe8309c61eb19a85L,0x74eb837d19a077efL, + 0xb1eb237daca65900L,0xcdc312ad0d7c99c7L,0x000000000000002aL }, + { 0xbe10915aed84c76aL,0x7b0f723a85519abbL,0xf538710f42a081a6L, + 0x78265356dcbbd734L,0x0f499a07096e7577L,0xff5daaf70079b100L, + 0xc8bf37a2eb6c1ba4L,0x84ac26592b098eb6L,0x000000000000017fL } }, + /* 56 << 259 */ + { { 0xab4d88d64a5595dfL,0x6c8dcabeef39ae0aL,0x2624df12f37103dbL, + 0x86be6542141f2376L,0x647ba3bba25fabc3L,0x5332a3fce7456368L, + 0x6d3c328ba44bfa16L,0x34a647fa239e3b29L,0x000000000000009bL }, + { 0xaa7ad625991ffeb1L,0x4e43f778d03d63f2L,0x39cf73f43cab9baaL, + 0x2250a6ab66f0706bL,0x83bbddff0e7828edL,0x8c63c4f79209731eL, + 0xce7951a6b2988761L,0x9d4e402fcac241c2L,0x0000000000000150L } }, + /* 57 << 259 */ + { { 0xa12465c4df67bdd5L,0x9218ff8bbc9eca2dL,0x794924a761d09f06L, + 0x89d11f65e1dbf4b3L,0x60e94678bcbbbecbL,0x34187685a297c4c2L, + 0x2169b8c93c788f5bL,0x61e7a4603bb5d6c8L,0x00000000000000e3L }, + { 0x25af24472719a0edL,0xdc1c88f4edfbfd6dL,0x0eaec9a6d9888a49L, + 0xcf7bc7e4fc0565b0L,0x7d9b1b74cd500f9dL,0xd2664e1ae2b62831L, + 0x18412cb253f95ba4L,0xb652b95b9c4b365eL,0x000000000000012fL } }, + /* 58 << 259 */ + { { 0xcbcce98ffc07fefeL,0xdf3b08fa0bdfc750L,0x4e39fcd4fc05529bL, + 0xf2aaac2e27114979L,0x7131d54541e195cbL,0x933fa5193f41e9fbL, + 0x630c99b3e95fc9dfL,0xbd5163825cbe8a8eL,0x00000000000000c9L }, + { 0x3c6b0a2406057628L,0xff27b4a4ef249b30L,0xe42dc28302e9465dL, + 0x71d7d424dd0bae7bL,0x4d04ba9f602f88c3L,0x6111058a74d2ae5bL, + 0x2c382c9cc3712cc6L,0x80b7d0e14d5b66d2L,0x0000000000000017L } }, + /* 59 << 259 */ + { { 0x019c03604022dbe0L,0x592bb5efcfee0bd0L,0x19be33c02b149c03L, + 0xc89d18c2ba04db0aL,0x5a3f5d450bba9aa2L,0x01319e4ad2d6cb21L, + 0xf6e9f27badc7b155L,0x5bdb9a7844825e84L,0x00000000000000cdL }, + { 0xfd75100dca64ee61L,0xe13ed4adcf163911L,0x56d0e58980d2069dL, + 0xf8064a05c45fd383L,0x64567521ca0632f6L,0xebfe94ed94c7f528L, + 0x80f8b30a5e4debb5L,0xa3d53bbb3bc900f7L,0x00000000000001baL } }, + /* 60 << 259 */ + { { 0x72ce80061672b5e3L,0xe0d15784257ac5bfL,0x2083a445fbb308f1L, + 0x2fc46f2dc8e53f2bL,0xad835795aa2adb58L,0x89b3d1edf35d6ebdL, + 0xcf54ea81724a0efbL,0x3085e94b513eb0ffL,0x0000000000000046L }, + { 0xc1d6ca589b47ffc7L,0x553268ba41a17354L,0x15f50394aaba1ae7L, + 0x9377378dcd023c80L,0xf5556f2a70213614L,0xb56122a4cb30db70L, + 0x03ae2b4c59650a3bL,0xbbfaa947e337a318L,0x0000000000000108L } }, + /* 61 << 259 */ + { { 0x0c07f65f3e218411L,0xf93336dc34e3653eL,0x915543013151ee8cL, + 0xcdeb170ac7b83bc3L,0xae1673cd6ed8c8afL,0xd6fa1c4b8a778809L, + 0xd98dc5cc95d4e2f8L,0x50a6f916a906d0afL,0x0000000000000048L }, + { 0x9842940d56885519L,0x487e1610eaed31f7L,0xb4336a8018cc4dd5L, + 0x92384fdd9eed314dL,0x39f2647569b8de9dL,0x9fd9be6b397286f7L, + 0x861bc717ce04611aL,0x5eb3a63d5d9aafe8L,0x0000000000000022L } }, + /* 62 << 259 */ + { { 0x577a30fa4b357fb9L,0x36a9492463e6b447L,0xfce5c432731fd6c4L, + 0x48eaf60527d69a4bL,0x0c5fabdef271da03L,0xab9ab8bd570d0974L, + 0x07780c6b9c004fe5L,0xe78c11feecd1146cL,0x000000000000015aL }, + { 0x52c4fa6239153966L,0x7aebe3aa7ba8471bL,0x618c9a3ce81fa8a0L, + 0x7abd26a68a47b9d1L,0x844c1024f06473efL,0x6231f29ab4072d4aL, + 0xe698c315218d976eL,0x39b20c9ebfbe05f4L,0x000000000000001aL } }, + /* 63 << 259 */ + { { 0x407010d297770b95L,0xe2490ae0b2c13f78L,0x7cb994e1b8634a50L, + 0x8ccc0776281300b9L,0x1c7a056d83f81cc6L,0x592034c74d47f984L, + 0xc170bd8f7df09160L,0x07b59c4e6cf44c96L,0x000000000000003aL }, + { 0xdc66941021689ed3L,0x4ddc860456288484L,0x68bfe9ed8f1a853aL, + 0x426531e548e4f90dL,0x574498889bc57ac4L,0x5144cfe92e90e5caL, + 0xfc1d8502d5f8796eL,0xcf50aeba50c5f60fL,0x000000000000018dL } }, + /* 64 << 259 */ + { { 0x225256fdb79711e5L,0xbdcf3433a6a843b7L,0xbd3d4548414a71abL, + 0x2e13fd39e29e7606L,0x194b6e92e07f29c4L,0xd6f5e492b68fd0d2L, + 0x3ac649985de0ae9bL,0x10e5afe94e5ba961L,0x00000000000001f7L }, + { 0x27df3c4b8ec9ccffL,0xdb2913722a7b0a2bL,0x1ca89222f045afdeL, + 0xa666763c69c99247L,0xd8d9f6b199033177L,0x10b696f690eca082L, + 0x41991d660d180f50L,0x7cee7f9b82d6f6bcL,0x00000000000001dfL } }, + /* 0 << 266 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 266 */ + { { 0x853233c2b24565f3L,0x43478b915fbd38c5L,0x962087ffd2a6992bL, + 0x403427f45ec71ec0L,0xc07276099b151a4fL,0xa8bb2dc4dfa1844cL, + 0xa20d339e101140e4L,0x0df52fbda1c70e3bL,0x000000000000002eL }, + { 0xdc4129870447de06L,0x080b5dd16cd90fa6L,0xcb6a6bc7c4ecddc5L, + 0x9462fbda970fe140L,0x4b0cda7ea7581610L,0x6437af41dcab0c19L, + 0x3ab19e3f4e100705L,0xf8e93a95adc5e7feL,0x00000000000001f7L } }, + /* 2 << 266 */ + { { 0x38ee58f048e075e3L,0xf285797e18d38b67L,0x02e97c63a120348dL, + 0xc64defa717bff13cL,0x0fd5b7237f983d56L,0x70acc4a830d1770fL, + 0x0f0ec2d749151740L,0xe99f0a612968c0dbL,0x0000000000000174L }, + { 0x4a5d683a67aa8ad3L,0xfa3a36cc56c69e37L,0xa392130edf8a093bL, + 0xe80d7527c840b37cL,0xa28eb4926ab73b96L,0xeece0912c7d4304cL, + 0x0b900a1130b9dd79L,0x43d757e3a20c5d55L,0x000000000000018aL } }, + /* 3 << 266 */ + { { 0xdbc8e45f29056168L,0xb1789b8f8aa24063L,0x8e394ec4e8af7801L, + 0x451958953e6dacb8L,0x42e76886d8e18229L,0xe9be1e0238912e71L, + 0xf4cc3bbbc793af2dL,0xa7dc4d1bb6ef26b0L,0x000000000000011eL }, + { 0x0255437c02adbc28L,0xad007fd57f4098d0L,0x5b827c408ecbf87dL, + 0xb9acb85cbf9b34ecL,0x6d45ae094a85c2b6L,0xee94e87b95bd71adL, + 0xa7f0f61b420c1eafL,0x8f39c45c8f1291d2L,0x00000000000000c0L } }, + /* 4 << 266 */ + { { 0x56f74e6985edc827L,0xafe2ef0a6a731640L,0x61f8bcfdd51647d7L, + 0xbeabbb8f41e33fa7L,0x3e4c3aaeec83c547L,0x48a9d1e92cc8dd97L, + 0x7b4d53b5e57d3822L,0x92387ac8a45c0584L,0x00000000000000d5L }, + { 0x802ece3dfacdb1cbL,0x9e4f5d209e537236L,0xd10acc25386a5610L, + 0x53f55d464c6c0247L,0x390fe9808c419fa4L,0x8b0a8aa6bed2bd92L, + 0x90e7f479f854424fL,0xd714bfb59e7f4392L,0x0000000000000074L } }, + /* 5 << 266 */ + { { 0xddbded0597026decL,0x12f1ff098a199bb2L,0x761a746fc1428d3eL, + 0xb9495d7ef012002fL,0x7a7c2c3aa50c6565L,0xc1264e37dc970f6dL, + 0x4f05342c865a7518L,0x9818d13d49d9ae95L,0x0000000000000195L }, + { 0x1bb4a4a08cc45e7fL,0x5fba73b315781343L,0x1e704a47f68560e4L, + 0x90fb65e54eaceb4fL,0x95438866400ac026L,0xbc4b4d817e6f6a48L, + 0x3b2bbaaf6e7d0d0dL,0xddf027615419780fL,0x0000000000000070L } }, + /* 6 << 266 */ + { { 0x2f9cf1972fe33230L,0x17021cdb2ceae391L,0x18b273a28a2ad728L, + 0x53289496e7d3cd0aL,0xfbf1d2dd06775a09L,0x445cf659aa70465eL, + 0x69ef17ef44d92276L,0x852c4f629a70a820L,0x00000000000000d9L }, + { 0x24819cb9192b4bfcL,0x2a3259fffd8c7b97L,0xb39c9e000e31618aL, + 0xf4dfe5b7f802aa84L,0x57bd5d63fec026deL,0x4cb4de99848b4c62L, + 0x68c65e5354a00cf3L,0x37ed0124161b63a7L,0x00000000000000c8L } }, + /* 7 << 266 */ + { { 0x2a5e0c228610952eL,0xbf871948e20cb271L,0x24fe902d31955404L, + 0x9678b0feab00b79eL,0x0d170ac43fd72c73L,0xc91bb075236c56b0L, + 0x45feeb4f75044149L,0x8cd5418362f8b77aL,0x000000000000015eL }, + { 0xadceca285c84440aL,0x27d8d9ddcc727b54L,0xcaa94f511f1d6f1eL, + 0x2e5e70ce41c288a6L,0x2bc6ff5b6c8116cdL,0xd84f24cf5725753bL, + 0x71d36b45a268b9d2L,0xb10ff02719274e7fL,0x0000000000000065L } }, + /* 8 << 266 */ + { { 0x9abf4318e7c8e1a2L,0x89fd5c8d5ef038d0L,0x20641c7c755be1d6L, + 0xfff489fb38b2c08cL,0x792104e1b3b3a7b1L,0xd8a69114c507e7d7L, + 0x4c07f5bac1ac93f9L,0x0a2e3d13315c3a89L,0x000000000000012cL }, + { 0x016026bed523d79cL,0xd4e3d86f00ee5640L,0x07627a1ca8e8f5cfL, + 0xc07c67f8533afedeL,0x081a516dd3bed1acL,0xddc6f44086e8ff0cL, + 0x4df71bc8c112d075L,0x2b8cc08f7cc49d32L,0x00000000000001d0L } }, + /* 9 << 266 */ + { { 0x8f20e8e5296dd3e6L,0x5b35a6296564254cL,0xee36dfc32969a7f7L, + 0xa32df83a9b8ce874L,0x449cb63355989616L,0x1cfec15081a04ac4L, + 0xd625a5fa5f6ac24bL,0xea46b54e1f95a1ceL,0x0000000000000093L }, + { 0x08e7153e171cc8e0L,0x0b4cc450c58a9d47L,0xcca705da93881d6fL, + 0xbb825112c702b9a2L,0xffa42735ea68fb62L,0x5ccf465db32b28d7L, + 0x6c099defd7fc4547L,0x91a920d33f37b44aL,0x00000000000001cfL } }, + /* 10 << 266 */ + { { 0x3377ff96a34b8405L,0x735fe3b8a3607c6dL,0x05bb89954a680280L, + 0x2a1b844443f751d4L,0x88577381b6131136L,0xf6095111bd4bf7bfL, + 0xdbb63de95babc815L,0xdcbda003457174ffL,0x0000000000000125L }, + { 0x4c0dac17d659699aL,0x0c97f1a4b90797e1L,0x86e3f70037c1752aL, + 0x28770759661a54f7L,0x696ccdc1f0226610L,0x36399bea2424ad25L, + 0x94f98b4bbaa7e4e0L,0x7e2207dd8e149164L,0x00000000000001e3L } }, + /* 11 << 266 */ + { { 0x3e459b94f997f2b7L,0xe2c4670e5a5ab47fL,0x6c42f7bea7277f71L, + 0xb6f4a867b849ea22L,0x9c261ef533ef93ebL,0x14faef70383434e8L, + 0xcc2581fe68ecf568L,0xbb425d21b0d9b128L,0x0000000000000069L }, + { 0x43dd049b2a74207cL,0xd89e8403d14cc7c9L,0x5e4b96d61aa77222L, + 0x64f3514b16efa356L,0x9336114aa79aa125L,0x86cfa19308e23be5L, + 0x6b0d3088316a61e2L,0x8ba7f7d478b735b8L,0x0000000000000041L } }, + /* 12 << 266 */ + { { 0x74af3f63edfcbedeL,0x6b0830b11356f827L,0x060db15a02536f8aL, + 0x166f9099f7095358L,0xd759efb4e03b9601L,0x05a4e29768484f67L, + 0x469b4832dc631a7aL,0x7c5406150bb2b1b6L,0x00000000000001b2L }, + { 0x590174e012481c04L,0x8d4ef6c7d7180a84L,0xd134b543303674ddL, + 0x60dbf72600c378b0L,0xc60e026a93d8406cL,0x57af4a82082c527cL, + 0xe3d038fe349a607fL,0xa7cb9b475a3582e5L,0x00000000000000c6L } }, + /* 13 << 266 */ + { { 0x113d76470bd80953L,0xe4478391e4fd3d00L,0xb599a4a78a9e2693L, + 0x9b0a26a00c2122b9L,0xa06d75d822f2ab6cL,0xa91bce84d181c4b8L, + 0xdcb896f252c19efbL,0x5c009091838a3552L,0x00000000000000faL }, + { 0xd077530a7a3feb1eL,0xb923c5e923a5b8ecL,0x518a1fde94742d7fL, + 0xf5d7ccd85609a48eL,0x98a271bf88865d38L,0xafea9987a60efbd2L, + 0x145df51ead05f497L,0x776f6dcf9f9e829bL,0x000000000000005eL } }, + /* 14 << 266 */ + { { 0xb4b3726011219674L,0x47c3a79526626299L,0x64c93e90f7b9b98aL, + 0x08eff93aff8e46adL,0xb4204ee85b43c5f6L,0x520b0790611d0ad4L, + 0x29d122dcc1dc2742L,0xcbcefa6e3169b5fbL,0x000000000000000aL }, + { 0xd43ca630a537c7b5L,0xae40e59aa9c0a428L,0xfdf9b81999f2182cL, + 0xca851eccec7e7227L,0x6760158bab6a8d6dL,0x3875e88ce215b7ebL, + 0x569be1e84514f9e0L,0x0a0a202ae14d5c5eL,0x0000000000000124L } }, + /* 15 << 266 */ + { { 0xf6eefade50247dc1L,0x507af32ce36262f1L,0x7e981908f13b6ddcL, + 0x7212c2b68c8c8580L,0xf643133ca54e7f13L,0xa1b10239c4a0069fL, + 0x18ae36e186279c29L,0xe4f7235a61fbeaf2L,0x0000000000000167L }, + { 0x4bc859b15533c37dL,0xdc59cf0624b1408bL,0xf488d1e9100554dcL, + 0x3eaf3d7920b37912L,0xa87f0850ee5acf54L,0xec6d7d40fd3bb7e7L, + 0xd95e630cb6b4eefbL,0x1aa0a4a5b2a5e55dL,0x000000000000005fL } }, + /* 16 << 266 */ + { { 0x69017ed5f1c86157L,0x8ad717eb3aabcca4L,0xe14dd980e57ac297L, + 0xc2f0da4b931e307bL,0xde7fd5799080b37dL,0x45bf6f6414134de4L, + 0x5cde0727ac019cf3L,0xb4e5c2e7863e739eL,0x00000000000000e6L }, + { 0x094e085628e15f36L,0xf92c44fb46276090L,0x9219998521cb21ccL, + 0xcedcd4f2de53957dL,0x802c6f95b6c417ebL,0x0ea41a2b6075e71dL, + 0x194d6fb1b9eec681L,0x8e7cd41f2e0320d8L,0x00000000000001caL } }, + /* 17 << 266 */ + { { 0xb35b4973fc9a8271L,0x07df6c1baa64d87bL,0xf8a40753c20694b8L, + 0x9e99bcc963c0ee99L,0xd1a9e1547b93b57eL,0xdf820e3853787f84L, + 0x27f53ecd99f332cdL,0x4fb11f5a9232ec79L,0x0000000000000088L }, + { 0x24b553cbee20f924L,0x0b7ef51349d43360L,0x96c9487ce38d79baL, + 0xfdb1c74fb5ed12bcL,0x21685ed84900c152L,0xa357e469e3bf2258L, + 0xbd08ba2c6fb351cdL,0x6bf4eab57c702332L,0x000000000000019bL } }, + /* 18 << 266 */ + { { 0x4d9b809e882e2e8dL,0x9c08905471a4d335L,0xf7d10eab057174cfL, + 0x2362775e835f8b97L,0x02cffff193e325e5L,0x5da2c5e530a19357L, + 0x3682adb3fac8a110L,0x0c80cbc5842d7712L,0x00000000000001d6L }, + { 0xb1f34b5ce63fcecfL,0x1b7ef8b72036dcacL,0x1e77a65bf2918bdfL, + 0xee7953ef56dec2d7L,0x3bb9d2d96e8dce20L,0x690991b765dc8ddcL, + 0x289f7b1c5127b031L,0xcfc012e6b308fb5fL,0x00000000000000dfL } }, + /* 19 << 266 */ + { { 0xe4e588655455e3ddL,0x0a3faf6e37979e4dL,0x0fb1185abc2baf05L, + 0xa7757468231db3c6L,0xdfaa6b1e2dece4e4L,0xe0fd7fbb57b73e1bL, + 0xd0a759633c1319ebL,0xfe723d7944751078L,0x00000000000000f2L }, + { 0xf34d75824e65024aL,0x4f3e335728712abdL,0xd736d0a0dc0619d8L, + 0x700cb63c18df1a77L,0xa741d8188dc6f72fL,0x1fb046b4e3f4ce0dL, + 0xe4c65bcf36daa0d3L,0x7de83bf3e0a1b42aL,0x0000000000000118L } }, + /* 20 << 266 */ + { { 0x5466a9901fe423c0L,0x59f6a48ba57e7875L,0x09a9f84acb7bd46aL, + 0xb5ae0d9469e02df0L,0xaecc3392d9fec11cL,0xc94de920a4bcd130L, + 0xfb5b36f882c78f1bL,0x32ad1cf22d4fc970L,0x00000000000001fbL }, + { 0x8cd894d5df0a2534L,0xcb83665c9735fc04L,0xcbd9157635169f6fL, + 0x2f3b64a72775c9d4L,0x80c9608fdd6410d3L,0x6503badcbcad20d5L, + 0xbdb308571b2b1640L,0x92338c7878fbc82aL,0x00000000000000d6L } }, + /* 21 << 266 */ + { { 0xac3b7a4a49a3cdceL,0xf58661f0a02ad4fdL,0x1ffcfa7c613c7e53L, + 0xf2524615a312b766L,0xcf475cfb5616df03L,0x4cfa2b326c13fc81L, + 0xbccd8375bc58b87bL,0x8abab16ea321ba9dL,0x000000000000011dL }, + { 0x3e0c8aa36c9c6bdeL,0x52b709235145030fL,0xb3b79def957b71bbL, + 0xc5d4d6d82ae65cb2L,0xe46b42854fe000d6L,0x7f0cbea3df9a4175L, + 0x184a74fff4816509L,0xd4309c3fda9b614fL,0x00000000000000ceL } }, + /* 22 << 266 */ + { { 0xc8015af9e84d1058L,0xfde0b0b44630167fL,0x75a6485be04303a9L, + 0x386d40635dc6a595L,0xb6a7e6ddb5e1fea7L,0x88760f62a98f79ffL, + 0x0fa02a3715ceb5eeL,0x3b5f506d6b7b4aa6L,0x00000000000000f8L }, + { 0x03364c2377688a7bL,0x8efa63b109c6d5c2L,0xb57aa39794f3f323L, + 0x1527ffadfc11fce0L,0xfe42ecd364d6096aL,0xfdc0cc40d56842e6L, + 0x4e82c4c8770bfa36L,0xc43342307717694dL,0x00000000000001d9L } }, + /* 23 << 266 */ + { { 0x4fcdc0c1d5f4e912L,0xfa7cfaa4004a4db8L,0x25aecad367023dcaL, + 0xca002afd5eb57cd6L,0x34541373741405d5L,0x67a0d594d3812ce2L, + 0x7b34bef0cfcf2d6dL,0xd6cf2f835bdd535aL,0x0000000000000110L }, + { 0x454609ac442959beL,0xe9237086b8a51511L,0x6fac92553babcba9L, + 0x78f71e6b27e7884aL,0x92d4c3ede8f4feb6L,0x52c5d2b4420fbfedL, + 0x009792ee9f40b090L,0x82b8ba85df824367L,0x0000000000000179L } }, + /* 24 << 266 */ + { { 0x3f99e9b3ee05cae4L,0x32cc71107fa0e686L,0xfefe300a1ef8a5c1L, + 0x232d446e86846634L,0x250f4e801ce3e75dL,0xca3146f236bae498L, + 0x41b28b529339513fL,0xf05d31b6623cacb0L,0x0000000000000066L }, + { 0x12b4c656682f53c8L,0xc865fa0ef72ed178L,0x7d41ffbc55f68dbbL, + 0xe4aa53b7c0ac64aaL,0x66e397bbb7194380L,0xba629db6834331f9L, + 0x6d14e879ea0c0083L,0xdf2e7eb0039343fcL,0x00000000000000d6L } }, + /* 25 << 266 */ + { { 0x0397ef6f17081314L,0x9546deb646fff7e1L,0x5b4dd93bc94171ecL, + 0x0a8a85527444b12cL,0xb1513889b9c40474L,0x9cf88d84bb2a206fL, + 0xb910e92085c19e6eL,0x45eccb62676a3583L,0x00000000000000feL }, + { 0xae266163ff1f2098L,0xf440e5da0d1ec4a4L,0x3253fc3f92193348L, + 0xe74818b8e34f82e7L,0xd5a623060676bff7L,0x27b66af8199cf83fL, + 0x3f10e57fcc0d1357L,0x5e957d2472cc938eL,0x000000000000008fL } }, + /* 26 << 266 */ + { { 0x575278cf18bc690fL,0xcacf60ceb54dc657L,0x26bbb292b42fbfb1L, + 0xdc41efa143d3991dL,0xe1b66a31f437cbfdL,0x78f6b7c39a9fb752L, + 0x91bb4eee42b805d5L,0x6be454c8c53eb4c3L,0x0000000000000003L }, + { 0xa33f2a2a709d0dd7L,0x991168d9fb00c9eeL,0x33f59abe2755ae74L, + 0xddbd1abb9f173047L,0xe45fa246f4075d82L,0xbaf7b425bddf38fbL, + 0x6a934744ccd0af06L,0x3f33c9152473c3b8L,0x000000000000017aL } }, + /* 27 << 266 */ + { { 0x354623ea02695dddL,0x311af2095e666b21L,0xf8aaa2aa323d976eL, + 0xff0efef950504afeL,0x87d20f09de7f31dcL,0x6295e64a7c4596b6L, + 0xdc29db81de13042bL,0xa145cbbe42be7ef5L,0x0000000000000111L }, + { 0x0833ee13598cec0bL,0x3915b741a55c1756L,0x56a8ab1191e38ffdL, + 0x8ab41e3bc1b823ebL,0x18fa8910defb1732L,0xe3454a259a628d6dL, + 0x9e4264e494e99e1dL,0x31832bb5637d5ee6L,0x0000000000000182L } }, + /* 28 << 266 */ + { { 0x3ee61d53e8a300ecL,0x65e529c7e0cd8617L,0xb8c03cb0077b303bL, + 0x05eba936d5c88dd2L,0xc1e54497d42bb4afL,0xab477aafb0589057L, + 0x55acddc8e8f70a56L,0x6579b78b4fc80182L,0x0000000000000164L }, + { 0x535e7ff0bc97c72eL,0x65e954c9809dc093L,0x49edc4b8cd182ee3L, + 0x804874f9877d38adL,0x20104ab16df1638dL,0x78459a4b748ee6f2L, + 0xb895205d3504aa35L,0xbf26ac8af8089e32L,0x0000000000000041L } }, + /* 29 << 266 */ + { { 0x414660fad60c46d9L,0x771ea3b5defcc1eaL,0x47e275514478526fL, + 0xe13deda6872bc6bcL,0x6a8af4a708e6829eL,0x0d234a58c0cc0817L, + 0xc209a0b1993176d4L,0x703a36e8e4d7c55bL,0x00000000000001e5L }, + { 0x00e521dbaf301144L,0x8cd4f076373a95e5L,0x1c409a63aacbb198L, + 0x1712c6cf0c0dd3d5L,0xc2bc08609ade8d6fL,0x0f2f51bcce84b335L, + 0x76bf54aa4a193540L,0x0b17b9455c0db4e1L,0x0000000000000040L } }, + /* 30 << 266 */ + { { 0x76251b3e9f2d82b0L,0xaeb86af12e4a0ac3L,0x01448c52ff3f9965L, + 0xa3280f7c61042ff9L,0x1845a38fa1a3a6fdL,0x802be4bc51d4dd8dL, + 0x189cd677d4837b6dL,0x9b052dc25af0fb47L,0x000000000000007eL }, + { 0x91e76d5f5c765e26L,0xa63a93d821904658L,0x03318a472af96a70L, + 0xe2b55958bc3a294cL,0x4aa9d90d9f7a740bL,0xdbb00ae4baa31d88L, + 0x83df26343920d053L,0x33b1b418ce51fe2dL,0x00000000000000baL } }, + /* 31 << 266 */ + { { 0x6c95ebc8b8c1d76aL,0x70c33cc22d77832aL,0xa73477b6b48e542bL, + 0x6d00020b6eeb8df9L,0x03888e1c3df975fbL,0xae48633704e49a91L, + 0xfbc90f2f0d90bd52L,0x44ead1e3dfec8b7cL,0x000000000000013dL }, + { 0xedd5fc2f83ac4742L,0x88933280ea5af10aL,0x369ae840a19847d7L, + 0x2404ca4e99b4a8f0L,0xade7c798d4e88a47L,0x049832ae08308b57L, + 0xb5fe206808d5ff33L,0x234872b6142fa38dL,0x0000000000000130L } }, + /* 32 << 266 */ + { { 0x3dff858061e5aa9eL,0xa33aef70955100c9L,0x7a52da3956ab4275L, + 0xefe16518102c4123L,0x8faeb0188d2e4d91L,0x3516243081719f70L, + 0x4ef1fb91fa78398bL,0x95eb0d18c351721cL,0x00000000000000dfL }, + { 0x90c4d877e5f04c88L,0xe108471abf176e9fL,0x8efd42750ef8e8fbL, + 0xadb1fc8e3b26ba83L,0x35050fd319bcb6aaL,0x00a7180d1e9a804fL, + 0x23ae97b660b511c8L,0x07d0be62403cef81L,0x00000000000001b3L } }, + /* 33 << 266 */ + { { 0x5c7f1fa9f7983284L,0xad20a27f4cf3733dL,0x848a72d4d256795dL, + 0x11cd1a8f31c2d9bdL,0xd101e6cc7875f7e6L,0xea169cd1013cee4cL, + 0x7c00964f6ba5c6f0L,0xce957f49def21557L,0x000000000000019fL }, + { 0xd87fec0c8fd38646L,0xc44d7c05105da346L,0xbbf95c4d910c4d58L, + 0x6a379cd0ed266251L,0x3201baf27b4b75b5L,0x6835bd3cf593add8L, + 0x09e93440b444ffa3L,0x8dd8d5428a999e34L,0x00000000000001a8L } }, + /* 34 << 266 */ + { { 0xcdcc17966d01191eL,0x76069b30cc266a67L,0xfe9a5cc93738b9e9L, + 0xbf210857b8d72f0eL,0xe71cb6f7c51394e6L,0xad01d21bdaf33303L, + 0x34c92183dc573fc6L,0xa293b60a8177e535L,0x00000000000001e5L }, + { 0xe9e6f1516e74482bL,0xd9b14871a07f28dbL,0x9ec270143e8ba372L, + 0x9050edcf579d622dL,0x2b3d57a3b7a0dec1L,0x5282d3fd7399d805L, + 0x46fbf40518b06ba7L,0xf0d75d34706066edL,0x0000000000000089L } }, + /* 35 << 266 */ + { { 0x88b9a526ca277da1L,0xe990008438a61571L,0x899e1a40fccf02aaL, + 0x5b352690d569929bL,0xc363f9897750040aL,0xad91ef44bd062400L, + 0x3dc71c7812eaa485L,0x7644bd1efe306e6dL,0x000000000000007dL }, + { 0x7f8ad11ceccd773eL,0x13cc76ea8b4bd7ffL,0xfba893a3a1e0a336L, + 0x3563ef917150a5e3L,0x2676215848bb8dceL,0xd02351235bc0826bL, + 0x284c75afcaf00888L,0x127c932a48b68a1eL,0x000000000000012cL } }, + /* 36 << 266 */ + { { 0xd877404a6fc3e4d4L,0x90f4424143959c60L,0xb11bc0dfadd26aa4L, + 0x45c833fe8773bddeL,0x26e39709ca264f8dL,0xcc87592446314f2aL, + 0xd9def19853ddf49aL,0xcd60de6495742a00L,0x00000000000000bbL }, + { 0x5cb1207487d742a0L,0x41ddaa31655afe45L,0x69765ba5a54ece2cL, + 0x575d21810b4b57b8L,0x8540de4bfdec936bL,0x2d73ccba13c17184L, + 0xdeba26f2a345fe93L,0xf9a8115844fc190cL,0x00000000000000e6L } }, + /* 37 << 266 */ + { { 0x20db3c8c67046c7eL,0x5c5899fcbd8b92cfL,0x707ec19eda3d6021L, + 0x3d4b9927fbc017dbL,0x6d8757ba35097e5dL,0x6daf1a7097e8598aL, + 0x84677a8a65cf83adL,0x99e82cb24b4483a1L,0x00000000000001b6L }, + { 0x85ac6ec39423cf6fL,0x746d4cd7f8ea9921L,0x7829a0ad8b6c7446L, + 0x17833a99799b235aL,0x04cc99281cdb0e56L,0xe119f3213d1957beL, + 0x9d723d47d6f73bb6L,0x478afffc456cd261L,0x00000000000000cfL } }, + /* 38 << 266 */ + { { 0xcac9ee0296bb50a1L,0x344a30a171b75917L,0x7040c29be338ca78L, + 0x7d11b5ab7f1329d3L,0xf545164d5deb103aL,0xc056825de377a217L, + 0xd1b8ce5d1fdc5ba4L,0x4ea7fa876b3f275eL,0x00000000000001f1L }, + { 0xeac8c31323ff0cdeL,0x4317e5a759d122e1L,0x402d1078cf955e30L, + 0xa26f2bb9e2aefe0eL,0xb33602a6779d0928L,0x5ad6f902966256a4L, + 0x9f13eca6064745a5L,0x49432dbade6d235bL,0x000000000000018eL } }, + /* 39 << 266 */ + { { 0xa1dd69bd5714383bL,0x1ecd74467128e274L,0x5c14f4e26a0407deL, + 0xda00128e3ea0e272L,0x3f07ca2c2e4b7815L,0xc886f05e80348645L, + 0x987b488fb9edab2aL,0x56a44c68aa59100cL,0x000000000000000cL }, + { 0x31c490fc20216efbL,0xc7d7cd07a226907eL,0x4f8dc600fbb809ddL, + 0xbe7d49f895a97bb8L,0xc8632c28bc899729L,0xbccde2198bbfdb6fL, + 0x9c20db29a3bffcd9L,0x63ae8bba2a16b4f5L,0x000000000000016dL } }, + /* 40 << 266 */ + { { 0xdd8026ea8d40a41fL,0xb0e2795c8f0752e0L,0x6f120a8d5e6d1f7eL, + 0x0aa99441abaf2381L,0xa60677641815fcf4L,0xfa43027bfe2668e2L, + 0x74557a4f87d678a7L,0x5c068d13c6096024L,0x0000000000000053L }, + { 0x0ce11532bfd0f624L,0xce62d3a67e476a2aL,0x467ae9f11667b2b7L, + 0xc418d77f4e4ea4bbL,0xb96c33825132ff6dL,0xca75d16c5fdb9483L, + 0x24cf4df2d08f6f87L,0x70388217eff3b645L,0x0000000000000106L } }, + /* 41 << 266 */ + { { 0xce9620b3056271d8L,0x6612d8926cf33e57L,0xcd12436bbb8f54f3L, + 0x98972847878977e9L,0x066c6a3753a12cf9L,0x80af4b986f85ed96L, + 0x939721e3a7d2b688L,0xc07ca3cfe4b6e967L,0x00000000000001dbL }, + { 0x5b3c9f28b32ef369L,0x2e365cec90bec7a7L,0xd2755e2441f7a73dL, + 0x6eb6f889ea6be0adL,0x19a373073dda2212L,0x07bd68ee7ab4b633L, + 0xadab0fac6a787bfcL,0x5cf888d8ce37e38dL,0x00000000000000d8L } }, + /* 42 << 266 */ + { { 0x9818697810a4cecaL,0x6360bc55d3c03045L,0xcebe3bc3f1e322f0L, + 0xfb5d861458625a5fL,0xe91937a64f41216cL,0xd94e229fa392d9f4L, + 0xe9cbfba022aa028bL,0x7c8a3d9b9f26e506L,0x000000000000004cL }, + { 0x8095d4ac44690ed0L,0x8e76d94ab578e759L,0x25a1ead877504659L, + 0xe7ffc4626bd2caf9L,0x18085dd8c7dcce0bL,0x336faefbf5476396L, + 0x5e94e9dde08c63f9L,0x9be80bf65614a1c5L,0x0000000000000051L } }, + /* 43 << 266 */ + { { 0x225252040c05d291L,0x8254fc5ea459cd01L,0x7826a55fb121c3b1L, + 0x0000dd29a49d91aaL,0x49463679dee46523L,0xcb5fab5db2c6c30fL, + 0x70d7ed1edabb3865L,0xceac3e388d1d4152L,0x000000000000012aL }, + { 0xe5ab8203b1dc6339L,0x00871ca7d6a1dae0L,0x3831e96762dc6379L, + 0x6d423be07a6a29caL,0xad645df0e61ffe3fL,0x68567d31e414b0ceL, + 0x48455422119b3661L,0x6860c25d6c3cf9ceL,0x000000000000005dL } }, + /* 44 << 266 */ + { { 0xf04691638d0bc669L,0x62b84488e939f5a9L,0x0d3f495380fe63b5L, + 0xe39a7aa158880c15L,0x74df281ee613f21dL,0x908934dcfa016692L, + 0x5b9cf6cc980904b0L,0x0f3aa0f43a17dd3aL,0x0000000000000105L }, + { 0x346bd219651d59e1L,0x51024297c71b37bfL,0xc634d031f0640f18L, + 0xa657da60ff003294L,0x9cfc79fc15aabd29L,0x56807dd77b4a869fL, + 0xcd97cd8959d9033cL,0x58640103a3233c39L,0x0000000000000090L } }, + /* 45 << 266 */ + { { 0x1c1d333fe7c0c2e9L,0x5dad349290d5ed65L,0xfd50637bd1fbf910L, + 0x73765e39d92af73eL,0x97100d8c7e12eac1L,0x2dd53ccca486a059L, + 0x0a8949c5fca9464eL,0x86fee09ec1d1e819L,0x00000000000001f3L }, + { 0x14a6857f2f1b392dL,0x55e63e4b61f6f679L,0x096b8806e2721792L, + 0xe0c5fcf597b80c13L,0xbabc39f6d78c2248L,0xd7a963e14b2c3516L, + 0x5d1bf511e8c0ee67L,0x15b6e43183b7041aL,0x00000000000001d8L } }, + /* 46 << 266 */ + { { 0x9fd0467622691895L,0x7bc9efbad1407b18L,0xbeaa1052bef6c449L, + 0x5ec3fde4887ba913L,0xdc330887b93cc69fL,0x2f45e738189d6049L, + 0xa5561ceda7b55419L,0x6c02e536c7c0c880L,0x000000000000013fL }, + { 0xc480b98a11c8b600L,0xbce33f4ca381f348L,0x0993dfc429fe9783L, + 0xde78a2b0e6c1590eL,0xe61ed1285347e99dL,0x51af7410e78af478L, + 0x65be48f114275d9fL,0x3434df3d7cf1aca6L,0x000000000000011fL } }, + /* 47 << 266 */ + { { 0x2eb82ba1bd51b234L,0xc17a47adf106770eL,0x913b0804bcdd7ce4L, + 0x2be5948e905cd641L,0xa2db49622ce3902aL,0x22c48f2c8ad6d3f0L, + 0xab188f07cc119a95L,0xd8177552595fd7a9L,0x000000000000003dL }, + { 0xd4de0f9cdbc3d844L,0xd195cd3257218c66L,0xbf9e5b684e68fd4aL, + 0x6e6f3122bc74f17eL,0x6daa87f5d46c0547L,0xf636ac5282c5a92dL, + 0x897bed5f780d6a32L,0xbcf2b58bde400db6L,0x0000000000000146L } }, + /* 48 << 266 */ + { { 0x79ba87baf1c9c2ecL,0x0e53555266aab703L,0x31d1d9712c990b59L, + 0x1e0b5d0e12c6b29eL,0x014d53ff563bcab0L,0x425986f2f64309d0L, + 0x475fc9d5318b9d6cL,0x02739c50ff59e392L,0x00000000000001b2L }, + { 0x8bf58dfc16f1caccL,0x1c93ba4e6b08ce06L,0x79b5e0399fe18430L, + 0xc9f67bd47de66325L,0x5def2733713d0fefL,0x31bd8b37b9945412L, + 0x8b2b69f3f3b99628L,0xf08a7e5a3118ce62L,0x00000000000000d4L } }, + /* 49 << 266 */ + { { 0x421966e5b03d785fL,0x3dbb06bb2db82562L,0xfef17870e406ab4aL, + 0xa6f04241b0f7f7ddL,0x69b5bcd1cb5270d7L,0xecbf4272e68d39e4L, + 0x722a3a295fb029edL,0x12e1d8236a535ae2L,0x0000000000000080L }, + { 0x57f3541d31af5bf7L,0xa9463bc40db44d31L,0x320dc4d335748c65L, + 0x86e70f72eabc0fddL,0x47911116c02d0191L,0xb66114907b95b2f9L, + 0x60ad777bb050875cL,0x168967505cfc66b9L,0x0000000000000015L } }, + /* 50 << 266 */ + { { 0x2f89c1663c2c5333L,0xddab9a55df09beceL,0x4bf00d3c6640cf11L, + 0x8366d23df05a2e65L,0xaf81ce01e2c9f1ecL,0xbca9287ae2678ff5L, + 0xab1854ea91ec560bL,0x2c74e77c6381b109L,0x00000000000000c8L }, + { 0x36214f8f0b1c20e1L,0x658bf89c86a631eaL,0x160b2ce485061fe2L, + 0xf7d151ea1d7b78f5L,0xb50a06792bdd0675L,0x8dbbd631d2f1cae3L, + 0x2548deae849ce90bL,0xba74f3b7bb58c6f7L,0x00000000000000ffL } }, + /* 51 << 266 */ + { { 0xdbc33fff01a3ae90L,0xdc1fb1371737a61aL,0x7213e66891def29eL, + 0xd99530b841d7df49L,0x23a50594943c90e3L,0xfefcb98881dd6daeL, + 0x736b8505dbf5986bL,0x6f4c28a987496a5dL,0x00000000000000f4L }, + { 0x92d13be9ca00d61fL,0xebd928d6dce1556dL,0x307e3fecc71ae83dL, + 0x2a4939c2c15ca194L,0x13947cafbcad067dL,0xf4ce58667d41f480L, + 0x1ef59d7047e07cecL,0x5d8068403796b61dL,0x00000000000000b1L } }, + /* 52 << 266 */ + { { 0xfea0d82ec8a4656dL,0x0fa181b503d94a84L,0x3666bfd2f8b5c4f4L, + 0x4e246013619d399cL,0xea46dfc9c11c1ce3L,0xdfb408aa10af3583L, + 0x94fce3f2785d7d61L,0x3244f6181feff091L,0x00000000000001b8L }, + { 0x939d98f4add4f84aL,0x4c2e3d15e9d04356L,0x08529c57c5d1729dL, + 0x4966bf7709597836L,0x03b9958cf4b32760L,0x2e7213b46a9536d0L, + 0x455ceb5984faa490L,0x7d83e2f5146ed682L,0x0000000000000004L } }, + /* 53 << 266 */ + { { 0x3006b4db99b32f7cL,0xcc70659922b7c67dL,0x2ef09c9c5259dc6dL, + 0x732ef8c90faf1eb3L,0xf3c83942a1837500L,0x6633ddde6cc28b75L, + 0xd1c95f71e1ebcaa0L,0xcd6609dc9e548e3bL,0x000000000000017aL }, + { 0xb35f17893b3da933L,0x8d169444379ef7f6L,0xbc00a35a8c84c365L, + 0x3411d83a5ea4534aL,0x209414cbada5b0fdL,0xd0ae1bce1a59ed3cL, + 0xeb7542c292c524f8L,0x0d233104eb75a43eL,0x000000000000003bL } }, + /* 54 << 266 */ + { { 0x9a4dd7b8ac1a7884L,0x2067b936b874749eL,0x52260e05d13b0d4dL, + 0x61476a10a9c3e750L,0x2047d74ca0f97967L,0x8655120d1d34b122L, + 0x60cd407c935034ebL,0x499ba4dd8bbe83b1L,0x00000000000000b1L }, + { 0x6213fd52b2bdce3cL,0xda3f6f864cc05a03L,0x24183f37b78a7247L, + 0x996bea8201ebedc6L,0x7de1df0aaed2def9L,0xb59c1f5a624d5b0eL, + 0xd9b892084ec7e5ccL,0x230df59c54090935L,0x00000000000000feL } }, + /* 55 << 266 */ + { { 0x0e683a79496c5739L,0xd855e4884737520cL,0xf3b0fb38b1dea15cL, + 0xf5118c492c55746fL,0xd313798ab9dcf49eL,0x33e97020992ef98fL, + 0xc6ae345eabd13522L,0x47fa0669425f9edbL,0x000000000000012eL }, + { 0x697599ad5e3eb523L,0x977313269216d4e8L,0x2bf6d652088d222bL, + 0x852230c5114a21aaL,0x8e713dee3f02aad7L,0x02c85a4ffccdaa39L, + 0xbebd0ff6b7a53b32L,0x103bd947b9b77764L,0x0000000000000199L } }, + /* 56 << 266 */ + { { 0x984d21e6b92a02beL,0xf2bba82c09f08a26L,0x9c4259d1909fba26L, + 0xa36bcd437030f0a5L,0xf6c218e468ae6f05L,0x199a1ef30ed869f9L, + 0x803860ff417e95faL,0xbd6bb9b4f29f7c84L,0x0000000000000130L }, + { 0x5fd4e3b2d4e924d1L,0x388d2e0a9b2814d5L,0x8e4696653e9b1aecL, + 0xe24a5cfe9a8320a8L,0xffd986e2a83ed877L,0x8499105d9c00fbc3L, + 0xc6b2171f73f5deaeL,0xf1432d70f9d058f7L,0x00000000000001e3L } }, + /* 57 << 266 */ + { { 0x030550d7132338dcL,0xd768b93af6e8f267L,0x90ea975362c49edbL, + 0x3d4ff755ca1676faL,0x2a6f71962a52f67cL,0x85f2473be355efcaL, + 0xe1fbd88920c998dcL,0x70bfdc6fd8491c54L,0x00000000000000d1L }, + { 0xcc89bbcf2b71306aL,0x10ea9edd6100aaa5L,0xd1774fbfe1f76710L, + 0xf006f841ec8bb412L,0x7dfaa7290c2fa1dcL,0x45cca48e8fed926cL, + 0x4600d2919e67f94aL,0xe363f267269a4cefL,0x0000000000000132L } }, + /* 58 << 266 */ + { { 0xd167783be279b76aL,0xa10f42afc988d262L,0x41da90eb8a53ddf3L, + 0xa66061125539740aL,0x0575a23cff000f06L,0x150f47e563a5124eL, + 0x8c923bf91a1c07a4L,0xac510583f4752dadL,0x00000000000001d7L }, + { 0x32fa0b887c3a94ffL,0x7625bb5ffb91c0cdL,0x2f2be34111b94d00L, + 0x824f6bfebcf2e24eL,0x95a9120f6f255fc2L,0xbde9336c35e3c721L, + 0x95742cc454f31228L,0x689d8bf3a899e1bbL,0x0000000000000155L } }, + /* 59 << 266 */ + { { 0x04bab74886920088L,0x0cb1d789aa224f96L,0xc93eb318d28e0286L, + 0x523d44902dda423dL,0x5150f518bcd81054L,0xe0443e329498fbc3L, + 0x0089a01e8fd77381L,0x7f5cebd637ce0637L,0x000000000000011bL }, + { 0x40cbeb26cb481b5dL,0x29d513b817e35565L,0xf884cd0182b961c8L, + 0x2ebd1a191cf01a93L,0x2ab31a0b8f398d6aL,0xa15f3eaec09c3a9dL, + 0x549b4cfc31f19b5dL,0xd42d563bdb0d2199L,0x0000000000000164L } }, + /* 60 << 266 */ + { { 0x8626cba3cd27ff10L,0x5f83c650dc15323dL,0x370d973042cc94e3L, + 0x06ebd6efdb164003L,0xcda6f2d782850e4bL,0x74e500ef17a82fcbL, + 0xddab08dbb228248bL,0x56e403d5870a453eL,0x0000000000000083L }, + { 0x507272fc79dcfe50L,0x25ee735db3d2ba34L,0x92386ed3fc3349eeL, + 0xb0a88de8f9ebdeb6L,0xf1ef010cdd93abf9L,0xead25256a0838914L, + 0xce2acbc0d54b6544L,0xbc78ce9a7c94ce27L,0x00000000000000d9L } }, + /* 61 << 266 */ + { { 0xc6914e748df59decL,0x1fe914fc5fb68222L,0x6927c9ee4c5a99bcL, + 0x6b391ac926d71724L,0xb702eac64ccabd0bL,0x30140f3225ef1fb0L, + 0x2925f1a5368574cbL,0x019a246c817bdcd8L,0x0000000000000148L }, + { 0x8b5bc82843a579fdL,0xa7df3a7e0428a197L,0xdc92a823b299e290L, + 0xb4796ea49b9eaf60L,0x37fdf758efff2b39L,0xb51d61b119a00b8aL, + 0xd0ba025158fec2dfL,0x4481424bdb70c86cL,0x000000000000019dL } }, + /* 62 << 266 */ + { { 0xdaf4f001c3742075L,0x9d21b51ee86d1e9cL,0xfe5359f1041b08a3L, + 0x68890b43565a56b2L,0xa5dd5889cf437ef2L,0xf78e7c4621323d5aL, + 0x0d6b8410646f2ff6L,0x7d08bb8206a40139L,0x00000000000000deL }, + { 0xd9a729497ac5686cL,0xf03be45847486981L,0x7f3c9dc4cd31b81fL, + 0x5c3b01b791730a67L,0x4abfeae42bcdc09cL,0x3e98e2ec1f65b976L, + 0x280fe587b1686b96L,0x7038ea9fbcdb3f69L,0x00000000000001c2L } }, + /* 63 << 266 */ + { { 0x0788729410b5679fL,0x4a660d8e0e9207b5L,0x65f23f96a702eeb6L, + 0x4d2892f71035ebdfL,0xcf6962a6237548bdL,0x599710afb3dc6988L, + 0x0787789e9e8129faL,0xefe5135a7b65d139L,0x00000000000000f5L }, + { 0xc1041db3dc996150L,0x61f30cbfb3b9717cL,0xcc86186dd933fafaL, + 0xa89429239edd4ba5L,0x3b35f50b2d323ad7L,0x8f43b479acc1d0e8L, + 0xda38daa46793e392L,0x564b87346fb6121bL,0x0000000000000004L } }, + /* 64 << 266 */ + { { 0x0e13c1ddcecdf0dcL,0xacd530b6eeb9aa6cL,0xbe3c0fc80af8d3a8L, + 0xee67f380ce44ae8bL,0x9adc56363854b528L,0x527f7f90130f4497L, + 0xee085713e80c35ceL,0x29bc4b3e190fae23L,0x0000000000000064L }, + { 0x17ea6fb5dbb3838aL,0x192eca92b425af89L,0x5fd3bfcf85a94659L, + 0x8d75b3f5daa4b4ccL,0xbacc18b89bee144fL,0x57591774ff60ec49L, + 0x319cba952363ac43L,0x6c0d079e472ee36cL,0x00000000000000f0L } }, + /* 0 << 273 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 273 */ + { { 0x28cd16ce7333eea3L,0x601ab611845655fcL,0xf18179d957ed4bf8L, + 0xa704b004172167c2L,0xe22ec70382e3fcf3L,0x4cd9bd0de64c4745L, + 0xd1eb58dfec9737e7L,0x540c29e8e6658aacL,0x0000000000000124L }, + { 0x4f35d47d4b18bb58L,0x3ee2a66ad4be21bcL,0x9d3d69a1052b60fdL, + 0x556347ef8f07eb0dL,0x66014006165af89bL,0x808eb84331e62cc0L, + 0xfd119574309553a1L,0x4945b9cd7656234dL,0x0000000000000060L } }, + /* 2 << 273 */ + { { 0x05bfabadb30c227fL,0x163a01129ca0d9f2L,0x448f630b4a448ffaL, + 0x7a304fdcf52d44f0L,0x4ceb9902f50356e7L,0xe7615ab1c43b436bL, + 0x9fd56374d63f192dL,0x38470c8f418bd616L,0x00000000000000d1L }, + { 0x4b42c1459554d529L,0x5556cf8b2bfe7bc9L,0xe28b6f8994eb909cL, + 0xc00748ee637dcaabL,0xf71e0fcffc076bf8L,0x57642f92b580d617L, + 0x755fdf2e1d91f20cL,0x9c3c9c86dd5ab14eL,0x0000000000000187L } }, + /* 3 << 273 */ + { { 0x8dfa447387950112L,0xd032940ae8d5c1aaL,0xeb909e673379771dL, + 0xe780d40da336a4f3L,0x318e1a69ce022106L,0x36b6b1748702e3c8L, + 0x327e8db4f8e60cedL,0x9c1a790b8380c97eL,0x000000000000008fL }, + { 0x8eec3caac6534a9aL,0x650fa0d556cfdcf4L,0xfe45e2ce58c3798cL, + 0x320fac25a60e6872L,0xb399ec98ef983104L,0x133c9f01be57c16eL, + 0x9736e7543b73b0a7L,0x12c0ce90b48f9618L,0x0000000000000155L } }, + /* 4 << 273 */ + { { 0x2ff940bfa7c8fc21L,0x9b3cbeeea6a86c6bL,0x5cd96c7d171573a2L, + 0x49bb08e812cb3013L,0xddba493e34b92d91L,0xa56f49a5ef35e196L, + 0x7a454c1c2cdca6c3L,0xb9c038ae684d25fcL,0x00000000000001b7L }, + { 0x15adc72daf46c7eeL,0x21450db25eaf9891L,0x115517e153a32b0aL, + 0x5bd9d862e26d8171L,0x991737bf3f8e37e8L,0xcbb33580531a836dL, + 0x96aff0213d976493L,0x9be26568ca69b662L,0x00000000000000beL } }, + /* 5 << 273 */ + { { 0x32d089edecfa97e6L,0x9bf82c96b9f080c2L,0xf1798cd9af6bf9aeL, + 0xa111bd63cc9b8764L,0x9d1aebb4295a4931L,0x37f6c9ac32442dc0L, + 0x3785c17e6d6b2b32L,0xae63aea3ccbaa5adL,0x00000000000000e9L }, + { 0xc2207aa55718e310L,0x5aaede6c9004a15fL,0x07e98d521a4b7f1bL, + 0x351eca4df3f71723L,0x0026164672a30c08L,0x838f24ecf46357d8L, + 0xa8278e1a05348650L,0x402ee68e10ec3795L,0x00000000000001e9L } }, + /* 6 << 273 */ + { { 0x91955973e33296fcL,0x83dc2cea4212671bL,0x59b7959a3ef3d3ddL, + 0xad706c14147b4051L,0xff0e938e46217be6L,0x3550f6ba963236afL, + 0x26a254a435de4d64L,0x829799ebd3fb0643L,0x0000000000000194L }, + { 0x30dcbccb348f4e93L,0x1211c003aaa258acL,0x9d8500d9383698a3L, + 0xc2c4310a530725faL,0x6f60e024d88235feL,0x1bcdbca5715c563aL, + 0x2100b4b2000501c5L,0x9f483a0505c8bf39L,0x00000000000001f5L } }, + /* 7 << 273 */ + { { 0x618d04f0001b7109L,0xc5cbefa8a94e8a9cL,0xd5f0e7b20aed80f2L, + 0x9c876050cd2e2661L,0x18a046bbe3f15e9bL,0x3568d9e165664a6bL, + 0x8ef9c3b4b0b7229eL,0xa779b68d66dde1eeL,0x00000000000000dfL }, + { 0x9997a707b3465741L,0x28192afa90f61124L,0xea06c8b89ace2e5bL, + 0x208f1ad1acb7cf35L,0x1ac49e4ae569efa2L,0x7daffa0f8368ecb8L, + 0xc559483f40e4879bL,0x8de74638753b0b83L,0x00000000000000e5L } }, + /* 8 << 273 */ + { { 0x51db2d9d07207165L,0xc7d56a41c3d4c59cL,0xa47fadda4e278b0eL, + 0x5c3964e204ffc588L,0x52bc9f389719ecfdL,0x2c9292c035a04049L, + 0x885ff9bafae7580eL,0x2319a2dcd5cc866eL,0x000000000000015cL }, + { 0x8b77ddb70ae9af6eL,0x0e21b050dd7fd36cL,0x35d968ecdc8b9c19L, + 0x6f3f0785a624ad4fL,0x97778a681d42c97eL,0x86103bf0a2a4342cL, + 0x0aef36fd3ababc35L,0x46c8fb212fe10364L,0x00000000000001e1L } }, + /* 9 << 273 */ + { { 0x21488bccf8cbba8bL,0x6cd7d298c6cd24b8L,0x0c83f3ee5de8e8e5L, + 0xdf29c9784582fcabL,0x7024c1526806aafcL,0x979cd4ff958067f2L, + 0xc13fbc7c8cde09dbL,0xd468e6f5da3a5090L,0x000000000000000dL }, + { 0x1ba673a47757fef1L,0x6992b5485070e0e7L,0x8b2c42327acfd5ccL, + 0x27210f84a508dedaL,0x883d6e39c2629e49L,0x1b83cba19e7dd61dL, + 0x455f212a4f2bee2aL,0xed70ce46ee719f5fL,0x0000000000000182L } }, + /* 10 << 273 */ + { { 0x5c3a0ed97adc9d29L,0x37ad64e4796a4dc2L,0x0d5920486ce3c6a8L, + 0x2f5d313d166ba729L,0xda128b7bb5ea827fL,0x97c63821f4a68914L, + 0x35a2d15501c3f074L,0x07fa2fb4b80123e9L,0x000000000000004aL }, + { 0x408d54c34c208ee5L,0x75119aa6b2f910c3L,0x424a684d1453eecdL, + 0x800c87a64e1cc60cL,0x1ca02fe5f835f438L,0x019870df716b0991L, + 0x94e36e623cf7b652L,0x45cc33a25dabec4cL,0x0000000000000019L } }, + /* 11 << 273 */ + { { 0x304d81e391a9d18eL,0x152664f7aa95f5c9L,0x6507a739a51eeedcL, + 0x77d8c41271aadc19L,0xf9908b36a7b6ba22L,0x8b2dd6ce2d5c2f59L, + 0x38f2787cb66f5e0dL,0xccd13ca9b2c40bf0L,0x000000000000012dL }, + { 0x1887e4a728b4694dL,0x417981e3a25eec32L,0xe07f28ff6490e81dL, + 0xd2ba6c79f00abe64L,0xe0af21fccddfeb36L,0x82fbcfeb986898d3L, + 0xae77cb38e0718462L,0x0818cb63fba01b54L,0x0000000000000193L } }, + /* 12 << 273 */ + { { 0x188c77cc75eccbbdL,0x424f7ec52aa03503L,0xfa5819b9c8fde26bL, + 0x70a39f9b566dae00L,0x70ea7dfba22f5199L,0x502f681e60b863e4L, + 0x0fd204ff4581af1cL,0x536f4b875d1aa7e9L,0x0000000000000044L }, + { 0x52d311fb5713fa2eL,0x28d3afee2b0af3a0L,0x82aca5d14dec093bL, + 0x9d6a3856a946f620L,0x91eb4618e32ae798L,0x62580350783bff5cL, + 0x610110e5b76e4c61L,0x76287f2f3c2ae4aeL,0x000000000000002aL } }, + /* 13 << 273 */ + { { 0xdc5bba7b75716569L,0x41487a141dac5c65L,0x36e2a740b5c823e7L, + 0xe40d249065e0b210L,0x77d4708f8839779dL,0x879b9a4ba1d7eab5L, + 0xb3409669311b4f51L,0x6d1899fe423b3b2fL,0x00000000000001e1L }, + { 0x2d351db99db2b624L,0x7143d053d054c63fL,0x3ad8fce9c5ae5097L, + 0x98ac8ae089757358L,0xcbb20672ee232061L,0xf703f203c02c786cL, + 0x9f7f153b2f4907aaL,0x2b2d7f4056eeb499L,0x00000000000000dfL } }, + /* 14 << 273 */ + { { 0x5054d6c0d4daec55L,0xe75aaa4d2d795df2L,0xa72ffe306da506adL, + 0xe019fd58e375432eL,0x9827f72b4fde3a24L,0x54f713e42a3320a5L, + 0x41408596baa1f5f6L,0x9c42b30fc65b3ff9L,0x0000000000000038L }, + { 0x1b36dca6031dba90L,0x5e80043e0c0c3559L,0x17a387f6a3a6c663L, + 0xc9a8c64f000711d6L,0x304a1f4178156da7L,0xdc5de4a9cbdd6bd9L, + 0xf65f6d1a599ddcc9L,0x591e35e24c1e3e7aL,0x00000000000001afL } }, + /* 15 << 273 */ + { { 0x8a775e63fc349e8dL,0x25de6478e3b39a51L,0x1962bc0927ae51faL, + 0x7d3cc7f79952670cL,0x9a09a697059050f0L,0x392dbf4355234066L, + 0x1cbff05524efc89bL,0x68a50b092f805012L,0x0000000000000078L }, + { 0x1c48213e47bef4e3L,0xae844a2a0d5e95d2L,0x95f0efc014c40369L, + 0x0e1d7c2d59ee29dfL,0x1ed16e33d68d68f3L,0x0c54c8d47babfe13L, + 0x155a3aa83b036552L,0xd9fd099e9efafce5L,0x0000000000000123L } }, + /* 16 << 273 */ + { { 0x2fe08a10462e6862L,0xa86db08f35ca4eceL,0x4af069a05960b9c7L, + 0x9f0086014c7fbbe4L,0xfe91aaab59bcd54fL,0xbb6cd66467a1bc8aL, + 0xf3b3590354ab69bcL,0x2d5b1227ff914c06L,0x00000000000000f9L }, + { 0x22226588f7d5ac27L,0x5bf4d921d14893e4L,0xa0a5c850c384f172L, + 0xc5deff764351441cL,0x9cba07b4539d45baL,0x324345620a80ae0dL, + 0x0e144257bb44f664L,0x1c5c19c163da2750L,0x00000000000001a4L } }, + /* 17 << 273 */ + { { 0x51e56fd2377fe385L,0xd7492e760b164c7fL,0x2f6b60c80ef96709L, + 0x9b0711e5922f73a4L,0x94778ad3f280a8eeL,0x64c426437a4bcefdL, + 0xb0c26afc659c9918L,0x82ee9fab35019434L,0x0000000000000074L }, + { 0x3d35f4ad7e800ff4L,0x238e7f313d01d6ceL,0x172c4169e901ebbaL, + 0x3232f5673a643a41L,0x4c6f397f79701e48L,0xc8b78dae42cb8029L, + 0x456590ca23d30eb2L,0x4822aa96ecef86a1L,0x0000000000000136L } }, + /* 18 << 273 */ + { { 0x2827509940aa836bL,0x4ff61db18e6dfe44L,0x8402c1a4b2ff8beaL, + 0x35306c0483432ff2L,0x038990de7e604bd8L,0x24c5056d94db9d3fL, + 0xe7b9a01fdf1dc845L,0x10d538b1c5918c0aL,0x00000000000000a5L }, + { 0x1d80e8bd54959824L,0xcb6fd66c89a5d7e9L,0xf3af6940860e3fdfL, + 0x421ce5e8563137c7L,0x03e6f677661866ecL,0x71e09e4b5276a885L, + 0x915f8f97aa9aeb74L,0xa95c50572cc76508L,0x000000000000001aL } }, + /* 19 << 273 */ + { { 0xb3809974fb81dcebL,0x04c8b5f63319a12aL,0xfb0f28c8888e81bcL, + 0xacbe4a1d2ad580c5L,0x98364afc0bb15f66L,0x989e8123ad9238a6L, + 0x1c447ce56b1c3603L,0x5aefd392295aa36aL,0x00000000000001c2L }, + { 0x9710f3e830cc890fL,0x7282afbae4c45659L,0x14ca9438c207839cL, + 0x1ed458d898c2b015L,0x3f2a4c7af1ba2f97L,0xd9a0fd1c44908498L, + 0xde8553a737b341a7L,0x2da1c2272e563011L,0x0000000000000140L } }, + /* 20 << 273 */ + { { 0x73d2ec27a3fa7b6aL,0x97ae7b3a601abb5eL,0xd42c967294f25a78L, + 0x9516e3b715a54045L,0xae7bcc8d6b889d29L,0x1cde169f55b50252L, + 0x52e0bd3767c5274dL,0x8784031d2ef34a50L,0x00000000000001ffL }, + { 0x8993fe21afdcf8acL,0x09332a37f854b7d7L,0x7e59ab356cf0ef3dL, + 0x296081d775691e53L,0xadab7d8697d96038L,0xd517a740df09ba1eL, + 0x0883ca2d330b20e9L,0x930bfd5f43b56a48L,0x00000000000000bcL } }, + /* 21 << 273 */ + { { 0x8f9362b782308867L,0x4c617f3d2b121634L,0x7128d265a0c31abeL, + 0xb149770ec374c526L,0xa30771d74c5ecbefL,0xabdc769627acda27L, + 0xc258797b6450a1feL,0x8f0ac6d683044b48L,0x00000000000000b0L }, + { 0x92e7643b6ce14f0dL,0xbb0cc016cc8cf431L,0x7a9fce4ef1056d87L, + 0x7ebc00349fd21abcL,0x76ad91162c8c09ecL,0x38a51d4d3b161bedL, + 0xe1f7286b9a9b8590L,0xd2bef39669822c45L,0x00000000000001e4L } }, + /* 22 << 273 */ + { { 0xceeadc92e3dba9dbL,0xd218a237a95c8cffL,0x03302b21eaf709daL, + 0xbf3e305a8111468bL,0x72ab2ec07e82860bL,0x1a2df5a3e186f8f4L, + 0x12579936f1a95aecL,0x61e2b1e1c19a4e2eL,0x0000000000000000L }, + { 0xcc6802561edb4b21L,0x73d87649d7bdc78eL,0xe440ad1918073608L, + 0x016ef0c7d90974abL,0xa466d5335c7e0663L,0x2885493eee21a1a9L, + 0xf459216a2e4d05c1L,0x6dbce2c807a94608L,0x00000000000000c1L } }, + /* 23 << 273 */ + { { 0xfaac87ee84d6cef3L,0xd90ce8f916e9316aL,0xb90a157b7c9c7338L, + 0xed9b08d2005e1b8bL,0x2439244636898a59L,0xa6ca07901c2f7350L, + 0x9a93d39ad9447b26L,0x56acad519cb4c8a9L,0x0000000000000051L }, + { 0xf8f0216874848490L,0xa86f9dc8d08c617aL,0x49b48bf8a0731a33L, + 0x78126e62538afcd6L,0x40cde5c8ed4d2ea8L,0x254dee95831e1aebL, + 0x7532ce3731639537L,0x27fe56322d4f834cL,0x000000000000012aL } }, + /* 24 << 273 */ + { { 0x604b730ef1ebe533L,0x336e362e0b4b9191L,0x246e5c1ab711e872L, + 0x652dc2c1f46e3f97L,0xc3492e151398e328L,0xd85f2669475cd2bfL, + 0xb4b28424a3c972bbL,0xf3d5711649a9ac4eL,0x0000000000000148L }, + { 0x2a5e5f5425e365feL,0x9202ddff6abd415aL,0x314e872acb24f45aL, + 0xced1dcd9b7bcc7d1L,0x0d39cf240377ce8fL,0x75eb96c9d1065414L, + 0x984b4a459fe0ee9aL,0x8237616704703ab1L,0x000000000000011fL } }, + /* 25 << 273 */ + { { 0x2839ff0b73edf179L,0x0c86f7213c27933eL,0x0635f21660380a50L, + 0xe30ac47e08978124L,0x9ef7580ca3c4c0c9L,0xfe842e3b5c323f9dL, + 0x320eb96f2322e2a9L,0x37e518f3ea489b6cL,0x00000000000000e9L }, + { 0x382199260b9d992eL,0xd96ade19441272a1L,0xddfb5d87ff188af1L, + 0x0bb3a1d0afbb820bL,0xc81b98b73d9f94e9L,0x9fd45fcc0f4b8cbbL, + 0x2b2e3a3ac5127190L,0x92c22ecf750fda83L,0x0000000000000047L } }, + /* 26 << 273 */ + { { 0x614ba6ccc44776c1L,0x80fede5815132829L,0xd69d5d613359d3ecL, + 0xf302a63e0d76029fL,0x201938b1bc711f3fL,0x1e7db241764f0544L, + 0x466d273d60c27af9L,0x472d12f244190095L,0x0000000000000027L }, + { 0xb30bbbad213788c2L,0x64d2b58670427068L,0xa0040d46f4c71bcbL, + 0x025d2898a6fb55daL,0x8f6bb191b5d4d6c2L,0xd9899a763ad65acfL, + 0x08acb6e19fea8d7aL,0x75cd5e5c63dd79b2L,0x00000000000000a9L } }, + /* 27 << 273 */ + { { 0x217ede5bae998a64L,0xb49ddfe7f6bdf2a7L,0x27344e0df7b9fd0fL, + 0x6692431a88552d89L,0xf22f33be0cc964fdL,0x56140230df10ab05L, + 0xded604f02c79339eL,0xeb4b1cedf36bd72aL,0x0000000000000079L }, + { 0xba2b0ff9f7844c49L,0xbda1e2396fb21a72L,0xe9ac028a95fb942eL, + 0x33324e2b37bffacaL,0xb5068cc3dcce1f97L,0x3cf1b594dd4cc89aL, + 0x65c4fd79ab81d662L,0xffd08615372b50fdL,0x000000000000018eL } }, + /* 28 << 273 */ + { { 0x06b3e15d5771af61L,0x1235e2b0a0b64d87L,0x5454afd261821972L, + 0x94396699d6c76c37L,0x2aac7459c61d6be6L,0x94893168ca05d3cdL, + 0x8d77047bc2250b78L,0x9adb6b8db1a06efaL,0x00000000000000c4L }, + { 0x8cbe58961a7b33a1L,0xd75a2d7bd83f7033L,0xe81731f314f445d2L, + 0xe905889593c865f8L,0x6caea833605f5264L,0x96df9656e3b6d3dcL, + 0xe7afb0760b224f18L,0x4cb4c72107aa123fL,0x0000000000000090L } }, + /* 29 << 273 */ + { { 0xaded87ed9d1637d6L,0x6d5d5b3b694a536fL,0x6aba5271494ef964L, + 0x5a9d8207f899f7acL,0xb426737cd77bd7d8L,0xe8de83bc8e7761fcL, + 0x99f09dab881d65f4L,0x145efe113369e1a4L,0x0000000000000084L }, + { 0x385830fac6470ecaL,0x08d8e0664a415836L,0xbac25039b5241a86L, + 0xf02ed14198be6a68L,0x172cbd25fb41f819L,0x8e8c6c3ec0a203c3L, + 0x4a7861303316f5ceL,0x0e5195fb1ecc5915L,0x000000000000012fL } }, + /* 30 << 273 */ + { { 0xb067305d41f4e9eeL,0xe2c3afab7b26ad35L,0x620dc76da158082aL, + 0x13e8afcc03213e9fL,0x64018f55c12946a4L,0x770066dd8c4c89cfL, + 0xa07b753efbe52d37L,0x79cec78b7424b86aL,0x0000000000000072L }, + { 0x5ec514f1a5dc80a7L,0x7e70e414c049c348L,0xc3daf2f2eebcac00L, + 0x0f49f8a703563a24L,0xc36c093204e789e6L,0x6c17a7c02b59bbd2L, + 0x3a84fc9dc6cb7b49L,0x05e1457e0de6085aL,0x0000000000000109L } }, + /* 31 << 273 */ + { { 0xc90eb31fc17d94b2L,0x8ed3a7945e87b6dbL,0x85ebc38150d82b3cL, + 0xc906cf47dfff4d95L,0x122b7a67f28fdcdcL,0x96bd35386bf4a9acL, + 0x67788dc93f96461fL,0x982412df832f92faL,0x000000000000007fL }, + { 0x5eaf69178324dd6fL,0xbd5ae389c8093147L,0x1c19b1917939568aL, + 0x5fc18a88e5600018L,0x5c483792b28ac566L,0xd15eaaac13b67938L, + 0x3ad1d25e8cab3ebbL,0x6e68deb0247e9904L,0x0000000000000031L } }, + /* 32 << 273 */ + { { 0x2347e38373435eb6L,0x75563e7858ea9f89L,0x20ccb854efa62cb9L, + 0xdac97aec1cc86c06L,0xbe99c357f432d1b4L,0xcd89da561c2e03f6L, + 0x7e424afda789af68L,0xc386d1bf401f225dL,0x000000000000009fL }, + { 0x46f9bdbba1a12ca1L,0x489bdf59a6469919L,0x94ad85a73974bf6eL, + 0xa7fe1c85c9fde759L,0xa26931a072b3c209L,0xbd5b56203db247ccL, + 0x9ecb8a741085c481L,0x9134bbd1ea48e281L,0x00000000000001dbL } }, + /* 33 << 273 */ + { { 0x10689653e6397251L,0xd36ab8d90f9f0184L,0xa8c95b34a5da43efL, + 0x1920d31fb6e047b5L,0x0dd12f4ce1469d06L,0x94efeb8c3cdee90dL, + 0xf6b42234490b18f4L,0x4ffd0c3f9d9c97d9L,0x0000000000000165L }, + { 0x04db694b8907f755L,0x1297a489cab5d47fL,0x531adce1572fba2fL, + 0x63960f81014224ecL,0x61175880c4a771a5L,0xbb9116fd2941f7a3L, + 0x04047b18a59d986aL,0x671b243166eb690cL,0x000000000000000eL } }, + /* 34 << 273 */ + { { 0x9db986ad8855ba41L,0xc106268894b3744eL,0xa92f7d8b15201d6cL, + 0xe1e9769008782567L,0xc9bfc18a482f9905L,0xcc6569b1f54a18e0L, + 0xd623f6e0ad8f5366L,0xb312dc0c5852a673L,0x0000000000000024L }, + { 0x82a24990455c2368L,0x77771a72791c090eL,0xbbcd5223698071e5L, + 0x4877919b2f7dbcb9L,0xb5b54e4e8622b142L,0x6c0f9b68c924f541L, + 0x40f658639de92f6aL,0xd1b03e4e626f28ffL,0x00000000000001aeL } }, + /* 35 << 273 */ + { { 0xe357497c44f788aaL,0xeb886a8f9952cd9fL,0xaabac389c4d3a590L, + 0xaf02fe81b2782010L,0x4cb27146ec5da884L,0x7104f1641074d877L, + 0x3c049a5c4bcda0fbL,0xcf680a9200118890L,0x000000000000005aL }, + { 0x76ea5fa1d2ca967bL,0xc8e6d78fb8d792d3L,0x203e8ca7e4fb3e32L, + 0x80a8f1e34959b68bL,0x3b5a8c7f0165eb1eL,0xbe35a03d5c518971L, + 0x815cc58c3dff7dceL,0xf4684b055909c47bL,0x0000000000000065L } }, + /* 36 << 273 */ + { { 0x7a8173004fabf449L,0x73b582d840f2b2a9L,0x3a3d45a8c970d55dL, + 0x4b929ad041a73083L,0xc39402f969c262bcL,0xdbc16a80f9b3d2cbL, + 0xf9abdf398d4fd1cbL,0xb3b5baa009125d99L,0x00000000000000afL }, + { 0xe8d916331263ea71L,0x34f37faa9afe25feL,0xe00102248d62a061L, + 0xfd4ac0ee89324085L,0x1ad23c7839425188L,0x3e04ce94f9b054feL, + 0xad2630664ba78a71L,0x1be2291a86fdcda7L,0x00000000000000baL } }, + /* 37 << 273 */ + { { 0x6acd94de05e1e1f4L,0x673190a0645a2a41L,0xa35fc575310fafd6L, + 0x3ac9c62230e58773L,0xb82a873cb07d1795L,0xe9946d72a34ccba6L, + 0x46c81998a6222e44L,0x2ff50bbcc8faabe8L,0x00000000000000d4L }, + { 0xa0b601a6786a9817L,0x93b775763e461079L,0x97ef405844808435L, + 0xf5a7b80c1199d980L,0x9e41ce271e84febbL,0x208a021be7f836e4L, + 0x58bf3f9231e33980L,0x00755fc7899562edL,0x0000000000000117L } }, + /* 38 << 273 */ + { { 0xdd5761c511676ffeL,0xb0cce6b697334331L,0x0cce322c7ef96ef9L, + 0x79fc56838c450809L,0xac7ad874bad8c85cL,0x41f7958cc41af040L, + 0xf491765edf1245d8L,0x02e26e6ccfceb02aL,0x0000000000000039L }, + { 0xb1abbef48131e277L,0x7aa601fe63dca0d7L,0xb2c6c9b5d7e372dfL, + 0x0b7298ad989af18aL,0xc7f3a5e3d5fe8f67L,0x769001fffde1066eL, + 0xe10fded0dda36343L,0xa92f0d1b15d0522fL,0x0000000000000131L } }, + /* 39 << 273 */ + { { 0x6c1a6c256f3eef55L,0xb08580701c18b346L,0xce1c87968b4446ceL, + 0xf17d9053e23c4f17L,0x327e132e89f21512L,0x205508fe284e94ebL, + 0x1e62dda7f24fbc99L,0x344bbe262d39e997L,0x0000000000000060L }, + { 0x770d0acb3035df5fL,0xdcd8b49049761501L,0x632b7d51ee075e78L, + 0x6d945e49b47d9adbL,0x0f8c0b321d0ed89eL,0xc279b05c89cac70aL, + 0xa227cc73353633eeL,0x925d141639a4d0deL,0x0000000000000046L } }, + /* 40 << 273 */ + { { 0xad4f18816bb2c3b8L,0xd45f9a0cd380ca18L,0xfb839dafaca25641L, + 0xb89884f27e519019L,0xde295e5458cd6c25L,0x1bc5070e25a1e8cbL, + 0x66f1635747c21e93L,0x379e8f32a4bddfa8L,0x0000000000000108L }, + { 0x9141ea4e87cc4a52L,0x285cbaf972cff2ebL,0x27898f4df492be77L, + 0xd49d035f223c8859L,0x763866c2726065cbL,0x0886e8d5b2eb9e8aL, + 0x7deb06b31f70bf7bL,0x8ce6bcb7889d3e33L,0x0000000000000060L } }, + /* 41 << 273 */ + { { 0x271a04f88ce4f4b0L,0x2414550726b4b7f8L,0x159a895bbef3547bL, + 0x3c71622e5db5a5f9L,0x7ea8523192ece243L,0x65c89b75bc972333L, + 0x936ef797fc711efdL,0x05b810c7af98632fL,0x00000000000001b8L }, + { 0x233474cfdbe53feeL,0x13f02ed581983f5cL,0x26f0244eb5c5b790L, + 0xc39151490fd4b89aL,0x5d5731029a9ff308L,0xaa9ca0826b5e3103L, + 0xcae619b710f4837cL,0xcd1617e2cf88ff6eL,0x00000000000000c5L } }, + /* 42 << 273 */ + { { 0x1b012cc6fcc7c3d5L,0x02b9f445dde71f72L,0x5f2cfd665a2000beL, + 0xfd676315c8780139L,0xa594ecd3ae9b27dbL,0x9b06895afd3cedc4L, + 0x0d532a0d1a268fffL,0xe06bfde60dfb1337L,0x0000000000000103L }, + { 0xaa0a8cfdfbe7f2e7L,0xb2eddfb7adf3624cL,0x452e0b4fddeeeaa1L, + 0x4bc994baa46e664cL,0x2bd9645ea0eac15eL,0x4d34b0e75e550962L, + 0x6295f4a1697c8f01L,0x778a20923c08aab6L,0x00000000000000a5L } }, + /* 43 << 273 */ + { { 0xc4b1b87656477894L,0x9f8b9485bc8ff924L,0x55fa6ad5e675d0aaL, + 0x27cc06eb926549deL,0xa53fe770d61fa7adL,0xb407e08f8fd62473L, + 0x4c2c03f2a92b75d5L,0x810deda5009ca888L,0x000000000000016fL }, + { 0x0f8e637dcca00512L,0xb195a15968dd81afL,0x44a70f986b904e07L, + 0xb264193f52c9eb87L,0x5e838f6fe75cd8fdL,0xd25d4157a7123b3dL, + 0xc6a13879cc52e032L,0xdda5d51ccffcbb8bL,0x00000000000001c7L } }, + /* 44 << 273 */ + { { 0xc7879147afb7d394L,0x8ec1b7fd4fb2b482L,0x955ea97dbd72c391L, + 0x9c5c1ec6cdb4e57cL,0xeda7ae96723ade30L,0xc27b17b48054db82L, + 0x0d1e386b91170eb1L,0xc6366e519aa3db3dL,0x000000000000002fL }, + { 0x710fde5f6111f40cL,0xd231d1b3fd9de6f7L,0x9cbca1b1a15652beL, + 0x1d1b5b4d0a93c0b0L,0x29f6c799dc4c3234L,0xc414ea5fcda6aad7L, + 0x7c44805f3ead0e82L,0x1eea7f63e86c4877L,0x0000000000000036L } }, + /* 45 << 273 */ + { { 0xea03e06e6909b7b5L,0x1eb59e94dd60ebe2L,0x34dbf67688d4c7b9L, + 0x45027f88f72d9b9dL,0x31b1f57995e61759L,0x61dfe2f6d4650f5eL, + 0xbd0d212d6d0b74f4L,0x3f9948eb48083b0aL,0x0000000000000013L }, + { 0x2bc49a766742cc59L,0xf83f88b2df5d4832L,0x3334aa91a688f8eeL, + 0x8a225b1d50913867L,0xe3d0925b9f5830baL,0xdef272bc88056d19L, + 0x22310572a90551e7L,0x5be1f7e622653c85L,0x0000000000000164L } }, + /* 46 << 273 */ + { { 0xc39065a8339a9d0aL,0x003e9b1e5ed50927L,0x1bddb64990510e1cL, + 0xc9cd7f8305bf885aL,0x1f6f6b26c677e374L,0x22c8e14499d0ce42L, + 0xe73b3c75760954c0L,0x8f1169532abfdac1L,0x00000000000000a7L }, + { 0xcb29557273c46650L,0x9eadcd045832208cL,0x81a07d4b6a6f83b0L, + 0xddf5c6863ab3dd60L,0x8f79004b68e2062aL,0x4dcc7bc7d2487a89L, + 0x23a734b65d1fc56bL,0x8bd92ff31445da6cL,0x00000000000000beL } }, + /* 47 << 273 */ + { { 0x2484e9b86f9d25d5L,0x15a3eef11bb1562fL,0x2877237285a8fdc1L, + 0xbdbe7ba434f5a652L,0xddef4b0751555a0cL,0x9ce61078fb946931L, + 0x3f96d51e4a63b946L,0x870c89e539f2af27L,0x00000000000001a6L }, + { 0x502642ad8155bbecL,0xf6405faf03db10e1L,0xe8a6560fa29bb9c3L, + 0xe50d29554c3a2c35L,0xcde535401a0decc4L,0xbba9da77090bedf9L, + 0x14ae1323a72b5b8fL,0x21a90f4992c11268L,0x0000000000000167L } }, + /* 48 << 273 */ + { { 0x0a65feba78bb30f6L,0x0235d91a50eabf36L,0x0b8ec4a4cd2d934dL, + 0xa122f10b8dfcce34L,0xc3c86b639b2e6536L,0xb8002fac84962539L, + 0xa6893cfbf6d36ba4L,0x280e9d60dccb7f1fL,0x0000000000000072L }, + { 0xcbb4609e279551b6L,0xc6e893884b03a718L,0xdf62f063744dc7e2L, + 0x1799ea3e4146ea10L,0x73d8cef29ecd77ceL,0x2b87c10eb2229d1fL, + 0xe9d08175cabfdf74L,0xed1d16a83f0d60c4L,0x00000000000000ecL } }, + /* 49 << 273 */ + { { 0x2631e8bd95df30c9L,0x247fec887ec7359eL,0x37b1e3913bc00041L, + 0xcbe855f3b87062faL,0xd338fd26481816c3L,0x0d8dd09ad0f19ebfL, + 0x2741bf86ddff6006L,0xaef4f2e5495cc8cfL,0x00000000000001aeL }, + { 0xeb8c8ac5a59d77bfL,0x63e588b77c3fc77aL,0xbeea43ed5d00f92dL, + 0x8dff885a80169336L,0x5c0cef670a35b365L,0x4c61a43ba705b9e9L, + 0xe95772c54a8c46f8L,0x0d81ff84829ff66bL,0x000000000000005dL } }, + /* 50 << 273 */ + { { 0x3dc4ad30a16aec5bL,0x9cf3022a8eb3ec7bL,0xd940cce316b6219fL, + 0x6324514378a15c73L,0xa70b4f69107ecdeaL,0x3ac7eb803b2bfeddL, + 0xee677c763137afceL,0x9686744c986c1392L,0x0000000000000115L }, + { 0x9db83670d591110eL,0xfdc8ae98d29a560cL,0x0e27d3c0f840aed9L, + 0x98bce3eb7e127e30L,0x7c1bb6b9184ba73eL,0x410c3f4f986eb84aL, + 0x8c28448dac4caa8bL,0x2e3c82365a3618ccL,0x0000000000000101L } }, + /* 51 << 273 */ + { { 0xbf1dd991bb37b704L,0xb36ed5ae0cd09760L,0xd9750a8697aa7d3eL, + 0xd1a27c2cc8a95b0fL,0x399937693893c07cL,0xb85a62c28ba9319aL, + 0xc233630451b89005L,0x1c942e9fef7a20a2L,0x00000000000001c2L }, + { 0x291509822572cb48L,0xe1ca81031d0b9df4L,0xfb70c750ba52c264L, + 0x4cf3fb966bd2e72dL,0x04b65b4daf9a2bcfL,0x56eaf0f7db2205e2L, + 0x5551b3333466cfadL,0x886b3275c55a1b10L,0x0000000000000139L } }, + /* 52 << 273 */ + { { 0xa976f5a2f8701332L,0x203dc2057771c835L,0xb1c541bde425fcc5L, + 0xbe9921205c2cb7eeL,0x300e2b0279278283L,0x24b80fbfc88d605bL, + 0xfd23544c3006aa2dL,0x1a212c5fd593c6d4L,0x0000000000000103L }, + { 0xd9fe69f1ca88f548L,0xf2e065d1a37c6e22L,0x5804ff61e43ecc0bL, + 0x34abcb423becc85eL,0x7d223605fd8e1751L,0xa2a0ea503b76b819L, + 0x6c7b9dfe147f6beeL,0x8db65fbd3be2fcbdL,0x0000000000000178L } }, + /* 53 << 273 */ + { { 0x633d962c0c9fe7cdL,0x8fb7af4e05e74840L,0xb9cfda5ce5228fe1L, + 0xcacff0b65199eb36L,0xb9ac48c7b6155dc2L,0xd147c6d3300bab8cL, + 0x4f58e235a5034c42L,0x097078470291dcc9L,0x00000000000000edL }, + { 0xacfde54d059836b0L,0xf755e402b6d8e2f6L,0x282c2d5581436e4dL, + 0x0a8eff54b3ba7338L,0x910d9a5dc0551151L,0xcb2a43a629e3a972L, + 0x49974730b85e35ecL,0xf79b7d5ccd5a58fbL,0x000000000000017dL } }, + /* 54 << 273 */ + { { 0x65d17a41539696a0L,0x57f8e852def0e88dL,0x8ef4f324c7739797L, + 0x532ceb30ab421668L,0x691ae9641db40e22L,0x19a80aabc518454eL, + 0x520e94d1338ad96fL,0x172bc7b6d28f8389L,0x00000000000001c1L }, + { 0xbb52038d92e07ed9L,0x7ebce2feb5272282L,0x2cd12a6bfbf12724L, + 0xd1c0d0f017e36d0bL,0x46b0fc6ca1709284L,0x41f37ca9d0015640L, + 0xcc5dea20be174e7aL,0x2831510fbcca4571L,0x00000000000000eaL } }, + /* 55 << 273 */ + { { 0x488370b5e902f19eL,0x1fc4025b68b7a415L,0x27c316dbc935ac89L, + 0x2fb89fb00293079cL,0xe4c123f52ad9ad17L,0x7031724aeb3910deL, + 0x38223c12db0a05aaL,0x199b557bdce2bb89L,0x00000000000001efL }, + { 0x49462b3bf467f0c4L,0x53d52ebfda88978cL,0xfe52b9ef3b5363f5L, + 0x64ed965fa8acbd66L,0x92e5b025750c6e88L,0x0f4841bfe5b28b0aL, + 0xd2a3d837e73509c1L,0xdf28934e0db0512bL,0x0000000000000078L } }, + /* 56 << 273 */ + { { 0xc466fd7b7514e752L,0xc0c9a0af8e1fd662L,0xd4b0ae397f3083fcL, + 0x9af1c21f24cb5771L,0x8c20329d469dcd1eL,0xb115fdd26579990dL, + 0xccd93d2a3160b749L,0x4aa17bb996f8fe36L,0x000000000000004eL }, + { 0xea95c80a4118ebf1L,0x267ce3a6e78b533bL,0x54167eb7152ee4f3L, + 0xda581b52d6d5b4c9L,0x285ddaa44f9408c6L,0xa1e4d6dc1ff6c988L, + 0x9d1464077540a223L,0x127967a3f46c92d9L,0x00000000000000ffL } }, + /* 57 << 273 */ + { { 0xb39e35c824e3fbfdL,0xafef277235e6a7afL,0x25b3068748cd5a9bL, + 0x305cd07226cd12d2L,0xbe0af44b191d2eb4L,0xe0c88872b14a8482L, + 0xffa8362a895930d2L,0xe2da964817b31851L,0x00000000000001ebL }, + { 0xc073c946625426d8L,0xd397726f62c66394L,0x3d1398374528c5faL, + 0xd8da5a25b3221944L,0xf50efb6b2635e15dL,0xf68306be42115851L, + 0xfc4919fa16837a4bL,0x5d4ab1f6b111cecdL,0x000000000000015cL } }, + /* 58 << 273 */ + { { 0x1fd61fd0b5286dd8L,0x237ed76f1b4f3393L,0x3e86a9a9d71eceffL, + 0xc8ab89b8e74180c8L,0x2402e229e2014494L,0x41893effc276af0bL, + 0x4b0c1a54eb97a632L,0x8d403e5aabdd0d62L,0x00000000000001d1L }, + { 0x033c88e0e7df1bb5L,0xc4ff0974917b53cbL,0x6834e17dff5540f3L, + 0x333daed32024b7efL,0xd76d2d74e7919887L,0x16ad783eccf4d0d6L, + 0xe57b0eb14cd5587bL,0x9e52405b4c7e37b4L,0x00000000000001a3L } }, + /* 59 << 273 */ + { { 0x2233ca7fb47270f2L,0xae765c245a51e38aL,0x685a629043be58fdL, + 0x070f79e18346ee5fL,0x09dfa96060cc36f1L,0xba1b744a516ab0bbL, + 0xb99d4a5d2b2125afL,0xa88ef3ca45541791L,0x00000000000001d8L }, + { 0xaa2d4980e27c4c50L,0x7bb1302b2a828c2eL,0x2b7f34c6e586f990L, + 0x182b69e2b22de340L,0x084ae47141494741L,0xff811bebdda7aa90L, + 0xb62bc87c0b45b4acL,0xc3971692251fa3a0L,0x00000000000001bcL } }, + /* 60 << 273 */ + { { 0x08c553f113562036L,0x11b803b8e4d93e33L,0xe7b3f20692fb7e89L, + 0xd8352288cd0cc8c2L,0x2bf9cd2faaea0945L,0x828bf1c71d16f78aL, + 0x9228c534afc79192L,0x1d7fc7f1c87becfeL,0x000000000000008bL }, + { 0xbc9bc44354ed41f5L,0xfd759f9579a650b9L,0x951687f8bbe6ecb1L, + 0x396d94a79e55762cL,0x4e489097b787cfabL,0x522cc6c3111e72bfL, + 0xf494094c7a4d01d8L,0x2f0b8ed600666f1dL,0x00000000000000ddL } }, + /* 61 << 273 */ + { { 0xdaa376d3bf886d75L,0xda77b4e0c5fa164dL,0xc5fa99d7e140d0afL, + 0xc1298a7f239e4b91L,0x656278ea77b95e2aL,0xd4615db928c2dbf7L, + 0x679e343ab4fffdf4L,0xf454c81fb109106fL,0x0000000000000020L }, + { 0xebb250e9d09e1320L,0xc1bacc99bc92b5b8L,0xd2bc9ce8c6fa578aL, + 0x4b5480d06d5411c2L,0x4a667abdbc01eef9L,0x2293ff502b5b5ae1L, + 0x064d795250578246L,0x4999b1244736e18dL,0x0000000000000056L } }, + /* 62 << 273 */ + { { 0xacabe0d649acab59L,0xf5c52f50022e503bL,0xc388decf5bc47f3eL, + 0x8d0cdcf14f1b89a5L,0xae32536823906d64L,0xc3ce9106967e20ebL, + 0xa820be6f821b2939L,0xa323b31a73d7b74cL,0x00000000000001eaL }, + { 0x4a67d5f8ea7184dfL,0x34355eead1834b43L,0xdeb639e821ecfdc7L, + 0xfa16656adbc01369L,0xb8791490db78b8d2L,0x8433fb9c29d46e0dL, + 0x7327033b802bbe56L,0xcb2f72c7be24f129L,0x0000000000000091L } }, + /* 63 << 273 */ + { { 0x6d4e39d3ac1f4630L,0x1fa0065075a61ac6L,0xddc652c51817a6e0L, + 0x42b1966823e50264L,0x3e980ad593896122L,0x8a47a5191081f154L, + 0x6b4199134c2ffd1aL,0xcfb1914f16b21385L,0x000000000000004eL }, + { 0xce79f49b2fc5c303L,0x9efee352e1d19a11L,0x7d084d1833b27ec6L, + 0xd078a4c3479ee85bL,0xc88a27f1983e02c4L,0xf7493d4c411e9106L, + 0xd13cca1d20cfca34L,0x80a39dc9c4f5a885L,0x00000000000000fcL } }, + /* 64 << 273 */ + { { 0xcca33db42a22b97fL,0x808d2e981942f8f7L,0xd972d1a737133c3cL, + 0xfe4b5f5f5ff8d7aaL,0x8bd08c189175512eL,0x67bad971bb95ce67L, + 0x885de1d49cf69b1bL,0x00d0ebbe54ba8e61L,0x0000000000000111L }, + { 0x50aeec386c90b677L,0xc765c4753df368c1L,0x3865a7395d423382L, + 0x5f843adf7e55f0d3L,0xa8c20b65e6159793L,0xbc3d7a4f05c2f2b9L, + 0xb6663d297a052d8bL,0x50cdb83a65a7ea81L,0x000000000000017bL } }, + /* 0 << 280 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 280 */ + { { 0x4c740ae10ac4cb66L,0xc8a66d9759c635ffL,0x328b65af2569dac8L, + 0xfdb6599b8838dbc9L,0x702473f87fd83c9fL,0x0d1f2ad8128683f6L, + 0x00ae5b505030b76bL,0xb1375d706d15b5edL,0x0000000000000159L }, + { 0x7a772a901f1e1f7fL,0xa790dcb7c669aa61L,0xdad905aec5e4f185L, + 0x3bd91c29116ad6ceL,0xf87c96eac329d8a7L,0x399c929cb5b14581L, + 0x6fd2983c3a8253c0L,0x616a8b61da988b39L,0x0000000000000181L } }, + /* 2 << 280 */ + { { 0x0d6e6eedae0e245dL,0x5c37e80f26f68799L,0x575626142aeeb588L, + 0x9261cb14de82abc1L,0xa4b446f5ebeeb914L,0xe8b749efffb71dc9L, + 0x3e1382391fcf5e8eL,0xcbdc4c5315c8cfb4L,0x0000000000000185L }, + { 0xe8acd37da80326b1L,0x4aa2d80f6ffc9ae9L,0x46da631fdb069e01L, + 0x1463768de7abe06bL,0x932e528dc3be779fL,0x756bc3de494fd10dL, + 0x4a181a650b0c9870L,0x0ceae429ae34b70bL,0x00000000000000aeL } }, + /* 3 << 280 */ + { { 0x3a070e58524bc705L,0x86404f01fbd0358fL,0x88456d7874567144L, + 0x91aabc494cb2bf46L,0x829df4fdb1e29372L,0xf177ce20230cf9f5L, + 0x404eca037a3d29caL,0x6d9b609a231007dfL,0x0000000000000159L }, + { 0x7ff3369eb1bf4593L,0x94ff624f5e4049a7L,0x5b7ef74134194bc1L, + 0xf4bac202131b06f0L,0x70a46357b7549633L,0x0b293380761ae1bbL, + 0xb2d9c64d974c1977L,0xe27e23b400e7751aL,0x00000000000001dcL } }, + /* 4 << 280 */ + { { 0xe15ef16cfaadef15L,0x0f2927a1babe010bL,0x252539dbcefdeda3L, + 0x1e8ff8c4a98613a8L,0xec7026d9f76d425fL,0x507c1214ac386095L, + 0xfba6c762c8024ee0L,0x1088460c3f4dd18eL,0x00000000000001b0L }, + { 0x6ccf1452202080cbL,0x33b934d5b645d8e0L,0xebd4d772c0fd54b2L, + 0x62a8045e207e1dfeL,0x9e5d3b88601b9f43L,0xc8f7e47d77e357fbL, + 0x1c8764778ec21437L,0xc08c5a27069dd1c9L,0x000000000000005cL } }, + /* 5 << 280 */ + { { 0x8b7bad84aa505f62L,0x8052b3cbecbd5f9eL,0x0e985e1ab3660a2bL, + 0xc17e62d3a69bdfaaL,0x9bb1033189b5e1e8L,0xc572485a6b55d39aL, + 0xc322f39adf1f6747L,0x88881e7fc81cb6fdL,0x00000000000000d9L }, + { 0x31d3f8493310a660L,0xdd188984fdc9db36L,0x89a0af2c3c61b203L, + 0x508fae9c8e454871L,0x2ebd3d7a69879c4fL,0x5ac4c93d83e6e3f8L, + 0x71744332c59f4c51L,0x0322f749ad204b19L,0x0000000000000057L } }, + /* 6 << 280 */ + { { 0xd445e8962dac6a90L,0x6904dd238a6d208cL,0x83adfbbf4e016973L, + 0x91630953a384d374L,0xcd5e353435cac47eL,0xcec42d1088bc5e3aL, + 0x13d0e8aa5141de2dL,0x3bbd85d1be1d414aL,0x0000000000000105L }, + { 0x2b200480b911ac9aL,0x0c681a4332bd4596L,0x68e2718368edeb24L, + 0xf8a34ce78950ff7bL,0xa58265b03afb03e3L,0xd0d866606bc58ad1L, + 0xf9542bba3303c185L,0x13afe747cc393e4bL,0x000000000000005bL } }, + /* 7 << 280 */ + { { 0x6871a6e155b6f895L,0xb935913e9d7e2730L,0xfe1eebb1a9ac21a4L, + 0xfdab879429c16938L,0xeb9afe9adeeb21b9L,0x967a2fb2797f546dL, + 0x2fe2023bb9cf34cbL,0x9272977ee049697bL,0x00000000000000caL }, + { 0x348928ef50b4a316L,0x115e58ece32c9a4fL,0x6d858be4e3a235ffL, + 0x7464d70472b943b7L,0x6e785131e862030bL,0x4060a8c7b8a27819L, + 0x831b7288d3e4f0d6L,0x81c21f75b36c3f8bL,0x0000000000000004L } }, + /* 8 << 280 */ + { { 0x640c489c80744c7bL,0x76c2615a09c3bd56L,0x0a0a64544b2e41b9L, + 0x1f67ea463635acddL,0x7a93c39af36dc54cL,0x4efd6c2f386cdcccL, + 0x789ffbcd8dd41a02L,0xb718d8187068e710L,0x0000000000000002L }, + { 0x463b2b5d8e3ed0fcL,0xef348838c77bcb34L,0xf3cd1bbd8e9bf495L, + 0xc275c5432a4ec1faL,0xe606d611cc394e16L,0x45c1e7c61a56a93cL, + 0x828c42db1f6707b2L,0x59ff294843754051L,0x0000000000000100L } }, + /* 9 << 280 */ + { { 0x23ac84cd9e107222L,0x011f6752130abb36L,0x83d1979f5931a184L, + 0x87880be04349bee2L,0x785209566b06ebe2L,0xa12c6e2f5c02eecfL, + 0xcfb3b87d2c027b98L,0xe399b69f59d49343L,0x00000000000000dfL }, + { 0x73f60492f763472aL,0xd0630d7aa32c8e02L,0x64fda2740b7eb763L, + 0xd2ebdd95c0295433L,0xd3bf0c58f8650b05L,0xc0c0a4abdf491d11L, + 0xfdd83464bec2efd6L,0x42dbbf3363fec348L,0x00000000000001e4L } }, + /* 10 << 280 */ + { { 0xeaefe760e7307475L,0x660015223de4536aL,0xc0c1396d45928311L, + 0xb972d84029ea65fcL,0xcbf018ad83c0faa0L,0x0ce90359f5116096L, + 0x33a6e9c82c9e2cdbL,0x1e61a90c5f1aa2abL,0x000000000000001eL }, + { 0xcddf71f961709e18L,0xae43057240f72033L,0xde53f951b1b7d4a2L, + 0x409f0a90cc853277L,0x341fa0c03789a7e2L,0x9cb4a809daf6b61dL, + 0x3b42585df7bd34f3L,0x7a624ac415d77b4cL,0x0000000000000078L } }, + /* 11 << 280 */ + { { 0x02e9c325f196b6a1L,0xcfc01de1b0374324L,0x289b25be0121a620L, + 0xec188af9f09b541fL,0xac954bc5fcb86a52L,0xd883b21f87a757b9L, + 0x1194e3a12741cd24L,0x3d1914b61e3ef5eaL,0x00000000000001fbL }, + { 0xb13ea4836bfd085dL,0x6e20b8d504a5dcd1L,0xcf5fba257f7af4feL, + 0x58625751914d9c51L,0x1005bcc9eb304193L,0x7b54ef23579943f8L, + 0x50ead06cc7d1c9baL,0xcd9877673664cf97L,0x000000000000010aL } }, + /* 12 << 280 */ + { { 0xe78a2eefd978be2cL,0x1958503c388b0164L,0x63a5d6fb3d35eaf6L, + 0xff6e50d5dbfb3304L,0x4b9c89e3931e2555L,0x93d6c6e9e2a419d2L, + 0xb2b26a9296f11b80L,0x1ba661c44c413fb3L,0x00000000000001feL }, + { 0xea30821624fc3599L,0x83000d9ae1823a76L,0xb614483695d2aac8L, + 0x661640c0996c6b75L,0x5212a79fa7b4f33eL,0x40c6bfde5a2fbee7L, + 0x8ce6e88e86ce3a36L,0x4a9b0c5eccb757a9L,0x000000000000010bL } }, + /* 13 << 280 */ + { { 0xc8b31578e40e993dL,0xdd419459041d243fL,0x0c47e4492fbaf7c6L, + 0x10b731c47d71b295L,0x41dca681f36a08c0L,0xec3582403d924e38L, + 0x891991a88593f600L,0x8d684b374a984c9eL,0x0000000000000109L }, + { 0x44787399a8a706dcL,0x307e57cb53caa9baL,0xea0ad37576a3276eL, + 0xb1b79db12e025195L,0x96703f51aa828a3fL,0x174f1c27536e5266L, + 0xc2d7da86fba650e5L,0xe69208051bed53a0L,0x000000000000010fL } }, + /* 14 << 280 */ + { { 0x6076eba5d73f6d54L,0x2dc200db4826039bL,0x683c59a00761fbffL, + 0x525681dffaeb001eL,0x84175769d29ce601L,0xb5301cfc04112e5eL, + 0xa242021d3aed1274L,0xc2f154ceb3cf3089L,0x000000000000005fL }, + { 0xc754a9e65799204cL,0x186ec103d6251a86L,0xaa3bc7933f06cc52L, + 0x2512fb5391c10e49L,0xe63f08c7df01b044L,0x4138d8974a7a5bc2L, + 0xd23a1be79258b0e3L,0xddb136202bb7743bL,0x00000000000000c5L } }, + /* 15 << 280 */ + { { 0x409beff853938400L,0x3593e4f9c460d38bL,0x6d0b5d57f0673911L, + 0x22c8dcaec72c6491L,0xe6e263f173e0ea62L,0x14c1e16512ece129L, + 0x45b34f069a474ff0L,0x36c07c4fb96851d9L,0x00000000000001fcL }, + { 0x250b8e258259bf92L,0xfb6826a6866e22feL,0x1d0fb6d935566f47L, + 0x6381018504537faaL,0xf8f10b30f1d6bcdcL,0x6ce068c4abbf20c8L, + 0x3ceaa8c944fcf678L,0x44c0ee4daa121e6bL,0x000000000000012cL } }, + /* 16 << 280 */ + { { 0x3878c70adf5ee186L,0x62f4f54a6fffef9fL,0x0a7664c380400fa0L, + 0x99be58c2d0ea21b9L,0xfa70b9ade311aa6dL,0xafadd452abf43a4dL, + 0xe6291c610f7b5de5L,0x388a421578d61e62L,0x0000000000000024L }, + { 0xf188f980ee1bcdafL,0x9ccc1a6bc991f497L,0x6f733d1221ec6f5dL, + 0x59ce56b9da9f5bdbL,0xe03a9a1f40e88d50L,0xa5e60328c0190a2aL, + 0x382bdbe69ff9d482L,0x26895085bb792de6L,0x0000000000000013L } }, + /* 17 << 280 */ + { { 0xb62405a5676a3e6fL,0x5ef780579ac1ab5eL,0x4aba44936c1169c3L, + 0x0e3dce11827d4ae9L,0x2d8bffba23687976L,0x9fc0f8a871502c8fL, + 0x7bddcff7be7d4d2cL,0x00784e850b6a00acL,0x0000000000000163L }, + { 0x54fa77ad66a2abbaL,0x14fc08c88a96575dL,0xf1b50adef2dd1b8eL, + 0xeecd2a0f774ea727L,0x6cc9ccbcf14625cdL,0x8800d8a0ba597acfL, + 0x52ee2b93f9dec864L,0x5da5fb84e642a27dL,0x0000000000000056L } }, + /* 18 << 280 */ + { { 0xcd861ffbe4649d6fL,0x2ed9b50d8dd76900L,0x5e5cff5dcc6dd1f1L, + 0x2f649d9fe440c31aL,0x1b6cab30e1b50625L,0x0280ffb2fedf3d27L, + 0x41fea755cdd53867L,0xda1c31ff4b6b3458L,0x000000000000009aL }, + { 0x504a5a4a1c5bfa1eL,0x3b16eff4cb6c1131L,0x42ae4011ae721056L, + 0xadcb0a5c2f4db3ccL,0xe9f89fbe43d4383fL,0x243f76cc28188da6L, + 0x836268eb0f53be46L,0x2fa1ca1b386264f4L,0x00000000000000e8L } }, + /* 19 << 280 */ + { { 0xdf5b66aab830b085L,0x2f6cdf43b7a797cbL,0x0101d0724241a303L, + 0x0a16ad64a6623b9eL,0xcdbd54b3a9f4c582L,0xc97a95567ee209feL, + 0xac166a4f72e05927L,0x944328443d9fa567L,0x0000000000000073L }, + { 0xa051a3107a366e15L,0xfd0e5a53c6bf5c08L,0xa240cf6d978ada91L, + 0xe8ad78c9349ea3b3L,0x9494acb015573256L,0x2bdfd51e391180d4L, + 0x911ad26d30e11852L,0x066206b1c0d32653L,0x0000000000000173L } }, + /* 20 << 280 */ + { { 0x682b6f1d6f37fe71L,0x3754800ccadcbcd8L,0x12367b4b70b949f5L, + 0xa470950748bd0160L,0xf46eb3dba321a86bL,0x9ea717597b65a082L, + 0x49607c639e80e35eL,0xcae1103f0ed3f9afL,0x0000000000000110L }, + { 0xfecfb034c97c9712L,0x46794ae3487685c8L,0xd801e8d1d123a90bL, + 0x6ee2529e5b0b89caL,0x2b3b7f4cf650661aL,0x23257437dbf31d1eL, + 0x6d2e599d2f61e4b7L,0x4853ca0956a975a9L,0x000000000000013eL } }, + /* 21 << 280 */ + { { 0x4641c185951032bdL,0xb4a82d1ed654967bL,0x4f60abfaebe0534aL, + 0xd8d59a71563969dfL,0x69a8365d897b93d0L,0x34dcbce7a3ddcad6L, + 0xb4798d7484801108L,0x1ef48f2b9509858bL,0x0000000000000186L }, + { 0x8f01ec3308d02285L,0x8f361eccfa7f5f30L,0x03d8d9aa173946f2L, + 0x2ee8b5750e55fbc9L,0xf3d5ab9b804c4aa0L,0x928acfc13fb93feeL, + 0xb60fb73a1f387d73L,0x4f0a74e1fabfa19dL,0x00000000000001aaL } }, + /* 22 << 280 */ + { { 0xdd033085ffc38940L,0xbdf00b79e0cbd312L,0xe027364751bfe81bL, + 0xc9dd4f9d4889fd94L,0x307b0e6e06260888L,0x1cc21a4c57fc3aa6L, + 0x80015db80375ec4fL,0xd8cd58e27ec6ce20L,0x000000000000017cL }, + { 0x46ba9532bf2925beL,0x948247d693eaa62aL,0x7dd8c538b71fae69L, + 0xc90496865561d353L,0xdcbb708b427cf50aL,0xa367d55f861bc80eL, + 0xf46ec4634a8f28ddL,0x3481474dcbce8110L,0x00000000000001acL } }, + /* 23 << 280 */ + { { 0x254c69ed55155f2cL,0xe14b1c5db7d2603eL,0x2dffb214ba8bd917L, + 0xcddb7eb590dff8acL,0x66e23246426e1109L,0xcaa97ba1cdf4a95bL, + 0x56522b0177be0ef6L,0xa81706f55d3f77d4L,0x0000000000000011L }, + { 0xd4b76f04639ff0cdL,0x15091830d19f7652L,0x64d3e1589351196cL, + 0x63b04349370e92f9L,0xf247c96d68ca3652L,0xfda88c1295b0a39cL, + 0x09ec4902691b35f7L,0xf1e93e7656a345a6L,0x0000000000000195L } }, + /* 24 << 280 */ + { { 0x1c1293a1f9640d38L,0xc24e8f93b6709da3L,0x5d9b87cb8cbfeeaaL, + 0xab49ee81b0fd1d5fL,0x09cba9064c63b1b5L,0x379febb06f867688L, + 0x2061b6eec99ba636L,0x966f2eaf51159b58L,0x00000000000000b0L }, + { 0x4a130277873b42abL,0xacd92f6c1c8ab9b2L,0x117dcc6ba66d3168L, + 0x70362ec85dae335fL,0x1f13ef5eea5f8570L,0x8268cf14a075c01dL, + 0x2f8431fad2b71b7fL,0xfff9c7d1382b4939L,0x0000000000000046L } }, + /* 25 << 280 */ + { { 0xed53320a5467b695L,0x95777379d81bc3c6L,0x1f53f221165800f8L, + 0x5a3a36ac90c1143eL,0xf319be6f7a9ab721L,0xb6d3e1158c4ed728L, + 0x6a9e9dccfaceb5d7L,0xa8abd38370e27532L,0x0000000000000080L }, + { 0x28f0cede226f9ad0L,0x8a96ae65e9efd27fL,0x4fe7eb7fef2564a2L, + 0xdea5e9aaedd96a1aL,0x818a8f76469bfc52L,0x0e8eab80c7f687f6L, + 0xebe81c5d4198b48fL,0xdd7e25c68d9d05f8L,0x0000000000000144L } }, + /* 26 << 280 */ + { { 0x5cfa7cbf1f54bc21L,0xd37a2401c5bf8ac1L,0xd6e4aa9fe1b4948cL, + 0xc0af78bb87876571L,0xd5f38c4a9435c756L,0xdeaf4d5d196d5a67L, + 0x847999d3665b16adL,0x55e422ab944e41cdL,0x000000000000011aL }, + { 0x1c73a7be1f893849L,0x98773cab9b84e408L,0x5e6a4e145ed1c89dL, + 0xe78d1b90716a5325L,0x1513fe4a577ed006L,0x7b8cfbaa90824caaL, + 0x663d3fa73c2a2886L,0xc9d7e9608eb5b6b7L,0x0000000000000167L } }, + /* 27 << 280 */ + { { 0x7804ab0cd5dd017cL,0x2ee33f77c259ae7cL,0xbc379b5e1987fb05L, + 0x472603f311e07d17L,0xfde157912168f043L,0x474bbd4c5d357133L, + 0x278fdbb181d0f190L,0x44d8a1465f7a199cL,0x0000000000000090L }, + { 0x7cf6a404bbd57b48L,0x19a5cb69db6852eeL,0x6f08b1da1b26de4bL, + 0x04262f2a58360fe7L,0x287dbfd73bf7c760L,0xddcf22856e702e73L, + 0xe4f8a899fe86dc89L,0xee9807e0c06c6e6fL,0x0000000000000118L } }, + /* 28 << 280 */ + { { 0x0a59179b025c0e2fL,0x43088758ddb8e4e4L,0x9ccb5d9baf9e3d51L, + 0x5b220d64f6cc6fe2L,0x43a38530f69d1a66L,0xcf4dace9fdc5b0f2L, + 0xf1bba4e1418ba2bbL,0x70e502e9d82fb27eL,0x00000000000000ccL }, + { 0xb4fb372ba990ec9fL,0xc1c31603c5fa0250L,0x79b46f6cecbe934bL, + 0xad2fc8f7b28f7d50L,0x240f4901b4d4e6d9L,0x11c72be533c59d92L, + 0x16cb9bc5c94ffb67L,0x8517fa2b2e957b97L,0x00000000000001e0L } }, + /* 29 << 280 */ + { { 0x8672f8c178c2fae1L,0xce853dc0b390e692L,0x50cf0558fc7389d6L, + 0x0eb7599014218bbbL,0x8942e82713a837c8L,0x07fb92d0213f6565L, + 0xc3e799a7db1d4bafL,0x93ef2dbe0d7238aeL,0x0000000000000087L }, + { 0x7e1c949a4d6f61edL,0x0951d2409855b4b2L,0xe99da0df0768f131L, + 0x3a9895ca40b9263bL,0xeec3111e988e877cL,0xb887eeb1392e2a50L, + 0xd26c888f3ae6a3edL,0x60db343170b42b56L,0x000000000000001cL } }, + /* 30 << 280 */ + { { 0xd6d025d839e224e4L,0xea671ba973ee2abdL,0xf71e527f0b2458ebL, + 0x97adc0a443f0bb0dL,0x39d5e7b826a9a537L,0x02ee550a7e29adfcL, + 0xd7fea1b5e6433c20L,0x6777e7f94b13721aL,0x0000000000000126L }, + { 0x4682b1468c2c454dL,0xd6e496150d5c34dbL,0xe84ad426a9213f48L, + 0x9143f004be2e5e10L,0xeb48a5a485f087d1L,0x10d141c784d2ceecL, + 0xc96c7d6673ca411dL,0x28aa426c59faea28L,0x0000000000000144L } }, + /* 31 << 280 */ + { { 0xbb86295ab95fe143L,0x7a0778a004aa86f0L,0x72402bb054f3df03L, + 0x5d2146943f81659bL,0x19aaa8e646689638L,0x8de0ef98937688a2L, + 0xda612f96d910473cL,0xb0b8a1120a41c825L,0x00000000000001b9L }, + { 0xd8d116daef118dcfL,0x4e24efd829a90bebL,0x3bc302db0e413b54L, + 0xa0e984fd1a8c9f3eL,0x7b485798206268a1L,0x52a1f4523d769ab3L, + 0x7bcb688e6853067bL,0x6b7f3335e784e934L,0x00000000000001e2L } }, + /* 32 << 280 */ + { { 0xba3a04f75f8fa47eL,0xb9c44aa1833dfb7cL,0xe4f3a4a6af4ea679L, + 0x02b4bc288240ae75L,0x390076ae282412ecL,0x74e5bd6406709592L, + 0xb87b1a57dbad0025L,0x71e0c6f732123b04L,0x000000000000016cL }, + { 0x266b71d5e4ffc2c8L,0xb061cb88a73f6fdfL,0xbca068cd75b93ff4L, + 0x4552d43b97712a14L,0x1035134c5b941ba7L,0xc14623abf31b6915L, + 0xdfdd91c4aa972a0dL,0x54e8e7e0e17b9ad3L,0x000000000000008dL } }, + /* 33 << 280 */ + { { 0x664d379c66087692L,0xe4cfe5932b4cc0e5L,0xa293eb7c765981c8L, + 0xa366a8be9ac9be69L,0x18a8fabdca5c9348L,0xa3d94fcda2a2c5e9L, + 0xd6d42b142c19d210L,0x90c8d32950ebedf6L,0x0000000000000077L }, + { 0x4b232a133ea6cce2L,0x9709a4f5efe51889L,0x6641e274e233ec8fL, + 0x68744e54abf46598L,0x8c12e8da847eb365L,0x31c4a313be0dc8beL, + 0x88503ca97bce4bd9L,0x98356a4724bf7fdaL,0x0000000000000058L } }, + /* 34 << 280 */ + { { 0xdd5dfc11a0dad11eL,0x6af8128d95bbbc01L,0x2a09533d2ba204d8L, + 0xf656433c14308665L,0xfb9b0752873ddf2aL,0x462baf84bff789b4L, + 0x4119391c36f8d363L,0x24a0d61bc83d0643L,0x0000000000000128L }, + { 0xa811558a81d32679L,0x73ebefc16d51d480L,0x52e334d76723ca36L, + 0xe102931c3bc64e40L,0xe0fc619a226f57ebL,0xc6533d88d5a49296L, + 0x8a96610b856dc0b3L,0x7fc05cda95a3bcb8L,0x0000000000000128L } }, + /* 35 << 280 */ + { { 0xbdbe8a3a30b14db2L,0x7e4bc20c9a8247cbL,0xa620bdf811e536d0L, + 0xe972971a67a49b87L,0xa10d8379f31c11efL,0x8908fe100b816ab1L, + 0xe44858845c7db701L,0xacb4e91129807371L,0x0000000000000028L }, + { 0xdb757df6497c4dc0L,0xad84afa0fd068916L,0xb98bb9c9c0ed25c9L, + 0x9af8511a42c7c5feL,0x853e412f8549f45aL,0xe5fcb0b400cdd781L, + 0xdda7fef0653d6e1bL,0x3340956d20aa3f18L,0x000000000000018bL } }, + /* 36 << 280 */ + { { 0x3751cc30569b8171L,0x0a5f50721195ceb6L,0x67a550c5aed5f6f7L, + 0x9fa0ca1a467c1996L,0x342155f04c5cc05bL,0xa8fd7a3b1265b643L, + 0xa105382b94eba9ddL,0xd2dc6762655a3ed5L,0x00000000000000c7L }, + { 0x2913e45239c2dcc4L,0x81fa35062758d09cL,0x7df16b9411840bb6L, + 0x0d96f28f04eeade0L,0xf5ab6c8c883daf5aL,0xa53d625e24e9795aL, + 0x14158b332a711c2eL,0xd5ce7e2a97ad3f03L,0x000000000000010cL } }, + /* 37 << 280 */ + { { 0x9871dafee3cd99cbL,0xca11ffb2f9d1422bL,0x0cbec630ffe7633cL, + 0x095207519701f5fbL,0xe7cdf5f0b714a25bL,0x54b44266e17aa21aL, + 0xc53a84760ef13940L,0x9e8c18f56acbd9dfL,0x0000000000000057L }, + { 0xabd27e097605396aL,0x731f83a012ea5abaL,0xb6a23513f28d07f9L, + 0xbba530fa2ea3b230L,0x38f79ffe89abfb21L,0x2767e58ab7dce472L, + 0x7eb867f9c16bd8b5L,0xeab0414eb76ce0c7L,0x000000000000009bL } }, + /* 38 << 280 */ + { { 0xe100e0eec1c91709L,0xa73bbf8c56238cdfL,0xe8b191e09196fb53L, + 0x0c54c7820203b3d5L,0x914cd8154ad45f0cL,0xf488ed49b504e7ebL, + 0x91983a3b525a0122L,0x7a9db32607a6bc76L,0x00000000000000fdL }, + { 0x9e5874870b0b0ef1L,0x0c5df87d4deca852L,0xa671e125fcfb2b5bL, + 0x4c0dc9d789fb90a9L,0x8bc6b9a1b47b79f0L,0x5ed3c685bcc0d53bL, + 0x107f5ef1ec8aaab6L,0x5f93971f98ffcd41L,0x00000000000001e2L } }, + /* 39 << 280 */ + { { 0x81d5acf81e69cfcfL,0x5786ae3471bda15cL,0xc3e9c790d4ba8896L, + 0x78415f43b320fab2L,0x5407bb1189d23809L,0x6b5d5e41d3007de8L, + 0x1f686d5c39797363L,0x0be555c28d9004c2L,0x00000000000001ebL }, + { 0x8eb5c5bf91addc9dL,0xe8b81cffe200f350L,0xca3b953d44b42146L, + 0x6ba06ea9ecc482e4L,0xa6a70a446e4d1a38L,0x3210ff44eff587e8L, + 0x9ad9ba8e1d8a7cf6L,0x813c4d94c77bf97eL,0x00000000000001f3L } }, + /* 40 << 280 */ + { { 0xe31effa2306c1354L,0xb5e2c13d85925368L,0xb1f2607f35ef339dL, + 0xb06e916138e6170eL,0x4e6644b8f3f3e31bL,0xda16e937e3e59db4L, + 0xc66e7dbf6accfe20L,0x31622dae364dc59aL,0x0000000000000187L }, + { 0xc9d619cbc2b0bd83L,0xfe780349cf4a8cacL,0x8109dbb48b3d6584L, + 0x55e1a93c943709d9L,0x651c52d321ed38d8L,0x87ca86938ef82c60L, + 0x6ba45f8f4280d960L,0xe903e26b7e505800L,0x00000000000000a4L } }, + /* 41 << 280 */ + { { 0xbeaefb738d027004L,0xb93ae273b643fdf8L,0x171130c78b054ce8L, + 0x03e50d97fa4348b9L,0x1e136c40bcd5a01cL,0x4c3c0b3320e072eaL, + 0x5b07b9f5fb37369aL,0xc306209f6aae7067L,0x0000000000000096L }, + { 0x28f228fdd8133dd9L,0x5e97b819ed9f4989L,0x0511850fa8e2e90eL, + 0xfba9dac8a577a370L,0xd8f8a690e9748a8aL,0xc4061391c30a3f0bL, + 0x22a676e042f585a6L,0x252bd6609c5f7211L,0x00000000000001b0L } }, + /* 42 << 280 */ + { { 0xd9e89ab1b2261b5aL,0x896f863c0bccf5d5L,0x09ba590aa5b7eb39L, + 0xc8f52dfb0fa4086dL,0x3f236dfd1075176bL,0x0e0cc891296f969aL, + 0xdfc18d50a2313921L,0x8494f9a73ef48a34L,0x0000000000000084L }, + { 0xd57a2f227f5e334cL,0x69d7c5b7ded57c18L,0xcdb18f19b1ccf76cL, + 0xeaec2b7c33021735L,0x41b62e1affce6cf5L,0x12282e773aad7e4cL, + 0xf9c2216e9db6619cL,0x5f1678eaf32e4661L,0x0000000000000079L } }, + /* 43 << 280 */ + { { 0x3806aa79ed2215aaL,0x77b2b474fda96f56L,0x4957726aacc33f60L, + 0x3aeede639dcb6de3L,0x7c5a4b2dd1f46307L,0x03638122d27dc35dL, + 0x5c8c687cad802f35L,0xfb84505dcb2b917fL,0x0000000000000089L }, + { 0x86ad4d10d3241367L,0xa5bbbe2804283e3fL,0xae8a8a156cf1b159L, + 0x8b30bb7f97d37100L,0x302e14e45dc590ceL,0xe3aa0b5dcdd1751aL, + 0xb2d7de843c7c9c0bL,0x9693fc8121ea3e9bL,0x00000000000000baL } }, + /* 44 << 280 */ + { { 0x30e7e4bda557a890L,0xb6a8ecf96b5cdb6fL,0xbec5ed5023b68ea9L, + 0xc359d11f3fe99380L,0x84e9e7d83bd97138L,0xa58c8a9a3a5815eeL, + 0x759839a47c240941L,0xb652633099670993L,0x0000000000000144L }, + { 0xe3f92bef6bfae59dL,0x6eb6a3ef34c6f089L,0xb35ddc7862aa2a94L, + 0x2495301b6c27da9eL,0xea719a48d8d23941L,0xf0be43ad79c61b1bL, + 0xbafb76e1a27c859aL,0x9d2f814c97f29bb0L,0x00000000000001f7L } }, + /* 45 << 280 */ + { { 0xe3d9794863bfeecbL,0x787b7b93c4e33b5dL,0xd193545380e717c7L, + 0x80f09edff51d7ee8L,0x5ece8dec2cb8e492L,0x1c2e49aeb0fbdd75L, + 0x555ffd0c62ca40e8L,0xb087f09cc34239e1L,0x000000000000011fL }, + { 0x058e0296c7e17929L,0xc37bb9507cabdf29L,0x3f760103337ba5a9L, + 0xb04c49872eebc4d3L,0x06ed604d12273246L,0xb2b6a2ac48a00bdfL, + 0x7b9eee4ef1e6f9caL,0x2bd8117abfcb3b3dL,0x000000000000003cL } }, + /* 46 << 280 */ + { { 0x9f03aa3fbadc0444L,0xd4646ae421350843L,0x9d669c31ae89c5cfL, + 0x28f2dcfa88476683L,0x1a27c9a49927c5d4L,0x05f0d014342307c4L, + 0x74a305b664bcee6dL,0xf2d3fa9c6b8102a1L,0x000000000000004cL }, + { 0xb2801ee8e8313032L,0x484d7fc270c2bf31L,0xb1e823146bcb4da5L, + 0x01c6855cf5dbd4bdL,0x59bc8152d21f4295L,0x452be6974095767fL, + 0xd1557ede45d79429L,0xec32dcc2afb02a09L,0x0000000000000173L } }, + /* 47 << 280 */ + { { 0x196940e45ba5d05dL,0x6576d0f4bbc541ecL,0xe5e984bd3a91a1daL, + 0xbc63c9eaf3498971L,0x89135ed31f03c14dL,0x44fe60e13379f59eL, + 0x28a5a0b752e302bfL,0x14d350309b2c7e38L,0x0000000000000073L }, + { 0x1c34012525832327L,0x88c1ebb85e862352L,0x5e82c2f646d29a75L, + 0x98aa3bf4b5f97690L,0x17a8b7d4fbcc02caL,0x8190772afcc8f81bL, + 0xee79fec1e5a4dc65L,0x62c89b093fe96241L,0x0000000000000166L } }, + /* 48 << 280 */ + { { 0x521cb4e322b7448bL,0xb51bca884c726beaL,0x9391bdea39c312eeL, + 0xf9128ef72c8b9a0aL,0x988f9f78a8e51d04L,0xdd44418f96c8257eL, + 0x3688fb242d7f2f00L,0x2f92e5ff1b1fdd4cL,0x00000000000000e3L }, + { 0xba865459e98c49f5L,0x3545cf759dfe9dd3L,0xeb3ef7ffce823a85L, + 0x39bc813a9b39a2a8L,0xb753463592a11c12L,0xb16a5bd61ee6d123L, + 0x3192e6dd82fef180L,0xebfb81b4287687c4L,0x000000000000008fL } }, + /* 49 << 280 */ + { { 0x41889a308e521628L,0x353bbe7cd54a23f0L,0x6e30dc47b96e2ee6L, + 0x957c31e030bf9a04L,0xe87fac6e880b0a80L,0xe7f5c5c603c6d936L, + 0x2b605a09abd24985L,0x24da8804c0227c2bL,0x000000000000009bL }, + { 0xbe44195151deea0cL,0x1d27a5349a94a2b3L,0xe89bd706906fd00bL, + 0xbe0e3af4b48a3888L,0x462540f9ba0f807fL,0x5698134e4480f7e0L, + 0xefbf0694f9470f38L,0x622aa439f2147a77L,0x00000000000000eeL } }, + /* 50 << 280 */ + { { 0x3d0eff5a9e14a157L,0x17b32d7a1e610aebL,0xd7d7e6da192e4c2cL, + 0x9fe47524bafe7a78L,0xffde6e808f0f59d8L,0xfa5b05067f07595fL, + 0x727527ab0c4ca54aL,0x3125b812f2ff633cL,0x00000000000001c5L }, + { 0x9949eb375ebf3165L,0x6b3b1ca23d969575L,0xbdc906e777691f06L, + 0x5c7fb79048e77747L,0xe14e6e1461b6e449L,0x168745a6ae1ca8c2L, + 0xb1a8c53a10d7485eL,0xafba1519b3f97687L,0x0000000000000012L } }, + /* 51 << 280 */ + { { 0x1954666c0c70fb24L,0x82266d143f951a55L,0x05d351fb7571c7d1L, + 0x18474822a33960cbL,0x9e21b386aff4d604L,0xdc1523b7c7722030L, + 0x97d8d47c3b40f662L,0xe46c6e58c518f8e3L,0x0000000000000197L }, + { 0x33776420fdf51fe5L,0x13c170deea2d40baL,0x9f058330686e0c42L, + 0x187d65578e9b7d13L,0x60731c312fd50daeL,0xc4a33b8f616a056aL, + 0xde3318374d1e0552L,0x883f0b1c76622fddL,0x0000000000000004L } }, + /* 52 << 280 */ + { { 0x702fec554340d2f2L,0xbfddf7ab5a6c6a38L,0x52d5a7ef0a77c874L, + 0xb31d293155a58f3bL,0x3088d8036274e0d2L,0xb939133876fc6ab3L, + 0xb144eb7c8e936c61L,0x9c6f8e0c31f651c4L,0x00000000000000eeL }, + { 0xc4f7ef971969c94bL,0xd0ae9ee1b1ee6473L,0x9a2310ae3c6ef7b5L, + 0x201667dcd9af7a6dL,0x177ef629e661696bL,0xa295439c27dce2c8L, + 0x917e7ad90654b39dL,0xf4f4160e253e2455L,0x000000000000016fL } }, + /* 53 << 280 */ + { { 0xa5f4d18206539fa7L,0x00e9ca62193bd001L,0x4f82a43b34a98a26L, + 0xf191a2c7acba36e6L,0x24af9551e410e4f3L,0xc497e07f6d3535ddL, + 0x29c932541c60ad3bL,0xc979ef0d599a657aL,0x0000000000000155L }, + { 0x0481964ddc0c026aL,0x02af224ad01e5c0cL,0x82bf913f1c2ef89cL, + 0x86f6d32c471bf1c0L,0xca2ce84ad8022d80L,0x4f145b6799803487L, + 0xebd0ffccda0964d7L,0x19f8d6f7bcef48f7L,0x00000000000000bbL } }, + /* 54 << 280 */ + { { 0x509b71b9ac36d28aL,0x15d32c770811d528L,0xb793235561dc6575L, + 0x05233f01fabb650dL,0x3a16d034167495fdL,0xd567e0bf8c1efb75L, + 0x253895e41fec8e93L,0x6b69015c5648158fL,0x000000000000000eL }, + { 0x879a7df915627f86L,0xab3caf87f08f9464L,0xaebb91e351cc0b72L, + 0xf5f3a2ce80718925L,0xb5740882c3e6f70bL,0xed18d27690f16d5dL, + 0x318733e770b09c3cL,0xc1e1c737e4c59a44L,0x0000000000000095L } }, + /* 55 << 280 */ + { { 0x88537bbe883279acL,0xd66ce961da9ea3d7L,0x5a009e7f6f24804cL, + 0x1aa5de7b572a81f5L,0x0a2d89e029910a84L,0xff6d0adba6a3b709L, + 0xf709ae9d176ecf3bL,0xb55a2f97857e7161L,0x000000000000013eL }, + { 0xd6ef2ad66d5ac444L,0x0cf086d7c738f5bdL,0x36484c34061945a8L, + 0xad1acdb879dd65c5L,0xd58ac17af96e2538L,0xb1bc5d14ee8ca9ecL, + 0x51ee010ef4765340L,0x2285cc3b31335245L,0x0000000000000138L } }, + /* 56 << 280 */ + { { 0x939fe75a6fc48236L,0x23d3887d911d8cbfL,0x258fea93b7d3a0e2L, + 0x9b073bf37f01bba5L,0x7324e1698e6cb543L,0x44843d4aaf26dedfL, + 0xa04a7c60ede00d61L,0x10218bd6cc5a03bdL,0x00000000000000a2L }, + { 0x33c5563a2b93360bL,0xd50f6be0d4e9d47fL,0x78e7dfa1c7ad7ac7L, + 0xff7c2c5a92c64ebeL,0x932531e7bb22d05aL,0x1c8f30604bc070ddL, + 0xbd9787cb8d8150d1L,0x8ce2223b0cee42b4L,0x00000000000001f6L } }, + /* 57 << 280 */ + { { 0x6731c027e046c0a4L,0xf718dcede5a06138L,0x78ee5eb548b6ba4aL, + 0xc2d5dfcadd81e407L,0x35da8dfe47dcfcacL,0xdc81e42a8eb7a98dL, + 0x35db538102f60af3L,0x137c92f71af4e84cL,0x00000000000001a2L }, + { 0x5fff48bc7fae3e38L,0xed6bb7ba7bfb9009L,0x3ea9829b94d9f8d9L, + 0x1b02d966e57ac9f7L,0x2a2e7b3b9e17a7b5L,0xfd677d10f8b5971cL, + 0xf02c62c4feac2153L,0xb2853d963d277f34L,0x00000000000001a0L } }, + /* 58 << 280 */ + { { 0x34cda6b2e8bcbdccL,0x1ca83d8ac58e01bcL,0x9c38e61d6a6033dcL, + 0xd3df77d1696792c9L,0xdf573ec644eb1ecdL,0xa8a065432d74f38aL, + 0x3cb79ad7491230eeL,0x828962151a271b8eL,0x0000000000000182L }, + { 0xb889ef07d85525c3L,0x39a37a8da74b5e4bL,0x6dd97fe1ba9f0edfL, + 0xfc32a4c55df3ce92L,0xfa0958f820517719L,0xd2570ed03c80801bL, + 0x7b0d90f5df333517L,0x5448e3b21cbee93dL,0x0000000000000005L } }, + /* 59 << 280 */ + { { 0x9dafe225907309ceL,0xb5c7f7796ab4285bL,0x025fe316475406c6L, + 0x6427b6a73703b8aeL,0x077b26865c34c5ecL,0x16bcd88fb1889dd6L, + 0x963169a8eab6ae3aL,0x2a42b76c7535cd94L,0x00000000000001d2L }, + { 0x7a7d5f3e60ea3972L,0x03495625a755ec1dL,0x441847032b183692L, + 0x8303d22fadd33ccdL,0x6932dab9215523e8L,0x26f696dea7164118L, + 0x682a8f9abb3aa620L,0x86aa638d15247803L,0x0000000000000078L } }, + /* 60 << 280 */ + { { 0x51849a5b53fbf94cL,0x2436d1f33647dfb1L,0xbfd7dfe1ac158d60L, + 0x4157581eaba95982L,0x574c1d648b89798cL,0x6d659219754f1eb0L, + 0x0654acce001d0c68L,0x8f1cafc9c86fb259L,0x00000000000001aaL }, + { 0x96cf49426662061dL,0x6928d7c4565613e2L,0xfa3e5e7011afa907L, + 0x2bedac91cd3c3062L,0x7b05951c87adb45bL,0xa2b68ef665e3fffeL, + 0x682b65bedb7b8c54L,0xf8ef1353338a27a4L,0x0000000000000153L } }, + /* 61 << 280 */ + { { 0x3eec74aaf0d73d13L,0xb1200378a199d41eL,0x0501130712fce9ecL, + 0x59e08d4685af38d6L,0x1c3028b097eb7439L,0x4389b2a7c8c2d7f4L, + 0x41155e4c03df89a8L,0x06a993c37e415914L,0x00000000000001e3L }, + { 0x1a57145402fb35d5L,0x818c71f0107f51ecL,0x009f840d42da6adaL, + 0x67e4374f54a5468dL,0xf2250a74b245d86aL,0x09bc3e44bca120b1L, + 0x6fdb2a6b308416a3L,0x0bd60e1c6a9146c0L,0x000000000000010fL } }, + /* 62 << 280 */ + { { 0x19ba4fdb0b35a9e4L,0xfc93eff181057e1bL,0x852ae0abf5380848L, + 0x910c30d66877b719L,0x3bf62a9503916cb0L,0xd56a881ea3c2d070L, + 0x256c3542a356f757L,0xcef8a8d929d75c65L,0x000000000000007eL }, + { 0xca44a43576ea795bL,0x611ecedea8f27192L,0xfcb3a868de9182bbL, + 0xac04e9d7dfa3eca5L,0x03f47adae89f80a7L,0xde7b801854858348L, + 0xbf27c5a14a0e3befL,0xcd91a04d3461a145L,0x0000000000000117L } }, + /* 63 << 280 */ + { { 0x76dec50fd577c07cL,0x5a1d94bf5a8d2c7aL,0x871183453616e745L, + 0x8b4e25d136e7d7daL,0x0464ac2c0033ea1bL,0x62f6569a2c5caacaL, + 0x1989e0f6d2649cc3L,0xcd5990464b02997cL,0x000000000000006dL }, + { 0x8df4238f7a8f232dL,0x04a5230661f90575L,0x179274765fdb83b7L, + 0xddba74da6f09f3f4L,0xf6cfd200793d48bfL,0xe3ee41a1c980f85dL, + 0xab48c17ae75f5667L,0xb09a7c62d4d971e8L,0x0000000000000009L } }, + /* 64 << 280 */ + { { 0xa99e0fa88b404760L,0x5f6b7eaa28835bbaL,0xebe75466811f0bccL, + 0x3d69da3e9d0fffa4L,0xd815e361185f1ae0L,0x78623276b30ff597L, + 0x3518246ea8466de1L,0x3c8e0d4a516b0e43L,0x000000000000003dL }, + { 0xf4bc4434d1614abaL,0x49d062b7bfedf3abL,0x878c93290fc7e5b4L, + 0x727ea9198a92c516L,0xd159dd269ac43994L,0xcc04bc19c25a36a5L, + 0xcc7ffb96172789e1L,0xc136b9c92f2ee22cL,0x000000000000014bL } }, + /* 0 << 287 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 287 */ + { { 0x758be5677ed6ead3L,0x5aab37984ea30f41L,0x8a66701d724bded8L, + 0x3e1ad9a070544b4aL,0xd0286777ee940423L,0x3335377b329a995eL, + 0x244d471a4eee58a7L,0x32d4dfa067275e49L,0x0000000000000105L }, + { 0x7ef7dd10f91eeef2L,0x51c22e8bba0c7d57L,0x45c515d485233341L, + 0xd9d7cefcd353231aL,0xbef9d961da5bd561L,0xa5110c6f5531a41dL, + 0x0f1f7a194328f21cL,0x1ad6d24c49ec18e6L,0x00000000000001a9L } }, + /* 2 << 287 */ + { { 0xbf2ffcb76e640a8dL,0x63479501a0b5a07eL,0x55bb30015f36f2e7L, + 0x9890f999058a676aL,0xd9ee341b455c0d27L,0x6c95780c18e08fc8L, + 0xeb63a675442a075dL,0xce46a1a5fb69da00L,0x000000000000006bL }, + { 0x8c618934f3ed6253L,0xe00342446e9433b4L,0x989b99214260b2c5L, + 0x11699804d9df4747L,0x125fe61f763a7f63L,0xa972786dc6ec395aL, + 0x662f9fe7b7b7b8c6L,0xfb644a615456556eL,0x000000000000008aL } }, + /* 3 << 287 */ + { { 0x2014334668cd4338L,0x46b6c25dddec02b3L,0x7ed7b8e00f3bdda3L, + 0x233b739abd4bfa08L,0x5d637074764e5f80L,0x16dca3c8bf2e9eb6L, + 0x833e08beee088b75L,0x2e6d8782d0ef69a0L,0x0000000000000067L }, + { 0xdd6bac439054fc97L,0x20f0fe3f00b0046aL,0x1f53f565aa8b318aL, + 0xf74826286eba16cfL,0x899f86ddb055944fL,0xdecc577c4b26279cL, + 0xefd021ba3dd92904L,0xa8be3c70f9aa1d87L,0x0000000000000029L } }, + /* 4 << 287 */ + { { 0xed29c2ffb0bb44d8L,0x0874c2a5fa00ddabL,0x9398e8a2065a4a5eL, + 0xa6e1952691aab917L,0x20a816ac667898deL,0xda4b97934b94a66cL, + 0x6fee1473b00e36e2L,0xcd9958d793a46ce6L,0x000000000000000fL }, + { 0xeab9fa64a362872fL,0x87cc55b095457fb6L,0xe3e3f13d3c702019L, + 0xe025b3c8ff6208c0L,0x8fcd5b2fd8b8f87cL,0xcf9ded31789bc9edL, + 0x7474244818d40f49L,0xbf14e1f046c33140L,0x000000000000016dL } }, + /* 5 << 287 */ + { { 0x509b4ebd2a3f7a56L,0x284be6724315de88L,0xd9bc95a7708c6237L, + 0xb614a4d485dfe317L,0xe35de3d30c1c5737L,0x9e0b17191bd97337L, + 0xc3b084f841c2b929L,0xce03487b0953b7ecL,0x000000000000019bL }, + { 0x35d5de3da22978fbL,0x7207c915b2212285L,0x109219a2328771e5L, + 0xb262f06c8716c153L,0xc47aadea45f2bdfdL,0x7032cbeb4a1497f4L, + 0x1603a5f69eabdf95L,0x56c6eb2bf53bb028L,0x0000000000000033L } }, + /* 6 << 287 */ + { { 0x0aa08334a3a5a2a2L,0x7b1e2f91dcfc5939L,0xe1c64158673e221aL, + 0xa9ce2061cb3319d0L,0xb359c34b977a9b41L,0xc127af82b7f9fc83L, + 0x18ada73481166c12L,0x1c4c48fb092d64d4L,0x00000000000000eeL }, + { 0xfed1a8da4c5015c4L,0xefd54478526e1897L,0x0ac4f64075bbdc91L, + 0xc0774cce910661ecL,0x699bb035c30ccc07L,0x836b7b1780952ccfL, + 0x03b8bfef9788bdc1L,0xb24604c5fd47a69dL,0x00000000000000d6L } }, + /* 7 << 287 */ + { { 0x8f58d77c3904520aL,0xeaf3076af6db1f47L,0x249c1fdb6a139301L, + 0x26211ebaa843505dL,0x4d3b5e4366b80789L,0x9adf20c48887bf76L, + 0x411b47eb328f97c2L,0x3d00025636d82c8aL,0x0000000000000022L }, + { 0xf0826b0c86b4639cL,0xcc429385bf2141c6L,0x72d72d948f036210L, + 0x4aa9b7afc64f7650L,0xdfeb64b0b237cb8bL,0x7f901ff757a7461fL, + 0x273abe41dd134f03L,0x026d1740005c1f46L,0x0000000000000033L } }, + /* 8 << 287 */ + { { 0x35e7af927e198aecL,0x2937b845ad58220eL,0x9c8c65cc625b0972L, + 0x91991d8a1a9911c8L,0xc9f134d90a4ef588L,0x1cbe5c250e01d57fL, + 0x4c31016e85251d47L,0xc587777652dc527cL,0x000000000000018eL }, + { 0xa9232336d429c5f4L,0x60454705bfabb412L,0xf5f84846794cc5c2L, + 0x1bd49ea0bfc8532dL,0xd4b45b2c3042cd43L,0x49590033374ab02bL, + 0x78770266ee464c45L,0x0a33044eda262362L,0x0000000000000144L } }, + /* 9 << 287 */ + { { 0xcc4bae5e0e57b63fL,0x3d11ca0f42641e4eL,0x3bbfa96aefeece66L, + 0x4857057f6abcde9cL,0xe796df9dc90f2d13L,0x28e49c241a945e44L, + 0xb57f90935abaacacL,0x49a910222d46547cL,0x0000000000000058L }, + { 0x868bc6345a6f0bbfL,0x466dce120f7ccec2L,0x23df20ee964331dfL, + 0x281867e6b92cecd6L,0xd198e769f76f2ce7L,0x2461e009b02bb0f6L, + 0x1a2009db2911f73cL,0x112c2f8e0897b7efL,0x00000000000000e8L } }, + /* 10 << 287 */ + { { 0xc17c5414720aebb3L,0xeaa1a7b97089e6bfL,0x9ee50ba136de9f22L, + 0x4beb00685b3bf8ddL,0x330e8c045b3f6992L,0xf477636d6e7a978cL, + 0x5799678a9543861aL,0xceb7d074810077e6L,0x0000000000000019L }, + { 0x0d20db6dcafef71cL,0xb1c02cba35fa4e9fL,0xeb6628684630ee0eL, + 0xda4bbe8c88265202L,0x71b53dc3e1aa127cL,0xf4e2e52f0abcaa62L, + 0xe4c4cc96f8f6c926L,0xf9cf5ba414aaf997L,0x00000000000001e0L } }, + /* 11 << 287 */ + { { 0x34d27e320a34e7b4L,0x7fa34a4218845536L,0x54f38c415010e48fL, + 0x1d19749a05382614L,0xcd7a8eea88443b45L,0x12534e16ab25cc4eL, + 0x609939ae83f50958L,0xcd6795401a366d0aL,0x0000000000000027L }, + { 0x877cd4c17f225eb8L,0xb0125faa2fd2154bL,0x4ee9df665f1aa132L, + 0xb14bc327752041d4L,0x606da47f0db62194L,0xf4f0a39330e3f6b3L, + 0x7f0d0e0743435381L,0x4078be8ab43b6346L,0x000000000000013fL } }, + /* 12 << 287 */ + { { 0xf946ab5188098f73L,0xe98c6e73ffd8bc23L,0x30d30a3c98758f69L, + 0x302d5f8fbc8e359cL,0xadaa2ff6fcd2f1fcL,0x694de81c4adec57aL, + 0x69735a5a97698a60L,0x15bbae8c2d55531dL,0x0000000000000164L }, + { 0x2398abf08c6b2d9bL,0x258bdc0e9ec5ee96L,0x80aca75ca9bf887eL, + 0x3e3adc8cd1495ad2L,0x1a30f94862d2f571L,0xebdbf063989dda10L, + 0x86bddede74d3efaeL,0xa42598aac749c868L,0x00000000000001f4L } }, + /* 13 << 287 */ + { { 0xb8f4898284e24afbL,0x5df2eabb25b308adL,0xce0372215016261dL, + 0xcf815e15125443d4L,0x0e41691c69ba86afL,0xb766cbd5d8f262a8L, + 0x4ed7451e7fb6ecf2L,0x5df5ae6ef7f4659eL,0x000000000000005eL }, + { 0x93994f1bcb7e522dL,0xc69bb9db8eb5bd8cL,0xc5a23729da4ece9fL, + 0x33f2b7fbf2b293e1L,0xef4bcb035cb85fd4L,0x6c325c5ad8976753L, + 0x5d3acef4ea95ccb4L,0x7ec98c6cc5de15c3L,0x0000000000000053L } }, + /* 14 << 287 */ + { { 0x6af0b935c936a7e5L,0xde94d724d4a248abL,0x0491c733594e53b7L, + 0x238ef5341ccd7088L,0xaf0e791e4f5052c3L,0x70cbea8110c3411dL, + 0x6b7c8574577d38bcL,0x00e147ae4ea6a924L,0x0000000000000085L }, + { 0xcab57e2a497b793dL,0x3f642966edf47059L,0xa276326d2eb66b98L, + 0xeba64255a76e1221L,0x698fd276d23677b9L,0xcbb4637445a0625dL, + 0x5d92dddc50b672ebL,0xbef8ccd00839333cL,0x0000000000000091L } }, + /* 15 << 287 */ + { { 0x262fcdfa594f0099L,0xafc1ab5cbe87ba17L,0xfcb0b4ee7101073dL, + 0x015c42afebd8503dL,0xee71a1a41b5a7741L,0xd77265524bdea91fL, + 0x1bfe39a18879ccceL,0xb0ba1bfd516f30feL,0x00000000000001f8L }, + { 0xcc05e22168028efcL,0x248242e61ad5d85cL,0xfa93caa2de771fc2L, + 0x76d50a1a7d5ef758L,0x028a0b8d7b147dbcL,0x00f8e82a37b904dfL, + 0xe105ca38ea612d6aL,0xc985187504292b07L,0x000000000000007fL } }, + /* 16 << 287 */ + { { 0x225420c396139042L,0x90a38d5c5bcf13cbL,0xb99a43ae99d999b0L, + 0x5bbbe4d450a20372L,0xaa92420245a7eda6L,0x49543e058bb2a5b6L, + 0xbd11556497d32f2dL,0xc57d1af95d680f19L,0x000000000000016fL }, + { 0x93af05115ff37259L,0x89c88bef9b9cc398L,0x85526ff5a91987b5L, + 0x8b7bb52adfbf9615L,0x25767d030839c531L,0x6b15aa2e1cd9033eL, + 0x47490261dfae7d95L,0x4ee02157e7e1e8edL,0x000000000000015dL } }, + /* 17 << 287 */ + { { 0x61b3233b6fc2ac46L,0xf300ca4f577be5eeL,0x20751b6858ff827dL, + 0x84f4dcd94fe2d451L,0xf566f8aebf7191ebL,0x4e0bb7c0ab75d2e1L, + 0x3d4267d7246dbf2eL,0xce9ecb9b3888199fL,0x000000000000004eL }, + { 0xd53b33dacd5cb1e5L,0x7c62f3c8dde86c33L,0x133ba1c89594264fL, + 0x41e6d49f69a3d324L,0x3256265cae79db2cL,0xbf68642bd17a25eaL, + 0xc3828f62f72d6723L,0x4e859cfbd6e41d3dL,0x0000000000000050L } }, + /* 18 << 287 */ + { { 0xfce66781065c3b1cL,0x2f28b538b5195f93L,0x9294fa3ed5f99ba0L, + 0x32ef709c66c624bdL,0x4c555df34f610d8cL,0x90ee123f438e3d99L, + 0x92d19b98919841e9L,0x2e228d8aeae83102L,0x00000000000001e9L }, + { 0xb4566d09009cf7c8L,0x60b0ca8338105c3aL,0x9a4fa08aa8da65beL, + 0x3208ad03b5fde8eeL,0x8462daa4737b4ec8L,0xbc182470a5c1b058L, + 0xb1bf27ce7f0b478aL,0xdede341e67045b89L,0x0000000000000156L } }, + /* 19 << 287 */ + { { 0x1e44dd062149658bL,0xb1e065c68c59cd31L,0x8c96ff88ad2dd1ffL, + 0xa18fb42b4f3c3753L,0xdd93c68fc4f74914L,0xf429553b8a075411L, + 0x12086bb9bee2c2ccL,0xc0662dd733a37788L,0x000000000000002fL }, + { 0x7830ac9733d6e53fL,0xd680aa17f434e81aL,0x2e2c636cbe139240L, + 0x72e62040f21e8aa1L,0xcb4ac7a150aba0b2L,0x73f9f305362c3428L, + 0x8582ca9c2d5d90edL,0x272c8dd4ca5ee047L,0x000000000000005dL } }, + /* 20 << 287 */ + { { 0x8cc3adc33883bee0L,0x306b08f63f15b848L,0xbf4896fc2340f68dL, + 0x19b1de94b541ad34L,0xf343c4602c320a09L,0x1e5fa35ff581b0d7L, + 0x34d600c7b3db161aL,0x54cd8526e9f8b6d9L,0x000000000000015fL }, + { 0x4aa6ccd6ceb32adaL,0x2c2c5e7e7497bc17L,0x531ec78678791b13L, + 0xa7f862c881596a68L,0xeb04236972e7ad06L,0x306db5ce7edad2f1L, + 0x5e7a162ea1cdbb31L,0x06f0b4f3565f914eL,0x000000000000006aL } }, + /* 21 << 287 */ + { { 0x7d799dcfcf519949L,0xf36bf428d2734641L,0x775036b5b77431d3L, + 0xa5a5c727b6b45979L,0x5aa5863d1c9b3278L,0x9330e8bbbc1f335dL, + 0xfbf7cb34780d9905L,0x2287f0a50c894491L,0x00000000000000a2L }, + { 0x1f922f765522c3eaL,0x52a337049e9a1399L,0xf0a0d965ba537263L, + 0xb18778ccb87027faL,0xf15ece21a7bc1a07L,0x34ff0a5a0ebbdb94L, + 0xf7875a19a18106b2L,0x6e17627f9432c0c3L,0x0000000000000040L } }, + /* 22 << 287 */ + { { 0x5f16528ba968accdL,0xdcfd5dc1261063cfL,0x99c6a790ad45c838L, + 0x4b83b5dc811ff9d4L,0xc3c067f23c3deab8L,0x447ecca83496c2cdL, + 0x212e9948ad2663c3L,0x080d13f5354fdc23L,0x0000000000000074L }, + { 0x4e4a2ffaf202bae2L,0x12ee692f0b704ea5L,0xdf48fe7d66a3e286L, + 0x13228b5095bdf85fL,0x65ab3ff9422dbe99L,0xfd795432b376e1eeL, + 0x7dca34ac0de9e917L,0xf907ad9a8614521dL,0x0000000000000159L } }, + /* 23 << 287 */ + { { 0x6a62326cf2bef374L,0x0148ab35d0229744L,0xfec14cf2e5c28016L, + 0x7f7a20b8f2913691L,0xe68c19f0a7a66511L,0x64181257c831bc8bL, + 0x79373addb8bf5318L,0x3be916c44ab48ca2L,0x0000000000000197L }, + { 0xab6dc2ae0e694594L,0x3e6d6dee6f24abaaL,0xe8f7befa436d78e7L, + 0x40917244dce86463L,0x04837770fb488d63L,0x1645ed1b53cb01f8L, + 0x492f484212320a08L,0xeea413edf21580d9L,0x0000000000000033L } }, + /* 24 << 287 */ + { { 0x1bcb1d73c96d566cL,0x0b51a27a1b6f4d02L,0x773be48b463d8fc1L, + 0x267e606a6f19334fL,0x67a49a30db4a4bb1L,0xe33033bb6f476620L, + 0xa2e4076b92025915L,0xb1845ee448f182afL,0x0000000000000070L }, + { 0xdaf44e82d2f77d9cL,0x753f7f717caff6b8L,0x7fda258ac5d9943fL, + 0x2f47f8379b012134L,0x9cbbdae94d2a9bebL,0xdb39f169221f799cL, + 0xee68daf32048a54bL,0x36939e9d7f5aef38L,0x00000000000000f3L } }, + /* 25 << 287 */ + { { 0x21d565da891c0901L,0xe7258e5a02e8d871L,0xf7683e6e8d22ac60L, + 0x40188b73d38b13aeL,0x09f41af92bf12facL,0x3a928b645a7b3178L, + 0xb7eb9139de59434eL,0x12c96b6ff71e130dL,0x0000000000000003L }, + { 0x0e18e0904423f103L,0x5e5184ba34e17c6bL,0x8fcf43a26fb05bb9L, + 0xe5990252181ce682L,0x7deda4923df077f4L,0x37d276b504c18d50L, + 0xb95438914c706ccdL,0x94d023c4ddbcb26eL,0x000000000000010cL } }, + /* 26 << 287 */ + { { 0xabf61a9ea3b3f8c3L,0x90237370b62fc9daL,0xc1d3de232c5128dbL, + 0xf67097fb65ae3521L,0x0016debd78d68d97L,0x23ea15ddeeb9df30L, + 0x4a09e240431259f6L,0xeb81173106ffdc4bL,0x000000000000001cL }, + { 0x3c709776a20611b6L,0xd695948645d62433L,0x86c9232cc8344819L, + 0x636675f6a260c3b3L,0xb927e00a2b6adb42L,0x9644f9e9f1c8c3f9L, + 0xa0842190e01260eeL,0x6ea161f8e2a15fa9L,0x0000000000000127L } }, + /* 27 << 287 */ + { { 0xf245e320841600e3L,0xc597f039837d15e7L,0xa427b0c93669ff83L, + 0x1b1fa11c054ce42aL,0xf985c1ab66e1c039L,0x7ee24b84af17c731L, + 0x90d42d66f467a998L,0x670192d6222a6bd6L,0x00000000000000e4L }, + { 0x252106f6b89fb34fL,0x8fcde56c7af89916L,0x9a6897639a7b371dL, + 0xd92b0bdc14b4d750L,0x295cd60df7fe2ea5L,0x4f16e7fa93025bb9L, + 0xb50d8e98acc07895L,0x0b2c2aab85295081L,0x000000000000001fL } }, + /* 28 << 287 */ + { { 0x84450f4a2d3a1514L,0x1ca1912da211fa6dL,0x18a4e99f487df3dcL, + 0xae9dd13e6535495fL,0x0ec3771fba83ee44L,0x3ef3b94359ed9f53L, + 0x0bae7d1fa163406cL,0x387121a0b815b7afL,0x000000000000001cL }, + { 0x2dec1270f94fd988L,0x21680d0e3d3b53e6L,0xcc653cca28c69753L, + 0xc03803d715d406f8L,0xae4e927ee939bf4fL,0x01dbf41d381933f7L, + 0x602169e07ef8dbc9L,0xe782930581026dd0L,0x00000000000000aeL } }, + /* 29 << 287 */ + { { 0x793030dcfec9973dL,0x5e9bc45d259f2cefL,0xf691294dfb8710e9L, + 0x371e0880556e5077L,0xe396fa2c3875f837L,0xc8062d5a607aa969L, + 0xd17b9ac343675d61L,0xbe71883cde013cf2L,0x00000000000001eeL }, + { 0x4495699ba7344978L,0xf546f5591c109544L,0xda2f84f37dd1fa4dL, + 0xce4166a343595270L,0x1341e861a7df4f65L,0xf6ee6a0739292f29L, + 0xe61f4613e5f02532L,0x3b36f58881f967c6L,0x00000000000001f9L } }, + /* 30 << 287 */ + { { 0xccfc7051875e2133L,0xfc3c0b55821f76a0L,0x544e229fcd8ef672L, + 0x3666479347494f15L,0x22c05e5f109b2ce0L,0xc294314c492bcbb1L, + 0x15beecde16558d91L,0x4518316761ae3008L,0x00000000000000a6L }, + { 0x80bc2b2c26c28852L,0x941f0224510b0086L,0x0fcb9de4abffeb4cL, + 0x0b1e80a966f723b4L,0x186fcc2be5cd0627L,0x8b1df932ca71dcf8L, + 0x789cb0a1eb01452cL,0xb2e81e3681ffe1eaL,0x000000000000009dL } }, + /* 31 << 287 */ + { { 0x1d0a215efdb151faL,0x3e242b9cb0e023c9L,0x7b69678cc83b6d8bL, + 0x2e8beb24f99e1e2cL,0x93149d54e690affeL,0x2b4e1f769b805cc9L, + 0x68ca06fa9599daa1L,0xeab0311f5ca75c0fL,0x0000000000000170L }, + { 0xa1012deb5102ca3aL,0x72335ccf4312d2e2L,0x1012b2bf4ab55d59L, + 0xd09a5f033368547eL,0xd9b1570985acd78fL,0x6d3b9f42f7e576dcL, + 0xc1de787eea6f8f55L,0xac3238aafb4c948fL,0x00000000000000fcL } }, + /* 32 << 287 */ + { { 0x5ec6427b05487074L,0x0b59fa1f80c426adL,0x0207d510c9946594L, + 0xba1fe985d38f83d0L,0x113aec98c96eb133L,0x3424ad0182515193L, + 0x3eb15aeffb6f9b10L,0xccb719e8f15d8be1L,0x000000000000013cL }, + { 0x63f898834c67518dL,0xb5ca3508f85eb1d9L,0x1ea1c74faf652b25L, + 0x4fbc5476bddedc4dL,0x22daa81af4b33c74L,0x9f7941a03f36ae29L, + 0x90c2b8cebca5ca40L,0x53e736a1b587b3d6L,0x00000000000001b3L } }, + /* 33 << 287 */ + { { 0xb2c4b8f8fdcda3baL,0x5913f72464002d11L,0x978aed4cec7c81e6L, + 0xfb9e4c7819795e08L,0xa63ad972e046e1ddL,0x4c913ef2ca493c76L, + 0xa7b7de5c67d5a177L,0x34ea3faed5d548edL,0x0000000000000076L }, + { 0x263ab24db8e17278L,0xf9db154a9ade657bL,0x66eac09c731d358cL, + 0x40f840edb8d08934L,0x409adfa60c35b4adL,0x96e6e42913ed3e4dL, + 0x4f8bc420d468368cL,0xbe6e5b6a5f85e95bL,0x00000000000001a3L } }, + /* 34 << 287 */ + { { 0x739e7cd7ebab693dL,0xcd08edef192f5597L,0x34ce91e9b4ab362eL, + 0xbf0869027d8e9063L,0x9f85b3292893d739L,0x7b7713eb12c08939L, + 0x81aef3b177d34b27L,0xfa873780f155d573L,0x00000000000001f5L }, + { 0x1ec60390a76da4e5L,0x14047fa7fc79130aL,0xc4e484be8c4dd66cL, + 0x347d3c918b008f74L,0x7a9fc84aca0ddaedL,0xde23768b59c02ff5L, + 0x353ba3ed8983d294L,0x70bfbadad7535907L,0x000000000000016bL } }, + /* 35 << 287 */ + { { 0x7dd4835098829cd0L,0x6839f375c56e3a5eL,0x860921cb36c4f91eL, + 0x0ca5e0b8159903ddL,0xb123ae8b23724973L,0xaa807279fb25d155L, + 0x2b58fcf5f2d0840eL,0x33c635c7409f55e2L,0x00000000000000efL }, + { 0xbdba4c387aa5323bL,0x8b3acb9b864fe257L,0xd440031e6ae45ddbL, + 0x37b9bb525d86d712L,0xcc88786333cdcabdL,0x1f6533527c1daefdL, + 0xc7bcc11e8a84fdccL,0x676e3d84b34be64aL,0x0000000000000004L } }, + /* 36 << 287 */ + { { 0x0b7c799943b4fc88L,0x47ea96a46a55dc86L,0x67fe6b4ba0bec552L, + 0x8164398732aef8ebL,0xcbe3ebf7b23195ffL,0x23a03ed1b70013ccL, + 0x3bfe99e80f6a4762L,0xdee758be7483a709L,0x0000000000000190L }, + { 0x05a52134be5007a9L,0x869f393c68e4cf51L,0xc7b56267c28c1a88L, + 0x2bd98f7e0dc40b0cL,0xfa631a8321f9d503L,0xde940d201fa14801L, + 0x26fdb625c3e152acL,0x7f72d33650d8d6edL,0x0000000000000093L } }, + /* 37 << 287 */ + { { 0x7298111644c81fefL,0xff5c80aa32f7a76bL,0x82f5d039182a39d7L, + 0x2f32365a7960f5f3L,0x0488e8918cb0e827L,0xd8549d180721822bL, + 0x5c80c8d57d9cf4dfL,0xc607bd030ead496bL,0x0000000000000016L }, + { 0xc7ff1e33ab77acf5L,0x1a514f0a044e2661L,0x12911922b906ce12L, + 0x847951f023f86570L,0x83242de153bd17b8L,0xd04a3e96190ad7e7L, + 0x3445dc26604be4d6L,0x616a61a208bac680L,0x00000000000000d9L } }, + /* 38 << 287 */ + { { 0xdc35d8c52cfeea6bL,0x45a9d97d99c6b24bL,0x727b2ff281261d8bL, + 0x87bfa12e9a446a97L,0x44b5cb46d045b1efL,0x4aae3f8a70c29285L, + 0xb911c6a8e7a014a0L,0x0692e0e36e1c9679L,0x0000000000000117L }, + { 0xa3759e511300d87eL,0x461e8bd13ce09839L,0x55a8b8194932e9acL, + 0x0623e3207eeff04fL,0x6f21721881e0ff93L,0x0b726fc0f9ccdbceL, + 0xb94f97d15fb03e1eL,0x9339f75bbbd8417fL,0x000000000000009bL } }, + /* 39 << 287 */ + { { 0x4b7fd5cd70221112L,0xe95de56b71ae6670L,0x2cb7d98df08bbdedL, + 0xabcbcd2fe57331a7L,0x69c3b30710dc3694L,0x5587e28e8e64f72dL, + 0x29d7a61b02846a2eL,0xfd4abcfac88e66c0L,0x00000000000001f5L }, + { 0x80ba4555d4f9d318L,0x7038827722c31c44L,0xeef78af085d0468aL, + 0x846341582221ca01L,0xc8ec963f726e877cL,0x3aba70060d2be74bL, + 0x71274ecbb2a71a81L,0x4a7c7680a6648fddL,0x0000000000000016L } }, + /* 40 << 287 */ + { { 0xa5d6c61faae6fe14L,0xff4fd50037b0da63L,0x087ca2202386bb0bL, + 0x80a0d03deb70f71aL,0x1b5ed1d861b7b396L,0xc49483c906218384L, + 0x3a3b534cb715959dL,0x4109d89408511c23L,0x000000000000018eL }, + { 0x0524d716504b62e2L,0x514f91699a0104bbL,0xc41c74c8e9c613c3L, + 0xce7a9561b4df2024L,0x17116cae95be7a5aL,0x372d47642172e2c5L, + 0x9aadf8d65aae7d3aL,0x77bd6304d35903dbL,0x00000000000001baL } }, + /* 41 << 287 */ + { { 0xdc15770ef840ae67L,0xa3aefed136f17978L,0x38cf706b58806eafL, + 0x7b5317312b8ca8e1L,0x45c9f9de1eb7b6cdL,0x8410e75a44a70febL, + 0xd362903a942c4e05L,0x43e5ec0b482b04fdL,0x00000000000001a0L }, + { 0x176aecfcf569465bL,0x8c6de88ca5f800a2L,0xb4efde01873fdbb3L, + 0x1d4f10aa523beda9L,0x7299f81d77f8dd80L,0xae24679ffafe5f4aL, + 0xff5eedaf142ab79fL,0x4f0aa1d58274cfaaL,0x00000000000001b3L } }, + /* 42 << 287 */ + { { 0xd9e66f808bbcda48L,0x6bf81e10ab2f71ffL,0xc0410a0051ca5f9cL, + 0xd4c92b1195d2aff7L,0x83f38269af7927b1L,0xab3223cedeb329c8L, + 0x1efd631048360a2cL,0x8aa5d85e4ed0a930L,0x000000000000018fL }, + { 0x595e653509c61585L,0x00203ea9f8489891L,0x4a048a9784a2d1acL, + 0xc3ccb4d21c6df789L,0xe7d740b3ab880e8cL,0x6345e96775fd1a87L, + 0xf5636d795359993fL,0x100b98e94907e135L,0x00000000000001d9L } }, + /* 43 << 287 */ + { { 0x8f91e1fd96799962L,0x55b13fe2a1ccf057L,0xded45a5794d8177bL, + 0xb9dceec579709266L,0xe3be73222eb10923L,0x8b06ffc8fc82aeefL, + 0x46d6485a742741aeL,0xe1e0f1bc0e82d8e4L,0x000000000000013eL }, + { 0xced2707e39ee8a27L,0x5c2664879dd5a0ceL,0x1d4115d44e369f35L, + 0x92c350e24ba9cf2dL,0x6533ce6ab1d46440L,0x633115437de4fb1eL, + 0xdc75f32cc0b8f159L,0xc04579e13b5b946fL,0x00000000000000f5L } }, + /* 44 << 287 */ + { { 0x980752e1b618c3fbL,0x1c15864b25161e86L,0x490bec9c83024300L, + 0x20d33fca89ce7925L,0x34c7ab08582837dfL,0x207a275e0bfd6076L, + 0xf09157704f9e58d0L,0xa8525b2f1b53bc52L,0x00000000000001c4L }, + { 0x1a6f37d694130fe7L,0xbc72fdaaa18d2529L,0x6e51aec7bb557067L, + 0x44860fc5147b7b40L,0x64f81f1cd254589cL,0xb4ba31930406123eL, + 0x9dd412efe8b8be32L,0x8dbcb40b15702711L,0x000000000000013fL } }, + /* 45 << 287 */ + { { 0x9eca299b06a27b5eL,0x79f6fdc3bc3f5221L,0xc5b3305b7346f300L, + 0x570a19511022cff2L,0x58740fccc5207454L,0x5215d8164004ea39L, + 0x522224c739c8ddd9L,0x174c95e1039a27c5L,0x0000000000000001L }, + { 0x4f69c604296bcce0L,0x51755ce45119beb1L,0x3d12e7d8a35cc907L, + 0x360935b0ab351798L,0x75eba43bc11b521aL,0xaf991a2cd31467daL, + 0xfaaf67ac17e63972L,0x44abfc569f13868dL,0x000000000000003dL } }, + /* 46 << 287 */ + { { 0xec7238d7c2d59d30L,0xaa4ef2e4aaa203fcL,0x566f58f4dcf9fc1eL, + 0xc8474f1a0869c0e1L,0x2d77f6726ad3ccd6L,0x9325fbc3dbfe2d82L, + 0x2860c924182a952fL,0x07b221d085b5769aL,0x000000000000000dL }, + { 0x259039edbb854ca5L,0x70342bbcfc89ff2fL,0xc9a179010e634818L, + 0x6a06796dc81f0292L,0x3f625bdd35063f58L,0x5d78a549f498f7f7L, + 0x5fb6be9d911b1e7aL,0x9a3fb5bc3ba9e1daL,0x000000000000001aL } }, + /* 47 << 287 */ + { { 0x5731b8f3d98e6e5dL,0x6778b260d5a03ad1L,0xf279515b649b1ebcL, + 0x48610ff53312d0c8L,0xd389681f46354ff0L,0x80e063c9ef3c2717L, + 0x10ce407a83f0dd58L,0xbee78d46158e8ac2L,0x00000000000001b0L }, + { 0xb5676bdb1a5531b2L,0x3ce62d7f2fb26867L,0x76e1b2152b0af296L, + 0x76875d567a42d01bL,0xb59fbfcb56991384L,0x7faea2f32de3a3a4L, + 0x06298c2772f7718aL,0x79264b65651d7515L,0x00000000000000acL } }, + /* 48 << 287 */ + { { 0x4de60e1e16d44fc0L,0x86143bbbd5546f49L,0x41fa8fd595ad6b38L, + 0xf2f21b350251812cL,0x2f5ade77eb2a57e4L,0xced255c0ac290bf7L, + 0xd1e7969058b23e1fL,0xba7a7514f406ffc4L,0x00000000000001fdL }, + { 0xe2a9e7bc7b8ce9b6L,0xcc6107e8803955b7L,0x8a59ec8c432fa865L, + 0x6886d08c481bbb72L,0x0de2fe38db7fcc20L,0x654419299976d353L, + 0x0527047ec4efae8cL,0xfeec201ee21b2e66L,0x0000000000000086L } }, + /* 49 << 287 */ + { { 0x3f121bc676bffba9L,0xed8f7b175c3251faL,0x4755cd3f2c99c00eL, + 0xe79768df8e766134L,0x58662ef2b2a09ffaL,0x6f8d645a6a29c866L, + 0x5973d32426f990c9L,0xae9beecf5ae23357L,0x00000000000001baL }, + { 0x64c20c91b3f0d194L,0x36b4d96ce153d1b9L,0xf294b342c9707d50L, + 0xb269c9b80599d63fL,0x2aa166e48a672b71L,0xdd8cf8ad8c69d106L, + 0xe446c91269bb8492L,0x3a5068e1e0afc64aL,0x000000000000006bL } }, + /* 50 << 287 */ + { { 0x7748105e4dfefb66L,0x952fc9823de55ef3L,0xc945662fec01a7dbL, + 0x57dfc326136c9246L,0x5864f8aab3c0dd2aL,0xdd11387181126d21L, + 0x096e017bc2f6ee68L,0x2bd384b529f22a80L,0x0000000000000171L }, + { 0x7acc273262443e2eL,0x21f545cefa227e81L,0xbc0f387d716b1121L, + 0x8756ea78a8d3df9cL,0x91294298cf6c071bL,0xa109d544d37e7a16L, + 0x2c27c61c4a979e5dL,0xad4662bf6bb98eebL,0x00000000000000c7L } }, + /* 51 << 287 */ + { { 0x8f24735f7ef57d61L,0xfe764b217a74fb7cL,0x6b90cddaa7913c97L, + 0x89001bfd3d0fed42L,0x874d170a776c73c0L,0x7a7095c6e6fd64e8L, + 0x5f787a9dc5b6b31aL,0x550f1c531ff8424aL,0x0000000000000048L }, + { 0x384c067743b13b54L,0x65d8884e2e5b67d4L,0x7106b3682b2b005eL, + 0xc22c663d2cb1f1a1L,0xc3bb1b8294e03ce8L,0x04a562df2d97ddaeL, + 0x7886ec4ba917202bL,0xa8eb4ac88bb2ae23L,0x00000000000000efL } }, + /* 52 << 287 */ + { { 0xf9a8bc79c269cd29L,0xbe6982a835a97cb7L,0xa7740e027200f43eL, + 0x72be0da1165175c6L,0xf7c5f902e8aea997L,0x079dd00fe6d7de98L, + 0x82940a7e0e5cd7c2L,0x615687af6e74f7ebL,0x00000000000001dcL }, + { 0x84a93d46969bec3aL,0x673f8a1a81283fd1L,0x59ae6b7d1a24c4c1L, + 0x1506e799db5385d2L,0x43f89c2dfaf7c9e5L,0xcd4a58e507df349dL, + 0x87d1239226eff985L,0x9c7b514286548735L,0x000000000000000bL } }, + /* 53 << 287 */ + { { 0x8bbe703a952e81fdL,0xdf43b74182f9b684L,0x9c6ac3560ffbeeaaL, + 0x78aacf6dc17fcb23L,0x2698f3bec9ee6c25L,0x707d8033a6fd9701L, + 0xe844b5ae82b097acL,0x02cbc3fa57be01a4L,0x00000000000001bdL }, + { 0x14ebe4367cedc9bbL,0x1d64fe901c921970L,0xe55c69b4daa0458aL, + 0x0f68f561da8951b9L,0x606ce9ddeb7f4390L,0x8b249877c8204883L, + 0xdae70a0a533eea70L,0x8367156bebbfc8d6L,0x0000000000000050L } }, + /* 54 << 287 */ + { { 0x4f085c6b70d71433L,0x500e36aeffe3039bL,0xbbb595eeecdbb49bL, + 0x49b4a39e84d8418bL,0x657af86547150026L,0xa1b6888f3495dd49L, + 0x3db61acb5f0cddd8L,0x86208eb95b9b0493L,0x000000000000017eL }, + { 0x378f7c79a3a8e19bL,0xfe9d20c80225c551L,0x7192347f8a7d2706L, + 0xaf66bd88bd5a1a12L,0x184528325eb963bdL,0x0eddc645fe3cfb9dL, + 0xc50b1fa21f6d4faaL,0x582fc52f738cdaacL,0x000000000000015bL } }, + /* 55 << 287 */ + { { 0xcd8bb30ccff1f7f5L,0x2e476a3d52375afdL,0x84b205ede21acdfbL, + 0x0c07485624b6524dL,0x081539bb079c9199L,0x0ff9151612dbacdbL, + 0x0e2a291d778ea0efL,0x2c6bb624b6ae1b88L,0x0000000000000115L }, + { 0x559603e05013ff58L,0x91ab545e04d6892cL,0xe58c8f9c9aabb577L, + 0x460c88fc3f50fb7fL,0x1d36f37d2c29abefL,0xc32f5683e698ffa8L, + 0x4311a22b14f77974L,0x061f7526c6512ae6L,0x00000000000000c9L } }, + /* 56 << 287 */ + { { 0xd0fc63b0ea5c4656L,0x87e24742b04936cbL,0x242865fef6223111L, + 0x0a1c9f7c88c3fdb2L,0xa1ce393deb785b41L,0xb57b01e50f014ec6L, + 0x84cd63f91eae978dL,0x28a02cf7615a8787L,0x00000000000001c6L }, + { 0x000d32fb64317827L,0xeec5181bd8ebcb3aL,0x4f559972be5be8f9L, + 0xaaae45d1ff34fa5fL,0x34330820ec25372cL,0x3d3d88af5f583d42L, + 0x16e14e2dac9463faL,0xbfe401edd02c6401L,0x00000000000000fdL } }, + /* 57 << 287 */ + { { 0x303f7c2191026338L,0x1044db852d912c48L,0xfc2085c2470306fdL, + 0xf1ae74d7d6261c9eL,0x9383596cca88358bL,0x40e3fc61130fed49L, + 0x5b31503e7fb7caddL,0x74ae8b314ede0bbcL,0x000000000000019fL }, + { 0x762dd492f4cfe3f8L,0x73ff4852c626dc61L,0xd71c7548dde18f9cL, + 0xb0f6e288549faaaaL,0x1e4864e9596d3c8dL,0x9d6ef29060783b2dL, + 0x4d4887031ec18d75L,0x18bb5d8f44f1de93L,0x000000000000005cL } }, + /* 58 << 287 */ + { { 0xd773ed1b16219cdbL,0xa9bc2d4bfd8c1c68L,0xb43ef1e5878e384fL, + 0x1b070439a150dd75L,0x8d5c9984e8a4281fL,0x7b4371effe5192aeL, + 0x4ca528f0fceb6294L,0x3657f78ac01f35a7L,0x000000000000008fL }, + { 0x00b1077093170e8aL,0xab16fb4e1d2e8bcaL,0x9f313a1a4f2f8602L, + 0x88521cae6b9b2573L,0x6f6bf5ddee331076L,0xcf05b7228e09934cL, + 0xf8b445c395762a35L,0x2e1579120c892f04L,0x00000000000001a1L } }, + /* 59 << 287 */ + { { 0x3766a4d7858bc9caL,0x295bb5db99714cdaL,0xac54938ab5298fadL, + 0x2ba0547a429ecfa1L,0xc49c9daef173653aL,0x1de672b7e9d21243L, + 0x24960d342b2352bdL,0x46067324b31663a5L,0x00000000000001cdL }, + { 0x13eae0f3789923d1L,0xb1849a53eac934d6L,0x354d92ffec456259L, + 0x88a4b03fe558f44aL,0x8e9d23e101d5bb7aL,0xf5734a3494fdf6baL, + 0x79e3d56e16e73899L,0x1ed057ee0e63d42eL,0x0000000000000099L } }, + /* 60 << 287 */ + { { 0x287d78f48c86cf27L,0x957ebaca51081682L,0xc5e2c0757ae4db49L, + 0x4fd137c952000f40L,0x614a4ab213285161L,0x71a6928ddae9e341L, + 0x9758874ba666dafdL,0x4bc8b6693697e3c7L,0x0000000000000161L }, + { 0x5c31b76a89aac81cL,0x365bcf13bf1fcb91L,0xc64df528441b324dL, + 0x8b57f07fbe9a9cffL,0x7ae23c2186fb45d6L,0xc3bf3c3b136ebdd0L, + 0x6b5dc92e6dbd0b61L,0x330886f1d519ef41L,0x0000000000000019L } }, + /* 61 << 287 */ + { { 0x6d2e03fe6d8655c7L,0x0487d0369a5d4136L,0x86b1ba539c89b36cL, + 0x168a8617c25c8477L,0x818ec8e4ce0b81d6L,0x5f7e1f5aa79096adL, + 0x3ed99ae7396d84d2L,0x2798c459eef4e012L,0x00000000000001d7L }, + { 0xe6564f4f531b9dc3L,0xb6627fe65b38bbe7L,0xd9ad26a6bea83504L, + 0x1b9d887b0e3d69e1L,0xb67ab97090f81c05L,0x16c1fc9ffcdbb086L, + 0xd7732e23d19f14b4L,0x047368461ec62389L,0x0000000000000100L } }, + /* 62 << 287 */ + { { 0x1dbc0bb16ddfa714L,0xda0302b7eec67fc7L,0x6daa1cba5a9dd848L, + 0x640d3b5ed9e894e2L,0x461653b3f87be8b5L,0x5a9dcbfef36a148eL, + 0x2e4784fca907eee0L,0x234c8b37b34d04b2L,0x00000000000001c4L }, + { 0x2e70d5abec9c6abaL,0x6e9c28cbf32925b5L,0x77fd60422d0e1d25L, + 0x216e3254980aa40bL,0x75e1075516878b0cL,0xcc80010706a2ba55L, + 0x5f1f53368bfe962aL,0xc043f7eae6e46f09L,0x00000000000001e1L } }, + /* 63 << 287 */ + { { 0x2cbaa9d79a7d2353L,0xc8ddfcbfe0405dddL,0xd8b33d0c694fd5abL, + 0xe1c2a3f3c2c6dfbaL,0xb26f832191fe9c8bL,0xd30b644aa794437fL, + 0x2a94f86d74a29d06L,0xc249a27839b16ed8L,0x00000000000000b4L }, + { 0xa2e31c29ea3096fdL,0x622d7af518e9785bL,0xf9a9c952c1cf5446L, + 0xe5c66c9d44c9a592L,0x128613e545227c47L,0xc57169f2f5ed7a49L, + 0xf5fe83dd15211638L,0x7cfbfa9beeaff7d0L,0x0000000000000140L } }, + /* 64 << 287 */ + { { 0x8645efeb9545bb61L,0xf696af86207181bbL,0x94d3014ca2b2d411L, + 0xb8f1515665c54db3L,0x6574f1cb011dd592L,0x7d9c69ebf3247ab4L, + 0x1708b24d5a391a77L,0xaaa19d08312e131fL,0x0000000000000167L }, + { 0x1d3f4510ebaea62dL,0x2186c9c26c118b52L,0x81c0b0afcaaadd5fL, + 0xa0256b3c665bb598L,0xec461656eced1f38L,0x77bfe65265aa7914L, + 0xd2600229333f7aaeL,0xd916fbbde1f081ddL,0x0000000000000010L } }, + /* 0 << 294 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 294 */ + { { 0xb64361a93f7cae04L,0x23cfcf76af64efbcL,0xb4539c3cd4965ae5L, + 0xf16e3e884df1182dL,0x2bf7eed4dd75fed1L,0x831a5956a215b733L, + 0xc23e3f1a9732d4ddL,0x3644961904677879L,0x0000000000000068L }, + { 0x14675e68e07af31cL,0x82c3d1baabcd16d6L,0xab1b388e6fe13962L, + 0x3490b5218016a6aeL,0x13b584a11ce7f93bL,0x44265da4a5cbeb76L, + 0x17e4726763df9697L,0x10144c87cf2b278bL,0x0000000000000013L } }, + /* 2 << 294 */ + { { 0x4e75aeb08523aab6L,0xd32a993258144cddL,0xe7a2ea2d3c055e41L, + 0xc25dc24e31c1949aL,0x719e3bdaa124dbc2L,0x331bf1820c1b238cL, + 0x6fb91096fb86656bL,0x6a12ae8db484c49dL,0x00000000000001e0L }, + { 0x4e4b8553e97e59afL,0x6a2b1f0d8e98d66eL,0x9001ace6d49bc96bL, + 0x70b21c8a3e8d2774L,0xba4c7da4473972a2L,0xf159ea8b7b898adeL, + 0x7a0e87a0cceda981L,0xb886ab7d2f280717L,0x000000000000003aL } }, + /* 3 << 294 */ + { { 0xdda29d3be583a2c5L,0xbc0632201b0c039aL,0x31c58c6f602b6077L, + 0xc8ddbb1bbb639a64L,0xdea19aefea3cf3d1L,0xbca9f001e9a2053aL, + 0xf0ec77121930345dL,0x4f2cddd43b9b8eccL,0x000000000000015dL }, + { 0xeb7851a8abf98eaaL,0x12d75a839dc39602L,0x252365a4ebb5f975L, + 0x1b9301bcb8efb6a9L,0x0b489a405e8ddd2aL,0xd3629e5fce3b1dacL, + 0x2064a36707a223ffL,0xdc7e94d10080ee45L,0x000000000000014aL } }, + /* 4 << 294 */ + { { 0xe0828a5b448996e2L,0xdd3f4d9b516e440dL,0x223b4f87ab10e109L, + 0x766bde384ce7241cL,0x0b1cc320dcaf88a7L,0xb0470e7dd75440d5L, + 0xe3894e41ba76b7e8L,0xa0341305a762bb9eL,0x000000000000013fL }, + { 0x4f5581d30eadf722L,0x6e4e31e050d8999bL,0xc14bb14802f03368L, + 0x41f1964326a263b5L,0xc0642e4c3b5f480eL,0x0a3280df63191881L, + 0x4ee1959a78fa9069L,0x7376078a557a8dc0L,0x0000000000000181L } }, + /* 5 << 294 */ + { { 0xbe2d598bff25ee79L,0x2a7c3b68c82ea4ecL,0x2914f1c160cc7b7bL, + 0xd9676e92e29a111dL,0xa5c2d63ba8ee3c8cL,0xa5c3ce60331676c4L, + 0x865ae6a4e189bc61L,0x73e0bc606cc0c210L,0x000000000000004fL }, + { 0xb36fdaebc2480585L,0x9e32b2fb0aa27761L,0xc1bc360f749efcb7L, + 0x7bb61cd4a1c021cbL,0x78d559a81295d34cL,0xdb3da27d9ed2b89aL, + 0x2cd8e29bdd5ef272L,0xc8ef3c63de181b01L,0x0000000000000092L } }, + /* 6 << 294 */ + { { 0x1d99435dc6064b04L,0x7fa5fd072b95a7ceL,0x40ef3a291dacc40aL, + 0x95af1d5af4c68a6aL,0x67a2b605532c2d93L,0xfcb49b4b78620565L, + 0xf814de99833ae939L,0x5770eba80f620cb3L,0x0000000000000190L }, + { 0xbf93ee90ff475874L,0x334debc1a2a5b169L,0xc67fbf1e68aa4bedL, + 0xfb1410ed74d07240L,0xb0a00657a9eb3f94L,0x59c6aa0a0f1be110L, + 0x5d615203b40b739fL,0x8724fc0b4ad94ffaL,0x000000000000006eL } }, + /* 7 << 294 */ + { { 0x30c955f422b25fbeL,0x0469be6e38702ec5L,0x24dc7ed77b4c3dddL, + 0xbcd2c1e51999ec26L,0x63d42519eb5918a4L,0x5b92b4a9b9272f31L, + 0xd02102ce79dedbcbL,0x09bae7da4777b662L,0x000000000000017fL }, + { 0x96d4a0c4897b1f72L,0xd481098d85bb152eL,0x226203a263df30bbL, + 0xca5e74089bb6051aL,0xc89f1c4be0dead4eL,0xf9d8f9b5503a124bL, + 0x9e73ea84a08dd221L,0x5a898d3d7b5a3e30L,0x0000000000000173L } }, + /* 8 << 294 */ + { { 0x5c6dcfe8fcb68ca5L,0x499866e3902e03cdL,0x28638635cff1adc7L, + 0xd224bf5d6538cf65L,0x59fac5c2d7c55f70L,0xc710492699cda00aL, + 0x043001c39ae92020L,0xbaf959dbc325cc56L,0x00000000000000c5L }, + { 0x76aec243e69664a7L,0x0885dbb386d69554L,0xe4a288833647325cL, + 0xdb1adc9a6e8e47a4L,0x3e2230dab14e1019L,0x49a16a19ac7da236L, + 0x12eac3e98df54dafL,0x78d48ea77964ab39L,0x000000000000008eL } }, + /* 9 << 294 */ + { { 0x6033de1bbaf7f635L,0xc253533472bc0c5aL,0x8192188210a5ae12L, + 0xa15accef6c3b8435L,0x6d0c30b8371eff0cL,0x877daecc0e7e663fL, + 0xae63f47ab0fdba65L,0x9b91f14bbb19f291L,0x00000000000000fbL }, + { 0xf98cbf19c9ed84d4L,0x26265242c73f3fb1L,0x62fba52fa237aff5L, + 0x561e0ed381f85bc9L,0x18825d251d817972L,0x53de722c9ed3f84bL, + 0x8a2817fa3854812bL,0xd03138cf3055d672L,0x00000000000000c6L } }, + /* 10 << 294 */ + { { 0xca64371d1b6ba219L,0xaee6dd0f5d49fd5dL,0x3bbf0a2ce222ca70L, + 0xcdf5981fdfd4aa18L,0xd59333e9703352b0L,0x63c14fbce8c9d667L, + 0x59c6249ee1c82654L,0xf74f29f7a2f2b42aL,0x00000000000000cfL }, + { 0x78a98f744fb7397fL,0x3b72aacb7d62ee2aL,0x5a2b827142b78618L, + 0xe73a88377efdba25L,0xe029ba19bb860133L,0x47b0f0fd48745802L, + 0xb416fb7d8b425a5aL,0x1716aca7838eaabdL,0x000000000000006eL } }, + /* 11 << 294 */ + { { 0x7b5e9555e7377511L,0x764963ddf57ac8a7L,0xc9d4d1202bc0c55cL, + 0xc89debb21325db7aL,0x768b37c0a42ad0a5L,0x6c47d86ab2c32964L, + 0x592ce7ff10712241L,0x5fc45fc1874d5848L,0x00000000000001c2L }, + { 0xfaa50c97de74b8a0L,0x3552df05a90e0a0eL,0xd9fbb1a5d216a436L, + 0x08a1940cd78657f8L,0x2d91c2d64f5a0ba1L,0x0c1b9d61812b8946L, + 0xe048c827ea96bec8L,0x71315883fa6e2770L,0x000000000000016cL } }, + /* 12 << 294 */ + { { 0xcb52bfa5386c4e0cL,0x9ad631ce173c0506L,0x7779fd8d1f481a10L, + 0x0ce5155cadbf3b01L,0x946c3d6c4a66e714L,0x82363b8efc39257aL, + 0x5bff5f529f7ddfa7L,0xf6019194c959f74aL,0x0000000000000195L }, + { 0x97e049ccd1a7e5e3L,0x20b70812ba5aba47L,0xaa1ae913eed6d2feL, + 0x3a742bce668bb4b6L,0x4527c1a564d51f79L,0x70e8a3c89295bfadL, + 0xa30663031cc82fc0L,0x2c7c0291fd3f6160L,0x000000000000006bL } }, + /* 13 << 294 */ + { { 0xe645810f34132c1aL,0x77c28cc35de4c75fL,0xbb2f5483a0de31e3L, + 0xd0fb9a7bdb26a8b2L,0x8886a0245235b15fL,0x526b04be0b4d1737L, + 0xcf6084d9489f9855L,0x971ca8398ed5829fL,0x00000000000000e1L }, + { 0x33b5bd61ca6b951dL,0x6bf54e1fb00b2c5aL,0x3e739a1de67b50a4L, + 0x4a9af7f0ce82ee0bL,0x0fb8696146a2ee3eL,0xdb9363a79c97b9f9L, + 0xabd29abbdb1be929L,0x7d4fb696e812de1aL,0x000000000000014eL } }, + /* 14 << 294 */ + { { 0xca22e1011f4db09aL,0x1a821888be1a96d0L,0xa5b1218d300d31d8L, + 0xb4e11200befdce35L,0x6900c54fa507c579L,0xa343019dcc9fddebL, + 0x1ef6f8e7ce55a800L,0x0b7e6f2eb39b7d09L,0x00000000000000a9L }, + { 0x67aee879d16b84d2L,0x4775d42bc4a9e158L,0xd13945291f790f48L, + 0x2cbd6d019bbcca5fL,0x08205b6360ad2134L,0xe69529826beba839L, + 0xda24d18c265360e7L,0x070fe85c1afb1319L,0x000000000000002aL } }, + /* 15 << 294 */ + { { 0xb382471bded9c20bL,0x2a710cf6db64da44L,0x92e11ddce8b5040bL, + 0x81d6600416313d98L,0xf9e48b77cfeef40dL,0x8a566a5c1961a274L, + 0x7e288e774935ffb4L,0xf4d9e1a989fb716aL,0x00000000000001b3L }, + { 0x27596e1e6d08166fL,0xc36263f89c92f5d9L,0xff7193ddc1150003L, + 0x928de56fa4c5e839L,0x5709e478ecf9d5d0L,0x78834f6b90cc279bL, + 0x3581d7a4a251ce7fL,0xe66b47cd30d757fbL,0x000000000000013dL } }, + /* 16 << 294 */ + { { 0xeb4b23274247e72eL,0x6660a1e5175cc2ddL,0xb6cb2db4fa58cacfL, + 0x035b7ab3f93c9b5cL,0xd2582c31bbc94809L,0xb80863e54328d05fL, + 0x61f99dd580b782b5L,0xa17fb8fc59e27db6L,0x000000000000004dL }, + { 0x4bdd2a99e6b71bdcL,0x586b3ce9210c0d88L,0x590b0c459295119bL, + 0x1ee8b021a417f7deL,0xce83ae394efecb1aL,0x06298f05eb3e3c84L, + 0x0e13905295e6d8c8L,0x5e8a4031babc6bd8L,0x000000000000011fL } }, + /* 17 << 294 */ + { { 0xa45c4393de825f40L,0xb9ec95008c3cf0b7L,0xebe3990e65d35126L, + 0x2de346a7703debc2L,0xdaaaa35182941b06L,0xb233de5e0a85d32cL, + 0x6f954814ea2ff0fcL,0xf60aaa4dbbd2cf8fL,0x00000000000000a4L }, + { 0x844743f9f912025aL,0xcc428af1e86e6a91L,0xdfe077bdd8b2ecdcL, + 0x20eb2e7a0b8f76e2L,0x6063a73f0c3abbbaL,0xdbdcff05caba37acL, + 0xec2f6cb84aaa8012L,0x1ae4c16d8cad3681L,0x00000000000001c4L } }, + /* 18 << 294 */ + { { 0x57d2461128803c3cL,0x9c6f153e905c766dL,0x2a8d0e65a3df5f03L, + 0x5fb1c0c5ff0dc35dL,0xe3420a10bb4bd9b3L,0x464e2fa04c4a9eb0L, + 0x511a5d07694c91cfL,0xb23f2f1a52c89680L,0x000000000000014aL }, + { 0x3d9d955113088161L,0x3747c9ca64c5a2d3L,0x5d13ffb630032fbfL, + 0x5f162ef0c0855550L,0x68bc14cc7f54d72fL,0x8ede5be68d825c01L, + 0x437cafd1d87e0007L,0x7040f1e0bca3477bL,0x00000000000000dbL } }, + /* 19 << 294 */ + { { 0x28cce0ecb9bebb36L,0xb29c8a152f0621fbL,0xf651326b8d2580f0L, + 0x26df142f35cc66fbL,0xe3750e9e14989eecL,0x55305d0d09feca96L, + 0xe006e3b93188fa22L,0x19b0e9242df6f347L,0x0000000000000093L }, + { 0x53811345b5c4cdb2L,0x57ac7a2a79fcfca5L,0x7814795cae76763eL, + 0xecc5a5628821cdb4L,0x4cb555779a55aef9L,0x8e7a4b3792ef5935L, + 0x45f20156f18a1791L,0xa785efdbf98c417bL,0x0000000000000038L } }, + /* 20 << 294 */ + { { 0xb9b6ebf9b1351aabL,0x3199f6e349a08f81L,0x3cfdb7175df347a9L, + 0xad627841b15d1ecdL,0x994ac9a2d4918f31L,0x3f3b84a1a482e07eL, + 0x82f47118d01df2a1L,0x1ce7c2530612e165L,0x0000000000000117L }, + { 0xa4d357d1b1a67846L,0x0ed36337a5f6313bL,0xf759acc56224b03dL, + 0x9f79550a43833acaL,0x71238a8d5b80bab0L,0x2f5d78d1f5a7e9d2L, + 0x34844c10af79cf0cL,0x202250a1878e03c7L,0x00000000000001deL } }, + /* 21 << 294 */ + { { 0xcee095846722f2e2L,0xa7e2d5ef2b812e11L,0x021a444781d9a125L, + 0x53b6ac752c3c35a1L,0xd41b7fef6cad23ebL,0x6f10644550687730L, + 0x6c5f184cbda38409L,0x4fcae4e8afaeeeabL,0x0000000000000058L }, + { 0xa6a280db7a9ad8daL,0xbfab051035f6f97eL,0x32ca44d4a6d62fe2L, + 0xc7d3521955b5a2b0L,0x5ecd78fd85d6e1a4L,0x40b504b0f0f12cdfL, + 0x483df9abceb7c1b2L,0xf416371483db0c9aL,0x000000000000000cL } }, + /* 22 << 294 */ + { { 0xd7d6b8dae1e2262fL,0xdd1ce7929442a0a8L,0xb3338e7a23ab20c5L, + 0xc454dd1843e4d64aL,0xc03cf46e65e83085L,0x219b81220c11eb73L, + 0x9e2f67deff2cd3d4L,0x46fff24e3a0d6beaL,0x0000000000000131L }, + { 0x990fc4b50f52b1d3L,0x980926dcfa16bdcaL,0xf70295610e725151L, + 0x009a73258100d906L,0x14beed1973be583dL,0x862ab646ee8ad63bL, + 0x96bf39cdac7c1267L,0xe73e0eae5d9e3cabL,0x0000000000000053L } }, + /* 23 << 294 */ + { { 0x3ca878fc4806181bL,0x405c7b2811146abdL,0x2d16043c0771366eL, + 0xd07c69fd4027bbabL,0xddadf4e4fbcac82fL,0xf5a67b2b7559132aL, + 0x942a4965f1a47aedL,0xfca25633a52e8212L,0x0000000000000022L }, + { 0xaae49d7b10ca9169L,0x97c05152759d11deL,0x655d94ad51815da1L, + 0x57445ad9b5063685L,0xfced3ea4c51e8dfaL,0x2e9e229089e37384L, + 0xe62ae24628f62dccL,0xfd5969067f5a5b72L,0x00000000000000c9L } }, + /* 24 << 294 */ + { { 0xaa91d38ba207739fL,0x8c5e7ae36cb4b052L,0xa785648126932370L, + 0xec5f6683810f6a28L,0x56d9b66fdd4a1a39L,0x936d24f5b4ae3605L, + 0x5b89adb53ccefa36L,0x7d893e3ac6a0096aL,0x000000000000015eL }, + { 0x98cecb6d99db465cL,0x66db3e132c1113b7L,0x9dfafbca881d1c5cL, + 0x78d6b56821503e69L,0xb0197722f05ac1b1L,0x2523d52bc45e608dL, + 0x8e77c13ceeb9ef93L,0xc5c6e0e756a50594L,0x000000000000010bL } }, + /* 25 << 294 */ + { { 0xbe9639f5607be4ddL,0xb7d415b59e82139eL,0xede6ac04d77f0ac4L, + 0xd8471424ceeb4155L,0x3fde6834bc76aa1bL,0x9020c58cae990b2aL, + 0xbe49e85009e1f067L,0xfe76c73de5218d9dL,0x0000000000000077L }, + { 0x787b9b1a0b3809c0L,0xd095141cf230c780L,0xfd6de04e1550c58cL, + 0x648eef5d108a70f6L,0xbd0974b51fd5d1caL,0xc29be634e9f30f9eL, + 0xb8559f72240c47b3L,0x8787033507c3bcdbL,0x000000000000001eL } }, + /* 26 << 294 */ + { { 0xfcf2897b3170660dL,0x29f6635c142ac9b8L,0x1b6d6f170b575017L, + 0x254f887f443ebbfaL,0x5659e0e267479d77L,0x7f8597a73e6e63f8L, + 0x13d34c6edcbe3f6cL,0x061b32786dbd9c9bL,0x00000000000000b0L }, + { 0x9240fa42f1d121d3L,0x596d9037c65540a3L,0x420820c53103dd1aL, + 0x494911c61334ddb6L,0xe0b43cc55fee1bfaL,0x6336d37087b32edcL, + 0x726ed517f52b8682L,0x7c94653320a217b3L,0x0000000000000031L } }, + /* 27 << 294 */ + { { 0x13f2450b909c1a88L,0x3344c3cdccbda714L,0xe230f4a809da0a01L, + 0x3fed866bfb805ca4L,0x2fe1c1cfca3269efL,0x49f7a32b47965aa7L, + 0xaecab243bd604104L,0x79769ec1a97dc613L,0x0000000000000134L }, + { 0x107daf6c16bcca1cL,0x1593235a005a0645L,0xae1114dcea3b7e40L, + 0xfc3e330ec40033b9L,0x579ceaaf9e3b41e1L,0xee0d6fe7a459911eL, + 0xfc55400245f98d3aL,0x2b995f850e73fc4fL,0x0000000000000102L } }, + /* 28 << 294 */ + { { 0xf7ed6ffdf50a3d85L,0x920b807525ecd014L,0x9074d5d14ebbd57eL, + 0x2c7656f08ce72dfeL,0xd17c1313336df7b8L,0x342593894a6a7112L, + 0xf9112c8171d23283L,0x80ee8f87b19617b0L,0x00000000000000daL }, + { 0xd411f8b2e36790d1L,0xe0352d7ac03b68a7L,0xe2b16d4e55dd2c1fL, + 0x3282a3de3b874068L,0x26fbc96e1b3d4991L,0x42f10b4520d08473L, + 0xf17dce35c84ed8e7L,0x78da06e791f15069L,0x00000000000001daL } }, + /* 29 << 294 */ + { { 0xf9775c57cd2967d5L,0x65123c2cd738174bL,0x4d91a7cdd3820c86L, + 0x3e709d34caf234acL,0x66250e69863a7e8bL,0x73cccdebb1885fbaL, + 0xca872751809b48d3L,0xeb4ca203f236ea5cL,0x0000000000000115L }, + { 0x7e499db1bb1d64a0L,0xff0b67f3c37a7a2cL,0x443c5f612741f78eL, + 0x0768cb396aa2725fL,0x98c48c0d92dfbd7cL,0xfdada4a36fb67cbfL, + 0x00c916f66b221b57L,0xb25a08a38fa18490L,0x00000000000001bfL } }, + /* 30 << 294 */ + { { 0x32fa6aa0ebf76affL,0xbbb4b69eea27786bL,0x1a61db994113e36cL, + 0xd9ec11e72884fc1dL,0xa196c2e729d7cacdL,0x076ce25757e81101L, + 0x2d9b9f7041062d32L,0xa0f7f96948d96dc8L,0x000000000000016dL }, + { 0x7ae29604caa1564cL,0x7860e39ddbb46de3L,0x099372d4dfc84fb0L, + 0xf784833c650622aeL,0x9711ddf9d66fff47L,0x62b51a5f046095eaL, + 0x50d05cef44618f6cL,0x2b3bc7637a5aefdbL,0x0000000000000024L } }, + /* 31 << 294 */ + { { 0xbd875cc5b89c9889L,0xaf8042931aa82e4cL,0x4ce2924dde1defecL, + 0x01f089d19b41586cL,0x9f2310e7ff9732bfL,0xe4c86bf3b0bb7eb1L, + 0xaa010c968bbc420aL,0xed2c9c7164c47381L,0x00000000000001d8L }, + { 0x4a6b565e045bc12eL,0x36bf29cf77275cdaL,0xb464be38b331e0c4L, + 0xbdaec4d0cd49e3edL,0xa8c0eef1a647036aL,0xc40dd02ed31fa960L, + 0xc7a368ccdc2f5113L,0x3bad9d88e632a505L,0x000000000000006aL } }, + /* 32 << 294 */ + { { 0x709ac1575b6e48aeL,0x7ef27b0f95eb72ebL,0xb1bca839d987e330L, + 0x16d0f9b110b85d29L,0x08d6d9b2a5e559f2L,0xdcb7602d980b6daaL, + 0xb38f08a5f2f9b64cL,0xd9adc282e2cf92b2L,0x0000000000000164L }, + { 0xcc6a51913137f926L,0x9e4ccf8488821c6cL,0x0b3991794c8babe2L, + 0x4269ee3929c119f0L,0x1374f63a3509d23aL,0xb7563c4386b73069L, + 0x2ad51f46077f8bbbL,0xc383bf47d8d3310cL,0x0000000000000079L } }, + /* 33 << 294 */ + { { 0x1d6891d39c0144e6L,0x52ac8f54ccdbe3ccL,0xb8332000c88c51c3L, + 0x033a0df23700556fL,0xf97e93d0ae841be2L,0xd8b2ae2e08523501L, + 0x6eeb8117928f1a3fL,0x81999870a8d353f4L,0x00000000000000b3L }, + { 0xc84500b07b8bb4b0L,0xae616e25d3320838L,0x0bd7d96a43cfdad3L, + 0x8f9f3fbe4f1f18b3L,0xcbb1ebb2592a0c27L,0xfe42cf82368d7ebbL, + 0x6ddf64b8747c2e25L,0x20e39304cbc56bb6L,0x0000000000000152L } }, + /* 34 << 294 */ + { { 0xa1259eb0bb2db655L,0xa89c9e6723209bffL,0xf09647c4a6c417deL, + 0xf92aefac3904a74bL,0x0704a73616ca9de3L,0x9adca9d445a1c8e8L, + 0x07a94afe87213d9bL,0xf74f1787766b608cL,0x000000000000002cL }, + { 0xa9546f6cb6803298L,0x890b7f942c895a2cL,0xb63ee1f2ac1c77f7L, + 0x1d40ecd67d5d03b8L,0x170a292646dba729L,0x11c4fcbbcd7545ccL, + 0x39e139d129a71c77L,0xfba76833cdb60a56L,0x000000000000019dL } }, + /* 35 << 294 */ + { { 0x8887dfd9515bc9c3L,0x1cae6d1e9a3bf242L,0x37ab9d8905a3a363L, + 0x1d37a7a883c27e10L,0x053e57b62672c074L,0x97d3adbad73a2ac8L, + 0xb633f506bcc69d6dL,0xd4e3c2b08a152835L,0x00000000000000f9L }, + { 0x6ee41d786c369bbcL,0x4ad8ad62941e1248L,0xf4e782a534050f32L, + 0xa8ea4017752206f9L,0x073e2905a6c0904eL,0x536f600f5e5e161cL, + 0xd67e05f5518ceee8L,0xde6a5527c3b67d17L,0x000000000000010bL } }, + /* 36 << 294 */ + { { 0x7bfeb9af12391b95L,0xd79181d83d2855d9L,0xeb0cb5ba84bc3f73L, + 0xc7aaee27d4577568L,0xbebb1d976d62aaf2L,0xdf7f87711c0d2c76L, + 0x8e735dc01390efe5L,0x324a3fef10b0342dL,0x00000000000000ddL }, + { 0x0f074d4be5ef45e7L,0x2af56abee6c99cfbL,0x55f70e37666b03e4L, + 0xb1f0116afed0a21eL,0xf9fe9c995603998dL,0x70507e45a43bfa71L, + 0x68d2f7d4ce0e4a10L,0x819fd53796d5a95fL,0x000000000000006fL } }, + /* 37 << 294 */ + { { 0x0c0b1628c0ce3722L,0xef71fa4e06194427L,0xe9ffc8fd35815428L, + 0x5ba6b0819d519771L,0xc0ff3c5e81293238L,0x5a91a345e4d2fa29L, + 0x6bedab154d0e36d9L,0xa2860c0014f98febL,0x0000000000000145L }, + { 0xd39d5aa9b47fcfd6L,0x7d6097fdb3537bdcL,0xcc18e7c947a1b76aL, + 0x3a4f5d36e2ef31f8L,0xab932eb1656164d1L,0x48ad2dae3bb3c089L, + 0x79240de0dd6942a5L,0x526b6529f9e208c8L,0x0000000000000130L } }, + /* 38 << 294 */ + { { 0xf53da60c28eba272L,0x120745e32d8a9f6bL,0xaecd0eced8dacf70L, + 0xa40838eab208a28dL,0x5a1a7c465d52ddc5L,0x3a57c3b47e8ea970L, + 0x0e1cbbe5ac84baeaL,0x6b65d65510465e38L,0x0000000000000170L }, + { 0x0f1ebb96b489253dL,0x0c973fcbc40f5d44L,0xfa1cc30044a55f05L, + 0x35173c9e07b22802L,0x4ba69f3619062869L,0xd0986139f6602251L, + 0x26fa63d1e46cbd26L,0x5b75686d57991c42L,0x0000000000000082L } }, + /* 39 << 294 */ + { { 0xc5115f182596f8fdL,0x3eb7ab5627e6366fL,0xb46be9749a07d0ecL, + 0x5a6e73b4dc031cbcL,0xcaaa10655df414f5L,0x42f6367757566770L, + 0x475e0a1036b5a3d3L,0x2fa0375c46291bc7L,0x00000000000001e4L }, + { 0x541780b5c542ce68L,0x8a2dca3d229cf7b2L,0xe792776ef974bae0L, + 0x998503c7f582aed6L,0xc8c05f91dc12e582L,0x8fe4af98095607b5L, + 0xccf8f7d8eb35165fL,0x292d6f084818a888L,0x0000000000000085L } }, + /* 40 << 294 */ + { { 0xf2f55e6187055f7cL,0x96c05bad1c7b6ae2L,0x24d4ba609b81ccc6L, + 0xc4c666e9b704f1f6L,0xc505d03fec354e60L,0x6ee24bea512b62beL, + 0xe86bc686b5ad8726L,0xdea6279ec3fc6e18L,0x0000000000000196L }, + { 0x2bb0ed8b4f92584bL,0x4cdf258aa481affcL,0xf067c8c665362effL, + 0x0341567f73099e69L,0xa42fa640487d6afbL,0x4856eb7e41d7455bL, + 0x7deaf1b1d3730d35L,0x7158dfe5d247b629L,0x0000000000000068L } }, + /* 41 << 294 */ + { { 0xcc0ff75ba1efb92eL,0xba0ae458675c11e2L,0x7e9bb0646ac807cbL, + 0x5050c5000f219badL,0xf8135f19babe7ae8L,0x3e7c7d8e65eda230L, + 0xe7ce162a30069ec8L,0x7c3c5a9262a18f99L,0x000000000000015fL }, + { 0x82e3dd1fddf2de5aL,0x130d220e94ff0565L,0x06f36b112ac2b85eL, + 0x78f3dd866617c47fL,0x1aa3510d1a1d5df9L,0xce8b3b99c16a9a31L, + 0x6b56971575712816L,0x573cac34cb8ca234L,0x000000000000000cL } }, + /* 42 << 294 */ + { { 0xd92f6b9d75b13008L,0x9a504d49a2bb0d27L,0xb38fde71b9ba11f1L, + 0x4901413b2e9f2060L,0x8d13eeeadee6353eL,0xd84abab0485f7dd2L, + 0x620df02792f99ca3L,0xc03f647f5e4d1b12L,0x000000000000019aL }, + { 0x54560d5e25f0441fL,0x4338b22c51d1f048L,0xed6b559a159b7a05L, + 0x476886554fdc8368L,0xf42f1a082eab0196L,0xac60f4d1f3b18394L, + 0xa915b254a5f7a4b2L,0x73b6a490db363c41L,0x00000000000001bdL } }, + /* 43 << 294 */ + { { 0xdcf1a754ea2591e8L,0xfd641959381e2f22L,0xfbf1c118a66001ffL, + 0x4269279d3413d83bL,0xe4ac33ac4b48066aL,0x6e6c559e0dc98b68L, + 0x02a9a9e02faa1329L,0x8648b1ba747c1385L,0x00000000000000d9L }, + { 0xc0aad5a5256c9a1cL,0x73129d17a8e79c07L,0x23145244eca7c812L, + 0xfed6f8e785cac27dL,0x0054b0438269c8f3L,0x5a78b38da889f04aL, + 0xbb890a92bf9897f6L,0xc7ce53668d484412L,0x00000000000000e6L } }, + /* 44 << 294 */ + { { 0x23095fdf14c98670L,0xa48c421e9dc0f253L,0xbe07424b0e23ffa6L, + 0x2d6b8ac86e8dc32eL,0xe3d6e195ad8e1120L,0x2f1ee8e1ffc71daaL, + 0x603908be648bd635L,0xff134805e912c300L,0x00000000000000ddL }, + { 0x0ffcc425d4322e89L,0xc809f5ed8f09f42aL,0xc1114010ef2508a2L, + 0x5c07dc01c51cb0c1L,0xf3d650a11d946eb4L,0xd907ad9a51e6b2b3L, + 0xd588d588c945f2d0L,0x7dab07bfd1faab39L,0x00000000000001e1L } }, + /* 45 << 294 */ + { { 0x9d5b2544a3d51ab0L,0x084588178813a487L,0x4159e69cb8d92933L, + 0x59494a445c5ee13aL,0xd339bd9cdcad1546L,0x3c021d0f1fae07edL, + 0x07c938bf8be84dd4L,0xe4509d7a3163647eL,0x000000000000011bL }, + { 0x29cf63b6756908ceL,0xa229c250b92741b3L,0x1d4507fd023662a4L, + 0x5ec7929f7f96cee4L,0x6c6abb6dad431e51L,0x99b1af2bb7feede1L, + 0xb08a47e466ceded9L,0x77b5577b2af814d4L,0x000000000000007bL } }, + /* 46 << 294 */ + { { 0x23c220f22f3ee9b9L,0xb3a0e0c9bf727513L,0x3e276e115108bd41L, + 0xf269c6b23fb319d4L,0x28a035b0e40e70f5L,0x9974d2d490400c05L, + 0xf7d62731e4e71f3bL,0x44119e4474b742deL,0x0000000000000000L }, + { 0x502b39745f29b07dL,0x02f9f845c8b1ec1aL,0xa80e587d1ff9b054L, + 0xc0e07aea1fd0e64cL,0x8b7e83622e582f60L,0x3e21f291ad1f1dbaL, + 0x214395c034de6756L,0x5728d60a7d45dc64L,0x00000000000000b7L } }, + /* 47 << 294 */ + { { 0x39922191c05c3ffeL,0x5de7dd5586c48ba9L,0x325c9b5cd38564afL, + 0x6a88a43990fa14b2L,0x87cf5b102362445aL,0xfa0eab8bc88b3f68L, + 0x18f7e5b5341e78b9L,0x31b27ad4116fd87eL,0x00000000000001c8L }, + { 0xf0ff17d4932024cfL,0x205a03fe3a30a631L,0x60b6627b733c9445L, + 0xb091ba56a715d6f1L,0x0c4397ab3b1a6b21L,0xf16a58e63a2bfd3aL, + 0xd217ddb7b6447176L,0x79d9a86cc2536744L,0x00000000000000faL } }, + /* 48 << 294 */ + { { 0x29d8faf43544cc95L,0xcdde285818e4e95eL,0x0a01b08dfe9368caL, + 0x40512dd7c6dbbcdaL,0xcef252a068c1423cL,0x98204e95aae7650dL, + 0xb922928b77e51a04L,0x9a77021a72eb97a3L,0x000000000000018dL }, + { 0x5647c6f296c7c29cL,0x6a17e12a870ba81bL,0x42df0e2e73c0ee75L, + 0xa214b4098765ee48L,0xc098d30234faf6eaL,0x0f5b59a79aa0e1a1L, + 0xd73224f17c87855fL,0xb78b168cd6d2a061L,0x0000000000000050L } }, + /* 49 << 294 */ + { { 0x070fda90fa5df695L,0x3160656226b01e04L,0xf6e12156de3aaaf3L, + 0xed0f215de7dd01d6L,0x57e283329659a00fL,0xc5969e57935e63c7L, + 0x90e3b2844178ad18L,0xbbcb285453123b93L,0x00000000000001bcL }, + { 0x36b29ac2195f63e8L,0x5771e9f017eb8981L,0xfe74b12b5da40865L, + 0x5f0ff67386d920b2L,0xd327498440426defL,0xdeaef693e06941f1L, + 0x61416da7dd68bf90L,0x8ff9d2bdf1723ba8L,0x0000000000000109L } }, + /* 50 << 294 */ + { { 0xd1da2b970f4ced78L,0x9486496aaff53bddL,0x3b2c09e905787a65L, + 0xd714235e3bf092b0L,0x2c2bd98def495c76L,0x48d93c1256044673L, + 0x897376addff54802L,0x68777721468af1b8L,0x000000000000016cL }, + { 0x5bbb6bed1f304dd6L,0x35dd2294241518a1L,0xb94de945df0e416eL, + 0x016ce62126a1b13bL,0xbd7cca2324a58997L,0xe3cb85775eb8b579L, + 0x8ed1e530dfb46f50L,0xd3e45de5b7bebb2fL,0x00000000000001f4L } }, + /* 51 << 294 */ + { { 0xe3819d9a1c771ea1L,0xbf690720dc59eb84L,0xa541e37699348dcaL, + 0xb7888fc369fbe622L,0x1c8a5762136e6a6fL,0x9ead48dabbfa63abL, + 0x23ea7bafb1ceea24L,0x9e5b105b5f2cdd03L,0x0000000000000179L }, + { 0xa0d739e636b63e7cL,0xdbe55702dbda9abdL,0x73866365556299bcL, + 0x4c48b2c9efe0e38eL,0x06427eeebf017592L,0xf3d389aae05788eaL, + 0x1db17dbb783c8e0dL,0xf1a6d1560552d575L,0x00000000000000baL } }, + /* 52 << 294 */ + { { 0xd185cfc3f5823bcaL,0x94e833382d648a40L,0x719def2c4d22d2deL, + 0xe23be64f266e78a8L,0x8830cb3e752ac9cdL,0x2598bc00d0f2cc62L, + 0x42513e2c62336adaL,0x7343da72b35e9e9dL,0x00000000000001a8L }, + { 0xa38d5d2219b56641L,0xd7e2949e94a2e805L,0x03f06fa6a850f355L, + 0x744779e0dea5393aL,0xf6078b4c9cf8ff02L,0x3a150184d43248faL, + 0xf2b064f88cce580aL,0x3894f51466eaf1aaL,0x0000000000000049L } }, + /* 53 << 294 */ + { { 0x5bf8be23e3841518L,0x390df312c1d8812eL,0x87c36912461bffaaL, + 0x9159a05a2adfaa7cL,0x7c1ebbe962f281cbL,0x9c0a76c9082166d0L, + 0xa929dffb920301aaL,0xc32334dfd7fd6366L,0x00000000000000b5L }, + { 0x3d292b74f63f5f63L,0x022e7982a5482092L,0x45808135f3179942L, + 0x7256ff64865831a6L,0xa5c9588e5a3e9363L,0xcc7d2ca88c82986eL, + 0x7bec162f05626c9dL,0x89da9ae77b9ff705L,0x0000000000000198L } }, + /* 54 << 294 */ + { { 0x5b038af3f5bb585bL,0xb98ecb5f37bd3ae5L,0xc1b18cb194be2a43L, + 0xa9618ec095e4d777L,0xc40f2dabc748a4bfL,0x21904d556228ac53L, + 0xc660cec8dde83410L,0xff36f88f50979bf2L,0x0000000000000160L }, + { 0x9628fa6551d8881bL,0x73f061b88947cb47L,0x79f5d6c845cf0762L, + 0x49504051376a43d5L,0x70c737b7a3f83f46L,0x59d034dd0cc593bbL, + 0xaa0875df26342c19L,0xa585ead811d79d98L,0x00000000000001ddL } }, + /* 55 << 294 */ + { { 0x1779f234ef839635L,0x8f58efb1f40a5a65L,0x1d6206ab3e5cc402L, + 0x1bbd6fd7e912df6eL,0xc924914685b7d132L,0xff29fea43e802e9cL, + 0xc61175867bcf48b9L,0x6540528e9282c06dL,0x00000000000000e1L }, + { 0xd0f41b96127c2594L,0xe7b7f49987819eaaL,0x86005a00584aaa91L, + 0xb82005e47840a3b8L,0x37a7f2cccf139be1L,0x483a5922c7f15281L, + 0x606a1b81c55ae9bdL,0x93027e7d27ed3387L,0x0000000000000144L } }, + /* 56 << 294 */ + { { 0x095c54694caddfa0L,0x7db5b97d383c724eL,0xb4698e9a92ca76e8L, + 0xd964ac2001ae58ecL,0xd743f813b822b97dL,0xd8de0d8520171bacL, + 0x549e8ae5ea164044L,0x03c9d920f8426c21L,0x00000000000000bbL }, + { 0x794be56d907e69abL,0xd3c037943abe9f98L,0x9a5eae5c84e132d7L, + 0x9a7afb80bf979b72L,0x624910c94bcba819L,0xfef0daf5adb707c4L, + 0x7c9d0680ecc8bf66L,0x342fd8b9681d3792L,0x00000000000000a5L } }, + /* 57 << 294 */ + { { 0x31af9ea81c876e00L,0xf54b3306c17ce3b9L,0x9da29ba8b7b66baaL, + 0xe8f958bdec97fbaeL,0x9aed85e5d5d5ca73L,0xde6ee26dfcf4f1d7L, + 0xf4e2831640d20e59L,0xc344605f84d7d1c1L,0x00000000000000b3L }, + { 0x5488e1b958226056L,0x79c948a9cb124c25L,0x278af742f40ff7b7L, + 0xecc60fa1740757d1L,0x648f9a18476f60f2L,0x28d72cd41ec656a0L, + 0x0679149a291cc21fL,0x74d87b8355eba80bL,0x00000000000000e4L } }, + /* 58 << 294 */ + { { 0x6d48fb31ad6f4fb9L,0xe536f4b6b626e78dL,0x49aae08b0d7fdbe4L, + 0xcb3963457a9ed668L,0x041268b9b63b577cL,0x0b1593d8ea5b7d95L, + 0x26c9109ca54dcef4L,0xcf5576e783ea872cL,0x00000000000000b2L }, + { 0x835382690ae7e08eL,0xe844c9b9f4c32ddfL,0x839bf854110c1a1fL, + 0xfaa5f6775c69fab1L,0x198e91e6e763e4adL,0x47da5f178f63cd42L, + 0x61c638a62c872558L,0x6bf1c8895618547eL,0x000000000000018bL } }, + /* 59 << 294 */ + { { 0x6a8cc986caae6ae4L,0x1d7c2d67d6d6435aL,0xdcdf751c1f27d040L, + 0x084a8ad907b0b209L,0xa03ce81aa4cb20a0L,0x8bd7ee7273b7ce3dL, + 0xc30a9ee4bdc436dbL,0x09c04ae46034aff0L,0x000000000000007bL }, + { 0xfabd79026c6bd45fL,0x799e004bc957c862L,0x5638fc917a905a28L, + 0xf0faba028a378c7eL,0x89e982772ac3baf0L,0x80c111429112a683L, + 0x134bf54ad1c0a381L,0x6981a134a371b908L,0x0000000000000165L } }, + /* 60 << 294 */ + { { 0x8cdf5496ea2d6f47L,0xd16475ce1384a82dL,0x443c6bbe79dce016L, + 0x520583ea11c229e7L,0xc345965d32d1ee47L,0x046c5c214eff5930L, + 0x9810075e19d48e5cL,0x3e1425909cc794ddL,0x0000000000000186L }, + { 0x254d003679cf471eL,0x87a8c5ed0d5a2f06L,0xbc16fd945516fbdfL, + 0x2e7c1bf26fa2a909L,0x28297856237ba960L,0xcfb6b336091507fcL, + 0x0cab1eb283a9939dL,0xe49bc8d8aec9cb63L,0x00000000000000b0L } }, + /* 61 << 294 */ + { { 0xc62b48a8982b000eL,0x8277c2682d1953b3L,0x19cfe475a9b47cd0L, + 0x310d9c740992a068L,0xc03ee94ec1dbed2bL,0xc26915c631c025c9L, + 0x46d1a3136d451124L,0xd8840ea94a5e33afL,0x000000000000005eL }, + { 0xbcb605537c315236L,0x01b759256d7c2f69L,0x31e142f17568be8fL, + 0xd39380b76ad30805L,0xeafc7a683c516d0cL,0x83a284ec418c61d5L, + 0x493a0f10045a330fL,0x548e81d57c31cc6aL,0x000000000000006cL } }, + /* 62 << 294 */ + { { 0xf3445d6401011685L,0x58c874941bcba0abL,0x4a5fb2a29e299226L, + 0xf76e578d815c50bbL,0x996d096f14f6347fL,0x6056b8185831b153L, + 0xda2fd3d05d5d22b4L,0x512217c398f9bc2eL,0x00000000000001bcL }, + { 0x82c87114979f39abL,0x7a032a31ca612d87L,0x81bd29f591930220L, + 0x361732d21fc75c2fL,0xff5fd67e43ce4841L,0x53b8c32d784a180bL, + 0xc666c0a34a359515L,0x78095258fd91743fL,0x0000000000000002L } }, + /* 63 << 294 */ + { { 0x22c1d1baa579fa74L,0xa3000c7398e2761cL,0x35fa789b86e47f27L, + 0x78247dded7ceb040L,0xe86dae9bf0211813L,0x4526464eb9a8e680L, + 0x8a415c8dcf84b9e3L,0x2a3d5df6bc25d1acL,0x000000000000006cL }, + { 0x34af41b5b5ad0cd3L,0x18920068740f40d9L,0x3efc1ccfbd6c4c23L, + 0x7d188dcb889947efL,0x9b63949593e850cdL,0xce1d6ae38bf50f30L, + 0x3f8370629c1740b0L,0x80d89ab62e93fa82L,0x000000000000008fL } }, + /* 64 << 294 */ + { { 0xcdb8a92c817221d9L,0x7ff2643e780ed8a2L,0x5efcceda56fc9caaL, + 0x59b5aa12f823d2fcL,0xfe97818b9dbfeb54L,0x2becceafa13dde72L, + 0x5d5b53cee8692b1bL,0xf5e5ac69cae9ddc6L,0x000000000000000fL }, + { 0x6ee75741c692d9ccL,0xd97eb6f262a91d4dL,0x8858ba5de2924e75L, + 0xb8e1b22a517030b1L,0x651a68382af83dfbL,0xc1fb14d21c21be14L, + 0x87689d46c9eba60eL,0xe16cfc1a0da1a29dL,0x0000000000000107L } }, + /* 0 << 301 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 301 */ + { { 0x4021a24de3c91b63L,0xf7cbd01a0b896f6cL,0xb005e27e145743c1L, + 0x170d30313d2c0449L,0xb2369781715e87bcL,0x7ef5d8e0d6e85650L, + 0x1a5fa6c7b232c66dL,0xc0455caedb20ffd8L,0x000000000000001fL }, + { 0x3acf9bd305c7c488L,0xbf769bb0294df5b3L,0x8d5f90958fa692fdL, + 0x9b20f4c777aef48dL,0x312134317987ad09L,0xc0b03aead6a327e1L, + 0x5be9a205e6f56badL,0x2db39c5e6c7ff60dL,0x000000000000002cL } }, + /* 2 << 301 */ + { { 0x06e672911fbe4b1aL,0x2df4c01c7328b43cL,0x811847ea7350889aL, + 0xee53f7eff19988a8L,0xcc3fc6d371578d7dL,0x20682a7b7b0f48ecL, + 0x6979a4f21ec4cd51L,0x58272b7c845bc78fL,0x000000000000000eL }, + { 0xceec799c35e8ef48L,0xa01333eff9ec8ae5L,0x5b99b83ef9262e87L, + 0x37c4df170f427205L,0xb20cef3ff972dea8L,0xb63ac380c6c4c976L, + 0x0b915c3290866381L,0x8817b26a91eedcabL,0x000000000000001bL } }, + /* 3 << 301 */ + { { 0x7ace3143a092069aL,0x0bc9cd595e70d5caL,0x1c26df9860dae8ccL, + 0xf4358cefb881888bL,0xfeddc9766632ab56L,0x08206f72b870dda8L, + 0x46f76ea4cc81f687L,0x865b6cb3ca702001L,0x000000000000007bL }, + { 0xc064a2e3b099af17L,0x88beb2371d625dcfL,0x507be25af7b18ac9L, + 0x46c8c36077f8ed84L,0xb019195fadf90251L,0x386ed5ce56c4eb40L, + 0x46427f9c870504a2L,0x156e3c6f36d0f019L,0x0000000000000033L } }, + /* 4 << 301 */ + { { 0x06e0332e000535eaL,0x5c58e86403bbbe55L,0x4d9a6db110b9b5a0L, + 0x7ef09cfc6a24db5dL,0x61d105f4819d4d78L,0xc5e243dd82d474a2L, + 0x8742d427886e0d59L,0x08cc598e079399e7L,0x00000000000000afL }, + { 0xb1af75d1c8e53592L,0x74857a7fca9dbfddL,0x2f7f11d8d49cf79fL, + 0x82c78c0d7c4073d2L,0xbe13602fab4c74c0L,0x4282184ed0d37ad7L, + 0x719bb29728289fe0L,0x08fe79e9b09427e0L,0x0000000000000192L } }, + /* 5 << 301 */ + { { 0xfb9970015eaa1129L,0xbec9a530e84da535L,0x133710955d6afc1eL, + 0x7156b33510b368edL,0x86d22c634c902f9eL,0x64ea7336eaf83f1bL, + 0x700571f6d4313851L,0xe6398407a8fb758cL,0x0000000000000191L }, + { 0x004bcb3ad7673444L,0xa0ad23ff6c5d7b10L,0x86969ed559aa896fL, + 0x80c4897508a9f27cL,0x37a8337cba7fc097L,0x295b4ece65a34328L, + 0xb14eceddf56efccfL,0xcf9265ea638d1620L,0x000000000000017cL } }, + /* 6 << 301 */ + { { 0x91a6ba108d64773aL,0x18e9f8c089ac1f66L,0x49f08c886ff68e9cL, + 0x57b6fb9131913442L,0xdce8f943d9be3a61L,0x03a6d0137c897a92L, + 0xa3cf84d469451d27L,0xf9d603b978585859L,0x0000000000000019L }, + { 0x9b651ee0bbe619d9L,0x6595951edf219411L,0xc4b2ffb73e53e02bL, + 0xc32b5aa6f8bd17f1L,0xe7788f4a97f05123L,0x31d33abeb624663dL, + 0x9e736612d8b9fcd9L,0x6b948711fa3eaa15L,0x00000000000001a5L } }, + /* 7 << 301 */ + { { 0x7f209595588ba3f7L,0xe2407c9a57ea6473L,0x9beac14648c8914fL, + 0xa76746da7281653dL,0xb98edf137b9eaf4dL,0x4af237b992b8e681L, + 0x9681674885d4493fL,0xcf67088b2d55fb92L,0x000000000000002fL }, + { 0xfc9967792f89cd08L,0x62bf8d1b54cbddc2L,0xd9395c418dff283aL, + 0xc384a2e9e7ffadd5L,0x39f0e602bedf1036L,0x0fc173dcd28e10d4L, + 0x574c346fb4a775a3L,0x9f62fef62b897266L,0x00000000000001cfL } }, + /* 8 << 301 */ + { { 0x163a9e62eec6e312L,0xd11432c1459162d8L,0xdde7d941a25d4934L, + 0x9067028e8d868aadL,0xb71b17085b6e3d21L,0xaed73637e95d3c7fL, + 0x51bcc93c1dce0d78L,0x2f45d06a504a9d0bL,0x00000000000000beL }, + { 0xec12ea4b300b2478L,0x91ac6eedf2a48388L,0xfbdc1bba7a7631adL, + 0x5b2669cb79d5f4ceL,0x219d64f948c8f025L,0xadcaab29aadb5873L, + 0x3b07af1c671bd673L,0xe66e7c67b7f2d8f5L,0x00000000000000c9L } }, + /* 9 << 301 */ + { { 0x877a56c5e8c21c07L,0xaa9b34cc64389ebbL,0x05d0452d80b06ab4L, + 0xef13a83f48e78fb1L,0x8f1ccb089003b1f0L,0x9b6f68ae0e240740L, + 0x3b4e3941e661e336L,0x3d50ab4f9673bef9L,0x000000000000002fL }, + { 0xd84e8415839a034eL,0x9af0ecb379490c58L,0x3fa5698f53a7f6c2L, + 0x7ee6a1fe1f5f8a6cL,0x759f0ff62d9134d8L,0x413d44f634a2bc05L, + 0xecaf95ee0fc28762L,0x881b307762a3eb28L,0x000000000000001bL } }, + /* 10 << 301 */ + { { 0x1d2684d8817a08b0L,0xdb7dca26c8be8d55L,0xa069d7e07c8974a2L, + 0xc061be91d03247e4L,0x1c6b377d7bd92a76L,0xcd7e0d7f50330391L, + 0x6de4e84426762364L,0x07276c1516426e46L,0x0000000000000102L }, + { 0x1744a4f50c65db3aL,0x08c6a33f9b8fb672L,0x31f23108ebf4762cL, + 0x7b2c0ab0181df967L,0x53cd3abd2be95364L,0x0be1aaa87bbb3eddL, + 0xbb0cb0531b469e7fL,0xea338278d6e16032L,0x0000000000000047L } }, + /* 11 << 301 */ + { { 0xc7f394748381c552L,0x986d2d61d55f8773L,0x1748580c54b59ccdL, + 0xe071cdd56194dad0L,0x1440e99aa75ba866L,0xd162309c0cb3e564L, + 0x45786580289fc2bdL,0x1d4120f1da2a714cL,0x000000000000011aL }, + { 0x564bfb392e7a47d8L,0xc43ad5f9c38c2c08L,0x60d4dd7acc7ead2bL, + 0x89166f8b91fde4a6L,0x7f1ac6ee9f62d204L,0x1a5728aafbfd7551L, + 0x7997f14a2e75d1a5L,0xfc2804ea649488adL,0x000000000000019dL } }, + /* 12 << 301 */ + { { 0x2d511de52dae34a2L,0x280126a9cc783bacL,0x247eb07f3b00a441L, + 0x909b0ea7567b28f6L,0x6045d39b53c3600cL,0x41714d14c75815a6L, + 0x6bef6123c601eadfL,0x3683ccf3d6343c4eL,0x00000000000001b0L }, + { 0xcfce0c48b23f24c2L,0x8a051e71622cd9bbL,0x07ec1bc49eb85129L, + 0x4ca832899cabb17eL,0xf095659733580331L,0x88e7f968e465e3beL, + 0x61c5c643086d84d3L,0xcd236eee26444e10L,0x00000000000001c6L } }, + /* 13 << 301 */ + { { 0x6a3f660b0dc590beL,0x32d22798247e0247L,0xc90cd006d66c6982L, + 0x653993195369eca2L,0xa1346bcab0838f49L,0x3b5d8514fb956f90L, + 0x17fa86f9b27e8c72L,0x61b430690dbbfbe8L,0x00000000000001daL }, + { 0x3d46ae770fb82755L,0xeac173262c02b0f6L,0x869be2347fc42178L, + 0x845d561ac8b457dcL,0x8295d7de2c8495efL,0x57e84cd5f079a84eL, + 0x30e443fe60b7d282L,0xe0340819a844632bL,0x00000000000000e1L } }, + /* 14 << 301 */ + { { 0x0cec34cc215c702cL,0xf9534c2acde28ed6L,0x5694c1addb2b1db4L, + 0x78834607f0ce1dd3L,0x0897479ede16ad83L,0x711db79e7ad8d6cbL, + 0x4fec3d6a30d12f46L,0x2fa068bab2925929L,0x00000000000000e5L }, + { 0x79ae08ddd6d413d1L,0x7afae372233385f9L,0xa9faea3a1c5915fbL, + 0xd1d81ccddbf76a40L,0xbcee5b60311bd5b4L,0x33fd134e0d295473L, + 0x8971c976a1499b9aL,0xbe778d06c2dd33cdL,0x0000000000000128L } }, + /* 15 << 301 */ + { { 0xe165fe45591537d3L,0x494e9b586f9e77daL,0x05048c2406b5f840L, + 0x27b142e0669c0541L,0x266d5823ebf3cba1L,0x2c9668a9f5f6117eL, + 0x92b1a670041d08abL,0xd2e4bc7d98b28dbcL,0x0000000000000110L }, + { 0xd96f0a4eb50c672eL,0x2a5ba3084f40bed9L,0x6ad427836ada6e01L, + 0x11d68fda636a6b24L,0x38d87a77bfcb526bL,0x3666085af7663abdL, + 0x9e8dd236688f4c58L,0x3f5eaea3577ddb84L,0x00000000000001c2L } }, + /* 16 << 301 */ + { { 0xf03e26842bc15ca5L,0x0b130b09bf196674L,0x3c27538756217f5fL, + 0xb26d17d4349aca89L,0x3faf18590269bf70L,0x3cdeb74fb288736aL, + 0xeba8f6007e38ea96L,0xd24052c851da8c99L,0x000000000000015eL }, + { 0xa5582bacb2f51cdbL,0xa325b5cbd0a7f7c7L,0x065a27f009931317L, + 0x1abd17b6cddc78ecL,0xd8fd426937962ce7L,0x5d8403c8002cf3c2L, + 0xe62f5305e3eb09a4L,0x1900f36ceeaf6fb3L,0x000000000000010aL } }, + /* 17 << 301 */ + { { 0x284fc126babba399L,0xda961c2d629b5b0cL,0xdb458a6c7acc2fc6L, + 0x2d048b73b07e4eb6L,0x9ee17cf76011f0dcL,0xff56041501eb1f54L, + 0x39ed447f8b55cf30L,0xcf2256f1d85dbed0L,0x00000000000000bbL }, + { 0x084a817403650842L,0x71be289cb89d85e1L,0xb9c9f5b8f0aa2e1dL, + 0xf454a8a2a0048186L,0x67e89b8f11c62ecbL,0x3fefb1f66eeb3419L, + 0x19c5c988db8b8343L,0xd477b615cb26d01dL,0x000000000000012dL } }, + /* 18 << 301 */ + { { 0xfebd0210b24b42c5L,0x4f8bb6ab782d99ddL,0x624530b516f9fb4bL, + 0x9f9ac0dcf52469a9L,0x6cd360b7a7297153L,0xa1989d5ae3ffe52bL, + 0x493e7f8e086c3ae2L,0x598af31b052d016aL,0x00000000000001a4L }, + { 0xd22cd1eeecebf13fL,0x1dece9d773807b67L,0x9d68d0462573e787L, + 0x70f7848f24c5d05cL,0x2e846609ad526173L,0xc8a05dcd05ce91f8L, + 0xc39b7711eeae3077L,0xcbb8347d040614a9L,0x0000000000000127L } }, + /* 19 << 301 */ + { { 0x87eac716c5722d13L,0xc1afc869d61ef2abL,0x496b7a2aacf51364L, + 0x5d48541015d5ca0bL,0xcb57650b8772fb05L,0xd26d4f958869f973L, + 0xb320d08e025cc659L,0xa950e9363419e9a4L,0x0000000000000030L }, + { 0x5e80b7840e4326edL,0xe712d1b6118b4b67L,0x9188de4ab4e2f1d5L, + 0xddd7c742931ba11cL,0x19856e2d083a8b4aL,0x28fd9f4ec237b67aL, + 0x264aa6aeb17c199bL,0xbb708f00b04ee318L,0x000000000000009aL } }, + /* 20 << 301 */ + { { 0x9d1eae14e6c6fa68L,0xc10a9135e799dc68L,0x7abe45371d7af249L, + 0x50c5b3e44b78fb26L,0xba92be148298b03cL,0x6229f220227a8e85L, + 0xde2d6415248042a7L,0xeaaddb9bcbfda7ebL,0x00000000000001bdL }, + { 0x3495558bc0642562L,0x5649a710eb006b80L,0xdeb44b295e284cb8L, + 0xfb01767c2fb2ad7eL,0xb40225510f9ccfacL,0xbe4eb5c0a67be6fdL, + 0xb5d9f3e86d920a44L,0x918cde4238b14e5eL,0x00000000000001f8L } }, + /* 21 << 301 */ + { { 0xf14dd13f7e46d751L,0xde1faee5de539ce6L,0x1f580420a878c7e4L, + 0xa4536a613feef467L,0xa7e9291d7b545e56L,0xded28c89e2b7025cL, + 0x59506d85d042bacaL,0x14b37eee8a3adfd3L,0x000000000000000dL }, + { 0x89d83dd3823c023cL,0x8dc3c29fca288645L,0xdd64a1a648af0bd8L, + 0x79f885a65f285574L,0x26f5a31b60248187L,0x70bd4d6ace84d4b2L, + 0x3f15dbc4463583a2L,0x6544d8e78426710cL,0x00000000000001baL } }, + /* 22 << 301 */ + { { 0x76d2d5eb9381aba1L,0xb2322af89b7df6b5L,0x71f40e2e58f343b9L, + 0xb61e43f9efcc85ffL,0x0300fa7259a01585L,0x5988ff95db05e553L, + 0x2b62aa0f5e93de6fL,0xc87ff47b4b549154L,0x0000000000000021L }, + { 0xd77a8a93ff66a25dL,0xa68eea535e13f466L,0x794995e4f79a54c9L, + 0x6df911fc56076e44L,0x8d78ab8c47c83b15L,0x3507ec1cb38bbf1dL, + 0x613c3c1f6a89e612L,0xc07da4b905f7fa61L,0x000000000000009eL } }, + /* 23 << 301 */ + { { 0xb88f49e250318e89L,0x0b6037ef58a8e5b1L,0x62b4464f8305be22L, + 0xda32be6c695d0c2dL,0x53718af39a3f3741L,0x7eab5da6e64b6964L, + 0xd3594c8663064d0fL,0x39279566f6eaee98L,0x0000000000000012L }, + { 0x4bbb1b13042a5dd8L,0xa70b5d62793e0d15L,0x30393d03d1ba796cL, + 0x4a21fdf287555c23L,0x9646d43c61ba2ab2L,0x6408cb45419993c7L, + 0xb9acd44b338322f9L,0xf283c84d4759b6ffL,0x00000000000000c4L } }, + /* 24 << 301 */ + { { 0xd142bfa992701a9aL,0x0b59d01447539e92L,0x7cefec6e8724475bL, + 0x5c77d10dbcfd641fL,0x4946dbabdd42eb47L,0x4ae66c3a14f1c33bL, + 0x05ba3e754c9f2c71L,0xb618a5071fc0f7b7L,0x0000000000000065L }, + { 0xdd94ee057c169a16L,0xa29e26195a337e5eL,0x30e106eb9ad816b6L, + 0xb91e66f71a17810aL,0x46abf80d5b672e86L,0x0f40f077f89626ffL, + 0xa474dfbe8b67b224L,0x9d3af9248053aa79L,0x00000000000001ffL } }, + /* 25 << 301 */ + { { 0x7d22cf775e1f232dL,0x548fda9e7e51a2c7L,0x11a9774149cf868dL, + 0xf0db0b7e2fb759c5L,0x1796b06efc5eb57dL,0x60ede66826f8e4f9L, + 0x7c37fa5cf37f1ae2L,0xf5348593ac10e958L,0x00000000000001abL }, + { 0x05239d1323d8b38bL,0x2c55a6e0d76a3773L,0xcea20821cd282bc4L, + 0x9e552a3a5ca82699L,0x3f1ae5bfa1548eb2L,0x56ca5414452552bcL, + 0xec3a3f85e1ba16dfL,0x91ed704a56240ce7L,0x0000000000000041L } }, + /* 26 << 301 */ + { { 0x525d2794aa3af70dL,0x62666bbfbf65bbd6L,0x250750d4b925995bL, + 0x050457f8e340c57bL,0x919451793b4931b3L,0xa4a0e728d223ec63L, + 0x9aa8b43380a43404L,0x9c52b96aaaaa7d45L,0x0000000000000149L }, + { 0xa4dcb38e2dc7ef6bL,0x115ba55675a63e19L,0x31541c6e90d13e8aL, + 0x4e0b09787401562dL,0xd81287b447cfebc2L,0x59ec023f074cc01eL, + 0x7086b3db94d89d32L,0x274daafbbf9b0f39L,0x000000000000009fL } }, + /* 27 << 301 */ + { { 0xd7fd9795758d15ecL,0xbe89bb8c335f014bL,0x3be772ba54b6b739L, + 0x2d652a561a35e643L,0xfd8b75fe36ab48b7L,0x89fa8e1d9c89c766L, + 0x75e124aaea145d8fL,0xdf889765299c9171L,0x0000000000000170L }, + { 0x103dda2e55540308L,0xc3c035e7cf05bb03L,0xd346e06a52488d57L, + 0xe7833e27f8c702d6L,0x25b942ca6be87eccL,0x7b0588057505ad4bL, + 0xa6fa90a5c97e6686L,0xd1d89bee28a9c029L,0x000000000000009eL } }, + /* 28 << 301 */ + { { 0x2b1a24297e163accL,0x07a52207912df2c1L,0x8b00c607c28459a1L, + 0x172a4e12d0d1c992L,0xd7d28839f97852b8L,0xfa91f13ff8fd2bdcL, + 0xb40ceb9cc7c71e5bL,0x6dfbd3ef1bb25495L,0x00000000000001d8L }, + { 0x77d795eaecb9e7d7L,0x307ec55c034640ceL,0xc81199211dc90456L, + 0xf336a8d62e6c8828L,0x39f69ea7efbdf285L,0x9f9b88446f6376edL, + 0x9cf44f2767ef7e9cL,0xcfb6ea37759662d0L,0x00000000000001deL } }, + /* 29 << 301 */ + { { 0xac8e3a669a265363L,0x0bcc6be9a837586eL,0x1800b8af5b913525L, + 0x6bb8bc3eaf0766d1L,0x83ca07cb0bad6358L,0x29442dc1330633bbL, + 0xce862c876fbccbf4L,0xc5550669b96ea743L,0x0000000000000073L }, + { 0xf015f8b97920c9d7L,0x34a73daf4c203156L,0x97bf03c337bb3adbL, + 0x7a7f0014000c2070L,0x04a1bac2d9d1d2beL,0xbd7991e0b7297737L, + 0xdbb0bd6d8deac191L,0x8ddab47a8386ee08L,0x00000000000001a3L } }, + /* 30 << 301 */ + { { 0x3d87a8098b966d8cL,0xb435d15896c76473L,0xefbd14d8e443c697L, + 0xa1d59738435950d1L,0x53a2c7714b9bf3dcL,0x712bc49a2cf7a12bL, + 0xa73d130b39f96cb9L,0x4c00c64ead7eebcdL,0x000000000000006eL }, + { 0x75300d109db550eaL,0xa7235321f4f7bd1cL,0xfba90cd35f63a433L, + 0x49b56ce3aeaeca59L,0x1d562d49f8ceae69L,0x42bf5c058ae7ade7L, + 0x571ef4b5c6aa760cL,0x2beb1d379fdd6c7fL,0x0000000000000074L } }, + /* 31 << 301 */ + { { 0xf025bfb65b9ab00aL,0x5afcde150c317febL,0x2dbf474e5deefa10L, + 0xfa820330f1e3a088L,0x52d1d96aa577534eL,0xaab36a89d11cbc7aL, + 0xe5b59c3e1028606eL,0x11298e30688131c6L,0x000000000000017aL }, + { 0x4e1d910a80a78b33L,0x82826cbfb3ed3ed5L,0x6259a943c011cfa6L, + 0x81284e4503ee6f69L,0xc3b994d2b86b8f5eL,0xd4dda4b45f239bddL, + 0xbaf7541f709ad8a9L,0xeaa31ffd52390dc6L,0x0000000000000186L } }, + /* 32 << 301 */ + { { 0xcfdc285df79421d8L,0x263289988674686eL,0xf991086e1ed73cd0L, + 0x65046369a36bd06dL,0x9712bb0b72adcd0aL,0x9b2d422f5faf9c7cL, + 0x7e3989f851491c47L,0x9fc24bf809aeead9L,0x0000000000000029L }, + { 0x8584ee840370acb9L,0xed56c569ea40417dL,0x6443587557b4cb9aL, + 0x752c5fa691d35c5fL,0xbbcf8703abd0eb6fL,0x0dd72d15c233db59L, + 0xf2b7a1ab692baae3L,0x6ee634377baadfbeL,0x0000000000000111L } }, + /* 33 << 301 */ + { { 0x2f00b1014d3f67f2L,0x02773442dabf0299L,0xb050dd0649b08536L, + 0x155d73c3ee3a8e0aL,0x0e468ccb7c29c66fL,0x3b7660e15b398646L, + 0xb6b2782165c7155bL,0x09cd57fcecb519d3L,0x0000000000000133L }, + { 0x3aa32f6097fcfd5dL,0x35d5448b279abb9aL,0x54bab5ce95cf663fL, + 0x9dc193ea1776f8d6L,0x6d2bed78f4d90465L,0x08da1c145e61f723L, + 0x68c8bb0604d91875L,0x03458fea8b3a3a3cL,0x00000000000000eaL } }, + /* 34 << 301 */ + { { 0xb40737908373a6e5L,0x9ce42cd2a1188649L,0xce446162ad0eb978L, + 0x322dadd09b7ffb70L,0xe568d26ddb803c73L,0xd74eb110c414c261L, + 0xa1be744270b5c055L,0x5f70ec901a420208L,0x0000000000000175L }, + { 0xad6727ca8c2a9937L,0xe9fd6825a03949a5L,0xe4790c050a8c931fL, + 0x80943e80b52d4b49L,0x104bd74362d38bd5L,0x0510772ffcc1cf4bL, + 0xc669adcd5f9185c7L,0xae818ff028123eceL,0x000000000000006cL } }, + /* 35 << 301 */ + { { 0x7a5717a10792b4faL,0x2c248be7fcfdf67dL,0x970a7feddcb61131L, + 0x0234eb04df345fefL,0x3eb4489ba2745622L,0xb145f3f95ed28a99L, + 0x0384b4cc064fe474L,0xea70e7d96931ea00L,0x00000000000001c8L }, + { 0x331e0fd9802e2aebL,0x42417e6c8732f311L,0x6006728b12056492L, + 0x3ce9e7fd5b979aa4L,0xcaaffb764b4fa416L,0x064dd6eca006759aL, + 0x28b047d186a9190bL,0x029d74a7595fc9faL,0x00000000000000c5L } }, + /* 36 << 301 */ + { { 0x657f1a4a0d3a6ecaL,0xe4131c3c9584cfd2L,0x75ecc937303e30e6L, + 0xb062e9f44c0801a8L,0xd9cf5f10d3221408L,0xae77108f25262e9fL, + 0xf5f313faaf40a968L,0xe40dcb7d2c4518f7L,0x0000000000000163L }, + { 0x949fdc6e98e347c8L,0xef10af84b013c3abL,0x4c48606d0e6d48feL, + 0x97e690a711fb595cL,0x9f47aea713f11f69L,0x1d670e6e31bae984L, + 0x35020db9e62e6909L,0xc43ff5489d07874fL,0x00000000000001d4L } }, + /* 37 << 301 */ + { { 0x28316d8b9441a550L,0xff7227210a759555L,0x5481756e2ab2ee17L, + 0xf3cbe2e8e3dd7974L,0x7dc4b0f78aa159c8L,0x9e5b4f561bf333e7L, + 0x63ff9f52a91020b5L,0xc76c6456bc08d5b0L,0x0000000000000103L }, + { 0x69bcec9d3a833b1fL,0x07441e04281f6fa7L,0x08315e671838e4e7L, + 0xcdbae7275209d351L,0x14c1373de5e46a7dL,0x8e3601e092125731L, + 0x6fa6044488b745e4L,0x8ec5cb7e562f6bedL,0x000000000000004eL } }, + /* 38 << 301 */ + { { 0x9936081ddd58574cL,0xcd952445fe2c792cL,0x852d5476ed663b93L, + 0x4b13dd0faf869b0fL,0x8793e140290f6625L,0xdd61017faaf6d5b1L, + 0xe7d2fe4699c8aae8L,0x8794c5703e234ac0L,0x00000000000000e3L }, + { 0x2586bc84622848ddL,0x81816931dc799fa9L,0x84e590e24092ee76L, + 0x7c11f235d863e257L,0x5f43fd9706988708L,0xeb474948e569a68aL, + 0x66b18d86d3b8b848L,0xcaf4df955083487eL,0x00000000000000c9L } }, + /* 39 << 301 */ + { { 0x89d29ec37898d2fcL,0xd5e69b49b98c5a0eL,0xe3c7a979b49ac9f3L, + 0xebe0c817530d25dfL,0xbbec2d1a2e03e0ccL,0x6447dc091be011beL, + 0x1d990dee2d10c2c8L,0x8ad4a24baf507a38L,0x0000000000000197L }, + { 0x8f38e29bac7a283aL,0x3ce5725db29af2c4L,0xe17035ef8077fba2L, + 0x5b50a50390b970fdL,0xfca6cdafb8efdf69L,0x0d252e814d349a01L, + 0xfe068bd123d614dbL,0xeb14408f3810431aL,0x0000000000000086L } }, + /* 40 << 301 */ + { { 0x97defe3d7938d98eL,0xe2458feafd0bec69L,0x7df8d2b58b42ddbbL, + 0xa82aab40ff39914cL,0x78549b01d30f07e6L,0x253f8c461331557eL, + 0x771750d919648cceL,0x96ad6776d045bc36L,0x00000000000000f2L }, + { 0x4a2ee334e08b83f8L,0x3e9c4cb72e3086deL,0x913eae4b9936bab0L, + 0x3cf83551d23cd36aL,0x7c51d84adfd27d4aL,0x2595a90f4d006eddL, + 0xb2d5c68461cc7be7L,0xbb02bd59a4b310f9L,0x000000000000018aL } }, + /* 41 << 301 */ + { { 0xea273b8fd19caa8aL,0x8f2eb0e5fbdceb6fL,0x13e96cf4cc37e1f6L, + 0xf91f2f4d7b6607e6L,0xdb70dc3e919dcb16L,0x752725306a35e521L, + 0xd48d0a08e930983fL,0x62bb3d8f9e955697L,0x000000000000001fL }, + { 0xc492f1a5bc57ce3dL,0x0496b245370d6a7aL,0x2a0b281dc8ed431aL, + 0xb8a3ff7922cc9f02L,0xfb6c685d7d711d84L,0x68e508ad40b6b8b6L, + 0x437d076c1f1236edL,0xdab5a2ef69a9f09cL,0x0000000000000009L } }, + /* 42 << 301 */ + { { 0x45ad42dec9ebfcd7L,0x4054e68145eef6c3L,0x05427862b00d1763L, + 0x025fc05fa9852684L,0x717a17c10de39afeL,0x618c2b085586b46fL, + 0xff0a80f7f6b71672L,0x3c7fa577f5e7c531L,0x000000000000010eL }, + { 0x33e3534c52ccb986L,0x9ddd2298303c58a4L,0x261e5b794a324d2cL, + 0x52c1c46af081f324L,0x0216cb8dfd3db922L,0x8e5b647c45a5a0e2L, + 0x1b1c0b847184888eL,0xc1297760e66e1c57L,0x00000000000001d9L } }, + /* 43 << 301 */ + { { 0x555d70c34aeab359L,0xc57ab8bb64f78762L,0xf95aba0b8d3e12c8L, + 0x8bf166223b0a34c8L,0xd9b90d4a97a02258L,0xd2718cd14e070653L, + 0xfc0b4a7243ae776eL,0x10b07a1f9e739e1eL,0x000000000000017dL }, + { 0xc53e4794dbf1d1baL,0xb82723e1ab28bff7L,0x5cc8876d993dffebL, + 0xc84384567c943552L,0x98d1a6ea3e1940aaL,0x1bb513db6e2214d7L, + 0x27a9e1daf0e11ef9L,0x000546decb247662L,0x0000000000000114L } }, + /* 44 << 301 */ + { { 0xf6820c898f7f0d51L,0x470f631b3e6bb71cL,0x1489563d67e33eb0L, + 0xf9a58c5e34260065L,0x2f0cbe5541d317c2L,0x3501da3bfae4b6f9L, + 0x25caef56436223a2L,0xe86cd4faaa1af452L,0x0000000000000178L }, + { 0xbcda46bd2a68b8bfL,0x7005074bf6e3ff4eL,0x17eb86a110cc5301L, + 0xaa01cafcfcf4b5a2L,0x8f4954e944dd4413L,0x485d7a8e3dbc7cb6L, + 0xd042f1f4fef97fcaL,0x8e1bd4289991c8f5L,0x000000000000007bL } }, + /* 45 << 301 */ + { { 0xc6611fa87a271022L,0x10ac4d7405f4fa97L,0xf94e7e96b448c4a7L, + 0x24a2e0ae49c62985L,0xa2d6dd08a74848f0L,0xf7710e0eef1b6b35L, + 0x3e6eca62ceae13e0L,0xcc1397bb0a3d98c3L,0x00000000000000e2L }, + { 0x4220e9e620923648L,0xecbcfc545b686455L,0xeb4580c51656d644L, + 0x9d1747a6a197f15fL,0xcf2b77a1f78eda46L,0xe417400145448aa3L, + 0x9a23f9a71649a48cL,0xfc6d16ab76272579L,0x00000000000001d1L } }, + /* 46 << 301 */ + { { 0xacf13584ec5f73b4L,0x14f5910a780e4469L,0x556672606fa1789dL, + 0x5866c80b9a4d28b5L,0x4a3a72f3217e2b8fL,0xc774aa4118da75fcL, + 0x9c9be5c98ca2cd13L,0xcfcbaedc2145b62fL,0x0000000000000070L }, + { 0xf78d77ee3833e520L,0xc536906550caeadcL,0x0392c83f4af31e0bL, + 0x9b9f30ce94537049L,0xb56dc7c2ef2c1d25L,0x820e464242363f57L, + 0xe70134f9abbf4146L,0x55264e69abc9ff2eL,0x00000000000001a1L } }, + /* 47 << 301 */ + { { 0xdd3503c5cee406e7L,0x4b64dd4333236849L,0xac777279889363ebL, + 0xeec078a0cf6f0811L,0xfd559e756d243ab0L,0xaac1010f2e4d26baL, + 0xc0b719ef55e9c8f2L,0xefff2f164e29695cL,0x000000000000016dL }, + { 0x3717eb9cad47f17aL,0xb39b8c6162b05e31L,0xac892a92918390b6L, + 0xaa3c7f01650cfed9L,0x6a8f7ec0fba21f56L,0x4732e5129121bba3L, + 0x220ce23e1a14d894L,0x9199750e5dff1692L,0x0000000000000038L } }, + /* 48 << 301 */ + { { 0xcaa727068efb0157L,0x713a7f37f08f035cL,0xff58c6a1aa4fd5e2L, + 0x760b066000a48205L,0x4a39e66655f07cb4L,0x0f6bedac67558061L, + 0x9eaa88c266a118fdL,0x8d9ea75c97d29ca9L,0x0000000000000190L }, + { 0x50a4bc7abc6094f3L,0x105120ce68f17d91L,0x514fdeca4e559f9aL, + 0x414f4c0ee8d688caL,0x1b91555424de5ac2L,0x5d983c0e298823dbL, + 0xa537321243264448L,0x4baf77e986eede9bL,0x00000000000001cdL } }, + /* 49 << 301 */ + { { 0x5233363c3908e1ddL,0xd337db04f34868cbL,0x67e60b131108934fL, + 0x197bff64d6300ccbL,0x2ae67541ba25d4a5L,0x2cecde5e52500639L, + 0xc89f08d4f30776caL,0x5a8a70c2d54d3966L,0x0000000000000119L }, + { 0x69950ee38b87ac93L,0x82f8ef8a8aae9a41L,0xa7240350d2ceebf8L, + 0x89babd2121a729faL,0xb32954b0bf228627L,0x0dfef1aac46b78bbL, + 0x9ea9cd1f4c5f1ea3L,0xd23bbc8bc8c76503L,0x000000000000017bL } }, + /* 50 << 301 */ + { { 0xa095037ded6f2736L,0x0646535bad883679L,0xf786126e9a39a143L, + 0x7b0ca7e623b36c0aL,0xf24b3e4b927379bbL,0x9b7fb39b25e2bb09L, + 0x9274d841abe08b45L,0xf6c52548dbee12c4L,0x00000000000000d8L }, + { 0x50b4222ef75cb030L,0x166b97d284f9c593L,0xe850289b5a778cc3L, + 0x36ac8e7bdbacde12L,0xcf3371820d8b021aL,0x01f6d14c223203c3L, + 0x4680d14e6369d2ebL,0x6c0e46e4e17a4aa9L,0x000000000000011aL } }, + /* 51 << 301 */ + { { 0x7061a9f024b2f7edL,0xbb9981d31b3a2b57L,0x2656f1674fea4e43L, + 0xd302b1ff4e19186aL,0x99747d59605eaac2L,0x462f9060bb48ed8cL, + 0x45023c424c6159c8L,0x580520656cbe1bc5L,0x0000000000000041L }, + { 0x87e0f7951f1a47dcL,0x4ea66203080b099dL,0xe9051bdc1dce1263L, + 0x30ec89d3e469142cL,0x6daa81dab80471f7L,0x2e12e2fbfb7c0aedL, + 0xc395d4415db48de3L,0xedde9d18069032faL,0x000000000000014cL } }, + /* 52 << 301 */ + { { 0xbcc582303e5ab417L,0xa60c1a36b86fa32bL,0x3918edf55e73256fL, + 0xff9846acf315fd9eL,0x31b17c0d4471f470L,0x953aeb49574ef0a1L, + 0x98879828271105fdL,0xc61be3605efdad16L,0x00000000000001f1L }, + { 0x64a9eff28dee2c40L,0x61c07dbb3ce994d5L,0xd5f17ebaedd78e35L, + 0x2cad1e605fa0d125L,0x4a769e59482f87caL,0xf9d4824301c7ca2dL, + 0xa283ff0e1070afc0L,0xc2dbfd1c7d19332bL,0x0000000000000165L } }, + /* 53 << 301 */ + { { 0x658850d16ce799deL,0xcc973847aef174f3L,0x6caf2e528de6a894L, + 0xb84584ad02fa012dL,0x1e78d607213be59fL,0xd9bf24ad7c2ee0adL, + 0x8fe953f726b076c3L,0xd38dc31c14cee17bL,0x0000000000000178L }, + { 0x8d0b066db1c6915aL,0x21ccd05457340145L,0x5885c54667ec6ddfL, + 0x41e073c91b2ec5ddL,0xf847703ff931f748L,0x20d56c94c21f1fabL, + 0x5dcb60b7021229beL,0x5193baddbaadd8d0L,0x0000000000000024L } }, + /* 54 << 301 */ + { { 0x37f1fc2d03a65cc2L,0x521d119a085fe64fL,0xfc0fa31a43e08d7dL, + 0x8f29f08e14fb6112L,0x79513894d29b5799L,0x8c2a5e15290f1bc8L, + 0xb0626ceaab7d477eL,0xde49612f5a1f5a67L,0x0000000000000035L }, + { 0xef75ebd052be58d6L,0x480a1e4323635dadL,0xb57c91739a3cbeb8L, + 0xe84cbe7311c90c9dL,0x53973bd42c272fdfL,0x5a69a9e16b1fc029L, + 0xf510e2b810568445L,0x8abe0317a03d18b7L,0x000000000000010fL } }, + /* 55 << 301 */ + { { 0xa24d79fc1f915fbdL,0x6f9b01cac405aff7L,0xa3f928dac9dc1424L, + 0xb6d7a76790832142L,0x1bffa8759c7ec089L,0xd8406c711f1d58cbL, + 0xc10c59f08ec01b2bL,0x0e6c986c5c4d7bafL,0x000000000000001eL }, + { 0xcaba835f0b06de2bL,0x82f3ad2b2bc71659L,0x987d851df34a59e4L, + 0x3ea0707f48bfc12dL,0xd79e3cbe936f8bdeL,0xf496192f8bd05c8aL, + 0x8c3ba6aed29848eeL,0x87f5784cba394b14L,0x00000000000000feL } }, + /* 56 << 301 */ + { { 0x47212cd83110ef21L,0x351356ca7dab31daL,0x5c31add7777a302fL, + 0x44844eb4dc66a1c6L,0x559f3fb428384807L,0x95dab974ff78dee6L, + 0x2fe12e92fb20857bL,0x41436000ef499bbaL,0x0000000000000106L }, + { 0x5e0e5f369df7b969L,0x2c96f308dc819d59L,0x3258db7411cf4119L, + 0x6246da5476676d2fL,0x5be278cfca079f54L,0x61ab6eb06e36452fL, + 0xe7742741461fc9a3L,0x252fb6386a926fe5L,0x0000000000000171L } }, + /* 57 << 301 */ + { { 0xd15425a0044b1e79L,0x30c1da1c01c4e18bL,0x2a8fe18f5b56cf96L, + 0xa65a8c353ec8ae22L,0xd5b3902b1ce4eff5L,0x25dd04aa45191df9L, + 0xeeede6edd2ec0e96L,0x3dd71ef21178f647L,0x00000000000001a3L }, + { 0x61cbd8a0a99485faL,0x2e3a830310b88a62L,0x821545277d196b55L, + 0xee30afd7176227b8L,0x0d7f8cb387f3f451L,0x461ab2741c2ae402L, + 0xb510aa7e7c25eb24L,0x2064a9326d6cb16fL,0x000000000000013aL } }, + /* 58 << 301 */ + { { 0x3a647a701e66ebb3L,0x87eda648234fb017L,0x522f0e31ab81b9b8L, + 0x0b423e7514efd69dL,0xa4876674b8df1b85L,0x43be28d578427a50L, + 0x2959b6b085148f33L,0x49b1b83da5796dabL,0x00000000000001cdL }, + { 0xe42c30325d6b2eb8L,0xdfa003ce05f8e677L,0x59e1401917e9254cL, + 0x34009be012ac5202L,0x7e0ebbb0db58cac7L,0x590acd1a64a0ffdfL, + 0x100e7b6afca1d458L,0x8933fc7a84524656L,0x00000000000000ecL } }, + /* 59 << 301 */ + { { 0x5251639ab0e87051L,0xf6f979ce0ca338d5L,0x144d90c7b6e253b1L, + 0xf037aa216a68696fL,0xf89c93246540c170L,0x29ff81b785893ae1L, + 0x4adef9b90e2c8c1bL,0xce6b4390efee4a2eL,0x0000000000000002L }, + { 0x8ddf233de075825eL,0x6b1e1fb82bb7ce2dL,0x6b6d4972646ee9e4L, + 0x83b3d991a7723d1fL,0x99b83cd10ec203d1L,0xc03b08a4e02da448L, + 0x02a118288fc47033L,0x604306491e6f5d22L,0x000000000000018cL } }, + /* 60 << 301 */ + { { 0x9628c0db8ed909faL,0xd3b2a86d0580185eL,0x5735bd594bd03ffbL, + 0x4f83f8fd1cc44682L,0xbfa641e8a18ede58L,0x840c9d0a5ea97db7L, + 0x66b28600dd4a3122L,0x8e603955d4678750L,0x00000000000000e8L }, + { 0x7da4912cd87216c2L,0x1ff65b078fca0ee4L,0x7e1fd56ef9bc6ddaL, + 0x566b2e611503ac12L,0xda75d74167835799L,0xc0ea3d59dc72ec9bL, + 0x6e72e5f65e154614L,0x7c42bca592cee288L,0x0000000000000185L } }, + /* 61 << 301 */ + { { 0x745a839ffd28d6bcL,0x4589047acd47e3b7L,0x93b23d94bbb197acL, + 0xa34679ceecf4fc7bL,0x034298450d34fd44L,0xee153249190fa29dL, + 0x3ad1071072eda190L,0xcb61b38e3b741423L,0x0000000000000119L }, + { 0xaf04e1516a6a568fL,0xf197ced617e3e52aL,0x0940b3238f8d327eL, + 0x5ac9433f0d86d681L,0xa5cff71ee57e66a8L,0xebe9a6ff3736d6feL, + 0xb84eef1671bf3727L,0x3c02e98d17c1cf71L,0x0000000000000088L } }, + /* 62 << 301 */ + { { 0x4c6238c497dd1d20L,0x3fa03dda3196ab67L,0x5fc72512a7f69b5fL, + 0xfa56e5413948db72L,0xe631f4fbd8fa32d6L,0x8495ca1d086e83eaL, + 0x715f2aad61ea493dL,0x7d8cfba3b12179dfL,0x0000000000000124L }, + { 0xff5705a163c29a0cL,0x6b7f470e4fd2f2a7L,0xd4a25c106860eb08L, + 0x479eeecf19028042L,0xdc108a08934796a5L,0x6dfd7872672cac03L, + 0x809fd3bfc86d5c23L,0x193e18ab4ad9c302L,0x000000000000016cL } }, + /* 63 << 301 */ + { { 0x5f0eb4740d66d7d3L,0xad88da12ff43b22bL,0x6ddd0823f87427f3L, + 0xa8753432c0af65aeL,0x8e4d1f970f47e6a3L,0xb9a187a3ec52c9c5L, + 0x7df606cae458ff72L,0xb5b21deea1c108f9L,0x0000000000000107L }, + { 0x14111844be16cc0eL,0x9bca8c559765b2a4L,0xf0649ee7e8f13547L, + 0xce303044c8254c5eL,0xf277fadc62c52494L,0x8634d4d0b31a1319L, + 0x3076ab431394abaaL,0x2ba1f7ed9e0ac32cL,0x00000000000001b3L } }, + /* 64 << 301 */ + { { 0x70c5acdc38693d3aL,0x9edee68238f054f9L,0x16e053f22960ceeeL, + 0x5926857c28bfe7b1L,0xb9f7420e6145f8d1L,0x9e0c42f59c0298efL, + 0x50b7210edd7d9fc6L,0xf9e4bd53d7be096fL,0x0000000000000078L }, + { 0x428c2455ed19fc57L,0x9e8f777d56aa75dbL,0x4caeba30a671d991L, + 0x5f927d79eae0f53eL,0x00d6ad369e4c2a5bL,0xc8811438a2e12d0aL, + 0xa36d664eb2aeb4b1L,0x74e76676929a8748L,0x0000000000000191L } }, + /* 0 << 308 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 308 */ + { { 0x358eb53667db4570L,0xaed3f999685fa5baL,0x4593c950cb571fcaL, + 0xdff7278c0411dc5dL,0xd62a44643bfe7e51L,0x53b87ca249f6f05aL, + 0x72327899f2c3d2dcL,0xbaa194aa9df4da5eL,0x00000000000000c7L }, + { 0x3e73accfb50a3e06L,0x0d8ebee30754f2fdL,0x3cd0ae954b3f3fd1L, + 0xc4a4eee0c08bb2dbL,0x7955296319d71e5dL,0xf91e1ab7872e35c9L, + 0xc85090f6f6c102afL,0x8edf8c5d1e42a1baL,0x0000000000000179L } }, + /* 2 << 308 */ + { { 0x56923e55f33c5a4aL,0x1c5624ec829a3c22L,0xed234e3870000295L, + 0xa8467561edf3ff42L,0x717a939e9cec9497L,0xf80e28024e6be199L, + 0x5023bbec06fa7b8fL,0x16e26a9ddd95c07bL,0x00000000000000fdL }, + { 0x88091d673c3485e4L,0x0377260fd40b6b47L,0x6d9c6df416f7b3abL, + 0x65376516f71b1fddL,0x89cc11f991956d40L,0x58992a09807610b8L, + 0xdeb47aa2b2799c7eL,0x3ee5834c8a327765L,0x000000000000012aL } }, + /* 3 << 308 */ + { { 0x421b840e80905a7aL,0x97c7f1d394b48cbcL,0x421a23843b79f360L, + 0x1121a2d8eaf59ef3L,0xddbe9d9959fb65d3L,0x6ad2be2371d9d306L, + 0x36fa0077789af534L,0xb928a6e78013dc59L,0x000000000000017bL }, + { 0xb4c26798a38d78e5L,0x8884dd394cf38545L,0xb74dd46112a2023bL, + 0x1b242f854133277dL,0x401d32f2d29d7071L,0xcd7c9aa43d2e2a0cL, + 0x9df2157e06ba6ad8L,0x749bfc4c80e2c3c5L,0x000000000000018bL } }, + /* 4 << 308 */ + { { 0xccf8b57aeffb980aL,0x82ea22f5d0108b23L,0xcab6a69f6c902061L, + 0x7a3d74d843663f25L,0xb12ca29a0ad5df2dL,0x9d3ebbeb2f9fb277L, + 0x6f975e2189bc5273L,0x75e8bf950929deceL,0x00000000000000a9L }, + { 0x14493740e1af64e4L,0x9ce3ae66109961dfL,0x36651bb7e31f4ffdL, + 0x18b7131cff1fcfe2L,0xded3378b085a67b7L,0x28efb2896e283271L, + 0xb66deb9a73427ac3L,0x3c5fd6d2a9baa451L,0x00000000000001b3L } }, + /* 5 << 308 */ + { { 0xec1f6a5762efd94dL,0x1d80b5e87b95536cL,0x01f8ba60af47e2b4L, + 0x6e8ae542a3b0e6d0L,0x5ecb52d9e45fefb7L,0x2108032a327299a8L, + 0xc49fc5c5044ed33fL,0xd1fd083752cd0833L,0x00000000000000e1L }, + { 0x3f6ae9b6cdad11acL,0xad3f0fa813bb828bL,0x812626816ff056e1L, + 0x2fd37492a73196cfL,0x172beb76df90d5f4L,0x63f2c6e466144036L, + 0x3e1f497457f6d086L,0x5e0d8e49fb6eb366L,0x0000000000000095L } }, + /* 6 << 308 */ + { { 0x611f4a795e2d7653L,0x966e843cceebb0a9L,0x2e358cf506e8a15dL, + 0x2ce41dc740a74fc7L,0xa2fdea4f5eb455b2L,0xbbbd718f120fa4d4L, + 0x210bff82030fff11L,0x5a1b943aa68b00a0L,0x000000000000002aL }, + { 0xeb814a67ff037be3L,0x3b08220469f52c59L,0x3f72143e74ff3224L, + 0xe7e85cc436f89aedL,0x2afcbe04caaf314bL,0xf8c8bb32bf93fd35L, + 0xe41f8328ac1dae25L,0x5318c6d7bdeb2001L,0x00000000000000f5L } }, + /* 7 << 308 */ + { { 0x630190bb95989889L,0xe041da504e659dd2L,0x5d88d99b5df8e753L, + 0xdbac25445ac2e158L,0x1ff6ece7699eda49L,0x3d0860a21051bd60L, + 0xd4d8685740c3a989L,0x2bfcc26c4002d915L,0x000000000000012aL }, + { 0x0c43772c2f4acc7fL,0x54963f2fe3753d32L,0xc699be8765dc40e8L, + 0x6dbc95ad81560a01L,0xc805d7411dedf025L,0x46b049c177fa74c6L, + 0x7c0f62fb288c1133L,0xa8a2e1f9e6f066baL,0x00000000000001feL } }, + /* 8 << 308 */ + { { 0xce24d42c1b72e95cL,0x7fa4289beab74b64L,0x42ba739e523fc1e5L, + 0x9293167b814c4fc8L,0x4418fbfd8623db10L,0xb079853f76a132b6L, + 0xec5bad8abf394947L,0xc78e5748e7b0a665L,0x0000000000000169L }, + { 0x6038ad95d7eba9ceL,0x421ecdc9572f3671L,0xc6b7effc60bc3cfcL, + 0x2c4b7b77a48fd001L,0x80280ec9d57795e8L,0x350815c93a7f6d67L, + 0xce30bcdd7da13b86L,0xe36da7b123c894d8L,0x00000000000000c9L } }, + /* 9 << 308 */ + { { 0x31105c7864ea99c5L,0x513faf09396e75e9L,0x34a79931a04b75a5L, + 0x3e026b0ed4c954f0L,0x70d2aeeb235bf7e9L,0xdd6c193bdf5682b4L, + 0x7cf6e8f29b1d9704L,0x41fc6b32026eaac2L,0x0000000000000132L }, + { 0xd09919ef5413aa96L,0xdb06523a393fc9e6L,0x8c6f8bcb74809cc3L, + 0x7d2a5b46b27e50abL,0xc4f0d0792a507c1fL,0x28c3944b4a751fe8L, + 0xcd55d261bf88e984L,0x56d48d0616da21b0L,0x000000000000008cL } }, + /* 10 << 308 */ + { { 0x2cf7fa7761bfa954L,0xb24bbea98e5118a9L,0xc5c77927689db327L, + 0x8d12557569c43762L,0x41f4bb986c359329L,0xf0715fbed02caf97L, + 0x3319662497538059L,0xfc8ae58b5ea0263aL,0x000000000000005eL }, + { 0x7245131ce020630fL,0x459c5721ee23ba9fL,0xf25f9be9ac36a41eL, + 0xbed1a35049589c41L,0xa72555b2c636e1cbL,0x35398149c006b213L, + 0x7761c08b6a446a84L,0xed021d8673e41c6aL,0x000000000000001dL } }, + /* 11 << 308 */ + { { 0xf8d97141473e6d47L,0xe73699d31e25f25eL,0xbb3aa86000986563L, + 0xd8158f88653b68e8L,0x1c7d943eb586d23bL,0x1074ee6888ee5191L, + 0x6f60808b0b60c796L,0xa7fa912d84dd4730L,0x000000000000002aL }, + { 0x589bccf26b6fe852L,0x902c7b946abbb0e2L,0x9145309822db1367L, + 0x6febda2d555ff37dL,0xb6fd9b3008771767L,0x968892ece5a8b409L, + 0xfe1910a2e1233793L,0x13fe7f04213d591eL,0x0000000000000184L } }, + /* 12 << 308 */ + { { 0x59975b4fdfc4e7ddL,0x35d9e63a3fa2a870L,0x7d6653749901f064L, + 0xe89caa890e9dca74L,0x6eeb63913d92e7f8L,0x19d9af358a4ccbb0L, + 0x405c35c2928e5cf9L,0xed771157ddb633ffL,0x000000000000008eL }, + { 0x6ad1dcbc685b9a90L,0x479ab5746efb4e61L,0x27a023b6e72992c3L, + 0x3607decab94c54fcL,0xfb2f50d0fbf16aa6L,0xaf3f098398c7c140L, + 0xd1a20e9ea3814586L,0xf1bc43165412b23eL,0x000000000000018eL } }, + /* 13 << 308 */ + { { 0xa047b2e89a705668L,0x2fb2a1b74cd0215dL,0x90ef0bb734146de2L, + 0x340074458c103e1eL,0x4803ee28d95e743aL,0x574f1ca6bb762bdfL, + 0x703fec4e2ed0dc5fL,0x3f19a069d27172caL,0x000000000000002eL }, + { 0x3dd8f275c44bcf81L,0xa32f84ac517a8afbL,0xc065ba2bf26384b8L, + 0xb3fadb9619d14499L,0x6258447d07523eb7L,0x4c8c791afd107696L, + 0x041ab2e7d111b4d3L,0xfd008507e28a3a07L,0x0000000000000165L } }, + /* 14 << 308 */ + { { 0x75d00875fbdd51caL,0x8c2e5ff75d7aa834L,0xee4fbe0aa4a1a123L, + 0xdf61ce6939ea5515L,0x8a747ddaf3ee68beL,0x106c5c5d3cedff21L, + 0x51ecf5362c3519fcL,0x6581ec138cefa861L,0x0000000000000187L }, + { 0xa8668f9d0eecd8dcL,0x4dc8fbe96bdd6803L,0x5aff8cdd339f6f89L, + 0x7a39f0ab91d488acL,0x20e1dcc8d8fa0333L,0x12e03a970fb08c2aL, + 0x265c2724173af27cL,0x962fa034cc395512L,0x00000000000001bdL } }, + /* 15 << 308 */ + { { 0x5fa1aa9d9283f38aL,0xe0dcef924846cfa5L,0xea5d81e7e4c20234L, + 0x5959dbcadf690b8cL,0xb77391b0c6e466a6L,0xdf6353c23982757aL, + 0x6516e711b064d2c4L,0x6a29f216df8f77c9L,0x00000000000000e5L }, + { 0xb5fd98187a2702b1L,0xdc2a2e1d49291bcfL,0xf18db82a493dbb22L, + 0xaaef9aa431732b87L,0x6ce115f70f27262fL,0x4784c2ed93afc24bL, + 0x97df3c8932540ad9L,0xc01564c169f2a2c6L,0x00000000000001dcL } }, + /* 16 << 308 */ + { { 0x92401ab5853572e7L,0x957d2ca6eddea020L,0x7d54dd653f9fbd6fL, + 0xad0c7d092e05d05dL,0x6656fbab04183b30L,0x543b50170b716d5bL, + 0x2481449d9857d796L,0x86173b6de3a39722L,0x000000000000005dL }, + { 0x7130d0cae71e511dL,0x168ef9d935663592L,0x63a9936eadbf816fL, + 0xaf63ee57d4f7a44aL,0x59f21032cd0ea152L,0x2f552046a881056fL, + 0x26eda9a621ba6bf5L,0xe75991c7d0a0caf9L,0x000000000000004cL } }, + /* 17 << 308 */ + { { 0xd892c4c47f86d0feL,0x56f68e92d138ac8bL,0x8b6cea6b812bef69L, + 0x65e6668136fb366fL,0x61708f66c5da12b8L,0x0f58c3a4c22b7b76L, + 0x51d69be4faa405a1L,0x468bd1ec9efef316L,0x0000000000000182L }, + { 0xfd9983c6150289b7L,0x7eb083abd79ab364L,0x2931bc8877a84d7fL, + 0x521e5eaa959467b2L,0xb6c94464c1b64119L,0x6012353f4c2cadbfL, + 0x134c778a4a2afac0L,0x996a73009c581747L,0x00000000000000e2L } }, + /* 18 << 308 */ + { { 0x33e1640091a78548L,0x0be887bfab17fd14L,0xd0a2c5b684e585a3L, + 0xa7263ff6c9aad376L,0x3043f2fece1efd0eL,0xa0680aabdef6a593L, + 0xe60970a1f0023f93L,0x3a37d9532721a60cL,0x000000000000019dL }, + { 0xa6f89b88c747ceb9L,0xef6327fbd2f9af42L,0x9fc2f4e140059ad8L, + 0x8cf625fb17fd0a67L,0x7b274e49d4b8e5fcL,0xabdf2b193b7de841L, + 0x33ace325cb52a6faL,0x245e8346cedd79e1L,0x00000000000000ceL } }, + /* 19 << 308 */ + { { 0x01bacaeabea17fe6L,0x59915363440c14b2L,0xaa37092ab5b2c0b1L, + 0x0d2bf652ab5e63ceL,0x78f5ca508ad78521L,0x22c92a71ed54e597L, + 0x25e63d45070cfe02L,0x4bd01cf9443e0e82L,0x0000000000000092L }, + { 0xd059eaebbee76889L,0x6bc4542fa6dffb4aL,0x249940849553d515L, + 0x2ce00cb4cc103183L,0x45a3fdc168016708L,0x2bad1eabbac0273eL, + 0x38ef326920c8a277L,0x849cd583f91eef7fL,0x00000000000001feL } }, + /* 20 << 308 */ + { { 0x1da8036a7a1e7dcdL,0xeba687ced11490f5L,0xc64142142c78ff92L, + 0xf74c827871e76977L,0x4f4870199295ccc6L,0x9f1f19e06d459a5fL, + 0xbd88bfbb1a7a5b0bL,0x64780f07e527efe7L,0x0000000000000042L }, + { 0x9e3e79f3e6b1281fL,0x0b7b6ea6cf470445L,0xa1b8c1490c2ab815L, + 0xc063df408bf69287L,0xc8db56473cda2051L,0x44a55be78b19b178L, + 0x359c27432185346fL,0xf88c095f67dbbce5L,0x0000000000000103L } }, + /* 21 << 308 */ + { { 0xb91e5920ac8ef011L,0x7a54d4adfbcdb74aL,0xfe11613ffc5098b5L, + 0x1336af032aef6001L,0x84605c56a061b0f6L,0x0f60d5e2e1fcaf04L, + 0xde8852e49907ac17L,0xb28a7d61ea6f8bdfL,0x000000000000005dL }, + { 0x4004301e0f0e5fe5L,0xf197d689fc10a41cL,0x0b0fa151123330c4L, + 0xf03abc5f0d80b3fdL,0x70938e36c84adc56L,0x56e25d0682880d7bL, + 0xe4ba61eab767a4f9L,0xfec4056bec7805ceL,0x000000000000007bL } }, + /* 22 << 308 */ + { { 0x71b0470927f2d575L,0x4ce1b5a35ec90c8dL,0x1fa98cc92ed1c562L, + 0xffac24ea60a174a8L,0xa9eb67c721c17f93L,0xdff4dc8fc22a0e55L, + 0xd44fe45a97f21f99L,0x617b224cd9032856L,0x000000000000012bL }, + { 0x807472aea19c8053L,0x83365805a0ea3ce7L,0x15ade403800a5422L, + 0x1d7f62916bfeca38L,0xd3454c6c3975cdabL,0xba22ba37899c6b79L, + 0x46901af86bd4d316L,0xd7dc618e8259a72eL,0x000000000000006cL } }, + /* 23 << 308 */ + { { 0x2c1a594ce5e70724L,0x9827008023b65011L,0xc4bfcb78112d48a3L, + 0xe9f153719f70adfcL,0x95fe6dfa442466aaL,0x11f7ba5cc44e4c58L, + 0xbbb7c63059794602L,0x3caea52a595ab424L,0x00000000000000beL }, + { 0x7bc2206923fddef3L,0xdd953f62707e24fbL,0x4711583a602d304bL, + 0xb63e6c862ceb6dc1L,0x37485fd80d5380cbL,0x9dd8e21be6c10e7cL, + 0x47e4555b2bf9a466L,0x12bffd0346e86fd9L,0x00000000000001c3L } }, + /* 24 << 308 */ + { { 0xe2f4e66122495a58L,0x0f547f2112dedb25L,0xda70b4ccab617fa2L, + 0xdf8f0a8727716051L,0xda92b34e5b9602c9L,0x13f62ddb9edc1bb8L, + 0x4c9453427196c75aL,0x03d6a92485cc70b4L,0x00000000000000b4L }, + { 0x5b3255495f951e47L,0xcfdf328e31c6c06cL,0x4c747ca3b4409bc1L, + 0x82b37aedb2420014L,0x254fad7217af258aL,0x8520c05a9aaf24d3L, + 0x22c81bd52f2d8a68L,0x15f846222126085bL,0x00000000000001eaL } }, + /* 25 << 308 */ + { { 0x49cbcd516677335fL,0xff3b5c376973a2a6L,0x24fbdf0a76d9b3b1L, + 0xb90d67e23be472a8L,0xd3104c78a0370080L,0x2d0c35e019f99197L, + 0xc01ae666656ff6cdL,0x70357f51c10c44d5L,0x00000000000001b1L }, + { 0xe922e0ca48b5b2f2L,0x8f4a74eeba82bfd9L,0x78744832f23c0329L, + 0xdac7d5e418144f0eL,0x4b6c8a5573ab198bL,0xad8701552aeb0191L, + 0xcb25a018937740afL,0x217c8ebb064ab75eL,0x00000000000000ccL } }, + /* 26 << 308 */ + { { 0xc3e6e304b6017e8bL,0xe0f4658e93da01e9L,0x7c000b973650f195L, + 0xa252398f0db26432L,0x5b8c3d166a3e2695L,0x58aceeec15c35d5cL, + 0xfbd04cf40c9d3e37L,0x91e494fb96955becL,0x000000000000006cL }, + { 0xf233b2294ef54821L,0xdf93fae3e9f9da80L,0xbcdd5253e7d7234dL, + 0x4b8a114bd07bb906L,0x32e091a2d6937e9aL,0xcc369893ad38b041L, + 0xcc0a70efd48839a3L,0x243948c57344ccffL,0x0000000000000070L } }, + /* 27 << 308 */ + { { 0x7d6c6c044e864354L,0x0a901df4875d7d03L,0xb5907546246dae1eL, + 0x6729f83134e1570dL,0xd764551bcfdea622L,0x313f1c743b5e6805L, + 0xa1481042669a54ccL,0x5f415fd1b868de4dL,0x000000000000004cL }, + { 0xd716453f41282c78L,0xd0195eb53fa4d88cL,0xe89c4289ab207259L, + 0xcde2eb67f7971651L,0xcea4c8f7c8412a8cL,0x0bc1ad8e99a02f0aL, + 0xae4acd7a8011b76fL,0xa1631045edd271d2L,0x00000000000001eaL } }, + /* 28 << 308 */ + { { 0xef3289b04ad9d4d3L,0x1642bc9a269cee37L,0xc43668c748c83a9dL, + 0x1273811b4be20506L,0x6ac04b3ad754dea6L,0xd7fc6015233d6ef0L, + 0xe1f876a478099339L,0x95f3600d848cc174L,0x0000000000000093L }, + { 0xd887337dfe703d68L,0x7fe8618d7fbb4be6L,0xcc8553bb5e92c232L, + 0x7c8c470d76d61087L,0x895e49a664c6ee9dL,0xd18e92679cce131bL, + 0x37e1526eafcd5762L,0xb36a29bace71ee8aL,0x0000000000000012L } }, + /* 29 << 308 */ + { { 0x5bd3e13724ef2377L,0xe78c3cf63fdfcae6L,0x4e142cabb58a6d07L, + 0x3e136d15d953e9a1L,0x0a7cc513153a91f6L,0x43b28b2c402eae12L, + 0xcc70a80949e328e6L,0x36f604d3e4e3b6d3L,0x0000000000000137L }, + { 0x9dab1378923289cdL,0xf78d3f45bde1d213L,0x5ab3e38b81422f9cL, + 0x1380b31a8ba8cbebL,0xe4a0c0d8f6d62dccL,0x05a681bd04c2e5fcL, + 0xc2dc99873ae76bacL,0x1b5b77687287ffa4L,0x000000000000009eL } }, + /* 30 << 308 */ + { { 0x313ffe575bed3c7bL,0x89efad76113b0db7L,0xa4068d0b23656bf8L, + 0x58b54b358440fcd0L,0xf012adff64e85c0bL,0x95fc8d711b1a0be4L, + 0xd8dda43290669498L,0x43a699b8cb319f35L,0x0000000000000030L }, + { 0xb47ffe3b8d9462a9L,0xd89b60f9768e68eeL,0x8679af2a3a0033a4L, + 0x585f61ee9dcca100L,0xaa093db603db700aL,0xe80a02fe08a588b7L, + 0xd6ad55849c2f6c6aL,0x0955a470748f1c99L,0x00000000000001c3L } }, + /* 31 << 308 */ + { { 0x6323f8eed9c72869L,0xd78b8344726240f5L,0x7eee40c79fcc91c6L, + 0xfe0d1b6a43b91a09L,0x9254b68dbdf3e3e0L,0xf7ab62718f3b0872L, + 0x913f625223694657L,0x306927fe683200daL,0x0000000000000151L }, + { 0x68e2727cbf01e24eL,0xcfc5c7bd9ce8a665L,0x97659cc53e39c7f5L, + 0xeb81619823c6a080L,0x40b159fb9cc5fbe1L,0xed0d128e7072081eL, + 0x15146656cfa8fd34L,0x8a860e6be7cd5746L,0x00000000000000adL } }, + /* 32 << 308 */ + { { 0xed90e62b5e3ced2bL,0x160214bb5a919213L,0xdfb8ac078cb4fc2dL, + 0xea5aa6e7ea384db0L,0xc9492ed2f916fb11L,0xf40cecce1305bd78L, + 0xb98af5013f28555eL,0x4e9f84b268417d62L,0x000000000000014eL }, + { 0xbe791feaac7df2e2L,0x15c9429ce745ba60L,0xb02c1705198a2d50L, + 0x0a67cdce9b6e6a44L,0x640506295ad00221L,0x75620ea82b314076L, + 0x64c89ead87b02aecL,0x0e5ff515a92113ecL,0x00000000000001caL } }, + /* 33 << 308 */ + { { 0x2c19949ef2406274L,0xeb1e901251f176c7L,0x2695dcbb4ba80dc6L, + 0x2e07a7e0f5334f98L,0x9428720daf669d01L,0x82c33b4fefb29d88L, + 0xb2e43bcd57ca108eL,0x04a02cd6212733c8L,0x000000000000001aL }, + { 0xa88f7adc850c9f85L,0x3e05d4e2e6d0c323L,0xfd785dccf2abaaa4L, + 0xf4655ef4740cf540L,0x8ccd72a6db4f34c1L,0xff73999f3cb7f1afL, + 0x7a8f7afb67b965c6L,0x689736dc011e9fccL,0x0000000000000190L } }, + /* 34 << 308 */ + { { 0xe370e04bda8b5457L,0x63b60bea8169a63aL,0xca9c40db704d3577L, + 0x3232585b00e9f510L,0x917ba31c02396333L,0x7c5080f6b91efb06L, + 0x1eb3475cbf68771cL,0x4cec20604cf02263L,0x000000000000011cL }, + { 0x507d8e2d9a69e3c9L,0x5851d2297408ec4eL,0xb2ade543a8576e51L, + 0x37a8e2664dc34c8fL,0x3bdee95ac43ffaaeL,0x0a0c91250fa2332cL, + 0xe24013f395ee30f9L,0x5147e3805619b0f8L,0x00000000000000bcL } }, + /* 35 << 308 */ + { { 0xf004e4c8373d4c8aL,0x7822596b460ac6b0L,0x471aaf043775d740L, + 0xcf749e00d276f1e1L,0xcaccb1f7f2ebb22bL,0xb5588cee33256066L, + 0xcc12f8a3b5237d3dL,0xee1ddeae0b0be19fL,0x0000000000000019L }, + { 0x447a0ca173675ea4L,0x635562ac3d1ae535L,0xf0743ca5170b653aL, + 0x1104441f680762e2L,0xde5a0d91fdb1f4acL,0x2e7d6445c33ac495L, + 0x9dcd3ccb794a087bL,0x1d3f68a58696a6a5L,0x000000000000015aL } }, + /* 36 << 308 */ + { { 0x2789d11789bd607fL,0xa49b3a3d1cf5731bL,0xb4d1ae40375eac7bL, + 0x7a9e3214709d9d95L,0xc7af5abb98b3eea0L,0xde6ea6f093831eceL, + 0x0a4230d2a22c398cL,0x7a2d8ce8484e09f8L,0x00000000000000d7L }, + { 0x41dd59f7641a8d21L,0x160f13b2f280dbb1L,0x5a3ccbd65a915aa1L, + 0x2bdc905e2c8b1919L,0x94c2351282289eecL,0xce5342af75f08e5aL, + 0x6fb1fe090c9c86e0L,0x8eac3f4599539227L,0x00000000000001f4L } }, + /* 37 << 308 */ + { { 0x3994c61f837a73fcL,0x05e46220c3c61376L,0xc0e2194910b886b8L, + 0x8359b1f1189ab28eL,0x13999cf923e8fe68L,0x04111bba06dab3daL, + 0xf18121394ca82f29L,0x5dd0e72bd9943929L,0x000000000000004fL }, + { 0x04858d7e9fe6d683L,0x2a004635afbc7265L,0x107e46cab959a351L, + 0x2f3e6d698b035e0aL,0x801c719799822213L,0x4fc2eb1161f5bc41L, + 0xf7c878ac5cbd4694L,0xb9dd8d02b6b9a266L,0x0000000000000196L } }, + /* 38 << 308 */ + { { 0x2cc54df39a952054L,0x47e7fb8b51ab4e98L,0xaff4e345b6e2cbd4L, + 0xd610c468f8c1d6a1L,0xa7563cfa992df027L,0x140c736189033162L, + 0x63971f30eb289c93L,0xe2d280352b4f7844L,0x00000000000001eaL }, + { 0x82228cb1bae96b69L,0xdf0d49c6a3083751L,0xbe69f9c9c81c58d0L, + 0x3cd796d57b379ce3L,0xccd51828bf65993dL,0x211b60262d97d104L, + 0xd4fc2ed814cae057L,0xc6ec10767e620833L,0x000000000000012fL } }, + /* 39 << 308 */ + { { 0x5f9c3dd34c7a6d9fL,0x980f2239fc7c6101L,0x2f9d1cdbce021a46L, + 0xa28bd3e778b27c58L,0xd945b2bc1c8ef989L,0xda48a77c0d1cb17cL, + 0xfd09eebd7610f5aeL,0x56a9dd2796b6ac9fL,0x0000000000000167L }, + { 0x849e468d1395f988L,0x2ab3d74264a87f6dL,0xf31f45c333b48458L, + 0x3919d83712166e26L,0xbd8adf1e79b58f44L,0xfb3b7a45366e3d27L, + 0x6290beabbba7525aL,0x7c682d245b4cecbdL,0x0000000000000147L } }, + /* 40 << 308 */ + { { 0xbe900c2b8b2cdcd6L,0x9cf651cf310d52b5L,0xd4a52f802935c768L, + 0x6e2e1c7e42edb029L,0xcdc8d47abefe41acL,0x199ee4beeda259faL, + 0xe706031f7f85ad33L,0x5a45278532b0f784L,0x000000000000004fL }, + { 0x2d3934ba35610090L,0x398169e89e7de644L,0x471618b4dd43f89fL, + 0x54961e3c7f1ef9b1L,0x46d59c5ed6fd43f8L,0xf5b2ecf88f2fb30aL, + 0x6c423061b68e08a0L,0xb491e42bdf61a0e1L,0x0000000000000080L } }, + /* 41 << 308 */ + { { 0xdd61e4a3e3a78953L,0xbf87cfd848d529a0L,0x05aa387d57575951L, + 0xd7335ac15d8653b6L,0x5a87134fa75e619bL,0xd3b65b4ffb2975faL, + 0x60914e3730514133L,0x164480addf63e2d9L,0x0000000000000089L }, + { 0x46993b20c16d3f41L,0x56b02cb3f40e7e23L,0x40dccfe49fd8bf53L, + 0xd27718e9865bd74bL,0x7e67918f4f316243L,0x1496d27854c01a95L, + 0xe335a4aabce954eeL,0x1c65a2279c07cc82L,0x00000000000000c8L } }, + /* 42 << 308 */ + { { 0x88c2882bd4c52879L,0xf5a17282601e9b5bL,0xa2cdc54876b70fe4L, + 0x58dd0c4b6978da58L,0x745243423ffc240dL,0x24d69e0ea8be34baL, + 0xe8e520f3fa0a2e8bL,0x3dbd0ce31ec56e1bL,0x0000000000000053L }, + { 0x30f5a879ad6210bcL,0x2d50247054846592L,0x0fbd0ae1e24df628L, + 0xca9f795424f3e652L,0x33c6f50b4c7907ffL,0xc6985026d1598043L, + 0x05041dcd7b3e68c9L,0x16ce3f929c508fc0L,0x0000000000000012L } }, + /* 43 << 308 */ + { { 0x662b4d35c3726445L,0x5720f5305038de2fL,0x19da86837f89ec6bL, + 0x73da9d38c6ee50d1L,0xd29069f62da75c84L,0x946fa6989562b2a0L, + 0x39659a1c99802cc5L,0xeba11d668e39c13dL,0x000000000000013bL }, + { 0xe30e9161797cf75fL,0x77a27dc6dd246755L,0x4681dd15f948fc78L, + 0xce663c6999c0ffb3L,0x4b7a85cb661acc30L,0xa9a796cce444094dL, + 0x36c43bcb6c9f5061L,0x2545040379695bfeL,0x00000000000001edL } }, + /* 44 << 308 */ + { { 0xba81ed506a19d751L,0xd7d6c9a08ede7c16L,0x20a58314e4efb6faL, + 0x6cf44b6f17466134L,0xfabeff5243e52adfL,0x8bdd16b1c5f6f126L, + 0x4a30ac449514d967L,0x1b589fbac46ab077L,0x0000000000000180L }, + { 0x77cbae8d7a9f8a8eL,0x09f14f7997385badL,0x8136efa828b2c50eL, + 0x9d13c7d39d3f91e1L,0xbfa47812231393c4L,0xd5e2e14001104469L, + 0xd3e4a6876112a9bbL,0x3bb8410cfced65caL,0x00000000000000b2L } }, + /* 45 << 308 */ + { { 0x495b282a648a530eL,0xd97f888bbf1bfeddL,0xb8d68e2a724ef32eL, + 0xdcb78c0f092d41ebL,0x225506b0d83cb14aL,0x9697dda14cd71449L, + 0xbb1813e322d21a2bL,0x7e0e1f41ed79869fL,0x00000000000001baL }, + { 0x8da6a9cc02b9ce33L,0xd4e48938353fb0c1L,0x4071ef95164b3e77L, + 0xe00f67a4c29fdca6L,0x24ac0818d8687a9aL,0x3f2bbfa3f2c95e4aL, + 0x3732ed47b24c03f2L,0x287b67fdaa853fa7L,0x0000000000000012L } }, + /* 46 << 308 */ + { { 0x71f8ab3171e1d201L,0x66af84451112f38eL,0xadf03cc091779ef2L, + 0xde8028abfd0ab5dfL,0x4fa210fea692662cL,0x145649061d83000fL, + 0x925492c383e1d5e3L,0x980ad314f0d5fb5bL,0x000000000000012fL }, + { 0x8d0164410565708eL,0xd4ee65af3b1c93d2L,0xf2759e7872ea4b77L, + 0xee155661b4384101L,0x354a5e0f85775c4dL,0x0d8ef755c8163ffaL, + 0x9ea27178c24bbdd4L,0x9b53bbce1a2571bcL,0x000000000000018cL } }, + /* 47 << 308 */ + { { 0x30766fec7f664c94L,0x7409db5ad21d54f0L,0xd49b56fe5dcd8399L, + 0xc78b523220b37488L,0xb2b34e987db34742L,0x453f193cc27ee29fL, + 0x819f1a9ce7ae5b8bL,0x3da6aced7c36808bL,0x000000000000009aL }, + { 0x6cfd9af22dd54b89L,0x774fa5ee9ff83338L,0xe4d38d08b9373ef9L, + 0xb7e1f43fe23f834bL,0x1333a5c20add6baeL,0x4fbffd23faa1e20fL, + 0xce555c822816888cL,0x34303fb8dd0eff4aL,0x0000000000000034L } }, + /* 48 << 308 */ + { { 0x52a60da401ecea43L,0x00e1ab1441bf6835L,0x785545cadd97b88aL, + 0xe2bc1326d2f2894eL,0x3c730457e398bf19L,0xa4c297531bdd3170L, + 0x7f01a08492ec7e73L,0x54d483b824bb9d11L,0x000000000000005aL }, + { 0xedc13dd6beea4dd5L,0xbcecc285125ec53aL,0x8b7de1ba5bcd72ccL, + 0x1853dbccddbbc9b6L,0x799b97b28f47fc1eL,0xc01253b8176cce73L, + 0xab6c5fdd5430bf2eL,0xf9250653cf5b31e6L,0x0000000000000042L } }, + /* 49 << 308 */ + { { 0xbad1e3b4b1814e8bL,0xdc80df020017a14bL,0x92e7c7f5d0caa98aL, + 0xd0fedadf154a72acL,0x83700909e5406a31L,0x009b49af34a6a9f1L, + 0x49747a7f5e86330aL,0xa14c39aabd0ef998L,0x00000000000000aaL }, + { 0x26eebaee360a5e0eL,0xfbf14808edfa126aL,0xe4d1dac6b7d01479L, + 0xf04493dbd8063451L,0x8429cb9e48a0421dL,0xfa1a9ed2c7323dfeL, + 0x1efcb833ed9d72e8L,0x3e8726103cf78a46L,0x000000000000010dL } }, + /* 50 << 308 */ + { { 0xc38371374a0bde6dL,0x675b967c2d8556a0L,0x6eaf1e86aae51478L, + 0x36675d0c822be9c8L,0xcbcbfe0f90625398L,0xcc96e4dff88a4d3bL, + 0xeaec101cdedaee78L,0xd87f4dc26c6fba50L,0x0000000000000071L }, + { 0x2cffa71ef76c62cbL,0xe7adbdb562fc75eeL,0x991645e69f96589dL, + 0xb838a48a46b866b7L,0xf77a287f7526217bL,0x255a2868320f6b81L, + 0x59f9a54802fd4fb6L,0x3883508a2441ca7cL,0x0000000000000049L } }, + /* 51 << 308 */ + { { 0x6b348fd624b337a6L,0xa4e8dd141026a300L,0xae405b249069dc46L, + 0xeb322a385cd34d1cL,0xacffdda0bfddc93eL,0x2d3ead70a2004269L, + 0xca6ef046c923eea6L,0x95ec33c04d1c30e0L,0x00000000000000f6L }, + { 0x8a4391ac74ff127cL,0xed6f4c5ff3de50bfL,0xd956fad840e42f57L, + 0x078b1fbfab13da63L,0x53b8c6760f03b200L,0x7620a901916e1116L, + 0x60ba8e8d5d005bccL,0x5aeb871b8c06ea49L,0x00000000000001bdL } }, + /* 52 << 308 */ + { { 0x3a62717c0ffd6985L,0x1fd32f21733d9f3cL,0x011dbe5d8147aa6bL, + 0xcd9908fee54d912aL,0x3a816c571d8a478dL,0x2c3b8621546c8c6aL, + 0xeed9efbfe7163461L,0x486c1588b1748edaL,0x000000000000014fL }, + { 0xc74f435a42f32ff9L,0xcbb8a284762199ccL,0x21786a042b632b1aL, + 0x4f00521e9c0f441eL,0x9731b233a4693246L,0x61903ec24f45b460L, + 0x90d06bda1812873fL,0x233ebe9ac7dd899aL,0x0000000000000189L } }, + /* 53 << 308 */ + { { 0xd0cc6ac7d1604578L,0x8f7c03bd9a7cf248L,0xc3737964837f507aL, + 0xd2cc12f3eb1aec18L,0x08827d5b1130097fL,0x1345bcab91f0bea0L, + 0xad565af5255b4d20L,0xcefc35609f26fdcbL,0x0000000000000105L }, + { 0x8242ac7a2f183ca4L,0xa6018cc3f9824ea7L,0x6efb40a2aafae2dbL, + 0x34ac62097610e9fdL,0x86ec4e010120c604L,0xa63ab91f18999b56L, + 0xf56dd81f2c39681aL,0x5e77017c8a7397f3L,0x0000000000000048L } }, + /* 54 << 308 */ + { { 0xcee29d979fd8ac86L,0x100ebc46736e18ebL,0x772b082c315677bdL, + 0x9c1db95fb0a2e064L,0x396ce154ab199211L,0x1e30a3a2c6daa93aL, + 0x9a7ac319f96900f6L,0xf1e451de804c81cdL,0x00000000000000f7L }, + { 0xb332554cc5dcb3e8L,0x7417b3062c2bb6ebL,0x049d887506fac3fbL, + 0x534ef151175afdd7L,0xf342d62e3a58683aL,0xe583a4bfe09ea4c3L, + 0x5cf0b6caaf2483c0L,0x8faecd4f589f4692L,0x0000000000000189L } }, + /* 55 << 308 */ + { { 0xb73d4ba8c609a9acL,0xa3ae20b5e8cd2948L,0x6c9f7f2cf5ba63ceL, + 0xba195b445d88340eL,0xee88c9c07e1cee38L,0x7ba10f56fcfa1694L, + 0x4834f9cb439198d5L,0xa4fa944dd69d83fbL,0x0000000000000045L }, + { 0x9488f1e0aee1243cL,0xf3931d8f731dc6a0L,0x6ea82c88b9a9311dL, + 0xcae86a35158843d9L,0x7d8f6b9789e3d41fL,0xe45413a917a8638aL, + 0x54fc84f72fde14adL,0x4889b4d4045c542dL,0x0000000000000137L } }, + /* 56 << 308 */ + { { 0x3b6ae375df5713a1L,0x507539c8eeb08615L,0xeea363fd79f23ed8L, + 0xd35de9c05a24a3a6L,0xd655632794c5e698L,0x43cfb402eea346eaL, + 0xf7f0d1e1bebe4d32L,0x3b36bd8c70a22463L,0x00000000000000b0L }, + { 0x78777cd9c845e984L,0xcb446e18a31e5dcbL,0xb4851ea3dabefc01L, + 0x18d2a234ef97b76dL,0x50a85010d5a74049L,0x14b78201acb00ffaL, + 0x0a8fc98c4cbb648cL,0x4be4551f9873fce5L,0x0000000000000124L } }, + /* 57 << 308 */ + { { 0xb363062aa7afddabL,0xd8268f1de5dd4407L,0x3ace2a091b790e64L, + 0xa0c92d9c3555f9a9L,0x0f752fb5384658faL,0x6749e84e52980595L, + 0x7084539ff40b0d0dL,0x9eb1046f552ed611L,0x0000000000000052L }, + { 0x4d47f6856686c007L,0x44bbdeccedbb1abdL,0x114cafdc075d0809L, + 0xf4f4eeea6a05c788L,0x75ddcddfd45dfcc3L,0x147b1787235c7924L, + 0xd492c7aaf5a4ab5bL,0xaa4b32a02bac18a4L,0x0000000000000075L } }, + /* 58 << 308 */ + { { 0x084a6b6311d23a5cL,0xab5f9756b3a73439L,0x252f50e4cf1a7bceL, + 0xa066a37e7a498ac8L,0x29206e9e8c559d33L,0xf9fe43ec8c602500L, + 0x2bb76d93c7c77774L,0x68609df4f9689fabL,0x00000000000001fbL }, + { 0xf6223435a31c6d6aL,0x016ccdb2eeabda79L,0x33ef208790e53bb3L, + 0x942ea7869d04b29fL,0x86905d345ba2a37bL,0x613723c8ac546743L, + 0xd5a462b093c3a659L,0xf133494680b4a914L,0x0000000000000095L } }, + /* 59 << 308 */ + { { 0x0e964bfae51c867dL,0x4f3f0bcc3ed4de99L,0x66c3bc1c46193003L, + 0x5ea89fcea9802a49L,0x1e81c4b4399876daL,0xa68d345dbf61cf47L, + 0x2a527a8bf4ac1373L,0xcde8a8a922975cf0L,0x00000000000000d9L }, + { 0xaad318a1d012c097L,0xca78d31b9b8d36dbL,0x2448c6ace479884dL, + 0x7c43e6d65bb93799L,0xd293de3aeeca6834L,0xaec6d7d535dd9091L, + 0x34f04c3b3ed39ff8L,0xa79da00f1ce455afL,0x0000000000000174L } }, + /* 60 << 308 */ + { { 0xd9e135d4909e5d9bL,0x1ae0a17c2e0bc5d7L,0x1fbfefd075076db8L, + 0xe071dc39a834aeb9L,0x1eeb9d2eb5d650b1L,0xaa20dfbb76cddd0bL, + 0xc32c3efc3795fae8L,0x5adcde3c2a3650a9L,0x00000000000000edL }, + { 0xdb923c659f719586L,0x8c1b9bd1f69546b2L,0x4823b2184ffcc633L, + 0x841f3292451d0538L,0x52a64ca0ebeb45e8L,0x8a0b375b64d61c71L, + 0x0425f6cee13c6ae6L,0x98cf4b4d00827285L,0x00000000000000a7L } }, + /* 61 << 308 */ + { { 0xdcf7204a502a5841L,0xe8803fd9b3c2389dL,0x98af342ac7587e46L, + 0xf6d845cf1b1a333bL,0x1fbe34235acdf342L,0x8ad324448c6cef92L, + 0x4a48cbc2ac8608baL,0x105bf24a88f4bfb2L,0x00000000000000cfL }, + { 0xf5bd7fbb45d5cc83L,0xd251ae69b14bb0ceL,0x7f6c9961cda6e920L, + 0x5548da95b0e20d2cL,0xb2d81a5eeeb3f6b4L,0xd3c1ac3f83ee7b22L, + 0x1198f80d9dcdee2fL,0x0f1113270cca9beaL,0x0000000000000052L } }, + /* 62 << 308 */ + { { 0x681c9c9e5551d22dL,0xe1011410c24d40bbL,0x1a659728df00294fL, + 0x1d4ac5e4045c6bd6L,0xb61e6eb0c71e0ac5L,0x0aa6738fbd4c522dL, + 0x278bee73f286cbdcL,0x9a9ca40d64f560a3L,0x000000000000003cL }, + { 0xd18807d339aa5452L,0xcc5b2a69613b3726L,0x230b5e89c8957cb8L, + 0xd32a1ec3b346ab21L,0x8a21c20975cf8d12L,0xd5d16bd029962528L, + 0xc6f8836703130361L,0x034d6f15925aef31L,0x000000000000011dL } }, + /* 63 << 308 */ + { { 0x42e9edc80584715eL,0xd218b0a4011faf8dL,0x055e4eb095fd2a15L, + 0x68ea9b5ddccea016L,0x0180246ffe9af8f8L,0x38bd9a02b832006dL, + 0x9af977eca3414177L,0x0746ad2fa173e5e0L,0x00000000000000ceL }, + { 0xefef29e60b444e14L,0x4357b9377d74016bL,0xeb63316d6bec3801L, + 0x25164848dbfc9d02L,0x00d845fccd52497eL,0x8c54d90c615fd91dL, + 0x33ceba564ec93464L,0x7a9e51d9f6f85b80L,0x00000000000000cdL } }, + /* 64 << 308 */ + { { 0xc071157213cc2875L,0x422583136ec01830L,0x32cfef4101e95528L, + 0xa8741ca0af8116b4L,0x3195f7f358f1fa26L,0xf2e969281144142bL, + 0x5a9c331f6b179bf4L,0x45e363f236c6c42fL,0x0000000000000057L }, + { 0xbe2e55f12c04c8aeL,0xa522f7d53af16430L,0xeae2b2c42904630cL, + 0x58f5c76e8e151736L,0xdfa21ea65aee8e9eL,0x103b7ad681717f92L, + 0xa89e274a9b4464eeL,0x316f580f2daa3259L,0x000000000000004eL } }, + /* 0 << 315 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 315 */ + { { 0xd1ce126eec3d1383L,0x0c7f980a4805b18eL,0x65945086fc1b1f4eL, + 0xac1703ae092e0ca0L,0x834c77f98b5ee5c0L,0x3e722f576d19fbb5L, + 0xae8a944af6770bd1L,0xe1d110502a7c7101L,0x00000000000000d7L }, + { 0x62029fd2ca303000L,0x366c72c9acb52ecdL,0xcf89c1aacc8dd8a2L, + 0x95c235bf5d1b984dL,0x3434d10dd1a80d52L,0x094d0a8f77e95addL, + 0x03890027d1203660L,0x32faf27329791ab3L,0x0000000000000075L } }, + /* 2 << 315 */ + { { 0x18cadddda03efd8cL,0x9d59bb52aeb3a762L,0x571d86bed1fd3f91L, + 0x81642aa1d2f26d6dL,0x7d1c3bc999877251L,0x0518eff8d7c2fd25L, + 0x6a54f17037b6c641L,0x8ffedfc00d33a426L,0x00000000000000c8L }, + { 0xacf67416bb54847bL,0xa293a8d7f2b54b28L,0x32f2b11a1f7f6b47L, + 0xc11948bcc6d67a9dL,0xc8b86b1f47623783L,0x357326aa5c2362e6L, + 0xc298be1b6cf79126L,0xf07b3ea784c5a79cL,0x000000000000006cL } }, + /* 3 << 315 */ + { { 0xe4cc8937c5cfe5e0L,0xb01db28fe534260dL,0x3311127e76c1f2f6L, + 0x0f4f7cfcdd8e1644L,0xd788f9f85ae2b93fL,0xf64431f8007d4765L, + 0xb3ad8af287159ff4L,0x8401e32600a7857fL,0x0000000000000016L }, + { 0x49b5a301e78a8db5L,0x97b3fd44b4d74fcbL,0xf5e2754d19f77e5aL, + 0x908fbc9c7bf17a6bL,0xfbe48b086980179fL,0xc33a8480cd28c69bL, + 0xa74a2ebed11832dcL,0x572b5f3957d49627L,0x0000000000000115L } }, + /* 4 << 315 */ + { { 0x4379897720210875L,0x8e95ab32d58004c6L,0x33058f96c68a6aa7L, + 0x2db1fdb8914082e8L,0x7841a48993e12423L,0x051a4e151f9d2ff5L, + 0x618eeac54da88e65L,0xc52113cd512c1795L,0x00000000000001b5L }, + { 0xb6dace96e93af674L,0x4ea1073bedb5c3f6L,0x4352509fa0b861d8L, + 0x05451d6f7adb5cfbL,0x194e3acd9a3375d9L,0x6258a5ab33658d41L, + 0x2bc60cb7f542a8d8L,0x68e50e5a74afe0cfL,0x0000000000000187L } }, + /* 5 << 315 */ + { { 0x8ab1a9337e8401d3L,0xdb8a437374dd345cL,0x8fc17b06dfc52ec6L, + 0x53a4528c3a8cbd43L,0x791f0050ff746544L,0x97c15abf468923c8L, + 0x4ba5521cd71e2cedL,0xe7ff5cc44367a0f4L,0x000000000000011dL }, + { 0x2397c8b2ca828fa8L,0x2d18f0b05bbc8858L,0x8aaec79f7ccdcee6L, + 0x058906aaef1284beL,0x2bfc7d36d3e0d3afL,0x497a175435bb97ffL, + 0xb270ae8ea8383a01L,0x3a7f70b8db1e5515L,0x0000000000000083L } }, + /* 6 << 315 */ + { { 0x5ebbd7015a0c9415L,0x7bcef0e229572d13L,0x539a78133f5a169bL, + 0x6f3a84a4bac0abfaL,0x822da09bea1d0b7aL,0x9b2d6a6c05c00dbfL, + 0x134fe65dcd8c7e6eL,0xe4622a9f6301e4caL,0x00000000000001ffL }, + { 0x5fbeb1f8073963ddL,0xcca648b0e595aad8L,0x5cc97e773d15c1d4L, + 0xf2d7697062989b5eL,0xf321c0c78afe8351L,0xbf361aa293a60c7bL, + 0xd9938c1174c2503aL,0xb05daaa5e409747aL,0x0000000000000008L } }, + /* 7 << 315 */ + { { 0xc6874a121147f509L,0x0882051c83b58e0dL,0x25553306823e9395L, + 0xba5751a97954be0eL,0x86056e04f06cf7f0L,0xdb057ca02c10577bL, + 0x8ec2f7fd400e24a9L,0x32356d2fbcf34c3cL,0x00000000000000cdL }, + { 0x931835514bb690a0L,0x373451d622798da7L,0x2474aa366b8da44bL, + 0x947d984cdaeba693L,0x3c3e3b2b21f6d6c8L,0x37b0234fecc78a44L, + 0x4b49e661959f6b39L,0x29cafa8b37bb5b1eL,0x0000000000000097L } }, + /* 8 << 315 */ + { { 0xc43876449d2224b8L,0x80c92ac5563094e8L,0xef919bdbf6d0542eL, + 0xc71a882d5c40a787L,0xdb3851be0880c1b1L,0x4f72ff0c287cae73L, + 0x0a628f172bf3efe6L,0x29db69240150e4a7L,0x0000000000000175L }, + { 0x7983f80acc367287L,0xfe2c8f3dc4633183L,0x41b0e222ae89f8c9L, + 0x93fdd59beb58ac1eL,0x5ee164c797538725L,0xb8e3b75b7591fb4eL, + 0xa2d0fa2e355a6a6cL,0x391a05540405f33aL,0x00000000000000e6L } }, + /* 9 << 315 */ + { { 0x1b0bf08bf55f34e4L,0x5fe51caa1de9b981L,0xc243b50297197570L, + 0x2cadcc3d699ac777L,0x80b62305dd9c8010L,0x93a003d3113ad5e8L, + 0xbb920d789e783dedL,0x2a2df2e8cdfc37d0L,0x000000000000010bL }, + { 0x14e3757112860379L,0x99f12790521bc74cL,0xc05287f50407d2b6L, + 0xeb6b8d61b3fd9150L,0xe41ab7595d637ad6L,0x8efa6e451fb70681L, + 0xe371b32cf025b1ceL,0x5bf35241e85fa064L,0x00000000000001e9L } }, + /* 10 << 315 */ + { { 0xfde8d0ed0254d2d3L,0xdc6868c5f24357a8L,0x1806776f26106991L, + 0xa3a1446d387d0f81L,0xeb83f7b319591fdcL,0x97678d46136e098cL, + 0x89292ad342e7487fL,0x53b24c0ab3c12865L,0x00000000000000b3L }, + { 0x06fc68f37a74e056L,0x10354bae9d0dd6d1L,0x713509d967092c45L, + 0x5ddd0fba52c97523L,0x421e81604616eaf9L,0xacaf5cf0a795b907L, + 0x1ca067c8beb5a28cL,0xe07cbbcdb79cea20L,0x0000000000000179L } }, + /* 11 << 315 */ + { { 0x59900b5c8b1b724eL,0x8dd0246c388c09c0L,0xbcf033ea7b567f8dL, + 0xaeb6215272898dc3L,0x01d8cefa24f7cceeL,0xa34d4c312415a08bL, + 0x60315ab2bee682c8L,0x28bd510f85ec5fc7L,0x000000000000000dL }, + { 0xc031c759d505a0e7L,0x40d1a56cda783571L,0xc15647414ec503e1L, + 0xbd02b9a6621e3c52L,0x618d1cbbeb48b235L,0xdf1bdb37880c3090L, + 0x95d53d4b664302a0L,0xeaf9e9bde9acab6cL,0x00000000000001ebL } }, + /* 12 << 315 */ + { { 0xcc511efe456925a5L,0x4290a6e5f0a1b7e2L,0x1705dcce8756548bL, + 0x4c139046b509c290L,0x9d0bb0844cd17379L,0x4643b4bade17e05eL, + 0x12be04c35f7100f2L,0x3ce4459479197058L,0x0000000000000191L }, + { 0x065c3e87f32703cdL,0x5ae023cc11390895L,0xd2a40ecac01552a3L, + 0xfcd1382288ee5054L,0xf61ca6f773498d40L,0x2a4653be0dd5e19aL, + 0xa56016eef88e9af1L,0xbd5c973aa73e0e21L,0x00000000000000f0L } }, + /* 13 << 315 */ + { { 0x4416d2e6c4cb8040L,0x004946f92aceef23L,0xfbeb8979a86b1ea6L, + 0x878481c65fd3c659L,0x28f514fba8e572d3L,0x8b9e4bdd55d6f02dL, + 0x3e76d73546867a07L,0xa1050be7517430a4L,0x00000000000000c8L }, + { 0x6538fa01526ae111L,0xf277a4bb142b038bL,0x6be27430efe4dd66L, + 0xd8ed5e9f8f8d8888L,0xca67196065dc30b6L,0x1a1ee5afc652d2a8L, + 0xe1a803044b3fccbdL,0x0ec65bd04cbd0471L,0x0000000000000177L } }, + /* 14 << 315 */ + { { 0x72e348491df30301L,0x71ba20a6a7ae2709L,0x2ac2b9e7eac84515L, + 0x7db23e2c684fdd3dL,0xb7ff18f9b1b07c3bL,0x296f805422f64d40L, + 0xe7e74e51aca26121L,0x2382baeba4abf51aL,0x000000000000004fL }, + { 0x076d34bba03677f6L,0x42bcc9c76484f153L,0x7b5fd617d74dd1bbL, + 0x1ba3e99c797cf9d5L,0xa907800787403dc5L,0x98278850d6d1e062L, + 0x456314ca6cc37a89L,0x3d3cff1b0a6daf4bL,0x0000000000000117L } }, + /* 15 << 315 */ + { { 0x1a19978f485193f0L,0xadb5a7bdde0df861L,0xbaaeb9a4f73b9dc8L, + 0xc66335183779f809L,0x9b55d68f3edee638L,0xcb261cf96a5affdaL, + 0xcb6a9ba2e39b793fL,0xa4ee0df08936019aL,0x00000000000000faL }, + { 0xe482665d53161177L,0xe3b2fc496ea0bdb4L,0x783ca2b5dabcdfdbL, + 0x9025498735a60e50L,0x6b9114ed0414d9ecL,0x2a9467665f239c27L, + 0x4623c042397b3971L,0xe683e6e532995c63L,0x0000000000000001L } }, + /* 16 << 315 */ + { { 0x088b099e8cdfe07dL,0x5a641cf1b905708eL,0xc281af16a221b96aL, + 0x8bfa8dc0f622f5bfL,0x432872735ecb0216L,0x13ea7e4d6dab8b7bL, + 0xa478fb012212b506L,0xcb4be166a0c1a955L,0x00000000000001d3L }, + { 0x38a2aa3dcf16c121L,0xcd70ea8a7f8eed07L,0x54532cd7e3a04c52L, + 0x8939f904ba78e53cL,0xc0132c268b563a91L,0x6730a0fcc093e75aL, + 0x1e6d961600fb49e3L,0x0af2a79f28587059L,0x0000000000000176L } }, + /* 17 << 315 */ + { { 0x866bec9ed63e2975L,0x8a29116c3e8d9c3cL,0x2b0d1b0436ea7fa8L, + 0xbf75196cf80c5e0dL,0x94dd18d370a51d7eL,0xdd9d61b9ea3f979eL, + 0x7ce80604db84f9c7L,0x4207dd17a593918aL,0x00000000000001b1L }, + { 0x5ab49cbf0b981980L,0x336f5442a93b94b9L,0x6568223fc5e38c54L, + 0xff80f21b3b2b4ef6L,0x6a77ff11c1e9ce9cL,0x07d4d0c02fca93e4L, + 0x955d771d76e3c109L,0x2f22c54d0ae199b7L,0x00000000000000c4L } }, + /* 18 << 315 */ + { { 0xbd742bb58c8601dcL,0x0d577d51bbd68faaL,0xb1ba3c3398b6e280L, + 0x1bfc6c723e8a148eL,0x59a16f766e41826eL,0x3ee5c3e7161094f5L, + 0xa98675bc24c86f2eL,0xb94c47abe2761752L,0x00000000000000b9L }, + { 0x1824673056dc8149L,0x846838c94ff82f9fL,0x23c95268450afb9aL, + 0x40cc51aa14fb3828L,0x28d1619711f0d733L,0x0efcce77513057a2L, + 0x9edb17beed0a7d9eL,0x4d6ed1fac7b17549L,0x00000000000000d8L } }, + /* 19 << 315 */ + { { 0xbaca7240c82b5aa4L,0x61e27f19ef5855f9L,0xb4c9237aea0e986fL, + 0xd090d6a554db69bdL,0x6b4fb63fccceaed7L,0x0cc89909fabad461L, + 0x9f7676cf0b9709e1L,0xbe92aa69fdda8413L,0x000000000000018fL }, + { 0xd99cc5a0410aa767L,0x39b6f4a87e75f2d8L,0x1980d6925eec5fa5L, + 0x9b314dc3b8846f32L,0x9714e192286dd13bL,0x94cccd2c467b8524L, + 0xd7b3d49005fba305L,0x75b7f38f7a5ff3c4L,0x0000000000000096L } }, + /* 20 << 315 */ + { { 0x15396e6b8c4141d3L,0x3ec8a34ef7755d55L,0xa9bddf95f39fb433L, + 0x79e71beaed69b7d6L,0x51c722cb9d0122deL,0x8de4cb47e0e7b60cL, + 0x726ba86dcbb17e78L,0x2815c3bf5c037641L,0x0000000000000032L }, + { 0x3366bd3547050a39L,0xcf665be239de8ff8L,0x0b1b2dc88b657ce8L, + 0xac1e0b80db61f750L,0x09f5eeb8971e2a24L,0x92850350ee755facL, + 0x3ea8358d3e97465dL,0x755b7ed954cb917eL,0x0000000000000188L } }, + /* 21 << 315 */ + { { 0x374097fdd5c529c4L,0xeae8aeaf14e14b24L,0x6246c5fa5449b2cfL, + 0x99c13884f8dc4b4bL,0x2e72eaedec7b3eeeL,0x877887a6f9f0d5f4L, + 0x4f97725fc0e5a19eL,0xbd7971fe9bc83533L,0x00000000000001a0L }, + { 0x79302079c035fa1bL,0xfa3e954f075d1cbeL,0xb59fda4254310460L, + 0x35004b05cb8dc827L,0xfa4227dd598766a4L,0xd6610ed7257db68bL, + 0x91f34e57df3d7a8aL,0xbc586bd69c420862L,0x00000000000001ceL } }, + /* 22 << 315 */ + { { 0x930cad152d7a2680L,0x6d7a0454c719cf5bL,0x7478744ebbb88406L, + 0x2598c4885883c3c3L,0x466925014f5fab3aL,0xc803c49ddd8a471eL, + 0x10cfa5b5831d5062L,0x2bce8e3c8cd03021L,0x000000000000004dL }, + { 0x8a75f6cd09a2cca3L,0x44598d59b03cff30L,0xf49eefdc9b9d4ac1L, + 0x6d759aea116a7518L,0x0f330edc02b18830L,0x52612307c41fc942L, + 0xea4652fb41b5e855L,0x5dce0d5c3ad0f9a9L,0x0000000000000018L } }, + /* 23 << 315 */ + { { 0x685e156c633e8718L,0x664dbda487a0c479L,0xbd5bd2516b3e4747L, + 0x873fb05ad9204996L,0xe1c1b3745b9ca959L,0xaee1fe5ab48568b8L, + 0x2b8a77560ccc64aaL,0xe0fffa144a842525L,0x000000000000001dL }, + { 0x81ab0b04d7864a53L,0xd2ded9626dce74dcL,0x20f4f42ae6ee03b4L, + 0x216939d9b133502cL,0x1c164e2ae583d735L,0x558bb0247a01a682L, + 0x68e8d4b88ff470e4L,0x1b500f437a69dc9aL,0x000000000000000aL } }, + /* 24 << 315 */ + { { 0x87cd5f5b43522030L,0x9b95d7cca97925f2L,0xa97d2673c4f3d98dL, + 0xb760715ae3a33b1bL,0x6848b7f36f47818fL,0x19d3d312f4b35002L, + 0x94faf039b3f57798L,0x62a72bfeb76ae400L,0x0000000000000100L }, + { 0xdbbbafdc31877679L,0xa68c50a2945b79f0L,0xd5b6b7c1a457a77eL, + 0x1d5249e0e8ea38c6L,0xef61b6ab33b52300L,0x864aca06e3d8c9a1L, + 0x852087916060f8d6L,0x1827cac1e1dd6babL,0x00000000000000ecL } }, + /* 25 << 315 */ + { { 0x417c93d92a097cc4L,0xe2006ea23311b1c5L,0x6265dae786c3a4d3L, + 0x22777fa2fd61a771L,0x21c8566ee52726a1L,0x5544b0ba525cbc66L, + 0x0227192516a552ddL,0x26f159fe26ee27ebL,0x00000000000000a9L }, + { 0xa03998cbaecc32f4L,0x3836f5f386372aa8L,0x0d708a1f5fb988bbL, + 0x8d865eebac677bf6L,0x9386ec40244e47bcL,0xf38e252310e055dcL, + 0x938b35a76ac7bd8bL,0x79cc344948973178L,0x00000000000001a7L } }, + /* 26 << 315 */ + { { 0xf8f4c244d0a59fe9L,0x0b34084b76402697L,0xe4240b863e032519L, + 0x7cf7b7aac2065720L,0x798134d64cb1ffe5L,0x78e358f275f60549L, + 0x4557430370582d54L,0x452570d4828dfbfaL,0x000000000000009eL }, + { 0x1b0bcbc468d79237L,0x8bcb0d804f5a6a6cL,0xd108b4f099609cfaL, + 0xe08db1ff71be7d63L,0x7ad777da27bd2d03L,0xce7fae775749c830L, + 0x1ad692f7c4565777L,0x93798a2aff8b8327L,0x0000000000000010L } }, + /* 27 << 315 */ + { { 0xa44bd43b7d176e2eL,0x73017a4eae0c21d1L,0x6f98fe80c170cbb6L, + 0xab5799b293df22fbL,0x765f4dcaa85dd542L,0x854eddbc619b4adcL, + 0xe1dc52446cf558e6L,0xc0a4f996e45f4052L,0x0000000000000150L }, + { 0xc1e0e94ea8a05aa1L,0xaf1abf91f3f24aedL,0x58c0603ec4d3ce6eL, + 0x9c496e91dc76c75aL,0xe6049fc196d9aa52L,0xd7862b4a2ed5b8c4L, + 0x41ceb99338f87e08L,0xe769c78f3c16ca50L,0x0000000000000000L } }, + /* 28 << 315 */ + { { 0xc50efa15dacda34cL,0x159e94dbb6e96210L,0x94b10231bd9319beL, + 0xa5914f55e2a461f6L,0xb75fb2ab47bb66c7L,0x4ffcfa6756b6e4cbL, + 0x559918efc40a2512L,0xad434761b66c07d4L,0x00000000000001e4L }, + { 0x9885be1bebae3f4fL,0xbd84a8ee4302a9a7L,0xbea75f26c373e684L, + 0x0da8fdf6472f2c17L,0x381a92bbb5dffe57L,0xc69646bad24160cdL, + 0x0577b8792ecad05bL,0xc59496c27fb62711L,0x0000000000000068L } }, + /* 29 << 315 */ + { { 0xab4afde7ff46e9d5L,0x33e45d21d2516ec8L,0x5561315ae08354cdL, + 0xde6e1582510407f3L,0xad202e0aad3ef0cdL,0x03c92749fc289a62L, + 0xc5022bb7bd3bf154L,0x3b2112f0358497bfL,0x0000000000000085L }, + { 0xd97a76c60e367447L,0xdd1c70ee13a73f92L,0xef80383807320e00L, + 0x92fa2d854bd797b8L,0xaaeec02b5287a9ccL,0x8dc7d907101f13d4L, + 0xffebb4d5f8c6b255L,0x76ea86c8a78c99b6L,0x000000000000007eL } }, + /* 30 << 315 */ + { { 0x04cc8a0b9c2de291L,0xbde190d41b774116L,0x76fd915e0cef5563L, + 0x08980204a3ad15abL,0x8532bff00047227eL,0x93713303df68231cL, + 0xa4d4b863b1b0de98L,0x67686192e0cad308L,0x000000000000007dL }, + { 0x6748cd2672cf3e2fL,0xa342c5d7eb321523L,0x0466141c5021d422L, + 0x116bf2246a70d2d8L,0x7129a3a4ce05f6fcL,0x86600259d9185da8L, + 0x8a6db564dd965c37L,0xb073f80c79e8f792L,0x0000000000000049L } }, + /* 31 << 315 */ + { { 0x00da97d33d831a04L,0xd929cf2a1eac836aL,0xf7d5c4c77c7c118eL, + 0xde71253e1c4c51ceL,0x6d2474376ec3e2e0L,0x9b912b7bf31fa3abL, + 0x10f538043da99cb1L,0xfcf8a99c2a16e669L,0x0000000000000098L }, + { 0xa70d9571d49b2fc3L,0x081849f4834cd35bL,0xf1e28cf98756bb05L, + 0x2fa729aa5568d3a9L,0xbb5d124899bba4c8L,0xcd742b7c3ee032a3L, + 0x693342cbd87a7668L,0x9bb74be7bfcfe6a8L,0x0000000000000176L } }, + /* 32 << 315 */ + { { 0x5f47ddb9ab9923d7L,0x839865d48e3fac52L,0x0214e93f647424b0L, + 0x266c909c31d65c6bL,0x956deca451457facL,0x8e8d240ea85ce8a5L, + 0x736652c4a4d9b281L,0x131a4191b77a526dL,0x000000000000008aL }, + { 0xa848319532d8542eL,0x1f7a8e67c3f38387L,0x162ac248d6797f2eL, + 0xd26469e10a6a1118L,0xb33aff1ff203102dL,0x63a52154aadf0ae9L, + 0xf0431dbb4bb313a7L,0xf93c18fbca9da156L,0x000000000000015cL } }, + /* 33 << 315 */ + { { 0x39974a2672134c85L,0xd7aa0a9d4c7e0f6eL,0x1b9edbab3443bd65L, + 0x6cd2d98bdccd2e50L,0x632879884b109d43L,0x44825b34d397a0feL, + 0xd53353c27118c8b5L,0x268cd6d6c01b82dfL,0x00000000000000f2L }, + { 0x8c5babff457c84bdL,0x117faa3055b81ff0L,0xc124c04661bbd4edL, + 0xe136b9a5a787f1f3L,0xa06373d1bdafa50eL,0xea1b6c0844a7284aL, + 0x9330d2feb1a5b08aL,0x25aa027d162f9e9cL,0x0000000000000182L } }, + /* 34 << 315 */ + { { 0x568ec079d87b35feL,0xa08deac6958c3388L,0x715279434034a877L, + 0x0859576ef2fef769L,0x60bd705595201257L,0x9f19d411dd68e3c4L, + 0x595fb01498308e14L,0xa2a84c0d4fa07f02L,0x000000000000000aL }, + { 0xdf1e5d32e69aea45L,0x9557a618318f191fL,0xd4f95c503b1c6341L, + 0xaaef0d9e6ae9e855L,0xea3847180c9cb2d8L,0x13e92ab81a52a30cL, + 0x1651ed2863bb271bL,0xd97ff1ee98a52306L,0x00000000000001d5L } }, + /* 35 << 315 */ + { { 0xfe80b99efbbf1e27L,0xfb7b5ee11955b0c0L,0x4f3a5833e1708fefL, + 0xf5a2e09a55e25e94L,0x9c6d38ed81d8c307L,0xe55d8fbc85262c42L, + 0xd65f395c2d4726eeL,0xdc540d4333cd1d74L,0x0000000000000149L }, + { 0xc010dccfd74a298eL,0x9e7056864a666f1cL,0xe0cfe3db254c3b4fL, + 0xb979124ff7b07fe9L,0x649ceddfb1b99c8aL,0x400cca2d83df18a7L, + 0xb0572105b99596b6L,0x30719cbf1edf710fL,0x0000000000000087L } }, + /* 36 << 315 */ + { { 0xa19953973af2d340L,0xfeb7bfb1d4141779L,0x64f0f9677a4bd845L, + 0x55dfb0868b2b75ddL,0x3b71a0508bbb38e1L,0xaf038a2aeba716a2L, + 0xc49b38c6a91319eeL,0x71f1e3cea8535e9eL,0x0000000000000105L }, + { 0x3f07f7e964c26f33L,0x953a5abb347b8f7bL,0x5104f896c06b5c17L, + 0x8a2e1b533c3e52c6L,0x2127f321ffba4ee1L,0xaf288b9499e502b0L, + 0x04fde09b29407d69L,0x29d8e20c86bf0d35L,0x00000000000001d6L } }, + /* 37 << 315 */ + { { 0x1aee0faf9fbec97eL,0x7d82f31e42ad7101L,0xc9f00d2750e8458eL, + 0x5c099fbe6b18313aL,0x96a679ef65d81613L,0x4fff394b8db6638eL, + 0x53a97eea9248e6a3L,0x707785311729be5eL,0x000000000000018bL }, + { 0xb8c6dd9e948f04aeL,0x4eb6c7f94a39815fL,0x1501c0bd1b7294deL, + 0x33ed433f9a242a2fL,0x3d50c608994d9f32L,0x324ac45bfda6fbddL, + 0x7d8a03ef6736a16dL,0x631d6c60ef0faacaL,0x0000000000000008L } }, + /* 38 << 315 */ + { { 0x01fe1042e436d76aL,0x2b65db559d05ddffL,0x48d26665932d9e5cL, + 0x01c106ce8a761836L,0xbd83cdb30e8c1caaL,0x9a02dca8ac1b3140L, + 0x6c27ef627d158450L,0x927980c2bedbbed5L,0x0000000000000123L }, + { 0x965261732483b8e3L,0x5665a308b841e686L,0xc8dd3428f4a890ffL, + 0x824c2b9522ade66bL,0xcf13a52fedb93640L,0x2af59ea913125d21L, + 0x975485e0deb818fcL,0x1c34566416212cb9L,0x0000000000000074L } }, + /* 39 << 315 */ + { { 0xb327a7bf9b17c8deL,0xa8ec6459cea7bcb7L,0xd6939689168426e7L, + 0x370eb9046450b774L,0x1f06ba383d71defcL,0xe185afa6aa16d5feL, + 0x3814a269b3148755L,0xcf44b1bd8bb9de20L,0x0000000000000005L }, + { 0xcbb2da404b8ed855L,0x32b8240ebf15a195L,0xf0cccd5a739cc9d6L, + 0x2b6147c4d907536bL,0xada9b9b4b3b57e0dL,0x75abb8dff4093307L, + 0xc503586c3ecd482fL,0x60a0b02fcd1d3312L,0x000000000000012bL } }, + /* 40 << 315 */ + { { 0x20527cec16bbc6f0L,0x1af2887295b9b3caL,0xea34ee171e96e75fL, + 0xa45bba37e4b8a04aL,0x41f417225f3036dfL,0x4b86d3e223157bf7L, + 0x1e67259e1a02706eL,0x8c1a3d9994a77d8dL,0x00000000000001e4L }, + { 0x8c9fd4c4e3ae391cL,0x69e1f95801a97eccL,0x5e0c660b4f7bcd62L, + 0xf3952575a21f57d6L,0x6b7d050c79bf361dL,0xf7a8893ffc95fe58L, + 0xe28efa2075d0c333L,0x7b561de2ce39269eL,0x0000000000000042L } }, + /* 41 << 315 */ + { { 0x479800978563f771L,0x3410f8763c5662deL,0x5bc9cdab15e8afdfL, + 0xd53088caea947c39L,0x9443dc753bdd8982L,0x71da1712be12e18fL, + 0xdea9106b816abd0aL,0xc7145d12fe4cbce7L,0x000000000000018cL }, + { 0x9be617aca5361866L,0x540a22e459fd2228L,0xf93e692ee3292da4L, + 0x9f730056c797fbcfL,0x0f5b37dc080fb9dfL,0xee979d41dfd5be21L, + 0x2fe9bbd377da5594L,0x7392b500940c7fabL,0x00000000000001d2L } }, + /* 42 << 315 */ + { { 0x534669b6bfd08da9L,0x0131ce0bad7f4feeL,0x3d07710449c9f551L, + 0xa0519089124d6d6dL,0x9df5e6650e3c525cL,0x03e0caa51b0855c0L, + 0x70f2d1aeaed8b293L,0x5258a199d9adab56L,0x00000000000001ebL }, + { 0xab747165401dd32bL,0x5048cc1cab988597L,0x1080390a14dc9ff5L, + 0x71808335ec187b93L,0x894a24ab1a1eac91L,0x97f922c61c234f84L, + 0x245d34ae36d5595eL,0x8a43c41f6aaa4b14L,0x00000000000000a3L } }, + /* 43 << 315 */ + { { 0x295a4c6365d5cfa5L,0x5a8a43db51adce61L,0x25de9d5fed6aa752L, + 0x934b8f517f9df62cL,0x6534c400235bebd6L,0xcaefa633d24cff48L, + 0x4c55585a8562870cL,0x0a5d10180ddeb2e1L,0x0000000000000031L }, + { 0x58d16026be4d501fL,0x38874de4d04fcc16L,0x4b795f00352da735L, + 0x9e97091eac8c0b50L,0x91b153a99ebf81f6L,0xacf58b7ba5ec6a01L, + 0x22374c638e177fb9L,0x62b7622c28edeed0L,0x0000000000000183L } }, + /* 44 << 315 */ + { { 0xc24d78dd440656c9L,0xca4fb99cb3819092L,0x88caaa9847f6ea2eL, + 0x3ad7dcd37bf55007L,0x2a94011d87af1062L,0x176b3e376efcf26aL, + 0x907e6108c362be62L,0xf00a10062b7c25a0L,0x00000000000001d0L }, + { 0x435481d3adb9d45cL,0xbba44b115b840331L,0xc7c2869d4c4de57bL, + 0x8cdffae2d43dab64L,0x1aa7c5f0c28d2e22L,0xb3f11b4bcab6c7aaL, + 0xa0153a91cb4e8df7L,0xddf80a114ca337bfL,0x00000000000000ecL } }, + /* 45 << 315 */ + { { 0x6dae6ce65d4bac9bL,0x7e0cef116b6b0289L,0x45c7d3063e081f88L, + 0xaba7cab9700ca222L,0xe8d94fef1fabd270L,0x0e76aac6f6b272a5L, + 0xfe456c4b4ca8b682L,0xd021e03d4d6a93ccL,0x00000000000001f6L }, + { 0x08355e745d39f92dL,0x6c1360477eb5dcfdL,0x90169cd5975ce76dL, + 0xd01445f219764b90L,0xd67c0cb3d06031acL,0x9d522c431c5f97b0L, + 0x5fde48bb040b9a24L,0x718a636325d29470L,0x00000000000000e4L } }, + /* 46 << 315 */ + { { 0xfd7f44ed23a3af11L,0x6333bd403ae53e07L,0xf1d2c8efe459a255L, + 0xa0c37f9d8799c62dL,0x37ef089911e1d53fL,0xef656bda08fca1f8L, + 0x0af3992870158e0dL,0xab14f9d1328c88d9L,0x00000000000001b5L }, + { 0x51b410993d600c77L,0x343fc57816ffb129L,0x248ecbda469c89baL, + 0x2c28ff288dc1b85eL,0x767141bf62952b72L,0xbcc3e57bafdcd8a2L, + 0x80051675cc86283cL,0xa19961c6af99a56fL,0x0000000000000182L } }, + /* 47 << 315 */ + { { 0x0e5ec8bfc833c8d2L,0x17a323039aeb80ddL,0x0c8054818f413e42L, + 0x53199f904fd9773aL,0xc6aa9d94e4fdce86L,0xe86019a45ea3d808L, + 0xb2b25ecebe5d522cL,0xe50d1357b9dc03e5L,0x000000000000005bL }, + { 0x18d688efd7d8655eL,0x9f297b45587aa095L,0xeb6488495ed1bfd1L, + 0x2d62357eb283a41fL,0xd95a7a38244d4363L,0x3e01047c58e32ec7L, + 0x4919ac6899283274L,0x84094ea76a033583L,0x00000000000000b1L } }, + /* 48 << 315 */ + { { 0xe54c6f706e842fc6L,0x19bf910e6f75efa8L,0xcc5b5fbd630e0ea6L, + 0xf157ce88092d7e53L,0x1022928d11cf8dc2L,0x8f6b3753d03e642dL, + 0x1cffc03b083e6d0bL,0x265c111b7e943f5fL,0x0000000000000025L }, + { 0xc6cfd943368cce07L,0x1a14c3f74cc79e09L,0x38d2c6d092562102L, + 0x98b25e33a91a0815L,0x0ac5666f3d7c75edL,0x1d89b14a9aa46d95L, + 0x8c53e6ca3e1d0cacL,0x7e07bbd31325bf89L,0x000000000000011cL } }, + /* 49 << 315 */ + { { 0x0df1c4977dc11a7cL,0x223a1262b01cb5eaL,0x25dd803c0810c4dbL, + 0xcb11f08785e41656L,0x73ec3e8b2f1dae29L,0xe50ec09af3bb0e96L, + 0xbeb6b3fc3fc029fbL,0xdbcd195de9278518L,0x0000000000000144L }, + { 0x1e51439cff7c554bL,0x7dd293ce2201bf38L,0x6521c8b6a1239dd1L, + 0xd2200070b51cd909L,0x727d874744dfb20bL,0x57b82eaaca155e14L, + 0x10761f46b6a7c6beL,0xe2b999e41d4536c9L,0x000000000000005bL } }, + /* 50 << 315 */ + { { 0x22af766ca4d900afL,0xf9a8bad8a91576d6L,0x532fb69eaad75d1bL, + 0xc11ff92d39a81213L,0x51e63b1dc464eab1L,0x651b3246a2f8057dL, + 0xeb84666f3dbcbf15L,0x98ac1e1baac37a68L,0x00000000000001e5L }, + { 0x3083c4843eddc711L,0x0bc703f86ed884e3L,0xd1e76ede3d5e550fL, + 0x4c24d2caa561ba09L,0xb7c212f9f8192b2bL,0xc683270c6e012870L, + 0x1a5fe85a82b37cb0L,0x91289562a1818811L,0x0000000000000156L } }, + /* 51 << 315 */ + { { 0xe48906e35f228d80L,0x6216e75f94bedc7cL,0x9707ebd62f8e26a0L, + 0xaa3958ca1a673e7fL,0x01ac80f2df41fe97L,0xb23738e01bf96493L, + 0xe7684540e0b97882L,0x770056549fe02c76L,0x0000000000000151L }, + { 0x910af00f89327a45L,0xd534a172f8205cadL,0x959d2b243776c9b4L, + 0x3ac5f308cbb27ef7L,0x51ba3c3d49b5ab33L,0xe0177e6705861588L, + 0x3fd40125c93b0f10L,0x29f0bcbb828131d6L,0x00000000000000adL } }, + /* 52 << 315 */ + { { 0x6bae11c47053fb58L,0xfed4afb1535c8c5cL,0x620bebb4fb6dca6aL, + 0x5759da42ed7f3e91L,0xb537fcf726663077L,0x2829ffe0fbf0d31eL, + 0x93666d29f290f144L,0x4de9a65e5826718dL,0x0000000000000146L }, + { 0xbeff2ca3b96fd449L,0xeb3c9ddfbd865ac0L,0xd6c2bd3d851f94ceL, + 0xaa428d52f9f0af14L,0x5a1106774f5cc20cL,0x3e976fa8ae1e9de4L, + 0x82d0634b6dc9a17eL,0x4d4a7bd7afa5e744L,0x0000000000000171L } }, + /* 53 << 315 */ + { { 0xa3e8d517e8867df3L,0x59597f2323e2a3cfL,0xa5bcfb3e413462bdL, + 0x69942cb7f1f38aa8L,0x8cb693756d9a2565L,0x4e5914dde3fa45a6L, + 0x45ea2ce7124a4896L,0x0c9700171f1299f3L,0x0000000000000000L }, + { 0xd543d0a6ef855750L,0x04dd229ce72dddf0L,0x2c72b7c73ffe71adL, + 0x726f5c442305ab6fL,0x64438793d6704991L,0xc5a4c2667ff42b14L, + 0x76ddf391b3086b51L,0xe5b30282588400b6L,0x00000000000000d8L } }, + /* 54 << 315 */ + { { 0xe5142d28595b12d0L,0x55e40067a14fe68fL,0x2b84189dd93da1feL, + 0xa65531bbbe6833dbL,0x7b7eea747c7d7e8eL,0x41c72bc07215eb28L, + 0x0a90cf0ce2d3c42bL,0xc3232468acb45de7L,0x000000000000007bL }, + { 0xf045e44eba15d9c6L,0x2c6f4bd762fce75bL,0xcc6ed0fab9430ca1L, + 0x2683da6990020f7cL,0x7c4cec7dca6d9077L,0x3e7e028b8fe80396L, + 0x06f3e60f0ddbf616L,0xc0fabfc1d2a2e8d8L,0x00000000000000c2L } }, + /* 55 << 315 */ + { { 0x092a8391a47dbcdcL,0xbae279ea63678f3bL,0xf395a2501fb0434cL, + 0xe1183e16105b8bc6L,0xb001cfbfdfd13240L,0x252d420549ae1be6L, + 0x27006420135a39f1L,0x2a130dbc2d859c73L,0x000000000000009bL }, + { 0xe638dc7141b6b9aeL,0x6a5f1007bc07e485L,0x7574919914cad421L, + 0xb6012f24c1f1d11cL,0xfd20f9dfdb1f7592L,0xd7f2dfb9fa2a3c16L, + 0x8ac19b7459368297L,0xb95552cd4e1622dbL,0x00000000000000c1L } }, + /* 56 << 315 */ + { { 0xe319a4da11c1ef91L,0x117ca85fa8ead392L,0x38784b6c757ae2fbL, + 0x457a99aea307b95bL,0x698e3e9c8bb14a8eL,0x7172c6b2571ef3c5L, + 0x02cd25bf3cb33498L,0x390b5efba243fce4L,0x0000000000000003L }, + { 0xad23a309fd0eb02fL,0xd22e490f36c9fa28L,0xcdf111a3ab8b5d09L, + 0x257bbdffe1c0b8c2L,0xd405532721284f9fL,0x46e4a9a534bb090aL, + 0x7284ab106b2b6998L,0x46aba7e6f57bde50L,0x0000000000000099L } }, + /* 57 << 315 */ + { { 0x7475a134e10a04bdL,0x2e024e9bde9287bcL,0x48ef65326be4a304L, + 0x761c87702ed78c9bL,0x450d1cc689b960dbL,0x2ba413a36e1855fdL, + 0x0af0c64be2a82c40L,0x43c4773285bd1f85L,0x00000000000000b0L }, + { 0xb79d374d5e6e57c3L,0x19d597c487b58f00L,0x2f08689424272b25L, + 0x6fc826ce4ba5e7dfL,0x1daa63e338f961f5L,0x792ed02f8f78a2c9L, + 0x93075e80fdb2ab90L,0x4b5aa382b67ba93eL,0x00000000000001aaL } }, + /* 58 << 315 */ + { { 0x4e9aa71ef43a6307L,0x58c0f6106e978e59L,0x093500d272236e84L, + 0xfc4ac660ffc659dcL,0x47c0fdad706b4f50L,0x2056f72fe095393eL, + 0x0b7adb25dd3ece91L,0xb6e5174297d8f944L,0x0000000000000023L }, + { 0x009f45c1d0aabe39L,0xd2041cbbdb8e94e8L,0xf0ef2bc26a4112a3L, + 0xd4b13654552e8c7fL,0xc2e819a41fa6f19cL,0xe9595ff52d299fceL, + 0x2125ca74767e22d2L,0x99902ab316614806L,0x00000000000000a0L } }, + /* 59 << 315 */ + { { 0xe4fd64ae0cc153cbL,0x78e9e456424b39d7L,0xcdaf25a5d3fb88daL, + 0x8b5157c5d1d2090fL,0x3009df9409a3b59fL,0x8993e887afcc70daL, + 0x98d62e4b6aba6ca6L,0xae5bd1e9d0b55588L,0x0000000000000091L }, + { 0xe51727ead2a73231L,0xbb78c75a11119940L,0x46cd02d334fead71L, + 0x516fb90f79f074e0L,0x95794a415926aab3L,0x2acb4273ca95c0d8L, + 0x2a554c55046d8abfL,0x3ea635345e0541c8L,0x000000000000014fL } }, + /* 60 << 315 */ + { { 0x39e516db05bd6b40L,0x6326d151b3450a21L,0x3f0595f40cdbf670L, + 0x330c29d1933eccf4L,0xd4b341de30d980edL,0xcc507c0b49b16dfaL, + 0x7013862cb0cf6c26L,0xea9aa644dd0f4300L,0x0000000000000104L }, + { 0xaac1ae33a1a7c1c5L,0x990e19302e944d25L,0x3494435a34e6e768L, + 0x2bffd9d0fdea3e73L,0x3449aa3d63401018L,0xd0742bd47458c879L, + 0xedc90d58f893bf14L,0xc78a12f7ab8cd554L,0x00000000000000b1L } }, + /* 61 << 315 */ + { { 0x47b5e95d00a65e5fL,0x0d78b1d6649f9ff6L,0x9e2aa63ce37ee6a0L, + 0x58f817a994c11bf3L,0x8c170c9fd5ba3c18L,0xe27da94f98518956L, + 0x7e12d7a9bd858fd0L,0x913469c75403cba6L,0x0000000000000026L }, + { 0xc0dd6e546195786aL,0x618e56115011e7e0L,0x06e017b330513dd5L, + 0xd3af20882b48ca95L,0xc622ce0100e7de21L,0x40605d3ed4a05456L, + 0xe3ea86f3be2886acL,0xa32b791e48371f63L,0x0000000000000185L } }, + /* 62 << 315 */ + { { 0x7475dd141a434a2eL,0xc1e3790411388191L,0xc94b7692bdadfaabL, + 0x92ebeb2376d05a84L,0xead9002dddfc6806L,0xf8e3b5ab13f239d6L, + 0xe76ca47a2f934e01L,0x05a1ecc18aded0a6L,0x00000000000001c9L }, + { 0x545938b83172c0f9L,0xf0b5a269bd43272dL,0x8293c33a4c480501L, + 0xb10ab474e2acb95bL,0xfe46ba6ee7f9dc18L,0x4ee4b0fc931c6276L, + 0x9a2730d99b042b1aL,0x2d3175b3367479fcL,0x00000000000001a9L } }, + /* 63 << 315 */ + { { 0xf08f79df91452e27L,0x826c0321a1c388dbL,0xeea26b8de44c7caeL, + 0x46b654482b6c49eeL,0x2c7c6d5d512e6928L,0x3279f451651d9718L, + 0x8be44bdd001af2d0L,0x85bc67e29221e194L,0x00000000000001bfL }, + { 0xc3adab308c929562L,0xd50abbf382b1d07cL,0xcb2300bd7c0e0792L, + 0x275fc0b89c6c9591L,0x906b1ac03c1ea028L,0x4251b8a186a2a725L, + 0x30af7c5057abc4f4L,0x865249727174416dL,0x0000000000000101L } }, + /* 64 << 315 */ + { { 0x8a9e56659d53ff0eL,0xe4c4fcd2c7ccbb62L,0x95a2b7a2b60b6fabL, + 0xcf39fde59242b4d1L,0x43983f305b49d9f2L,0x4b5fece7220ec7baL, + 0x10b1639c221fb872L,0xf5955c1df594795aL,0x000000000000008cL }, + { 0x73dd1711eb1eea2cL,0xf027dae274f9e73cL,0x11c87d1589586cb5L, + 0x16bc03cf1380814fL,0xfbbbe67a19693d82L,0x9ca4b47e5b37ae4dL, + 0xad59beeedc7860a0L,0x46ff6ded599ca348L,0x0000000000000031L } }, + /* 0 << 322 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 322 */ + { { 0x47fe51518f87ae00L,0xf49ebf83b2d64637L,0x11eca7b4a1e86582L, + 0x58bb53f9cbde2995L,0xa620bba204af7663L,0x2a01040d7221bc31L, + 0x1734531da243ac9fL,0x1dcb67d6117d06e9L,0x00000000000001acL }, + { 0x5bb6699b43275ffdL,0x4b141be73bfea061L,0x805d08b0dae58347L, + 0xbbbbbae321ee35afL,0xfa863bafef02b56dL,0x21460360b35e5127L, + 0xb64a8ac4dacdd7b7L,0x51582b4f92b58b6aL,0x0000000000000172L } }, + /* 2 << 322 */ + { { 0x74f71c976305b1faL,0x75c64449aae6788bL,0x072b8b0afc132537L, + 0xd66862f496fd8bc3L,0x32cb2f3a5ff59f77L,0x9667801b58ffe8e1L, + 0x227b57a4558302c6L,0x0c384f6e441664b4L,0x0000000000000130L }, + { 0x0cfd7a96c16bc635L,0xe8f67af8d846d4c0L,0x7a398e333637d199L, + 0x2648cdd111d7443bL,0xfe3bdc069843f169L,0x8192bff3b42d2c33L, + 0x7caddfdcfd2b8fc0L,0x42d128611456b53bL,0x00000000000001dbL } }, + /* 3 << 322 */ + { { 0x4c7066df84e9430dL,0x77abd2e8e1f59506L,0x784cde21534bda51L, + 0x84176c345bea6125L,0xc4f00c4c11104b82L,0xdbb8ef7087f79adbL, + 0x1e47fff55961812dL,0x70326449202d45cbL,0x000000000000002cL }, + { 0x84ecf48df169b460L,0x972348061a19979bL,0xde2caf4d9bdcf29dL, + 0x90acca5e218caca8L,0x66480bb594715145L,0x70d761509ba70365L, + 0xae45e69a924d4c52L,0xab48997cc321ca7dL,0x000000000000013fL } }, + /* 4 << 322 */ + { { 0x5637ceab569dfcd1L,0x0d1cf091b2c13802L,0x09115a1f731087b6L, + 0xeda479febdb45b2bL,0xed883a7d8e6e5507L,0xbc9206ba30bd8162L, + 0xff2af28ec6294b24L,0x2058521ac8947bc9L,0x000000000000003cL }, + { 0xfcfa87a55a82fbaaL,0x988a9a6c1569f45bL,0xe65849f65d3d6aa5L, + 0xa3d0879f05446b48L,0x45571ccf76f480d1L,0xacd9a3230efd5f79L, + 0xf30e5ec4ae956132L,0x325306d0ea93f245L,0x00000000000000a7L } }, + /* 5 << 322 */ + { { 0x8582acf43008f8f9L,0x78d113721bd79b09L,0xab3aaaf5e6af3cc0L, + 0xecdc672eae9dabd8L,0x56f69a15ed65ca69L,0x51b4b25a3932ae63L, + 0x8047c54c6f8e81f3L,0x7fee87e2910eb492L,0x0000000000000006L }, + { 0xac401b28e398d849L,0x5cda81e9c78212ccL,0x1d38ac77ab650ca3L, + 0xf1c7e42583da6475L,0x2150ce594108cdceL,0x9a59ba89de85336dL, + 0x3bf646ba9551319dL,0x2db133dd2540ea12L,0x0000000000000068L } }, + /* 6 << 322 */ + { { 0x7c1d8a0eaae05a6bL,0x4906595862835a21L,0x66338e4d7023e607L, + 0xa8aec3272501507bL,0xe3eb9783283179faL,0x11908e2f9703f59eL, + 0x08ff92f55a86e591L,0xe4c7c4d922806045L,0x00000000000000c6L }, + { 0x27f1b842af9bd2edL,0x238d7152eb1c7239L,0xf02ca4f06935f71eL, + 0x528b57266888b5feL,0xb9965603e8e613dfL,0xebd0e8fe7e1b59f7L, + 0xeaf264d6f09bae67L,0x01ff76c9a540edcfL,0x00000000000001fdL } }, + /* 7 << 322 */ + { { 0x80a0e45b314cc74dL,0xd7fa3610b57c5800L,0x848d96d7b67e5f65L, + 0xc861fad13449baceL,0x68cb8a38ddf3f1adL,0x66d16b9c66a153ddL, + 0x94b81dd9e8236d66L,0x26b5570869130397L,0x000000000000008eL }, + { 0x0b0a8f43a6bf9b14L,0x185a239e96c9d7aeL,0x1119dec363fcebbaL, + 0x952da42323c83456L,0xbcecf110bdc4a430L,0xc64ba9f6b2a77ed2L, + 0x75d06bb1323b6059L,0x192de9edf709226fL,0x000000000000004aL } }, + /* 8 << 322 */ + { { 0x5f7d9850fb6f765bL,0xafe6dbcad0ccfce0L,0x9e66a5e9d97d6f42L, + 0x1b402928377f7e90L,0x44b0d87abfc71aabL,0x3cffd5151de366f6L, + 0x845d7d74b292c4f5L,0xe0822ef93f88e818L,0x0000000000000017L }, + { 0xfff7c1d7104d2069L,0xe53e122dec0fdb18L,0xf07a43395646b614L, + 0x3f2341d0b47765f6L,0xfde6dd5e2cb3def4L,0x87d324787a97d20cL, + 0x27235476bcb7b256L,0xb03f98dc9303c7d0L,0x000000000000010bL } }, + /* 9 << 322 */ + { { 0x634e5bf3d3323dedL,0x3bf31a84f4d4e862L,0xdd6b8f2235e8b445L, + 0x0b82f8ffaa9b65cbL,0x34c6a6fd744b4cf6L,0xe1831208a43f35f5L, + 0x7cd47de06db12386L,0x845e032b5b4183e5L,0x000000000000013eL }, + { 0xc7b8b5ecb448fbe4L,0x779c1f1019d44cd8L,0x63be24d85e05ecd4L, + 0xc34e9ff989f86617L,0xe1b13059d96a53a8L,0x92101f9fec8a4021L, + 0x56150074ecb053eaL,0x802050cb44760734L,0x00000000000001d9L } }, + /* 10 << 322 */ + { { 0x29f559f30101f121L,0x755e6943457fc6c4L,0x4531ef607c423ebbL, + 0x2e214b5f5c1752afL,0xfbb45080bf760382L,0x4ac072f6fb9ec95eL, + 0x1c738977d3f9dc13L,0x8941f56b46cd8ddaL,0x0000000000000035L }, + { 0xd99d101d543301b0L,0x9cbe76c1eb88a1f0L,0x3a83d44c974b3e02L, + 0xaa0fad91d4c62aafL,0xa4393659c1aac52fL,0x5cc89ddeb695fc35L, + 0xd248c1bdc017698dL,0xb232bd2028af0b2aL,0x00000000000001f1L } }, + /* 11 << 322 */ + { { 0x09742d9a3320fef4L,0x211ff0f93e06ff4bL,0xd78969686f42e626L, + 0x6bd0c95a11fb5505L,0x28d29ee1288e9a9cL,0x6ff437b97d75ef3cL, + 0xaae59f1669ad35ccL,0xa8d0a7a2e20c026bL,0x00000000000000faL }, + { 0xdc4feadd783a9b51L,0x387f15e0622a4a07L,0x78005c174a207939L, + 0x2f1c8ed7a4ad0da9L,0xba0da149f12ae544L,0xd3085ef61db1ce40L, + 0x8185ae15c8027370L,0x672ae84ee7699af7L,0x0000000000000107L } }, + /* 12 << 322 */ + { { 0xc1d74d05ac5fab15L,0x6ea132c49b102d52L,0xd3ada4ce0d890cd3L, + 0xab7248f092823f62L,0x246c373023871113L,0xebbca0b998424f07L, + 0x0061eb2c2e62e741L,0xac1b03d25e90c5b4L,0x0000000000000136L }, + { 0x0748c14a7eefae68L,0x82525b3581f8a2ceL,0x31a64097398363e1L, + 0xcbf57e0257b6b33bL,0x523f0584807217e6L,0xd607efe780b9700fL, + 0x79afcc9435596890L,0x140ed6f46985a97eL,0x0000000000000160L } }, + /* 13 << 322 */ + { { 0x994d6f091dc32085L,0x7ef66c0ca1ec3338L,0x862f3f00d05f032bL, + 0x14b255ab398a3cd5L,0x15dcba7d14d0afb9L,0x426d2d2b0eba73c0L, + 0x49c12810c97b173bL,0x1b41ee58de677412L,0x00000000000001e5L }, + { 0xf95f8080e3f62ad7L,0x2043cc48def132ffL,0x675e320a4ac025a4L, + 0x70d97e051e334fb8L,0xf1f374fdd5c92698L,0x092930a4e24b9c4bL, + 0x9df749fe264df791L,0xba72b3748ec13529L,0x0000000000000170L } }, + /* 14 << 322 */ + { { 0xd9b7a5b9e1c9df84L,0x709198f2b5280f15L,0xb34b95d48277fd56L, + 0xfd99a4ec9de8b8a2L,0xe382385a3a437ec0L,0xec27c9e21c556ecbL, + 0x40e3599d22df4e54L,0x01c11920edd21904L,0x000000000000016dL }, + { 0xd4049aebdd8453f7L,0x8c013265d4ed86eeL,0x97aff20706af369cL, + 0xa09fb20b053ebdd8L,0xa0864a12daa9bb5aL,0x046821b0487993d4L, + 0xc60e798c71b8e214L,0xe34ceba3b599ac41L,0x00000000000001d6L } }, + /* 15 << 322 */ + { { 0xa5828c36d7a89dc3L,0x469a1d568dd9c304L,0x66de18a2fd9cd815L, + 0x9c9dbdc6b2dc8e19L,0x1a498589fd4ac190L,0x5a56e72594b98ad2L, + 0x01af3fa4075c69ccL,0x1545c78e90dffbb5L,0x0000000000000050L }, + { 0xa65c03dcc589d32aL,0x17e9f0ce6060cac0L,0x0e4407a37302e7a5L, + 0x0b6d8617e87d0d8cL,0x6c4d924e82c0970fL,0xb8323cda4e7cf7a9L, + 0xfec1519afc7592f4L,0xe3ca60ad3b413603L,0x00000000000001d6L } }, + /* 16 << 322 */ + { { 0x116ca45b2e19e0b1L,0xa49d8f1b730ac8c2L,0xda037586eb7a1238L, + 0xeae3abd5e834391bL,0x8675c82a277eeb94L,0x975ac10634b19d06L, + 0x6ab169a96b83a163L,0x3ed9a40a967424fcL,0x000000000000013dL }, + { 0xa23d2b9b2082d8bfL,0x853ae3f39a7d0a3fL,0x04c7ce962879d8eeL, + 0x17da556d29c66703L,0x1b3a760f1fde40f6L,0xb98726458c5b2120L, + 0x59b18fee260aff65L,0xf2a8908d5df615dcL,0x0000000000000116L } }, + /* 17 << 322 */ + { { 0x0c565f6dbf610ef7L,0xfb3f938daf30f04fL,0x3e626c858b8f63a6L, + 0x46b39352fcc292aeL,0xf7d0a0563a488d11L,0xdcc803337f10c63aL, + 0x9ce707d99c6af943L,0x5a919c777093b168L,0x0000000000000198L }, + { 0xc635f424549b3f1eL,0xdffe84686bfd7fbaL,0x2ca865d6999c72f8L, + 0x904ff1826178fabfL,0xb431f3d908971a3dL,0xb6f8633bcbc2b248L, + 0x65788bf3b9173f22L,0xfb2c9aaa9e356216L,0x0000000000000091L } }, + /* 18 << 322 */ + { { 0xa0fecc4018e76bfcL,0xaba8add4477d51acL,0x41057a450ba7ff2eL, + 0x75b902a119cb81e4L,0x8e5b4a580e68c3adL,0xfcb68db8cc58a3d4L, + 0x0a7e44ce4f044a64L,0x40e4907ed0543f9aL,0x0000000000000006L }, + { 0x32195dac8df38660L,0x6e1d60e5377aa840L,0x847cf167ef79a408L, + 0x3be397f0d44fbdebL,0x6bf1298cb6474131L,0x116995a6fd22822dL, + 0x5af45b34c8290ecdL,0xddde3eecb230c779L,0x00000000000000e9L } }, + /* 19 << 322 */ + { { 0xd3d86741f2c7d24dL,0x21d5116b19322c3fL,0xaf9d1a0819063e10L, + 0x5b594a50e31c6774L,0xe61198cd516f6354L,0xec9e8eb451088a79L, + 0xcc3031bbfcba2683L,0xeb08633e531785d2L,0x00000000000001f4L }, + { 0x539664466d67c0fdL,0x2a42f1e974ae1a80L,0x6c12467c885ea4ccL, + 0x81ab0c5b0efb792eL,0x71ddf7497b91b2e2L,0xec246aafead35602L, + 0xa0f4a61d2563662cL,0xc717f71c505df6c4L,0x00000000000001c0L } }, + /* 20 << 322 */ + { { 0xafc75c178ec70a8dL,0x32686ebf7b117380L,0x16050cda21e28c47L, + 0x70c05654e731b083L,0xafa2e57571d1f6d2L,0x0df1a032c1982fd6L, + 0xc0e7b1461e86f69bL,0x0df3c6bcdce1864cL,0x000000000000012aL }, + { 0x46fa0fa600b0d0b1L,0x1f0a42b21691a590L,0xf5a6e4e8ac8c1189L, + 0xa2cc987973a0ab49L,0x721ca290cf17f795L,0x4026f9c3ecf32b7bL, + 0xbabd20c019cd7057L,0x99378e8c5c30cbccL,0x00000000000000d2L } }, + /* 21 << 322 */ + { { 0xd0cbcc47c26a88c6L,0xc7b0cda13f2dc480L,0x405f4a1024029c61L, + 0xa79337861f262318L,0x009fa8def18a254aL,0xd8e1d3ff56e86d6bL, + 0x9d1c8cb1cf685730L,0xe9e94c8373d42c86L,0x00000000000000b4L }, + { 0x03d5e8cf263d6639L,0x008d0a7caf59818dL,0x030bfbf3d18597c3L, + 0x7e6faa51312d4f8fL,0xbe896d83b2d948e3L,0xd6a7d6786f4f8181L, + 0x2140942ed01ccf0aL,0x458145e3bf228206L,0x0000000000000182L } }, + /* 22 << 322 */ + { { 0x9925822b53727667L,0xb75905699f8ec5bcL,0xab4ec9d48d2a5fc8L, + 0x1fdf79f123996a2bL,0xb7c9dca72e618aa4L,0x70df99533cce63acL, + 0xdb24712df2c51a36L,0xd95239ec9d3a3f59L,0x0000000000000083L }, + { 0xd649c751d2ef6338L,0xb18fa99e47365d93L,0x1f96c9811d979826L, + 0x752db605cc8def36L,0xe723f8151830cba5L,0x11a2323f7123ba6eL, + 0x63fb9a0d0a390d71L,0x41a6131c77638870L,0x00000000000001daL } }, + /* 23 << 322 */ + { { 0xa3e39aae9386104aL,0xde389403c6bab3e7L,0x2aba3b6dc8ecba58L, + 0xaba666e342eda205L,0x247aa93c7ec04499L,0xeabe55e2811dc300L, + 0x955ad4c37f7c679bL,0xffa5d2930dd8edfcL,0x000000000000001cL }, + { 0x6cd7828c8d795923L,0x93fdd33bcf549b25L,0x8a2a61d68e5ca30bL, + 0x5e0466db7302313aL,0x296faf9fba7c9c91L,0x4f8645cf5999f063L, + 0x589f67f837992492L,0x0bed28248081dc37L,0x0000000000000177L } }, + /* 24 << 322 */ + { { 0x2229e06cccd6d98dL,0xad78008c2bbc14eeL,0x361ffa4fda34a88cL, + 0x6023fcf46cf499fcL,0x72051f272a50f74fL,0x56cc61c8e9a59a2aL, + 0x2defb0a1bf0392cfL,0xa5ed8b89d29013a8L,0x000000000000008dL }, + { 0x9d396c29d3d7339aL,0x0d21b9dfca0262d1L,0x164b69e0a000d10fL, + 0x2f7f7dfce1147d4bL,0x3cc2909978310e2dL,0xdca71fac36ae5a21L, + 0xe0c8579aa0f4cfd4L,0xc05c548904582119L,0x000000000000013fL } }, + /* 25 << 322 */ + { { 0xecc1d27d517fd248L,0x5e8aa5936053787bL,0x46fe1e027a0727d4L, + 0xe190ed905b0731cbL,0xaf9197611b2c2c55L,0x6d3b24cf61100b10L, + 0x354826b3638b7314L,0x2ab60c42a335005dL,0x00000000000001b3L }, + { 0x60406971fb3548c7L,0xa39ee5b246ea496fL,0xbda9226bdea5de60L, + 0x125efd34ee468f88L,0xe04d9f1abe0b4fadL,0x123e18b7c72e7621L, + 0x0e4c5a0dc75a09c6L,0xb31d669e57d867c5L,0x0000000000000162L } }, + /* 26 << 322 */ + { { 0xf5747cbc85245da0L,0x120980ce618dcf96L,0xec629da41d48d206L, + 0x4419aa98d3952436L,0x01a4a6e1bb4544aaL,0xed247d13a94aa6dfL, + 0xbceaddb76a5924fbL,0x11fbff73e1cf234bL,0x000000000000017fL }, + { 0x224a152e21813c71L,0x6e2a6f4a20f47b53L,0xbf1a147535e50638L, + 0xf1f713223308616bL,0x64d14f69d2f8fef1L,0x1e8b9fffa246cbfcL, + 0x62bf5aa4d3763484L,0x7c57e682b86702acL,0x00000000000001baL } }, + /* 27 << 322 */ + { { 0x3a70dcd25f13c332L,0x576f41afe4313910L,0xd0b6b3c552430b5bL, + 0x0d5e9e48e8996f32L,0x207b17fe77e70a78L,0xd0d76189584a4c91L, + 0xd35b7d9ee12153afL,0x1b5f87f40becd4e2L,0x00000000000001cdL }, + { 0x4bc65a348ff1d0a7L,0x4eefffa356825c30L,0xa42098b2616e22dbL, + 0x46bc46e15e3e3225L,0x015f358ae05be037L,0xe02307bbd86e016aL, + 0x14bc3a329a393613L,0x5bdce022e6a07114L,0x00000000000000b5L } }, + /* 28 << 322 */ + { { 0x161b9770b7dc0f64L,0x42ad385a84f67076L,0x6a94116c710d4992L, + 0xddfa30033ec54457L,0xa17c61d12d520342L,0x7e673638f36c6f21L, + 0x7654e648eaa1a95aL,0xc4b4a3c40dd8029dL,0x00000000000001cdL }, + { 0xd748525500c9033cL,0xcf0d9c3551acd2cdL,0x02022ded596a8678L, + 0x56fe8164d90b82ecL,0xe376a376863801c8L,0x559b0f3fad337694L, + 0x3c720b4f06e23e58L,0xe02dcd8191914c96L,0x000000000000003aL } }, + /* 29 << 322 */ + { { 0x7c7c5db9082cfac8L,0x3598e5560a7f3db9L,0x2601b07557884076L, + 0x6ff31de8b9759bd5L,0x8a065c1fe55fc009L,0x61b3433f212241c7L, + 0x20413706427bcc47L,0x11748db7fc48d7c5L,0x0000000000000004L }, + { 0xe5cf15518d4fd88eL,0xd68399f20567016fL,0x8e255e59ce2a0316L, + 0x5214f9d91ac90dc8L,0x1e0c990eef6bc99eL,0xd604cc6438467d13L, + 0xc3e5306af38a48beL,0xadf8009b6d167ca7L,0x0000000000000044L } }, + /* 30 << 322 */ + { { 0x21487ed2e126ce5dL,0x49bc01bc5bd1e210L,0x1aa5f2c20fa11fabL, + 0xa2242af403fff4b4L,0x973eca3c209db9aeL,0xf2db2f1b3dd561d9L, + 0x784606951bcd0ed1L,0x5510ea75e062faa1L,0x00000000000001abL }, + { 0x5e7d9dd5f2bebc17L,0xe4244808d597a75dL,0x0aa0f5a516b9fe86L, + 0x61d40d8a03e38a68L,0x934f4bef9e2e8268L,0x52a157da1ef90390L, + 0x7b1a6498cc5d0b9aL,0x95b9693300f06240L,0x0000000000000072L } }, + /* 31 << 322 */ + { { 0x7952dc96c4dcca38L,0x71e01c35fdd766a9L,0xd4ad5e22d4782655L, + 0xe4e886ca808c793eL,0x636e86b943abdc47L,0x00a2bee29483f877L, + 0x6a88f59186462a7cL,0xff8140be192bbe07L,0x00000000000001b3L }, + { 0xd8e44cf01ff8cf63L,0x82b1238f14439990L,0x9f1800edc4edd1d0L, + 0xae0de82d7eb239ecL,0x9b60b1dfaf69441bL,0x4e521d71e632474dL, + 0x964fd7357e9a7351L,0xed7cf4349f2ad1cfL,0x000000000000001eL } }, + /* 32 << 322 */ + { { 0x699f72e9e084ea4aL,0xd1c4bc45dc721bcaL,0x7eb43d78995db3e6L, + 0x76e84fae02088e1aL,0x1b061123e048084dL,0x7caf0d3279abe6feL, + 0x9340583753b13e9dL,0xa05d1313cc58f64aL,0x000000000000013cL }, + { 0x81d54e675fa5b0aeL,0xee551c40086b46c4L,0xe7b98e209b7daa1bL, + 0x8082803eb868c9d3L,0x036502f236731930L,0x075455aef9a93c2bL, + 0x080e704652a61944L,0x9f4bbcf0580494e0L,0x0000000000000154L } }, + /* 33 << 322 */ + { { 0x298a0e46721ade9eL,0x677e8b3ac7e30064L,0x0568fd0178016c49L, + 0x994f0036d3ec1adeL,0xeb07215b15c5599fL,0xa97133b3e01597c1L, + 0x6c991e6ed54986a6L,0x2fe04bae2e971e6aL,0x00000000000001dfL }, + { 0x25c966cdf287199fL,0x7e9f51d01c337d60L,0x0c823f68216d1b44L, + 0xb381e0434d7ca148L,0xcb0cd817bcd421f3L,0x188b342f88fea786L, + 0x751d7c1bb2a40514L,0xb05475b4f0efd0dbL,0x0000000000000083L } }, + /* 34 << 322 */ + { { 0xf12858a030e60874L,0x885b746a49363fcbL,0x89677efa9e6267faL, + 0xda86b591948152a0L,0x13a52d2eefe62f36L,0x44a7c6cc22770a18L, + 0xb71c6a83c8b69076L,0x239a31f25ed31aaeL,0x00000000000001c6L }, + { 0xa91974f8adbb6beeL,0x0cef8b3d5fef0a99L,0x47accfa21999c491L, + 0x8d9fd072192c2a13L,0x925393d4d87514a7L,0xad667ad4e6c482d0L, + 0x812fbf47a5668487L,0xf1c2e82276e7bd3dL,0x0000000000000019L } }, + /* 35 << 322 */ + { { 0xbf8363be4df133aaL,0x00d7ebe6830d997cL,0x2059980566b5fdb9L, + 0x2b682dec73cd9f64L,0xc4ab8c87e5db0383L,0xfa2dc9686d539483L, + 0xb0ede1675e8498eeL,0x5d8c23dfa6a71bb3L,0x0000000000000002L }, + { 0xed62df962ddcfedbL,0x62dbb40ddecfde69L,0x3a847e28ea8d6afcL, + 0xb4c7f1db2fcf58a2L,0xd774982684eebdafL,0x0a5078bd6259f54bL, + 0xf9d2df2d9b7545e8L,0xa5b293c9ca1ea850L,0x0000000000000053L } }, + /* 36 << 322 */ + { { 0x0e4b463a886b2e59L,0xa7091bdde1730e93L,0x53693de2e485313cL, + 0x32137cec9d335d4bL,0x30983f46e7b8f7b6L,0xa8f5732c36785d2fL, + 0xfc96dd6d27f2db5aL,0x649054e25a562614L,0x0000000000000164L }, + { 0xa4b1b99714766d09L,0x93af1a132001eceaL,0x436fdf6a69bb5799L, + 0xb0f3e655eae264adL,0x1ff369c761838a92L,0x3780d328a24da60cL, + 0xe126d99241cade42L,0x9cc0e1ea9ea7d1b4L,0x0000000000000056L } }, + /* 37 << 322 */ + { { 0xe3f5c92af2c9c91bL,0x9fe56b3fc64b60e7L,0xe651ab8b2d15166bL, + 0xf7421bb890e51defL,0x0adde7402cacdf7bL,0xdaaa12e1cb9e11e5L, + 0x42dde46b5211a0ccL,0x621aea01f0da0299L,0x000000000000004cL }, + { 0xb1c259a1a55cbce3L,0x10fd90719f2f0057L,0x8242faffc92645adL, + 0xabdf00a3cbc7aa9bL,0x0b2deaa426b1272bL,0xc9edb172bc53e47fL, + 0x7e3c26de93e77e99L,0xcadf52633d8eb2e5L,0x00000000000000ddL } }, + /* 38 << 322 */ + { { 0x89f03416a0f14094L,0xd56358745bf6b5a6L,0x07140f5ab2d9fec2L, + 0x3a6be1f713d6753aL,0x29f0b6713669ba9eL,0xd3e499809c9edd9aL, + 0xa1222f0a876e1af0L,0x7e4b2d8e3a539f63L,0x0000000000000197L }, + { 0xb9fb59ba3657e3ffL,0xb0bdaaca95a402e9L,0xd6f9607d9fc5fcb5L, + 0x08277d4ec95e8496L,0x9d6231d624a10303L,0xc45279e62ff82594L, + 0x925b8d7bff7c6659L,0x2ce594e8599f6b14L,0x000000000000009aL } }, + /* 39 << 322 */ + { { 0x0d0927adec179a82L,0x9bb9d81707986ba2L,0x4dde10ba9a1e77a6L, + 0x52ceeab0fa680f62L,0x7415c752e0dc60e9L,0x45bbf4dea0c28835L, + 0x926ccc195a41d082L,0xc18e0bf9fcaf9128L,0x00000000000001e0L }, + { 0x212b073ade970121L,0x2f3acba643d17f79L,0xc4f7513cb540cfcfL, + 0x5d4ecfec9dba50a6L,0xb6ac72945403b601L,0x445f1431a3e52e7bL, + 0xa4471581a24dd4c9L,0xbcac26f166e43679L,0x000000000000009eL } }, + /* 40 << 322 */ + { { 0xb3e7eabf951c2d0eL,0xbf9959209512c00cL,0x0f1d97a8dbf1f0f9L, + 0x6a52615c430981d0L,0xfb0f0d2097073d82L,0xd843bc6cc0430de7L, + 0x1bd02e5440061f87L,0xca4930b25dd88dc1L,0x0000000000000056L }, + { 0x92c1350259a249afL,0x1597c507b2ca76b5L,0x298751f3039592d2L, + 0xc50bb3a4e8a71d91L,0x4d3a058405f1e850L,0xd16f70a3764af248L, + 0x5d508a243f2215ceL,0x2cbcdaed889ab117L,0x0000000000000193L } }, + /* 41 << 322 */ + { { 0x33651b0fa901023bL,0xb284799cf74f9e94L,0xca2adf58ab42f967L, + 0x5682cd2f8f916cd1L,0x1fa28c120716ddd4L,0xc7fbd070fcce3af4L, + 0xe2584ea683c7d494L,0x292cf09563e96f86L,0x00000000000000d3L }, + { 0x978eab0d561ae9f0L,0x505bd69d87b3fa7dL,0x6da8f644e5f1eb0dL, + 0x23835289de97f4beL,0xcd57f90042407f6eL,0xebbf612c3210ffbaL, + 0x7b4bacd8094d959eL,0x8bff4d6940769457L,0x000000000000003aL } }, + /* 42 << 322 */ + { { 0x0afc9e7020994117L,0x0e57032cd363dcc3L,0x7524eb6c993e395fL, + 0x8ef74be384122007L,0x7d33ed5003a27726L,0x27513a4eb65c88c8L, + 0x6af64d0b7d9f370aL,0x4cb05a9b45c17a8bL,0x00000000000001ecL }, + { 0xfd7afd321f4198b9L,0x385d1c7039b3b13cL,0xab60c146d2b332b1L, + 0x50b827d7dc655444L,0xa2c7a8ddd25c4e3cL,0x0a27f3eab718ce9fL, + 0x14ff04ece2dbeea8L,0x63f281fe2fef5044L,0x0000000000000074L } }, + /* 43 << 322 */ + { { 0x49fbd643eab88f85L,0x564f4ba7e8c9011dL,0x1d818e0bc7b1478cL, + 0x1803f387883b4833L,0xedba2a623bc03335L,0x634a781db6fc5a17L, + 0xe8196ce6811f8210L,0x76af8a379732049aL,0x00000000000000fdL }, + { 0x8c283abdc06054cdL,0x34308c7a8998068fL,0xde75159933710724L, + 0x992f8183afd84b44L,0x893f046a3e7c8ce0L,0x0a967e5f70f40608L, + 0x95c11e854bc01128L,0xa5e3708141fc13dfL,0x000000000000000eL } }, + /* 44 << 322 */ + { { 0x8ed9e22a9439311eL,0xd3f46e41314b0141L,0xbdb1570568382359L, + 0x149adebfb65d3aabL,0x4f73e3254fb4c8c2L,0xef7883ccecdfb62cL, + 0x7a0ecaa2123003b4L,0x8da68ece15a2a7fbL,0x00000000000000ceL }, + { 0xaac484e542250748L,0x5e1020b2e43c22e1L,0x755475a75f3c8e7bL, + 0xbc1fd16e533061b7L,0xd10d8530011d193cL,0xb8d9c97e4ae8efddL, + 0xf19de2648f813fc8L,0x3a0b5f66415daf6dL,0x000000000000009eL } }, + /* 45 << 322 */ + { { 0x9969b5fd3135fadeL,0x9bc95aa606e484cfL,0x355ddb588b0fc080L, + 0x1093422b3bf11d43L,0x5d5237f95bc21dfdL,0x25c184a661758002L, + 0x1625b15c318e6360L,0x32d2ae19b508e1ebL,0x000000000000012cL }, + { 0x722ed1b76e253643L,0xe9242c221809165eL,0x5c962af74b9630a7L, + 0x36d676d51fe4f40aL,0x9c6232022f1b1556L,0x209b0bc4daf3120bL, + 0x72726a0105689599L,0xc16d60ab500ef70cL,0x000000000000004fL } }, + /* 46 << 322 */ + { { 0x9808593467fbef8fL,0x22bc62a9f82be097L,0x98cded2d74bb7957L, + 0xf315e03a49c8bd33L,0xf9e872d565490065L,0xa5a6bd39857ea243L, + 0xded7128e8cfe8539L,0xd763a75b8905dea9L,0x00000000000001c8L }, + { 0xc9861044843cde89L,0xe3761912078c8f57L,0xefa422f7582d9659L, + 0xb50a0a959632fa91L,0xf955855a469df6c0L,0x0ea369f34669bc3dL, + 0x368ab00685599310L,0x2ad00a5eb7a8b481L,0x00000000000001deL } }, + /* 47 << 322 */ + { { 0x4c84c9f9ea65b932L,0x0b54ecf8bec4ea40L,0xd5de970a9e70ecd9L, + 0xc940c1ed40455e5cL,0xc8bc7dca3838fdf7L,0x63c4efb6ef33dfd4L, + 0x82b51dd6c47f4e20L,0x5cb41a27ff99b86eL,0x0000000000000123L }, + { 0xe7b1d8113117d1faL,0x517166515e4a173dL,0xc6d75c1834fbc8acL, + 0x4646316c524b402aL,0x361c0a1b7b71c7b7L,0x777727080796b95bL, + 0xe6e931370451eccfL,0xb43aa00d237aacf2L,0x0000000000000108L } }, + /* 48 << 322 */ + { { 0x77b4759ecdc28bedL,0x2c9485511067b654L,0x97a30e9c1be3d888L, + 0x1b8ee1134d8a74eaL,0xec2a632c3b1697ffL,0x60ac030fa8d72746L, + 0xee82efee20b60c63L,0x6c7904a018c6ac9dL,0x00000000000000a0L }, + { 0xefabd68d11a97778L,0x0c8e7905d29960d2L,0xc58e496f0b58310eL, + 0x8bbcb95ce092e4e6L,0xcfefaa5cd7b6d076L,0xddb0d933600c7c58L, + 0x7ec76492d34b8f40L,0x6ad971caad23f589L,0x000000000000004cL } }, + /* 49 << 322 */ + { { 0x9c3eaccf73c220baL,0xecd040d222e53490L,0xe4e958c14dcc1e96L, + 0xdba4a808b1af5f74L,0x3106a290a95fbb79L,0xffb704293792882cL, + 0x7e595219f8432c48L,0xdde6ce6c8e43e77bL,0x0000000000000125L }, + { 0x5c5bed0062b3c950L,0x4287ec7870ec2a41L,0x31f56d23604c34a9L, + 0x39550dc938e5e90cL,0xce05656c3502e7a3L,0xf65501831c683d8bL, + 0xcb128c0981e26495L,0x4f4f4402cc33b9e6L,0x0000000000000053L } }, + /* 50 << 322 */ + { { 0xe5a77166be5da74fL,0x32f62c8e2fa814e0L,0x70ae9f87b6c207d6L, + 0xd803c9a7fce84376L,0xa7e7e9ec53846c94L,0xeb357fcc3fa01a22L, + 0x2fcdd432d264e780L,0x4ce115c9c352af13L,0x00000000000001faL }, + { 0x9543539d8912fd69L,0xe1eb19b8d14ed2c2L,0xc47ab4f0beea02a9L, + 0xa097de0020a38f8cL,0x182cb223511265f0L,0xcdaae98f01614e32L, + 0x9968eee31ece63d3L,0x6219a46db34a7e22L,0x0000000000000080L } }, + /* 51 << 322 */ + { { 0x9e12cfec9c3a4c3bL,0x779628a45f53855bL,0x7ac6e3494ee550f6L, + 0xc167283c7a4b2192L,0x58c2fa2063e39e4dL,0xbab2d6ae0b743e23L, + 0xa34a920cca6e9cb0L,0x7818f9f4ececa894L,0x0000000000000010L }, + { 0x3416933959b9f008L,0x0a9c68d604ef634eL,0x18a98bb9e6a78900L, + 0x124443d61e0fa784L,0x56668b018fc2d6cdL,0xb5ddc6d2faa5ef32L, + 0xeb89133aec245cffL,0x5b3dd85dc2929841L,0x0000000000000197L } }, + /* 52 << 322 */ + { { 0x42c6902d17001ab9L,0x6bcd13b0e3c3bef7L,0xe61d8161a3b5a78cL, + 0xa201774e05b85e95L,0x9c0940abd7e54543L,0xd8c739a71066397fL, + 0x2a086159a49687e8L,0x6e5a1b2592222916L,0x00000000000001d9L }, + { 0xc2fe70be069126eeL,0xd337ff0cda37306aL,0xd91e07f157f8eb8cL, + 0x83eef5e6c3e8974fL,0xa0584968665f08cfL,0x377cdce9b5297c10L, + 0x581d1c013d1f2799L,0xa1b669d8bc2e1714L,0x0000000000000096L } }, + /* 53 << 322 */ + { { 0x087569b150902c90L,0xf91f6a3c073c744dL,0x7811c91daa8b077cL, + 0xa5ec0a6d68dadbd6L,0xb885e064f45ab631L,0x24b696dc452bd6b1L, + 0xa528be732e9ef035L,0xc5683c7afffda366L,0x0000000000000196L }, + { 0x944aef1b14b55baeL,0xf9b890250d064e11L,0x47c699ca4c807c41L, + 0x37312d6d205c11d7L,0x5d1546c69319437fL,0xdb86c05376c56164L, + 0xc554d7719937d348L,0xfd1f3aac9cff2d06L,0x0000000000000140L } }, + /* 54 << 322 */ + { { 0x4e5f70bd593eeec7L,0xa08a3a07c68adf61L,0xa939150808b6d600L, + 0x0b36d861cb6b65f8L,0xa5b6fccf274f5cabL,0xd821c75ed6a29c3fL, + 0x61f56a9e5d06fe0cL,0x818aaa46240372a9L,0x000000000000010bL }, + { 0x97e136c0c49978b0L,0x9ae5030e27f22acaL,0x9a2ab3a3ebb6f0a4L, + 0x4eccb9a421c62aa3L,0xb3c264e17e298f4aL,0x5681ce55f5126a1dL, + 0x2e55fbbb2dc003dfL,0x96eea8b06c8cdffeL,0x00000000000001bfL } }, + /* 55 << 322 */ + { { 0x225b4c7ca087d667L,0x1c7212bf5e019ad6L,0xb4d883fa55533217L, + 0x060a1cb3a9382203L,0x0cdd75b0c64dfca2L,0xbca3849f8129ca54L, + 0xea0f1541fdc4ba6cL,0x76420c089f8d1f5fL,0x00000000000000e6L }, + { 0xa53a534f6ce1b6a2L,0x2489f4cd43caa085L,0xce3afd72f0ea6160L, + 0xaf9e1e469665daedL,0x947a8d04312ec1beL,0xd802b36ed89e92feL, + 0x41a7f3301ed24021L,0x6b8789018bfa23a0L,0x00000000000000d5L } }, + /* 56 << 322 */ + { { 0xb42ef882816f7665L,0xb3afb61fb9084579L,0x3127e9a54745395aL, + 0x1fa6611f0126d538L,0x798fa552070097ccL,0x6f41901c704f16d6L, + 0xf528b063bd6749f9L,0x3ca460784b9fff68L,0x00000000000001dcL }, + { 0x0096eb977d54eb8eL,0x9de669afe24facdeL,0x1582b22ac6df403aL, + 0x8f4fbd44a91242ebL,0x1696fcc0e0ad2760L,0x334c9d4ad2d90e25L, + 0xe57995440d5893e9L,0x5074b63ed1077be4L,0x0000000000000029L } }, + /* 57 << 322 */ + { { 0xc11c1de6640b3b9aL,0x3075eec8641ac7c4L,0xa2484dec7b144ab5L, + 0x4ac49f44e53e597fL,0xaabefa613e6c80d7L,0x2d964c50b363a587L, + 0xcb58a0f06d2d92f3L,0x2015b478e899fe65L,0x000000000000005dL }, + { 0x7c51a1a88df037fbL,0x86b9973bd5582434L,0x946f58e266867f0cL, + 0xbe9451d97ed59b38L,0xfecd089d4c5df184L,0x3f14866afe5f8eeeL, + 0xae2f927fc486af2aL,0x684917776c511735L,0x000000000000004fL } }, + /* 58 << 322 */ + { { 0xd8bfc4c4210815c5L,0x13012a4f8aeec2dfL,0x2fa0e6847ae5255eL, + 0x1088cf4336a2fa9bL,0x94cd1efe36ab082dL,0x128c234009a7804bL, + 0x159360ff1bb24487L,0x0a458b52ae3eab0aL,0x000000000000001cL }, + { 0xc4775d3d9576ecc4L,0x13a0078c5c9c25a5L,0x92ef8e4e4062c8e8L, + 0xf196769660375b3eL,0xd072937ba24b431cL,0xe5e3bf64d0420605L, + 0x68712d5bb23660a3L,0xe89fff9ae6358c41L,0x000000000000016eL } }, + /* 59 << 322 */ + { { 0xff0da5c42a22890cL,0xc044cdfc46bff2d8L,0x2fca586d876f2f60L, + 0xf06e8c8c14dd39adL,0x042dff7320789797L,0xed6a327aeb5cf944L, + 0x844d7c94375fa477L,0x710ee88704b7b91cL,0x00000000000001f4L }, + { 0xb8c9507ae8e1a9f5L,0xc273343f9575c160L,0xe3da5ae5991fee34L, + 0x712ed5679ba2b295L,0x9301bef49b3f465fL,0x8ed51c6c25950377L, + 0x5f46a90cad56c370L,0xb7694541fed996efL,0x0000000000000014L } }, + /* 60 << 322 */ + { { 0x5f4ba23b78e658c5L,0xa9463d66dd8f6428L,0x2b6792e9fb241842L, + 0x10dfa360934aa1f1L,0x7769322ab7bf8a36L,0x223c1474f127aa05L, + 0xebee2760d048d6c5L,0xf495b445e7b13526L,0x0000000000000032L }, + { 0x7baa637e8bb79c71L,0x349709051681a7b2L,0x12359173edd0293aL, + 0xbd9deb854e888c4cL,0x43f03f70563c6c2bL,0x42d131b2f20fe520L, + 0xb080e82cdcf32d61L,0x7aca6b8c0817ea86L,0x00000000000001f4L } }, + /* 61 << 322 */ + { { 0x430d75a3c507c83dL,0x4406dddeeebe1adbL,0xdfe966eb94d0023eL, + 0x7a403a862a422704L,0xc7f74e48d3fbbc05L,0x47ef018fe07bdeb7L, + 0xb69187f8e5ee7ca7L,0x9340009d4fda175eL,0x000000000000001bL }, + { 0x6eb88ceabc7b237bL,0x862c7ae95249e73eL,0xb517b7e79cf910d6L, + 0x2b780471297fe7b4L,0x99738346acbbc73bL,0xc0a18e55e317c474L, + 0x3fdba5cfe254a076L,0x02f6984cd7a4c08eL,0x0000000000000174L } }, + /* 62 << 322 */ + { { 0x70310eb86def0125L,0xdc681d69eddbda48L,0xcb29aa5c303d3057L, + 0x93a7cbfff83bc576L,0xff9e461be48ded6cL,0xf0ba47061d15d104L, + 0x2700a174bb028212L,0x85b2f2b815ed0881L,0x00000000000001a3L }, + { 0xf48c263c758e2de5L,0x5fc91b78a810b626L,0xcc57c5471ef04f52L, + 0xa3bc05d6db2ff614L,0x5918d61139c567b3L,0x8af3817bbf54d907L, + 0x0f65e1a44cd3470bL,0x26d7c80b9d7f6cebL,0x0000000000000122L } }, + /* 63 << 322 */ + { { 0x6854aa7ee687f6e1L,0x74212a251b7b35eeL,0xadef57c11b3f79a5L, + 0x1a748b8e45511b30L,0x5725c9e78c892a6aL,0x94cc52b54d12a323L, + 0x633cda13a8b2824bL,0xdd069d05b24a8579L,0x00000000000000c6L }, + { 0x563f4058af114a39L,0x820ab6c934462a35L,0x4340b6d903cc02e3L, + 0xe8104176d06f02c1L,0x0b6583783c825b98L,0x07d0c3f46311ea4dL, + 0x34b6a707673e0f0fL,0xcc3e2922dea3d23fL,0x0000000000000086L } }, + /* 64 << 322 */ + { { 0x4d1ba365c000e4f4L,0xcf34a1a87b2caa8fL,0x0e4a58d5783e3c39L, + 0x1ad4b5ce2c1fcbd9L,0x6b30bfb9e8b9556cL,0x304ad2366301b3a4L, + 0xdacf31b8c8d571f1L,0x25b7a0fdec93e088L,0x000000000000012eL }, + { 0x2863bcbf3b6cc6ddL,0x70760671e9430f99L,0x46cce87b7586cddeL, + 0xc1f58398161a045fL,0xa84ca0cf291636aeL,0x11242d2726892bd1L, + 0xb1d85da78c971e9fL,0xc66e0e1d61aa5039L,0x00000000000001b1L } }, + /* 0 << 329 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 329 */ + { { 0xbd52d5127c1a3915L,0x88b64d2e59d9dc08L,0x142d5364a1da166eL, + 0x138ed011c8ef7e38L,0x90e189904d6d06eaL,0xb0081001147f3f6eL, + 0xa8e9f89711a1ee9aL,0xe7607f70e28c7a31L,0x0000000000000011L }, + { 0xa73dc4e4db957a5fL,0xb3c023c02fa7966dL,0x590ca0877aefc5b1L, + 0x63427f912ca0d266L,0x7bc0f1ef4eeb8864L,0xef9125d279239e3cL, + 0x12e924a1a90c09c1L,0x0fec85e3b3663a4eL,0x0000000000000085L } }, + /* 2 << 329 */ + { { 0x6c6119f6a1c88f3cL,0x924e5fec2ec6944aL,0x4c8aac605742ff2aL, + 0x60adde1eddb22c7cL,0x9728938cfa5d25bbL,0xfa5ac4f7ec117de0L, + 0x41f35ab7482929c1L,0xd1c4e8f90afd95f5L,0x0000000000000180L }, + { 0x2fc4e73da7cd8358L,0x39361a57f2a1c920L,0xf6f2f130ad94d288L, + 0xe37e24662b6a78e2L,0x0babff8b79c262cdL,0x6cae01ef61b597b9L, + 0x9c1e33f0a60d4e64L,0x52a42280dd01f845L,0x000000000000000eL } }, + /* 3 << 329 */ + { { 0x9f19b3e18c730727L,0xa2231248d5e33b72L,0x0505b3dd5bde07e8L, + 0xaa30e5ace8a2fc4fL,0xb2d24a392f964eabL,0x2d9d22a460ca41f1L, + 0xc7183625e0e8dd2dL,0x87f3ab032653d84cL,0x00000000000000d8L }, + { 0x52e505e3c7fb02d2L,0xed5fcc2c7154e8b7L,0xc0df346f8e8397adL, + 0x1c9abde6f542ce97L,0x158cd908e2a2205eL,0xfbf550dfe60b9a18L, + 0x66a4aa43bd817d90L,0xeca35b6cc60dc534L,0x00000000000001beL } }, + /* 4 << 329 */ + { { 0x09bc56744c81da95L,0x9756c443f23b526dL,0xac041332abbece84L, + 0xd933aaa327696aa1L,0xfe78e91d3e4e9032L,0xe491ec63cd67fcfcL, + 0xb73746e7e8fe9c4aL,0xd2ca6f6a199f1eadL,0x0000000000000191L }, + { 0x86a7466aad125891L,0x4817dd6e42499ff4L,0xf5478e3b7c6a0addL, + 0xb92d8e5d2a394366L,0x5c5238e3bcdf955bL,0xfa855ff10489d6b1L, + 0xbb15d2fd134f7f2dL,0xd5faf13744bd6838L,0x00000000000000f0L } }, + /* 5 << 329 */ + { { 0x0fc11e4c84b0f4eaL,0x435c27d47d244a14L,0x3996925dc6eb93caL, + 0x0b699c9f09f7e1e4L,0x92d5807e1eb96a28L,0x772a095b9c14e99aL, + 0xda362fd51bc8f956L,0x157e3c0b9d4800b7L,0x00000000000000a8L }, + { 0x5efb3b8f90d9ec28L,0x8cc8c25ffa2e2948L,0x1ee212252668d00fL, + 0x37102fb19651d783L,0x341ab0e5a50af73bL,0x868464e967e0b7b5L, + 0xe264f59d5ba120e4L,0x661650aae80385c3L,0x0000000000000158L } }, + /* 6 << 329 */ + { { 0xfab2f74c3ef1eeb1L,0x71af5a9992007e36L,0x384d6a18a0273729L, + 0x03df77a9ab5eaf13L,0xbe0136fa0e0bdcb0L,0x5f4be468bebaa588L, + 0x2ebfcfb42840138aL,0xf8e098f02594311aL,0x0000000000000053L }, + { 0x84055186c73f0476L,0x92dcdd0e729b0b1dL,0x7c523b4806be9764L, + 0x95357b956a57baf6L,0x4b556882860ce9b4L,0xdc08b8f831882647L, + 0x11c9b3c738c3deafL,0x50b6fea8e68f20c0L,0x0000000000000164L } }, + /* 7 << 329 */ + { { 0x76b2671fa4c28ca5L,0x91a0d41bd0f62e23L,0x3db3924db448f296L, + 0x0925dc3fa60a9738L,0xa37774b00371c080L,0xfd182ac7b5167fe8L, + 0xce4795b714eb4895L,0x7c993d229887590dL,0x00000000000000b6L }, + { 0x6090f452a0feb93aL,0x032de9c217e4f78fL,0x29ffff34a237d15cL, + 0x81205285efad6015L,0xcf90cf14ab143ecfL,0x56d1caab0dbd215bL, + 0x2648af04284b4611L,0x6df91b71bc20a51cL,0x000000000000013aL } }, + /* 8 << 329 */ + { { 0xbf0b5ea3d34b138fL,0xc49513813ac7cad4L,0x82ca5536cab6a319L, + 0xff56913688bb4ea8L,0xcee2512a4761d3e9L,0x4695e52155ef9647L, + 0xbb83316b9378f132L,0x1d447f048d43bb4cL,0x000000000000002bL }, + { 0x777ab201ce5889f5L,0x635d968f4c6f0545L,0xbb48a0cf872777eaL, + 0x33f4de62c616138fL,0x93e74192bc8d65ceL,0x532ca9bd5ebae1f0L, + 0xd783e104fbaba8d0L,0xb32af569bdf2d850L,0x00000000000001baL } }, + /* 9 << 329 */ + { { 0xa0bb7fffad4e0ec6L,0x6621d487b0fd2273L,0x7dac286992bb025eL, + 0x2daea181e142e35dL,0x6d2ad2a7b5585b04L,0x7c156b91c67b8de3L, + 0x018e5ad7f9a8259aL,0xa2fbf18af52813cfL,0x0000000000000176L }, + { 0xf5f511266c344a4bL,0xc8ca1d94c7d3e7f0L,0xdb27ea15da64dad1L, + 0x248a9d6eb2dd60c7L,0xc27b6461b6e10f17L,0xf64f36ecd01fa2bcL, + 0xd395dad1052a4c0eL,0x2ed72e101ac0c910L,0x00000000000001b8L } }, + /* 10 << 329 */ + { { 0x218712c5d244dd3dL,0xed19fc5f3849ea68L,0xe4bef41212cdd2e3L, + 0x4189ad768e675f49L,0x818140c606411e8bL,0xb13cdb88bd3e301bL, + 0x8696b6b929e10534L,0xf325c324cdba3b73L,0x00000000000000b9L }, + { 0xd41cdb59966ebb3fL,0xb82e5cba766a762bL,0x4cb02db9628fab85L, + 0x1092daf6bd4d732cL,0xf307aad8333ec4e3L,0x956be117c56ddc0cL, + 0x2e03ef708674b500L,0x9a540a11d836fff7L,0x000000000000010cL } }, + /* 11 << 329 */ + { { 0x5fe1030137825aa3L,0xa74e7065f13007ceL,0x7d3ebd9df6e9c94eL, + 0xcba3ac5535e6d681L,0x602758b91c29d435L,0x6199e8ffebf0335eL, + 0x338348e5e2747f04L,0xf3585a73bf365565L,0x00000000000000c3L }, + { 0xb3c68f52e99b1b1fL,0x4f91a78bc8085753L,0xc7e549a78c010053L, + 0x17912acc9790e0b7L,0xabbeaab400cedc5dL,0xe885041467136ca1L, + 0x166b73459b51dc45L,0xc12d21e36d037065L,0x0000000000000102L } }, + /* 12 << 329 */ + { { 0x88367eebb1714fe6L,0x7e62026793ba98daL,0xc32ba6878352de93L, + 0xf542af8c979410b0L,0x929bae6b0e5b7cf2L,0x1201a42abe4afff0L, + 0x8dd7508567016e6eL,0x6cba441bc820de5dL,0x000000000000011bL }, + { 0xeda68f37476e21aaL,0x16fcf4f2e71dfdbeL,0xc43966b272a6702dL, + 0x2732f8f0d6af1eb7L,0x2bbd7368572832d9L,0x83807036b54abbb5L, + 0x1f34a2c505aa9032L,0x0840702abc5297b9L,0x0000000000000130L } }, + /* 13 << 329 */ + { { 0x9989f80307ef564fL,0xb8b2ad57d8354633L,0x6f40c68a0ec997bbL, + 0x1c7963bcff213ab9L,0xa9548ec25a844439L,0xe55816bc980a798bL, + 0x724de727d2674309L,0x5a50a29195e2a8a4L,0x0000000000000119L }, + { 0xf46573aa104d6a06L,0xff5f3f99d9d12718L,0x9d527c1889d06ed8L, + 0xdfeb3fef6ba0946bL,0xa156e73dd8fcc522L,0xcf5fb969177f3823L, + 0x98210af8c27cc78fL,0x452ce06048c56dd6L,0x0000000000000116L } }, + /* 14 << 329 */ + { { 0xce7489926632ad8aL,0x334efd2bdee28c6eL,0x5486cc6ba645df0dL, + 0x5f02c1614bf66fcbL,0xcf2ca9b5e228e838L,0xeb7b3e95c58148d8L, + 0x25f22a19a344184bL,0x8f522dbb73976e50L,0x000000000000013eL }, + { 0xd758bbbbe87ee0d7L,0x6d7c077ae946a6cdL,0xd9732f17c80954b0L, + 0xae1d674e56d292c0L,0xa1fba7cdebea75caL,0x533d65b08398242aL, + 0x22fc479e22740f06L,0x5c13ab6063ca9aecL,0x000000000000008dL } }, + /* 15 << 329 */ + { { 0x9941d21ab49db659L,0x5284918d18fab9deL,0xa325293ef612f99cL, + 0x2062c4dd8ebff205L,0x7b4b97775321cff8L,0x10d0c4726905b8d1L, + 0x1645eefea2c71febL,0x7fad527a27ec5eb4L,0x00000000000000d7L }, + { 0x36795144f93f74a9L,0xca5177b36115fcf9L,0xc49b404e5df5ccc6L, + 0x940ca8bc2bda601cL,0x60d8607073ae2f2eL,0x410c1e9c3f1321a1L, + 0x3181ce2c713e1037L,0x1d252106ec2ba1cfL,0x00000000000001a3L } }, + /* 16 << 329 */ + { { 0xd552c9d248351408L,0xa6d83a480ef56558L,0xad5e955a313b47c2L, + 0xdf3caff7f6dc03beL,0x3d62ff1951cc488dL,0xdb49f397966a34f9L, + 0xe10e72130bf6a9e7L,0x560e329abc5852d2L,0x000000000000004bL }, + { 0xfdad029af3017c8cL,0x6ca51acbb78d4d1aL,0xf984fa8679d7d7d3L, + 0x75d9044b911a4d3bL,0xc5b20c87ef955e4eL,0x413d6838f064fd0cL, + 0x37a5ec08ec7fb5c9L,0x634cb97f74561d5bL,0x000000000000015bL } }, + /* 17 << 329 */ + { { 0xad1efc90854c308aL,0xff83fb8276d035cdL,0xd1959a8384fefc47L, + 0x62c1d0574906d383L,0x8634b4ac4f968d2bL,0x03ff550c1a5571d2L, + 0x76f35ffb3a99965eL,0xafa6323331f48bcaL,0x0000000000000100L }, + { 0xb5c0272bad54971fL,0x4cb20e716cd136bdL,0x6245003b6f9aae9dL, + 0x0be08260dd76ce6dL,0xe45b5015d2a01ad8L,0xa079ad1be686ab99L, + 0x6ab15c9d69f8ac52L,0xa52bdbd741f5309eL,0x0000000000000161L } }, + /* 18 << 329 */ + { { 0x0b617b8a00484cadL,0xd966cd8eedfb5312L,0x78daeed22e0556bfL, + 0x3770c07d5f740718L,0x0ca5bf4b4716ccf7L,0x0ed0493a9b99f851L, + 0x9fd997f26f7a9b17L,0xa0bd37177c8b6fdbL,0x00000000000000f4L }, + { 0x708283ffb63b3e84L,0x41f2f8872ab27c88L,0x2af970973e877cd1L, + 0x9ded7532521ef940L,0x939a47de0eeff999L,0x630c45887ae38086L, + 0x34268747773f602eL,0x9a91920734993ab5L,0x000000000000008fL } }, + /* 19 << 329 */ + { { 0x707b18b6c3928c9eL,0x465e5a820aa48af4L,0x2c18c4e7e68a74beL, + 0x9b9037c2ac6930e1L,0xa1560f34b3416163L,0x511406a69285d6dfL, + 0xf68235cb6e106799L,0xbd3d844a088a6ad6L,0x00000000000001e5L }, + { 0xc4f78bfd85e2f692L,0xc83f003671075cbbL,0x8a75bafa208b6885L, + 0x8b4097d0654cf76eL,0xf567fd4ff884572aL,0xf5dfb3b1ada26881L, + 0x4f423f8bd71ea04eL,0xb148a467cfdbf809L,0x0000000000000147L } }, + /* 20 << 329 */ + { { 0x98d7ec4574b83fb2L,0xda049f4667938ccbL,0x6abf18a43e04316eL, + 0x200bc564efa482a2L,0x5858ac6c029cfe0cL,0x2d81cea5a5bdc064L, + 0xe7ab7be81636eb12L,0x6e7bd59fd1ea4c1cL,0x00000000000000b7L }, + { 0xaee1246920b44e57L,0xeb14e358836b5462L,0x9b4f621d2b22fdecL, + 0xa6b8be6be8b1a5d9L,0xc43a3ec35e3da0a5L,0xd89e2333d43f2c85L, + 0xe534c8393fe85436L,0xc8f70bb54d6862b4L,0x0000000000000106L } }, + /* 21 << 329 */ + { { 0xe3f343d136aab671L,0x872612411057e237L,0xc649dacfd4686627L, + 0xac5c2e85d6976ca9L,0xa778bdd13322b77bL,0xab619e1c856b0eb6L, + 0x0b5fe3ad8a9941ebL,0x2f9b420a9c019561L,0x000000000000014bL }, + { 0x185fefbbeb33f10cL,0x1cf9640cc41725d5L,0x55b9968c85d74435L, + 0xede863e0504b63a5L,0xf59711270e07a7e9L,0x50d97b0da57bae09L, + 0x6dbd2b8ab0f76892L,0xf52601b7f929729aL,0x0000000000000179L } }, + /* 22 << 329 */ + { { 0x9f97c2576700b350L,0xd002835d78f746faL,0xcf3ecd578da75e7bL, + 0x8768abd95e23f219L,0x3f9e66466cf8ff2eL,0x3064948b1f360d9fL, + 0x92bfce3bb129b55eL,0x4de97c12253a1789L,0x0000000000000180L }, + { 0xa94b1dff29e5efcbL,0xb9c77f91f225c1b6L,0xeaf061bff40e20caL, + 0xc7f20a1e5a880560L,0x1a49752babd0d172L,0xb2397b9b7704d613L, + 0x1bf06d60563fd774L,0xb6bf63b045039cd5L,0x00000000000000d3L } }, + /* 23 << 329 */ + { { 0x7ea8ee24b4cc0562L,0xb5885f8fe2af04aaL,0xaa012337275d5351L, + 0x76e54bb937e29944L,0x7f9b8f182ea56782L,0x4b3d999e28d442b8L, + 0xf8b0e3804718f1e2L,0xf137fa8334e560c9L,0x000000000000002aL }, + { 0x7fd295c0dadf8134L,0x4677e5ebe41515edL,0x5f4a4ea2b67de812L, + 0x083b497c8e609e41L,0xb70b8f2669132911L,0x62c6e87f984b49d3L, + 0x2549a999e54f6ee3L,0x16bff3e87206acfdL,0x00000000000000f4L } }, + /* 24 << 329 */ + { { 0x9fa30f39e43b6c21L,0x4d3822aab9fc57bbL,0x0230adcf69a8a55eL, + 0xdc8ab2796f87f988L,0x2d642a82b2af81fcL,0xb9e63a7730765913L, + 0x5787400625f2a77cL,0x0aa30a6f5bfee0f1L,0x000000000000003dL }, + { 0x796f6508fce49daaL,0xb586299fbf907285L,0x21ccfc555838145bL, + 0x20a0f8c0e6d43b4dL,0x17170a2be5e80fc6L,0xbb8f840688fba946L, + 0x5c226b0e411fcbe7L,0x35ace6e02ae8e090L,0x00000000000000b0L } }, + /* 25 << 329 */ + { { 0xb547200a9bb37cabL,0x0f91bad87af4fc99L,0xb714d3e6695a4975L, + 0x6f563effb9242c77L,0x6ee4ef17510f7214L,0x7119fbcc1b9f9343L, + 0x0fd99faf8ce55139L,0xa9595dd66c1d1f5cL,0x0000000000000149L }, + { 0x78fa8f186fb6dccfL,0xc9458c1fa23f4d29L,0x3c91cd7025d83072L, + 0x05d221aa5d471830L,0x2c8874b90e7270b7L,0x88b5608741995b6bL, + 0xbfa1e6af9c1280d5L,0x47e5b560d7d76623L,0x00000000000001c0L } }, + /* 26 << 329 */ + { { 0x35456330801af096L,0x8f66380bcdef18ceL,0xfd31fa5e4d12da33L, + 0x931659924b9ed4a6L,0xc5112dc875c17662L,0x326d6659939112b6L, + 0xab1acfab4f3be3f0L,0xac364fbb2e4d4d54L,0x00000000000001a5L }, + { 0x45c40db020574528L,0x6a6667eaf0339597L,0x6e4543629c83c9c3L, + 0xb385d150e06d9313L,0x3e135b18bcb29487L,0x5f5c63f833110b15L, + 0x5b0e0ba2c9488f2dL,0x9bc7194609682bedL,0x00000000000001f8L } }, + /* 27 << 329 */ + { { 0x3472744006e20becL,0x7fac38c38c7ef888L,0x54d2cd7ecae2f3e3L, + 0x8d4cf97a5064f2a7L,0x3db5cda7e928fb60L,0xa7a160cf6269afa4L, + 0xd3612410e4aee8e2L,0x1a72c3961a2ec9d6L,0x000000000000000aL }, + { 0x95a203bdbd1ee880L,0x22c099e645b639cbL,0x67dd291643ae9aadL, + 0xdad086a4aa05dfd7L,0x8b832fd8c468741dL,0xd3b83b4b3b45c4c2L, + 0x99c4a32791dd0c79L,0xee55689e99f169b2L,0x00000000000000fdL } }, + /* 28 << 329 */ + { { 0x15b283fd125d84a3L,0x56aeb34bc1890bbcL,0xd3b9d608a09c584aL, + 0x0c2cb39dd4cb3bc0L,0x46db0f67fc3cc700L,0xf5cc2708db50f7dcL, + 0x5795e7521f0867f2L,0x7cc9145f0427b3cfL,0x0000000000000178L }, + { 0x2fe532cb31e66ab0L,0x2ac67713eee69b01L,0x38f6d515d77ed9feL, + 0xf275c56a2118c560L,0x21a9c7f00fdc638fL,0xc50b3c693254e3f3L, + 0x8fbbc2505f950b32L,0xf7122780014ae500L,0x00000000000000e9L } }, + /* 29 << 329 */ + { { 0xdd3ec700226f04ecL,0x0df90c19482e80e7L,0x68dc1e01c63dd9a2L, + 0xa4f28e23a15fe774L,0x565f36a73ca7a790L,0xafb1eabf67ad7df7L, + 0xd0cf7ff0d1c1da4dL,0xaeff6da8edc5e910L,0x000000000000005cL }, + { 0x3c053168041b88a4L,0x988833f5df29cc86L,0xeed26f752ed77d58L, + 0xf7f4788983991f9bL,0xa1271ee5e35e68a2L,0x312e0d918f01ec3cL, + 0x839f1a84dea08bfeL,0x387d533914668886L,0x000000000000002fL } }, + /* 30 << 329 */ + { { 0x3e48d19e46fa0eb2L,0x3c52023de9765fc6L,0xee7c3a58ac5628c8L, + 0xcdc6dc5fe06e780cL,0xda092f8a33c55394L,0x3f53c57f6eb9b6e2L, + 0xa2f3321bbbf66534L,0xf14d50d72a650a3cL,0x0000000000000116L }, + { 0x6e9f43d0f6b86d3bL,0xd845899a16fcfb74L,0x9ab03ba416f965f3L, + 0xbb61b4a2cc21c1baL,0x13f64e24d547c4e9L,0x484e226bb2b9ce9cL, + 0xddbc2681a05d3c18L,0x3c16171abbf49b24L,0x00000000000000aaL } }, + /* 31 << 329 */ + { { 0xee38edb2dd1bbe51L,0x80e6050243f2c49bL,0x74c3e9d891622e00L, + 0x5a8bd3cd4047f50bL,0x4dd146c4f8cdd703L,0x91e38765ec128a35L, + 0xaf641c18735f735cL,0xef96fbd12fcab7adL,0x000000000000000eL }, + { 0xc3cbc6ca45287b7eL,0x55aec8fea1ac0f00L,0x630fa637af6e4abeL, + 0xd83b22d2587d5779L,0x84fa662a50afa01dL,0xbd8dd789b19f62afL, + 0x490ccda6f64aefedL,0x5008037e4af86aa0L,0x0000000000000164L } }, + /* 32 << 329 */ + { { 0x446b17b0e695916fL,0xb3b0dcebf9bd8da6L,0x6c76c3ce5ff86aadL, + 0x0785d5a20cde74d8L,0x2ce9fae12bb7ae7aL,0x87cd05685f32e59aL, + 0x8e229326e1598559L,0xa0bad54e34edfb01L,0x000000000000015dL }, + { 0xa54529c20e1290b4L,0xe7c719cab795dba1L,0x68cac3a5109b4740L, + 0xb032bd2ba68e8e4aL,0xce03d9321658337dL,0xb39d102ecc95d9dfL, + 0x79838d96be248c98L,0x02562708d7df0b44L,0x0000000000000127L } }, + /* 33 << 329 */ + { { 0x65202498cc143cf4L,0x09f80cd704445898L,0x54fe51bec4f61a5eL, + 0x29013981597d0979L,0x13b63add19e0ee5eL,0x1ea5e689341200cdL, + 0x1bea4d3663d7514eL,0xe7d322f47b46f902L,0x00000000000000a6L }, + { 0x349b1972cc60de8eL,0xed79aafaa2075005L,0x88f0a5b6004f0314L, + 0x8c0738bc46dee06eL,0x870acd790b63003aL,0x64a1205ca367811cL, + 0x8aa20bebd4a99dfeL,0x5f8eba3fc39ef513L,0x000000000000013bL } }, + /* 34 << 329 */ + { { 0x4cb51584855891a5L,0xfcd86f0188f33a98L,0x9ca889eb8e39e929L, + 0xc0a5ca46bdff8628L,0xba14fbf09e965cefL,0xce2474e21b5cb8ceL, + 0xde7d528bfa459a1bL,0xd741fd3992785092L,0x00000000000000dcL }, + { 0xeb956f25ddbc488bL,0x29bacdcb6682a201L,0x03cbba50fb38ad85L, + 0x0687794318ad03f3L,0xccb0a078e746ec1aL,0x4518bbf1c6d1f10eL, + 0xf769f2dc1f59b213L,0x3b8b1035e24118c0L,0x0000000000000123L } }, + /* 35 << 329 */ + { { 0xa52e2eb373c6ed28L,0x3cbcd662c077dde7L,0x0a2b9b2967eeb3d8L, + 0xcd61f644d697239bL,0xa934b899cd0e9bdbL,0xaf89b6ed95b01dfcL, + 0x7cf64dadc1fd5fc8L,0xf9ed347799099fe9L,0x000000000000006bL }, + { 0x473364deca19dae8L,0xb33f7926c2de1948L,0x9f77368bc9fef6ffL, + 0xe9d364e3aa467f92L,0x825ebc2192bb93b6L,0xb348b9bec4932331L, + 0xd66d94ed152467b3L,0xb6970ac7bb4591fdL,0x0000000000000028L } }, + /* 36 << 329 */ + { { 0x18584526dd5eaf27L,0x316d5c3591e8b4c8L,0xf0515b605b8d3ddbL, + 0x5b64260a45525bccL,0x4f05352788d9435eL,0xa92426af7aa1b06eL, + 0xac351063a5dc466eL,0x5b9ea7dd575a1f35L,0x0000000000000187L }, + { 0xaca204a297772298L,0xdcd6314ff757e252L,0x2f9c3b40aa5ea9b6L, + 0x1cc338f109311e8fL,0x441c170303512e14L,0xd1da2ca59c64f91eL, + 0x9730d382c7ddc586L,0x84cea67d55dc73e9L,0x00000000000001e5L } }, + /* 37 << 329 */ + { { 0xbbc4e624ee3271b5L,0x6b5e21b6fe977a0dL,0xa3265ba66d0452d6L, + 0x1ec78dd87baf2b08L,0x2c05e3122eebc034L,0xaa1b947bea4f5ad2L, + 0x94f4933a06ebc241L,0xed58b946d50bb167L,0x000000000000014dL }, + { 0xaec8d2b31be81ae8L,0xd86c6d69b3d0e22eL,0xa1ac5934c12c7d6dL, + 0xcab83b6a54faf69aL,0xdde77363c47d62b7L,0x71f0a7769d919c14L, + 0x5606c72bfbf2ab5cL,0x453a2d5d9db0d55dL,0x000000000000006dL } }, + /* 38 << 329 */ + { { 0x998d5368a9b51f22L,0x1f15d19ff477ab5bL,0x37442775d68c28bdL, + 0xd64687ec2c0cd5c3L,0x90e88caf0f326169L,0x35fe08e939109538L, + 0x32f70606af2c556bL,0xc6fc69bd96a5aa0aL,0x0000000000000095L }, + { 0xff2f1fcae1d825bfL,0xabd7283905cc4840L,0x5e6ea842c3ceac94L, + 0xa37eb05d7bf5800bL,0xc7e8cd9218feb383L,0x2cb64fd5a0e35725L, + 0x14c7de98f5b53b67L,0x8c6f364a3280ab21L,0x000000000000004dL } }, + /* 39 << 329 */ + { { 0x052a61531ef9be5dL,0x2ce3b605a27b942cL,0x2d1982cec7a50a00L, + 0xd2ab12fc3938c01eL,0x1aba919ad4e38b6aL,0x6464772967f6c405L, + 0xcacf20a9efd8f7f1L,0xa8d90b0f84d73009L,0x0000000000000170L }, + { 0x20b77432d6e9b843L,0x47d93d17d9df2d13L,0x15ac011057912081L, + 0xa24fc7094dbc99b8L,0xe2f6ba470ed28e94L,0x6a9d4bb1e81889f8L, + 0x48035db36831c638L,0xbc7f16c9b7b511f6L,0x00000000000001a7L } }, + /* 40 << 329 */ + { { 0xa39d4ac8162dd91dL,0x5704769322e67a01L,0xdfe57f997c446028L, + 0x0f49df77d131a2c8L,0xc8f26682f7f779bdL,0x1696655818e884f1L, + 0xc5d9d516f390907bL,0x23356e77617f5f5cL,0x00000000000001edL }, + { 0x5e6830b9679deabaL,0x18bcc37f9fd1a70eL,0xfa84e86272225892L, + 0x2e6cd938c610763cL,0xc179376a671378eaL,0xe3b844b5f502ea1fL, + 0x74a390b75fb4dc36L,0x51f019400ed8d535L,0x000000000000018fL } }, + /* 41 << 329 */ + { { 0x829f07763b84874aL,0x43871c3c629f0068L,0x67ea591aabb86558L, + 0xcd9d158842e26ff5L,0xe790b7b4b87fc2b3L,0x8fe0b1aded7d3f58L, + 0xbd60a61ed9c77f0fL,0xfdc080c3779dd449L,0x000000000000013aL }, + { 0x2a1419e0e10dd216L,0xc53633aedf69bf09L,0x8750156c554560f6L, + 0x45d18d5211057cd2L,0x668e9f5d6bc3b095L,0x23c0ca2bbef1182bL, + 0x66b5954e307147d4L,0xb55897fa6582e607L,0x000000000000001dL } }, + /* 42 << 329 */ + { { 0xc560dffe49b36a45L,0x93e588366b8574d5L,0x8181236440ae3131L, + 0x0b7a1cc90adbbe10L,0xf0300653f9d147a1L,0x74bd55b4db6ee70fL, + 0x04f147f86856ffd7L,0x62a12dea5e8fea8dL,0x00000000000001fdL }, + { 0xe387062af295523bL,0x3c88bb54562c7c5dL,0xc1c6d25554ab4cb0L, + 0xa2aa7dc600ede737L,0xf102965348769099L,0xe7fb6655931dbf6eL, + 0x9b7f1bca63b027c2L,0x1a04490be25853c3L,0x0000000000000061L } }, + /* 43 << 329 */ + { { 0x2cad6e8e7d7a3311L,0x192637a101d2ce9dL,0x54408f763b09d1e0L, + 0x056fbb0e6c9f1c9fL,0x5d86dd60e8a73041L,0x3cef3dc1649edc73L, + 0x2a5855721c3a9234L,0x6fc27608d555c011L,0x00000000000001f1L }, + { 0x084bbef69c34f177L,0x3e28813737096f4dL,0x4d163b57a397ab9eL, + 0x2b50f5310e1e07c7L,0x8440e162a93ad09dL,0xe8e444632593df14L, + 0x197660292b20316fL,0x90ef0d967f635186L,0x0000000000000190L } }, + /* 44 << 329 */ + { { 0x30ac47e8f7a5ad23L,0x9c7749b063788eb8L,0x74b0ccb37ba11421L, + 0x95738a0b1c6a3f64L,0x0aae5fbe05110374L,0x53cfe62bc0c6e8eeL, + 0x0c56162d6af02414L,0x3257726d88eb33eeL,0x0000000000000148L }, + { 0x3f56ab59dcbe894fL,0xdc75f5e0bd7699eaL,0xb8f7d880e6f78aa8L, + 0xffbb24f65e5e8d85L,0x7833e090f2011756L,0xcf9af908a0349a95L, + 0xd31658498856a70dL,0xfaf01d021845a0d6L,0x00000000000000e9L } }, + /* 45 << 329 */ + { { 0x8b9b44f29e4685aeL,0xef81af6a7b8cf6f7L,0x8adef463de2bcbddL, + 0x1b69518ffcffa721L,0x561f0c5a405464acL,0xe28b2f168bfc6e45L, + 0x9172848b090e79c0L,0xe3c6092f2826d317L,0x0000000000000129L }, + { 0xd95f1d0b79a9a188L,0xceac08db1f69871cL,0x93a8b01280b65e91L, + 0x48d3e503ce3e8c95L,0xed2ceabcdf74d29fL,0x71220652c2119809L, + 0x4bb4bca76642cbb1L,0xd228abd74d461a6dL,0x0000000000000122L } }, + /* 46 << 329 */ + { { 0x13cbe9d39086d901L,0xd71e1a2890ec6eabL,0xb8ae9ebb66472a41L, + 0x4e3a88fd205e2c55L,0xab212c986f72621fL,0xcea45a44e27bb1b9L, + 0x9056afc557d42873L,0x9d2d273bcb3c220bL,0x0000000000000056L }, + { 0xf13ce2c9170143e2L,0x1f44bb4286d61f29L,0x005bf7e894eddae6L, + 0xc24a3c91c0d3e2fbL,0x553b308db50ea90cL,0x9e779eb0a6c0a0ebL, + 0x8d8a0321d980d46fL,0xba0df44d582cf028L,0x000000000000015fL } }, + /* 47 << 329 */ + { { 0x44c65a3ddcd99092L,0x68e73d6682c4c760L,0x19c50c79b8ca4e79L, + 0xbd60adfb0971debaL,0x0bfa8cc333ecbf46L,0xb178da302146a1a7L, + 0x1e60002d50fe9c55L,0x6ec488ea055d5140L,0x000000000000018fL }, + { 0x79affabd184acae0L,0x7fb90a1c9869ee93L,0x2f1f9a663466f356L, + 0x02a8273d4ef8be31L,0x8ba3132741874651L,0xc56ff44fd2483813L, + 0x5efdb725a6e5448dL,0xaded563f0992ce80L,0x00000000000001acL } }, + /* 48 << 329 */ + { { 0xc9a88248ca91f6f4L,0xa368fd8ac7d5acc8L,0x33d9fd5ccea11e10L, + 0x0af2f470816de633L,0xe78e8101ac186f1fL,0x0d97de7d5e3c4857L, + 0x9346af7407946bb1L,0x81c2bd5ec7044ea9L,0x0000000000000003L }, + { 0x04c2eda3696d947bL,0xc3f988f301efa843L,0xe839587792f22674L, + 0x376972a2c83396c7L,0xaa679a3144f2ad2fL,0x61a46ebc3112b6afL, + 0xbda02ce0c6b3224fL,0xb42aaba7dd4c7b77L,0x0000000000000060L } }, + /* 49 << 329 */ + { { 0xfd99ddf3949f1e54L,0x9e37bc9a42c74bcdL,0x667e6412b528c0acL, + 0x45b8bbdceb560efeL,0x2605f758266eae0aL,0xd45cba357019456fL, + 0x1e7abac6b80ad6eaL,0xccb23c42ba18ec32L,0x000000000000004eL }, + { 0x3e3b6948e4a456abL,0xe98cc81ccdfaf3a2L,0xc3a78e4acfc4cf5dL, + 0xcf981aefe80c61daL,0x4423a07d0a0699bfL,0x80a93eef8c2fa65fL, + 0xe7396c7f7cac8832L,0xd9866a7b731d27fcL,0x0000000000000042L } }, + /* 50 << 329 */ + { { 0x8d452f698f1ef9c7L,0x87eccf87baea3098L,0xfc3204e24903728eL, + 0x35db9b5ea004f3b1L,0x6094bdc2ebb31480L,0xfae255eb95e7fa59L, + 0x59012cfcf0cf198fL,0xc7622600b97968b3L,0x00000000000000beL }, + { 0x477ee91af42b1d5cL,0xf8a49e41b38f6e92L,0xc194985a0f78930aL, + 0x829ec9f4fa14de02L,0xfe13d75852882268L,0xda34194f3e1e5ad2L, + 0x0eec2380dbbe0391L,0xa33de0d430cc7f21L,0x0000000000000040L } }, + /* 51 << 329 */ + { { 0xab573086122f0868L,0x76c0316c261b14f7L,0x83bf61c4050c5fd4L, + 0x41ca4dabb9bab773L,0x8071890301138744L,0xe790898da00fb373L, + 0x502ad727a7ee4925L,0x4f02eb663990928dL,0x0000000000000086L }, + { 0x73860d4fb81fa334L,0xec04cbff2fa2a1eeL,0xda93b91043041addL, + 0x2fbb81e1afc62072L,0x3534caef06e6fd7aL,0x38c3238d9bcc1697L, + 0x4ef619b141f85807L,0x97e4cb5432d5853aL,0x0000000000000140L } }, + /* 52 << 329 */ + { { 0x78dba3df1b03222aL,0xf9a19f4df743b0a5L,0xd9025fc728786953L, + 0x974c35dd0e75b160L,0xec2ad730190ab6e7L,0xb9c3d4efe7417b0dL, + 0x0e19df5b715a93ceL,0xac4cd49c6cb2d8e0L,0x000000000000006fL }, + { 0xff2080ed69898bd3L,0xc0e345b7e724b25dL,0x4632246b33da2d5bL, + 0xa031381d832ae2aeL,0x38d4d94fa7bf9935L,0xd7ca7e55f30d34fdL, + 0x5ddbe112c9320a7aL,0x69fa155ed90e6170L,0x00000000000001fcL } }, + /* 53 << 329 */ + { { 0xf3fcd3f329070358L,0x61bcf6a94766608dL,0xae11489bfdc834a4L, + 0x153265f486fa8a4bL,0xb39af52a03431002L,0xeb126ae4c8551498L, + 0xd606f364aed87f5cL,0x38f70cbcb9361319L,0x00000000000001e3L }, + { 0x6b1582752f3d2eedL,0x1e8d787cc83309eeL,0x4e7802fda22933fcL, + 0x98a126cb4e0120dcL,0x1701b21f14accf81L,0x94d92ff6e9dd2425L, + 0xeb6c98e6deda42cbL,0xea752ae39a83bbf7L,0x0000000000000088L } }, + /* 54 << 329 */ + { { 0x74c7966431e64320L,0xa73a381035e3541dL,0x1ef1544db34c3f57L, + 0xf71aa56d9a16c02bL,0x42d6f0ef06f35f04L,0x674e853d76201cd4L, + 0x4d416adc0eaadc1eL,0xe5434f2af51ddd7aL,0x000000000000015bL }, + { 0x6a5d1969438cbc53L,0xb97f4776646e4cc8L,0x79f5d3dc66273332L, + 0x93fbdc3b3afa59e8L,0x0c142dfb9af35764L,0xa27741a2a47ac0b1L, + 0xd3a0f40d48383b7eL,0x7d05a2c010bffaaeL,0x0000000000000087L } }, + /* 55 << 329 */ + { { 0x32bec69a22d06febL,0x20b57b7bd6ed9f30L,0x65ca9046547cb8e3L, + 0x689788bd1f69889eL,0xa4cfd9a0d2378606L,0x94db81ec364645eaL, + 0x554dc20ad90cf501L,0xfda49475b0075c45L,0x000000000000000aL }, + { 0xec832a2b513eebf3L,0xa3810d026f4c8dcdL,0xd92de6469737c052L, + 0x32f07ce28b7a8178L,0xdc9adea7f8e7ea6dL,0xb078a52bd7e3c857L, + 0xf70c3ad51cdd8d09L,0xebe8bcc6aa541cccL,0x00000000000000b0L } }, + /* 56 << 329 */ + { { 0x8879637b3d70360cL,0x053c278f72f41087L,0xcc895e1ff07f009eL, + 0x4c9b8c6468db6690L,0xb45473981b97dac5L,0x0399cac8af65ff38L, + 0x295e09141fd0a6c0L,0xab605fcd871bdefaL,0x00000000000000d0L }, + { 0x3f2d42975ad63582L,0x28fda2bbdc7d3a40L,0x7f14436765672d9eL, + 0x9322ab082937e8c8L,0xf351279faf748929L,0x63a624f5149d6f5dL, + 0x3351f4ff989f5325L,0x0456187bbc7dd8d9L,0x00000000000001b1L } }, + /* 57 << 329 */ + { { 0xfcbf34524112884dL,0x7112c9ea903f7279L,0xa82dea538dfd3bc4L, + 0xad4f103abbd451a6L,0x49d6cb3f59f7f2f3L,0xffe87f02c526bf33L, + 0xf2b85e4ed9e6c585L,0x7c5777c158b38628L,0x0000000000000120L }, + { 0x250a9befb4b57ef8L,0x3b872628b7cb867aL,0x35b111a51a879fadL, + 0xcfd5cab5f73c98b2L,0xe484a6f6312c4749L,0x84f719e11c8b573eL, + 0x83edb875f8840f86L,0x915e1ef95d2dc42fL,0x0000000000000064L } }, + /* 58 << 329 */ + { { 0xa77f1d5f92bf4853L,0xcd9ce94099447d87L,0x4cd274fcba386d27L, + 0x075c64605c0cdd46L,0xfe26dec8d7583ed0L,0x4c0a447759e4255cL, + 0x247938de6aef2ed9L,0xd8dbd48a3d227000L,0x00000000000000f2L }, + { 0xfb51bb52916ce89dL,0xb7997a98092acb0bL,0x94bbe9d8ac2e55d7L, + 0x899feb11827bea20L,0x469b4834919b2a7bL,0xc994c41b1dab85a6L, + 0x8479555ce43d5b1dL,0x6635657f3b0925c0L,0x00000000000001c8L } }, + /* 59 << 329 */ + { { 0x81a0c97ada91e249L,0x5e1e5d32838076e9L,0x389ca1e02c7cf349L, + 0xde81e23cbb367fc5L,0x926f32b9e5e4a732L,0x061a6b8f92c1b204L, + 0x6e5faf35ed97ed0fL,0x4e1d4d170caf4fd6L,0x00000000000001b7L }, + { 0xc8dc770aa4583241L,0x12e4ecf1e1ef9744L,0x099f111cf7d6264dL, + 0x2b2307382e6772a8L,0x65cf0a517f27f826L,0x8ba92657ed871105L, + 0xe9d1d3f5887e4295L,0x5cbeaefe4562fb6dL,0x0000000000000190L } }, + /* 60 << 329 */ + { { 0xeb7316a61a0dfca9L,0x12d74d0102113c17L,0x518499bf8b72396bL, + 0x8151dceddcf70010L,0xcea6bb1168208deaL,0xa1127162501ab541L, + 0x3b35298f471895d1L,0x1e43d852103950f4L,0x000000000000009fL }, + { 0xe259a9b2a01e30b4L,0x8af1dc7b7c0b94bbL,0xb1020ba1b8d2d163L, + 0x91d4953f11932c5dL,0x393b1a0c79689ea9L,0x42222ff8a18ad561L, + 0xad03a6dbdeccde82L,0xe2a11f661162399eL,0x00000000000000f9L } }, + /* 61 << 329 */ + { { 0x6bc7b50525db69deL,0x4482b033d8b4b0e3L,0x52fe701dcc9e8d21L, + 0x050c4edfc61dbddeL,0x7c631930d20dc520L,0x123db30c2e5b2f60L, + 0x2fd7e1ec35d20b9cL,0x8385cf8e2644151dL,0x00000000000001afL }, + { 0xb885a30bcc451719L,0x21c65d8e47e67d5dL,0xe971a878fa2b6a6fL, + 0xa60fa1b2838c9aeaL,0x4efefeb5e4efcd77L,0xee35b7474fc48efbL, + 0xa95f3beaa06a6906L,0x371c74645e0e6ebbL,0x000000000000019fL } }, + /* 62 << 329 */ + { { 0x5c59edc157066b91L,0x08407d6cc76fa7ceL,0xb3f0b31abfb5115dL, + 0x54a04e6483f65955L,0x8f95167b035d326eL,0xa0eebe160d24f491L, + 0xb1a1caf05e5b0ffbL,0x802645e2d08382c2L,0x00000000000001d8L }, + { 0xcc8f65eaca39e065L,0xea7e6b2a6df057d1L,0xa725e7f64dbc48a2L, + 0x60f36a0fd5c8d0dfL,0xd2adbc8c8f0cc4ebL,0x5cf276c19f0c2849L, + 0x77ade71c8cc56dffL,0x77f04f45d35bc65cL,0x0000000000000121L } }, + /* 63 << 329 */ + { { 0x927e172adf84476cL,0x850b77b9a02bc731L,0xb6ccc003cfd20fdeL, + 0x43ff11ca335e3da6L,0xf5fcebf888b42166L,0x08508ffce90a7019L, + 0xaa50439e5c4b02b0L,0x64666563af1407d7L,0x000000000000008aL }, + { 0xf0361fa97015d2adL,0x06c10c06c3f6d10aL,0x6b30ce607a0b50adL, + 0x905727d44499ab51L,0xadaabac64b4ca19eL,0xb97d87dff24f4c97L, + 0x9bd3bb9db808e478L,0xc0476491fd5b3b47L,0x00000000000000dfL } }, + /* 64 << 329 */ + { { 0xfab6fa734e185ee5L,0xec85b9edf15957d4L,0xa978adbd72ba04e7L, + 0xc7e3107e7986fb14L,0x0e97063de37740b4L,0xf26f51c225e29918L, + 0x66b9445312e6f22fL,0x6617deae68090e9aL,0x00000000000000a3L }, + { 0x86595b1dd2dafd33L,0xcca6cd7ff3c234faL,0xd24847edaac709ccL, + 0xbe089df8fc689536L,0x4fdcc1dc367bbbeeL,0xc2592739a01bef50L, + 0xff4d427aa53d6b64L,0xd3ddc2bc47d9af89L,0x000000000000000dL } }, + /* 0 << 336 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 336 */ + { { 0xadf5e06ea79cb073L,0xcb46aca98bf180e6L,0x5bf0d9eb835f8e22L, + 0xd692cd207713f3adL,0xa33e9d7ff0d87f74L,0x9f2b6950f602a362L, + 0x27a4c70bdfa4f77cL,0x99c699dd3c56080aL,0x0000000000000105L }, + { 0xdfb1b25595338fa8L,0xabadba3ad59e754dL,0x2338452b436e70fcL, + 0x34cfb5b2e07a21b6L,0x25ddb684f81ea5a2L,0x98f463c0aa325fc8L, + 0x963bbc8094aeb02eL,0xa98fe976434ab9faL,0x0000000000000188L } }, + /* 2 << 336 */ + { { 0x9a7fc933204a966eL,0x7c1c59150d9680ecL,0x0643609730415e73L, + 0x574b06528ea44e8bL,0xd35d1340f4b8fe58L,0x81ddd1f4daa40ca5L, + 0x5c908486ac09937bL,0x467dffa614c180deL,0x0000000000000034L }, + { 0xe6f97acc8000602eL,0x2650e9570f527147L,0x3af58dd97c3923e6L, + 0x67f7102e69f5cafdL,0xf80e76c00ecb60fcL,0xd3441285197f6bdbL, + 0xdeed257a52050b0bL,0x8c28afea0703c0efL,0x0000000000000052L } }, + /* 3 << 336 */ + { { 0x2ea6e18456500523L,0x566ccdd0f44d63fbL,0x8cc0208db01114ccL, + 0xdb7bf61311e5dd4eL,0x65e4263eff83d7b1L,0x12d2924cd6da83e9L, + 0x093bd19078380401L,0xd2835bdbff97c4f9L,0x000000000000013cL }, + { 0xd3ae4c901d8e9521L,0xe2c92a4c42dc5bbbL,0x478e934b8c96057dL, + 0x0e526187ec6155f7L,0x3aca950d2bbe77d5L,0xe7edf96a6e492364L, + 0xfc039fd22fe76745L,0x68dfce9653928c27L,0x00000000000000f3L } }, + /* 4 << 336 */ + { { 0xab482b76e225fbd9L,0xd561f4cdd3c33a7dL,0x18813777ccb2f003L, + 0xa163377524cebce8L,0xddbe587d16adb540L,0xbc0d225f79e133cdL, + 0x8fa3d30032a4c54aL,0xded1ece4243dc5e5L,0x0000000000000024L }, + { 0x94ce92a3ffd4b7ddL,0xeac8e7a15a8efbd5L,0x0c842c3fa9dc648aL, + 0x19172f460d286101L,0xc8681e80beca770aL,0xfaaa1ab4a8795d3bL, + 0xe8625d4663944868L,0xe13004f94f3254ebL,0x0000000000000104L } }, + /* 5 << 336 */ + { { 0xb0fece2bb7f3d4b1L,0x8b6168aa47b042fbL,0x207d9b0f01658a57L, + 0x52deebf25cf0bb71L,0xc678dcc72fc808a8L,0x8bea9d0975a39999L, + 0xf26b5d2b7042c5a7L,0x58b1d1bbdbe12667L,0x0000000000000120L }, + { 0x10cf0b69d821c3eaL,0x045a9342c684e985L,0x2245c17f56babdcbL, + 0x7859852ab40c5f7dL,0xcbc11caf35b36fe0L,0xcbce03baddc4abe5L, + 0xfa4797269e11ee36L,0xc70cf89a8b993b9aL,0x000000000000013fL } }, + /* 6 << 336 */ + { { 0x2e911bb30d9ea003L,0x50503fd6dd7bbb56L,0x23780725f43c710cL, + 0xf3854bc2d236028aL,0x0bb0cd7085f08816L,0xaf2ba06878e34910L, + 0xb0ab1008558b8babL,0xe0676170e24955dbL,0x0000000000000049L }, + { 0xe2b0161a87b571f5L,0x1676b3346209d9adL,0x402122ece2f86308L, + 0xbd0e364487d6f082L,0x587ef7c464747111L,0xe8edfeec8f3386a7L, + 0x2b70f9333a489db5L,0x819e1d7c442a278bL,0x0000000000000025L } }, + /* 7 << 336 */ + { { 0x731be59d6ee24007L,0xe96884c4b97efd86L,0x9e4a2bbecf73d698L, + 0x7820a2d72ef12084L,0x1ebbf048c5692055L,0x707194dac1b300f9L, + 0xf45fe3fec0d5dc37L,0x20cb9bf6df061c3cL,0x0000000000000000L }, + { 0xef76e09349841247L,0x7e963d5c5673eee7L,0x135d2ca7c5eab79fL, + 0xf0d3ac04ceeb60e6L,0xdb33fc361d07b5f9L,0x37ef4405f1443ae5L, + 0xe4716fae6ec33bd0L,0x3a6e33a14fd6f657L,0x000000000000003fL } }, + /* 8 << 336 */ + { { 0x6f01c26104e188ffL,0x0d9bf057c7ff2e86L,0xf9cd2dd6c8e36163L, + 0xbfc6200bec1562e8L,0x735836b48afb5102L,0xb7a29e92035e1ed7L, + 0xb16b14ca5f2e5924L,0xa77550a628630d59L,0x0000000000000032L }, + { 0x7b4bc3ef7418d27cL,0xdf5c38b623b67adbL,0x168d61e3ea3713c8L, + 0x806f95491abd92f8L,0xe806ab7b98eb3239L,0x47f49e50f09b81cbL, + 0x4324da1706fd5ba3L,0xd54dc8613193dd6bL,0x000000000000003cL } }, + /* 9 << 336 */ + { { 0x9f88ff35a3d49d3dL,0xe4284139fce247f8L,0x6c4d5c7af9dcb985L, + 0xfbcf4fa1161d138aL,0x2704ef59a06a95f5L,0x2a277e200d0e4d15L, + 0x3105f965c5de8f83L,0x10fd313deabd284eL,0x0000000000000133L }, + { 0x308bbdee0774ab51L,0x3e273e4200f231beL,0x843c53d8f8392927L, + 0xbbae99a3c0007797L,0x110b8e4ce1f84317L,0x456790c6e26fc087L, + 0x13144322bf6798e3L,0x35d44ed5217a52ceL,0x0000000000000020L } }, + /* 10 << 336 */ + { { 0xd7c148b3841f5e75L,0x4fb757644b5442a4L,0x95e65524dcdfb2dbL, + 0x2265e0edd3446c55L,0x7d95f3b17a24cbddL,0x852beaa10567103dL, + 0x48c62a902808760fL,0x5f872492fc689aebL,0x0000000000000065L }, + { 0x11fa99ecd6740dfbL,0x0b028e97621f47f7L,0x2a75276c3b2a21bcL, + 0xbad738bbb779b390L,0xa178ac3072cf8732L,0x4c515b788ef8ebc9L, + 0x5a411714b745b0deL,0xb7c566faf0341a62L,0x000000000000008aL } }, + /* 11 << 336 */ + { { 0xad24d78c5774fc6eL,0x0ae7e57b8dccc508L,0x92d4a44ce70f8e39L, + 0x3915c8eb837de2f6L,0xf7fd688854dd047aL,0x13bcad45d1bbd515L, + 0x86e766a1302bee43L,0x788ea08a0986a53aL,0x0000000000000188L }, + { 0xe0117403716f966cL,0xf2ff910991cbdb03L,0xb30a2a3cf6368671L, + 0x29d49766adf90b6fL,0xe7333e2338d741c7L,0x11995ae97e584997L, + 0x0f46aad3bbc84668L,0x98ad5a3f89992582L,0x0000000000000197L } }, + /* 12 << 336 */ + { { 0x42814ac122ff19a0L,0xde569bc8b27c568eL,0x1d08a1226ada1db1L, + 0x3a3fde2e6e513dc5L,0x29c0813b1f856f5bL,0x13d3bc840b6cb7dfL, + 0xabba0aba2c40bc67L,0x1fb6edcc8a718cf5L,0x0000000000000072L }, + { 0x806a0ca2b312563aL,0xbb056c4891380b70L,0x0c90364b0a157f24L, + 0xf79016c3d9e3f68dL,0x8d12edf88944a622L,0x333d61b31ec7abdcL, + 0x372a693d50bded2dL,0x08bad7dc2dfefccaL,0x0000000000000081L } }, + /* 13 << 336 */ + { { 0x95aecad65c1c6ddcL,0xdd32cd749ae10bd1L,0x81882ab6a84c69dfL, + 0x8a7d8cbe8838d94fL,0x099ad520bcc1300bL,0xd63bb039c30e4469L, + 0xb3bef31417356877L,0x48fea9063238c2d5L,0x0000000000000047L }, + { 0x772aa8f68e8fbfbaL,0xe852c8f15845a5ffL,0xf1355ee5a907e9d5L, + 0x18c0ba0e5ba624e3L,0x32a3b125b72c700aL,0xe4e19fe46afd2a5eL, + 0x08009bd907e95944L,0x14a813f019294091L,0x0000000000000106L } }, + /* 14 << 336 */ + { { 0xfd9946a699102917L,0x62df6a373c73b904L,0x9998064f29c4664fL, + 0x626c7d71a38aeb0fL,0xebb4ba2d9ddbe513L,0xfb7b78301d72725cL, + 0xc1314481e4c19372L,0x90c5ec66ed09077dL,0x000000000000003cL }, + { 0x05bccd2f92b2ca1eL,0xffb71e3f55d4157cL,0xe4913c2eb5edf4e2L, + 0x185eb955a96247b1L,0xd70ff9614d7924b3L,0xe6ebce13ed6c45f0L, + 0xcbb30c53786c10d3L,0x37b6a2f2137354e1L,0x000000000000015dL } }, + /* 15 << 336 */ + { { 0x4d82915996367153L,0x60562a5910e88061L,0x2b2c480b942bd412L, + 0xc015240fefbbe7b6L,0x363ddb1df084fba3L,0x21739dffd1c2f9eaL, + 0xbf972d6fefe1ac54L,0xdfc5ff12edbef772L,0x00000000000001e7L }, + { 0x2f0a919a39250f37L,0xca4b3f9711cb4bddL,0x85909ebfc15252c3L, + 0xef2421d0f81d74c4L,0x7f8a45d7faa5d679L,0xcc97e0caaf2c27acL, + 0x11fa8448f0393356L,0xd0727c3b091f3a2fL,0x0000000000000098L } }, + /* 16 << 336 */ + { { 0xd17bc05981fe2fbdL,0x8247a0c7cdfc711dL,0x973eab2f63ce74eeL, + 0x270a6bdc4191b5f1L,0xe21afb51637f5917L,0xa84e71c550326c7bL, + 0x14cef332f30c35cfL,0xff0e5f89b7407d5eL,0x0000000000000011L }, + { 0x7b23a55df8b6b176L,0xbc660c66c679c536L,0xad82144a897fde1aL, + 0x5082037d545baf12L,0x4bf2b1174448059fL,0x59c25c1bf1e8c52dL, + 0xc01a4b8d8e030e69L,0x45077803229951c1L,0x0000000000000184L } }, + /* 17 << 336 */ + { { 0x06ab930118ba92b8L,0x0ecfb20658c698f5L,0x0d07b9580c53b831L, + 0xeb8bcf504c6b8a70L,0xd6aa30dd46cfe9a3L,0xc0373457e824156cL, + 0x14ae78702b281201L,0x3c50eefa57c56e20L,0x00000000000000cbL }, + { 0xcd4eac45dc159da0L,0x3e09b24db817f40cL,0xb1a9ddb4f1ce719aL, + 0x27e01e5ee7c528a3L,0x6d3effe69f1aa448L,0xcc3ea2c66c529ff2L, + 0xf2ad16d9cf1095a2L,0x8497e6bf6795d3f1L,0x00000000000000d0L } }, + /* 18 << 336 */ + { { 0x5936402fea46c7b7L,0x611f48a4c8e292fdL,0xc9178953b1ef5c93L, + 0xf8c3b0bf45132752L,0x6f60ee45243ba68bL,0x7aae87523de7945bL, + 0x9a2526667e5b0f06L,0x39503fed24112340L,0x00000000000000e3L }, + { 0x8479c73c64655be9L,0xd3c0a16e00645b62L,0x4e71df09fbe33ac9L, + 0xa0dafba123f21934L,0xba16e195a3689496L,0x0dea634e05508b68L, + 0x6855cfe25ebe2694L,0xec373e0b7d85a1d4L,0x000000000000009eL } }, + /* 19 << 336 */ + { { 0xb8c3dacd4ca0c4d1L,0x8424abd597274623L,0x081fba1ddc429d00L, + 0x4ea6805c77022b62L,0x09561cf5610a6c42L,0x06bcb6857c4d0400L, + 0xbf9d4591ee3eda1fL,0x6bee800bc391cf2bL,0x0000000000000011L }, + { 0x5e41cb27c0611ef1L,0x7a25441cc7cd8245L,0xc5bd725ca22201aeL, + 0x554ff12ef99102f4L,0x55ea06a484c51d6cL,0x93cc43c1fa4d3a28L, + 0x1e930d0e95158c18L,0xdbbd3924f3aca4a0L,0x0000000000000064L } }, + /* 20 << 336 */ + { { 0xa929ab929b4fcaa8L,0xbd15698f18c196a7L,0x52ba54d9ed6b3cd1L, + 0xaedec8263afce3d9L,0xc26a7b6aa7a05e27L,0xbfacb969c40091d7L, + 0x73f5ccaec8fc6e07L,0x7fd306960a44e5f9L,0x00000000000000b3L }, + { 0x00ddfd1f2a5bedbeL,0x417d4298c591c174L,0xf91f5a4c55f96920L, + 0x3a6623d3936f7a6dL,0x9b67aaf628dd7721L,0x5bdda9995184915fL, + 0x14cb50bf61a502deL,0xfbb382b2f0d3a5f1L,0x00000000000000b4L } }, + /* 21 << 336 */ + { { 0xd2e51c02358407feL,0x379ba801aa5d1f95L,0x78805d1d433bee43L, + 0xc8406d4e30368d9eL,0xf1afbc18181d8bcfL,0xe1b5418dedcc02caL, + 0x594140aade21f349L,0xc935edc90960aedeL,0x00000000000001d0L }, + { 0xb4bac9577c6b52f4L,0x193cae40a1f988eaL,0xd3611a5686c73777L, + 0x8861119d10907a06L,0x4080e88e97c5aba2L,0x13d48f856177a63aL, + 0xd98a14637d1505ebL,0x712f5704345cbb1bL,0x0000000000000145L } }, + /* 22 << 336 */ + { { 0x9602183a8656a9dcL,0x38d9cf9914e10f6aL,0xc4480df1af352f91L, + 0x312a0ee97284509dL,0xf8f1609428a89368L,0xdc60750c394fc38dL, + 0xa45097b8c4b076ebL,0xebc69f19188f7500L,0x0000000000000011L }, + { 0x0b82a33debf35521L,0x38cfc247822af398L,0x7e47262408ea1a06L, + 0xdbf9ae15ca3b03b0L,0x992f77515b8ec4c3L,0xa2d4411d491fd8aeL, + 0x00e5457f6d6707dbL,0x91e79e34a3d679faL,0x0000000000000123L } }, + /* 23 << 336 */ + { { 0x729069f3c2aac2e0L,0x0a3cbdbdcabc2ef7L,0x1ac994e853919f9eL, + 0x988345d28f722950L,0x6c446036b3eaed7aL,0xc8d1ea591af32590L, + 0xcb555a29c5f0298aL,0x2245dadc5a95ddc9L,0x0000000000000003L }, + { 0x28b43e4040fd5df9L,0xa37c16c1700ddf94L,0x57f4770188bdd8cdL, + 0xe46cf6913a7b7b20L,0xc2982480151c16ecL,0x064d58699ab5b911L, + 0x282e068b1e2e9b4eL,0x8bce73305ca7880fL,0x0000000000000079L } }, + /* 24 << 336 */ + { { 0xb8f663eeba7642c0L,0xd6de3ffd8e5f9f42L,0x5f7a9a7bb661e29aL, + 0x43e035a5e709fabcL,0x115ff746d241f6feL,0x9aff188d3a889406L, + 0x35a4d29a00ce6f54L,0xa43706eb19542e2cL,0x00000000000000d8L }, + { 0x6d4d00fe5307c22eL,0x97c82f0ac02f495fL,0x23857399a4563bb5L, + 0xf5ad0f92e621b072L,0x5e28217f73ed71d6L,0xb850554f7c532cb8L, + 0xb75dcba289b00deaL,0xa9252801fabb05cdL,0x000000000000016bL } }, + /* 25 << 336 */ + { { 0x113df94b7d7ca596L,0xd0d81f973a09c4e0L,0xfdf25b6dde5f749aL, + 0xb7a9ca9ff56f6819L,0x4e227c423c76e2adL,0x8f568606fb0c6c74L, + 0xc68a1311e7d75727L,0x3cae257ea97c6a06L,0x000000000000003eL }, + { 0xb8fc17b67c4eec48L,0x859ababb5c2177e3L,0x372a76051058469eL, + 0x82bc83878198208dL,0x362b7058289b96d9L,0x008fdbb14b1ff834L, + 0x6754a93e001b5d35L,0xaf0588963cb788ecL,0x00000000000000e6L } }, + /* 26 << 336 */ + { { 0x7f0b78bf7e8da44dL,0x6bc7705a76e657e0L,0xf62df055522bd33bL, + 0xcfa310d2d5730259L,0xdb3c752706babe9eL,0xa53c9580c5b35c93L, + 0x88a085faa6e6210fL,0x2436bc42bfbb2a0dL,0x000000000000011eL }, + { 0xfa99d4eddcfc0929L,0xc08653aec8373758L,0xdd402a641820eaf3L, + 0x8fe28a88991a187fL,0xb2a8d2aa14e3031aL,0x5d15745af340b738L, + 0x03f304abb5d057bcL,0x42b9c992b2d32a29L,0x0000000000000130L } }, + /* 27 << 336 */ + { { 0x7bf6c878e6f3697dL,0x0da60fe25286dc50L,0xdaf811cf4427b675L, + 0x21263e1a63b81439L,0x6c1588eab23ad5d6L,0x67ae668ecf30a50fL, + 0x019d2c48dedb60c5L,0xcfb3ef72898225efL,0x000000000000010fL }, + { 0xceb271fc50d5ad72L,0xc590aa97a684a6a0L,0xf0c51ceb97b0397cL, + 0xb0abb09c6f549db0L,0xb04b4898b6adf961L,0x4e5b4948ec0602ddL, + 0x892d411e95a537b7L,0x613fc8af84b4dcaaL,0x0000000000000166L } }, + /* 28 << 336 */ + { { 0xc9eb8a3732462315L,0x91893c7c6e4e47afL,0xc2c9569bf9bbc7b4L, + 0x656b2ce30314c0e0L,0x90b866af7e012597L,0x235d7f2adf853ba8L, + 0xce824693266a534dL,0x3b362419b30f8251L,0x00000000000001cbL }, + { 0xf5c990529df9422cL,0x261a1d31ab69da5fL,0x29ac221f2d225cd4L, + 0x37090fc6ae4cec4cL,0x257983637417f8bfL,0xcd19cb04f6898caeL, + 0x98580a0a75239b7aL,0xd1a38556e19c424eL,0x0000000000000025L } }, + /* 29 << 336 */ + { { 0xc4b2e4321ec0f9bbL,0xb6d9c02b4f72792aL,0x692936bcb7745461L, + 0x70c8d81bf16ed29fL,0xfaf6602cead1e3f4L,0x9ff71ca731f17e50L, + 0xae87c59b5a4f7187L,0x8103cc5ccbbded31L,0x000000000000013dL }, + { 0x973df81487cb1debL,0x8d733cccf3e31541L,0x3f9a0a7bd66e3904L, + 0xd9deff68e1a92726L,0x15118a0f9a57ef64L,0xfc1e37f6a079c288L, + 0x1f3b8593d66e3a5fL,0x2f41c9df69904b4eL,0x0000000000000130L } }, + /* 30 << 336 */ + { { 0x2438afa71bfa8266L,0xe42c4d707aca9c58L,0xed95a6df3a0b8f82L, + 0xc63e62418c218d8dL,0x17cb1bcf4a467ea9L,0x00add391e6a13b32L, + 0x84f65ead54bca5fbL,0xf3a6b748d44c6849L,0x0000000000000077L }, + { 0x46a9ffa49a19a7a0L,0x0e8a201247152131L,0xbf3cce1b9e569e70L, + 0x8ad6f57ecc68962aL,0xdea8fc4929a6daaaL,0x71a147d711e9686fL, + 0xb705d31d21d2ae96L,0x6bab44926ad7409cL,0x000000000000008aL } }, + /* 31 << 336 */ + { { 0x3bcbec5b43f1bc6dL,0x82f1c117e173ff5aL,0x86732bfc5f7bbae2L, + 0xb369f7b53b3a68bfL,0x09723468dabd0f05L,0x536a17524466eb28L, + 0x797ce5a43d685bdbL,0x5c6904da797af4d5L,0x0000000000000112L }, + { 0x0125dc59b2365e89L,0xe95c31e5e6b036b9L,0x025da8ff1bd84808L, + 0x980320c96689aa70L,0x7a7cf8959ce14041L,0x09fc1e84eddb13a6L, + 0xa562767a27a54095L,0xdd0ed2f995de4423L,0x0000000000000023L } }, + /* 32 << 336 */ + { { 0x6a55868f11c50119L,0xb78abb45c7912bbbL,0x00ec6edcf2598660L, + 0x45b396bf3b330dddL,0xf1e1b15d430b461bL,0xd7d7ad1de8c572adL, + 0xc236b2601ec38412L,0xb668a33699889cf7L,0x0000000000000109L }, + { 0xe39ab64b0bf6ef3bL,0x83496baeb87b2f04L,0xc9a9009ddd04f4b5L, + 0xcd813e028fbed8dfL,0x8766407ed49842feL,0xed08303dd72a0525L, + 0xed117dd7b47e882eL,0x62ec57f744ce2e25L,0x00000000000001a9L } }, + /* 33 << 336 */ + { { 0x2f7726ba8e8a63f1L,0xb0c2ad66ecb3ae83L,0xe57a64b99accb54eL, + 0x79eb94ce3968e980L,0x1030ec413a04e7daL,0x60900b75ae88b7fdL, + 0x6a2b74e30d22ebeeL,0x93e1657222357db3L,0x00000000000001c6L }, + { 0x0cbe946b8feb2f46L,0xeebf1e20d5d78ad9L,0xbf52e57dccc955e2L, + 0x03ca5245a84019c3L,0xd83c5de77390eec9L,0xdfb17932c7ca0529L, + 0x4408ddc221cf2ae0L,0x4b217712bd878077L,0x000000000000010eL } }, + /* 34 << 336 */ + { { 0x19c2fb35e68bfa93L,0xd194609c6be8cd17L,0x3d5bac21c0950167L, + 0xc6f452ad320fa097L,0x5efbc350c519d010L,0x04b5f5091ce6ff40L, + 0xbc81371fd33d1152L,0xec1882e91ec5cbdfL,0x00000000000000d8L }, + { 0x389fdd4ad929e151L,0xbc3bf1fc30b376ceL,0xb57789146458a81aL, + 0xa1b02703bd08823bL,0x6e0bafc8b8ead9bbL,0x05009a9da4a6510fL, + 0x788cd68402719b2cL,0x4abb7c12f4eaace8L,0x000000000000015dL } }, + /* 35 << 336 */ + { { 0x924834880d6e1420L,0x0f6b842f02a9e525L,0x5f195b9ef2c23ed3L, + 0x872b7dab188c8a40L,0x6ea520e57a05f397L,0xb84f6f52a8318cf3L, + 0x7c20f13cb4ab7198L,0x94c07e8634d193fbL,0x000000000000007fL }, + { 0xa665378f640101c4L,0xf718aba2f23d6880L,0x7feaa46ae39f34ecL, + 0x0c5e49bc45be61a3L,0x6c9e53e91d1097bbL,0x1f8311795b9afba2L, + 0x7c91abbff68ea497L,0x84c407ffcfb9359bL,0x000000000000010eL } }, + /* 36 << 336 */ + { { 0xf66002a7532cded3L,0x12ae6fbd9b51ba09L,0x1877c71151a4511fL, + 0x75a3a374b468daaeL,0x09f3fbee0866ffb3L,0xfb535b7479a0e5ffL, + 0xa9f20777119d443cL,0xb0d45806818ba850L,0x0000000000000089L }, + { 0x9ebce29758c4764fL,0x25239275d673bbbcL,0xb8e731d526a3b9f5L, + 0xe00ea0a81b7a1218L,0x64e38615251ebaceL,0x1861ba0c43c768d9L, + 0xabffeab0faa8388eL,0xd7a8dc7e036e9163L,0x0000000000000138L } }, + /* 37 << 336 */ + { { 0x3a548ee6767f67e0L,0xf43995d96e7381e8L,0xfc613affe3f4fa78L, + 0x75b8421853a88ca2L,0x4efc41a4a34d77d8L,0x67704b6c6dfeccc9L, + 0xb6301c73ad702b28L,0x2257dc83b0bd12c7L,0x00000000000000c6L }, + { 0x01f144891820d290L,0xd2b2e7b74b212d5aL,0x2372e19e203ffa93L, + 0x7847f736afc48e54L,0xfd061aa2b0d47790L,0xeb5d269c56107e9dL, + 0x90e172f6f400c123L,0xf13bd3b9a924ac2aL,0x000000000000014aL } }, + /* 38 << 336 */ + { { 0x2f1e5d38c7d8a5a9L,0xc812dad7d2f00aedL,0xcccd0e9be04e41ecL, + 0xac5b0abd217a004cL,0xb1332777129d0219L,0x7ec641f09f912102L, + 0x53d6db38ef3b0f35L,0xc9d6b3cacabcfc1cL,0x00000000000000bdL }, + { 0x834b4b341b206ac7L,0xb6e4bc6ba8a9615bL,0x024f9e7d83fef8fcL, + 0x659a16fd427d6556L,0x56c796186ea415a4L,0x26b75d757aa919acL, + 0x01522021d880f489L,0xa250d22c6089862cL,0x00000000000001d5L } }, + /* 39 << 336 */ + { { 0xd90132009928e628L,0x1193d8578046a060L,0x4121d1e40ce4e473L, + 0x4c4d212253cd7702L,0xcd42376919823037L,0x696394e8b5506010L, + 0xbd26c4266e10bcceL,0xbb15eb862869fc0bL,0x0000000000000148L }, + { 0x24e36d1ce3895fb0L,0x99b1202f62a2e7adL,0x7be82d41360abc5dL, + 0xaa8b30afe64dda1fL,0xa5c5cdf693938b09L,0x8a0d8440db48efedL, + 0x17db9cab6dbdc7c6L,0xe0f3f1a778477ea9L,0x000000000000014fL } }, + /* 40 << 336 */ + { { 0xa35909198b52c773L,0x76609450fa34c84aL,0xcbc53186e1eb0e17L, + 0x4f1f328a93f87b38L,0xad5a48aaf3e70effL,0xb90942f9338873f4L, + 0x8d9018db7c92f220L,0x883ee17010edbe69L,0x0000000000000090L }, + { 0xacbf26d4352788caL,0x6e1134413124e676L,0x8c23d996bf850dafL, + 0x8781a364b318b4f9L,0xe9ccfb474366d53fL,0xc36fddcfa2522307L, + 0x0d9d490f5c18063fL,0x70e6946a7c772f22L,0x0000000000000021L } }, + /* 41 << 336 */ + { { 0xceb65c253265473aL,0x0848d8c109e9c600L,0x7a1d52b5da02fa2cL, + 0xc9d059a187ee1d9bL,0x6fc44d08796f782dL,0xc986ddc0de4faa1dL, + 0x7ae99e1cdb221bbaL,0xecce252439e934b3L,0x0000000000000145L }, + { 0xebf9741ec8b2e015L,0x8291788dcc2829e9L,0x85e30cb4b92d367bL, + 0x6306f633d498e495L,0x982bf2b64b7d0e51L,0xec78717623b5ce39L, + 0x13813e58d7dbe278L,0xf35d5ab7de0b9f48L,0x000000000000010aL } }, + /* 42 << 336 */ + { { 0x3465a55880b2842bL,0x69ae8ee5fb5c4ab4L,0x0f60b67f1d9d3c2cL, + 0x43c637aba61d5441L,0xebb11af2616bb3beL,0xb233d60c8ebb368fL, + 0xfe78b8e8e666a6bcL,0x40c89e61199647bfL,0x000000000000017dL }, + { 0xdfd02f7d1a7e4004L,0x67ea0f94f0db6c8fL,0x75fb799d0b25ec12L, + 0xbf15842512e0b07bL,0x6232d56ccf7cb388L,0x62852eadd2b1cf35L, + 0xdebdf566d4d5a337L,0xd4224257c5d68925L,0x00000000000001f0L } }, + /* 43 << 336 */ + { { 0x3b3b236a2705c8ecL,0x18e596fe8f9ca382L,0x6cef674b595529f9L, + 0xdd4d79e523afaeb2L,0x96b687668c095755L,0xc20d270322f1303eL, + 0xaf55fa652118e930L,0xc1598fdb6ab3adddL,0x0000000000000007L }, + { 0x8c72b53a9f7ec947L,0x230c6c33ea8256afL,0x3631364728ff1428L, + 0xedd1a1d226269503L,0x35142f60f50ad51dL,0xfb1597d3365f7d6aL, + 0xc0d5b6da40fb6f18L,0xc3cf67973fa48c72L,0x00000000000000e4L } }, + /* 44 << 336 */ + { { 0x7d59c3f4712ddfbdL,0x839dd63f84f37f9dL,0xb7d4176f102b47ceL, + 0x91549e3ab3e2d34eL,0xf82494656176fa4eL,0x34cef89d534e8a88L, + 0x5752810fd05652fdL,0x05878d59e8109c08L,0x0000000000000045L }, + { 0x83e223e70f9b0af9L,0xd4094717d8b7e4e9L,0xfd6789140f16428bL, + 0x39782a1591a6f1c7L,0xfc637c567d10b525L,0x396ce1a419b2719dL, + 0xe3c112a59d21f5baL,0xcc9423b1fec40d42L,0x00000000000001e6L } }, + /* 45 << 336 */ + { { 0x9918e21421c67dc6L,0xbbbe96db3ce73d10L,0x646749af353facceL, + 0x4471d11f941a1ae8L,0xdce491d8254be83dL,0x2a734d2b06044c3cL, + 0x015bb26f33e90f68L,0x2986a9179b397235L,0x0000000000000000L }, + { 0xf2bc225392d0328bL,0x3cf80ec41c01f96eL,0x7059378025f2b93fL, + 0xe218c28f8caa3c34L,0xed5b3b01acf08f85L,0x78e7130b675abf03L, + 0xdc164f62e61ea709L,0x5e9bcdafe7cce590L,0x00000000000001d7L } }, + /* 46 << 336 */ + { { 0x7dd978412969cc46L,0x464f831465c2a193L,0x0f0814d7556ba73cL, + 0x4e0b01c2a746479bL,0xf769a86a222abae4L,0xaf204519b06949abL, + 0x219f3dd4bc0cae6aL,0x41e7fd75bb04e014L,0x0000000000000111L }, + { 0x8c81a4171b4685ecL,0xc2cdc2fb56fcf448L,0x047f00b60ffe70b3L, + 0x4fb3e96353c0b627L,0xd6eb99c304092d40L,0xcef9e712824d32d6L, + 0x22438fe0661c9073L,0x1d24b9508b5d62a6L,0x0000000000000179L } }, + /* 47 << 336 */ + { { 0x776548c41646b926L,0x5da803c6cbd4f251L,0x9fb4cbbdcafbff5fL, + 0x136e5ab46b1967e8L,0xbf3187c655c2cc0fL,0xb5cb7010ce03caffL, + 0x75f960d74cba3a38L,0xf1f7da468c4de719L,0x000000000000008aL }, + { 0xaa9771cf8e145328L,0x955607127e9c7154L,0x7576c566c54ac3ebL, + 0x83a1cc22b9565637L,0x65a033e85f237916L,0x0fc5b3076eecda9bL, + 0x6a8a26805a6bbbc6L,0x3f5df88eace3f66fL,0x000000000000011fL } }, + /* 48 << 336 */ + { { 0x8ee76fb84b60e7d4L,0xcdcf289366df99e4L,0xf0a8087980648b08L, + 0x3c499205ae3ade6dL,0x73d8d03503d4984aL,0x4fcd2c1ce21a233cL, + 0x0b5c1e89bf51ca1cL,0x1b7a009f237792d1L,0x0000000000000116L }, + { 0xd2dca97044961ef0L,0xd5a6fbcba97a7b6aL,0x5e6da5975a4f98c9L, + 0x2421c548bca536b6L,0x7a591af11f7ee8a4L,0x320308818a8817c3L, + 0xd98a21782af67257L,0x31fb8f953828bbddL,0x00000000000001cbL } }, + /* 49 << 336 */ + { { 0x2fe629cce3886eebL,0x703263920694724cL,0x42ea1744001fde6aL, + 0x285ba5fcb4b7c99bL,0xa7474f9203317986L,0xfa84421405f64263L, + 0xe64e02dfea76091bL,0xe5356b2ddff26696L,0x00000000000000e7L }, + { 0x47e54d6513506a2bL,0xb66069948dd04445L,0x86413fcafda61d05L, + 0x58a825310779b2ebL,0xaeebe93899414dd7L,0x838add05bf1cc63dL, + 0x98cbe079d98429d5L,0x7416b93677ceb9ceL,0x00000000000000d5L } }, + /* 50 << 336 */ + { { 0x196b992f7903265aL,0x3f8c505985ac43caL,0xa0542a9832f558a6L, + 0x34493872f8f15c20L,0x8422611411ff2331L,0xaaa95263dad5aa4dL, + 0x5ba3c89449618a2bL,0x7114cb67ffb0b7afL,0x00000000000000ebL }, + { 0x22896ed5c413488fL,0xb68be598e59cf5a3L,0x8aece01d5a9f0b73L, + 0x5ef6ef9fe44bf3c7L,0x276042132e665544L,0x5cbaae365b4fb862L, + 0x2bc1f35ed164d7b8L,0x353fe76e05a5405cL,0x00000000000000c7L } }, + /* 51 << 336 */ + { { 0x5c6259f3f2bd866dL,0x74f730eb67f990fbL,0xeda67791bac84b24L, + 0xd77e5d5ff0d8e9e1L,0x997d32d627f0633bL,0xe26e2cff845bd85bL, + 0x214962750bcec89cL,0x615c7f755ab41a61L,0x00000000000001c5L }, + { 0x5b5e76ff238ba9a4L,0x5a66c41f6342061aL,0x036bd57b984949baL, + 0xb8bd4d4c78893803L,0xade10dafc8bed951L,0xb17d2e19dd61db30L, + 0xa36f13476346eaccL,0x43ef15d396e37846L,0x00000000000001c2L } }, + /* 52 << 336 */ + { { 0x2e82f2e8fc7dfeebL,0x4351215c52c20206L,0x3b7875804be6deabL, + 0x480d4e87e24e7b8dL,0xbc1e73e71a2fee06L,0x9ee62e8dbbe97079L, + 0x729883c30c65ff12L,0x7d79b4450d40e7a4L,0x00000000000001c7L }, + { 0x0040e8251338a343L,0xd46631e703c25efaL,0x69038f5ec8d68911L, + 0x5cdcc85d67efed4cL,0x7bc6c0d960dec282L,0x0ff1944dc69bd184L, + 0xe9c1f924840c0a85L,0x8744d9a243df13f2L,0x0000000000000039L } }, + /* 53 << 336 */ + { { 0xffb073725731b8dfL,0x55b50655528162dcL,0xb06b7e4fbaf43a0aL, + 0x2384151f3205c2ceL,0x2fb3a0ff0796e15eL,0x145b6de137a2080fL, + 0xa7fc63f1eb9e8ed7L,0x282c8eeddbb2c666L,0x00000000000001deL }, + { 0x797ac3c0cdb70b94L,0x9ed6c19f8afe775fL,0x959e53d08fdf4354L, + 0xee3422904c6cd016L,0x878e3ce6da3d2171L,0xf91eb6cc0c0ecd98L, + 0xa4c9deb0352ddefbL,0xe0fbf7404cd91881L,0x0000000000000093L } }, + /* 54 << 336 */ + { { 0xcf3ab1a8c06a14cfL,0xe1dab4729134a070L,0x02695c59d307b78bL, + 0x6ff63c8927584f96L,0x0f2d84c83591c925L,0xc3f41483aee8afb2L, + 0x9f1021819b9720d0L,0x990e1e26ecc16999L,0x00000000000000b3L }, + { 0x3bd5760e1240a3f9L,0xf40840d9bb9c9613L,0xbee95c7dccb1cc9bL, + 0x094f695218da33f7L,0x116776dcb55c954dL,0x7d5820b102e92e9fL, + 0x06fb44a56ee9fb47L,0x35771a46baeb1c66L,0x000000000000007cL } }, + /* 55 << 336 */ + { { 0xd0440862ec99e2a6L,0xab4a43a33ba340ebL,0x12bae2691f10a256L, + 0x4351afbd77fa12cfL,0x940ca3c3316a9a8eL,0xda41cfb0e7e77bd3L, + 0x0fa1738fa142b7e2L,0x00fc2c8afc50ace3L,0x0000000000000157L }, + { 0xb1d820d0714f689bL,0xe64117180c4c8717L,0x59083d80ba8d5ce6L, + 0xcc2f13ab5a3b4531L,0xaddaf07ad5a8ebbeL,0xba541abb21a54993L, + 0xbf86b7ca801cc68aL,0x23d9dae3b2f94878L,0x0000000000000107L } }, + /* 56 << 336 */ + { { 0xebe606acae034f8cL,0x818cdf076b6018d6L,0xfe55379b7608ef90L, + 0xaa46536f8ba09a70L,0xc013d0e9c8e4d932L,0xf6c3c883da15fbfdL, + 0x9d01956feee975bdL,0x12559e14ca159bd6L,0x000000000000013dL }, + { 0x13cd63821aedc90bL,0x0702dfbdbb2ab040L,0x30b7ab23ff22a921L, + 0x5e348016544e103dL,0x478ca80b00ef78b1L,0x5f56249af37d1238L, + 0x330515a9222de97fL,0x415dc934b0a87d9fL,0x000000000000015bL } }, + /* 57 << 336 */ + { { 0x1c4d07047b5bd6a3L,0x48c3ef051b90b22eL,0x70a08c1f1a64461dL, + 0x4a04e537dd99999aL,0x150e2d2272551b9cL,0xce73d701f738d935L, + 0x0075c2f9282e466bL,0xd4fb74363bbfab78L,0x0000000000000198L }, + { 0x4aa5ceeeb6af5e61L,0x3d5e02891ce71dfcL,0x4ec385390a795137L, + 0x26a82c384e4c960cL,0x853dfe6003e80d25L,0x05c38fe7f833c27fL, + 0xa0fc2cad0e5fdc8eL,0x651964f066b1409dL,0x0000000000000155L } }, + /* 58 << 336 */ + { { 0xbcd1b8fdcd2c882aL,0x972b6862b2561e9fL,0x0f3d3eed44ec7dadL, + 0x2f193dca33dd798bL,0x16923e1b3425be8aL,0x424ac2d0fcea5e3cL, + 0x04dae6a5d3088f6bL,0xb37290420a495a9aL,0x0000000000000191L }, + { 0x233de0d8c36dd1b6L,0x4201b3dd8a627653L,0x193ee18fa4ba6c52L, + 0xafd48153da4125b9L,0x893c89d06ef2233aL,0x1181fc422ad683d6L, + 0x6e9a5811f22ee65bL,0x2b60ec5e843e7be1L,0x00000000000000c2L } }, + /* 59 << 336 */ + { { 0x6906f9196f71d92dL,0x05811b7aa400e02aL,0xad0a7fbeb1147b49L, + 0x14756b6e854e11a8L,0x2f1ae8e68e071b70L,0xa923d788938cf9e2L, + 0xa2798bc0dafa6f8aL,0x0762c1b7bcd2c30fL,0x0000000000000097L }, + { 0x53c32ec5438538b4L,0xb81a76f9225d6abbL,0xa6bfdddb1596d571L, + 0xe7fdd67a7bcbfa9bL,0x90bcccd91136787bL,0x2407ce31ef192712L, + 0x89dc7c1c2656dcdcL,0x613cccda158f3d14L,0x0000000000000044L } }, + /* 60 << 336 */ + { { 0x17f4ae2fa457f66fL,0xdcbaaf86f7f9b42fL,0x63f9b6a4512eae83L, + 0x0e7005e8906c197aL,0xa6dbca2a47d9a62aL,0xabfc7eb772e1d3d5L, + 0x67a98eeb1f26cd4dL,0xda8b076a0cbf90e1L,0x00000000000001a6L }, + { 0x6b0ad7ac20b776ddL,0xc866b4abfcd80552L,0xa61879f9df7a54fbL, + 0xed17d02b5d624997L,0xd91598090e81e750L,0x0950c09fb796dd7cL, + 0x2841052885acec3fL,0x951a064a247e3b6dL,0x000000000000007aL } }, + /* 61 << 336 */ + { { 0x92e45635293aa8cbL,0x632ad2cb47abe5b5L,0xa90124982269db7cL, + 0xb43095b71331e914L,0x4791600f7092ae50L,0x12270e800afa04b2L, + 0x6734fdca0a89a3b2L,0x703efce3cef04cdaL,0x000000000000004cL }, + { 0xa3a0c24dac19c42dL,0x56dd189a4b3e8f25L,0x9413431f3ed8d5eeL, + 0xb4f9626ec0db57d5L,0x7991275b8d43d9d1L,0x154a82d9aca25085L, + 0x30d558d279af6d23L,0xc37d14ed0894d400L,0x0000000000000152L } }, + /* 62 << 336 */ + { { 0x8a52f15018291f3aL,0x0ba052fb8e4a18eaL,0xb33beb1850947674L, + 0x24e7295dda7960d2L,0x9c881fa7b4252751L,0x141a115777d3eeb7L, + 0x98ba241b6d50cac6L,0x6e7627ae87169304L,0x00000000000000bcL }, + { 0x4d7bda103d0d4bcfL,0x22827deb008a4463L,0x0e52c2e79a70f90cL, + 0x6712a9fce39b6617L,0xfd8973c0505fe510L,0x11cc0de558c8409aL, + 0x210477f3c64f2cdfL,0x0e6e51af443cc0a0L,0x000000000000001bL } }, + /* 63 << 336 */ + { { 0x4e7381532c5b1678L,0xf1264efdf0c3ad21L,0xe6b67649c37f0993L, + 0xb3250481fdf7a6adL,0x34841e26d656f64eL,0x6c582f17eb5e5266L, + 0xe98350b4455c5d17L,0x58f8e6d373857797L,0x0000000000000156L }, + { 0x43289122e842a3acL,0xeb6f13b743590803L,0xc62511d75946d265L, + 0x68a0c27f718aebadL,0x0105232e8a8e7627L,0xf5c3f66c62bbebeeL, + 0x8ed456985e690cddL,0x3eeacd862e990ee2L,0x0000000000000076L } }, + /* 64 << 336 */ + { { 0xee2f0d5e485da448L,0x1ed09705ff6f7ae7L,0x14eecc17280081ddL, + 0xa120ee797230161cL,0xcc768683ff269a0aL,0xf63e5dfc2a872fafL, + 0xbb5b1ae6acc10cb2L,0xd110650707a71c22L,0x000000000000007cL }, + { 0xa17256895a78b340L,0xc8802d2a8d805b2bL,0x4f1bd019e62fac2eL, + 0xa1dd8419806b6762L,0xe6808f48a9223942L,0x50a99d62a6a1e281L, + 0xfa71344852fbacc9L,0x09e7a5fbb5d82a8bL,0x00000000000000c8L } }, + /* 0 << 343 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 343 */ + { { 0x56b1560ac1c7ce9cL,0x619fddf45fedc58cL,0xe8d35dc7b59afa4bL, + 0x0c5f5ac8eda188adL,0x14d11e1b1a468155L,0x9a5fd6c91529288dL, + 0xf8b121d3e9da413aL,0xcd307365e231527cL,0x00000000000000e4L }, + { 0xda248cc0f086ec25L,0x800ea7d4306b0d1aL,0xa5011ab86b74d50aL, + 0x68bad0af749befe4L,0xc1ad06091bbbce0fL,0x363078eb2727c718L, + 0x2e2e4aec18c61d23L,0x41f8fd81c84e4120L,0x0000000000000137L } }, + /* 2 << 343 */ + { { 0x9a579dee796de182L,0x6f780aa7f6fd67c5L,0x8e2e0d6d6c566337L, + 0xd8d685b5e5314714L,0xbc0bcb3a8099f308L,0x7127691f585ce610L, + 0xb1dd4f51fe15ed8aL,0xab542e06217bd3c7L,0x0000000000000183L }, + { 0x67e189835f29dbd2L,0xf0092c9f74d67113L,0x3faa747c847dc935L, + 0x798604f27f5be1b1L,0xf766db9a8c76c26aL,0xa38d86f05c205ca3L, + 0xfec097778c6e65adL,0x1c2b356c156b3b92L,0x00000000000000d1L } }, + /* 3 << 343 */ + { { 0xfd161d574973e99dL,0x10ee5e335fd197b8L,0x931929e7e937363eL, + 0x3593ac90e29447f7L,0x69e629fa511c53beL,0xa9140acf40818adaL, + 0x2228aa6cdbc45f65L,0xdac3b76cb6956fedL,0x00000000000000b9L }, + { 0x6f387c616730915fL,0x167ce892b42fb653L,0x88ae15d58258e7e5L, + 0x5b5467b445c89d8fL,0x4689f145d122a7afL,0x47c3167dbd359cfeL, + 0xbaa7f767939f7d23L,0xd2457722cf62d4fdL,0x0000000000000051L } }, + /* 4 << 343 */ + { { 0x40e2e814378740bfL,0x63a78984cbd2bd7bL,0x372e13db8a8dac87L, + 0xaf09f900f60984c2L,0x5fcb7a8e79dd89c2L,0x29b8a792ff667cb1L, + 0x9f8db873adaa64f7L,0xc52129f85dcc5522L,0x000000000000016cL }, + { 0xa7c053bf00c4f4cfL,0x236a5b2399f2b4cfL,0xaa4e794a7d8f5dfbL, + 0x42752a2262ae7550L,0xbf731493b6aa180eL,0x4cfeb68e0c875ff4L, + 0x0c30dcd7e0c5141fL,0x3dcf620376da59f2L,0x000000000000012fL } }, + /* 5 << 343 */ + { { 0x95dd4eb5f8656474L,0xd4669a971cd62198L,0x0cc8ef1e92d0b064L, + 0x9dcf50884d3ec93dL,0x98579f40ebb7f117L,0x5d355b938f1d1f2eL, + 0x84b5a27e9e66bea0L,0x813076efea829073L,0x000000000000013aL }, + { 0xc377cf528a995f59L,0x51d5d552504af77cL,0xe78a03839e855f7bL, + 0xc28dc8efd54d0003L,0xeca6acc485efd53fL,0x94835190e0a0125bL, + 0x26df5fda0ecfbf08L,0xdeb29864cb0dd7d4L,0x0000000000000046L } }, + /* 6 << 343 */ + { { 0xdfb64f02974d9b78L,0x31f9a91b41cce8d3L,0x34dcb6e34999e143L, + 0x01d57ca3739197eeL,0xc0f2108c78b9f00aL,0xadc6692c4a735404L, + 0x8358671e3bcc3b72L,0x3fc1ea7d6f09f3c5L,0x00000000000000e7L }, + { 0x9c983854e86d270cL,0x4d51aa04ae44b27eL,0xff192db6d49c136cL, + 0x8b4eb830e0f2963cL,0x839723cc894608a5L,0x39df449cfd6315ecL, + 0xa65f47eff69010dbL,0xdf70203694f69bacL,0x000000000000017cL } }, + /* 7 << 343 */ + { { 0x98a217d8a7979eb7L,0x2f4859d405e2e911L,0x37ec784f9f43d39bL, + 0x6047068b55a35243L,0x2d40c1194be6cdeeL,0xa28ca46cc2d0a82eL, + 0xf3f91ea1e53f26e5L,0xfaa1f1ee0ad5a3d9L,0x00000000000001c4L }, + { 0x8774d3bab3deb139L,0x94e79e50bfe48f58L,0x31425427b8a91318L, + 0xac77740efab106c0L,0xb496d88441a74a18L,0x45952361c5556450L, + 0x3d513f1474936fcfL,0x80f8d50ebe0c79c1L,0x0000000000000006L } }, + /* 8 << 343 */ + { { 0xb3eee6fe2a5a58feL,0xe8fac6485f1bd90cL,0x8c76159758fb2b1bL, + 0x58d5a7ec41103723L,0x33335bb4a9800518L,0xa2ded46025ed26e0L, + 0xa8996e7829e98932L,0x819f8f8bcdfb40d1L,0x000000000000006fL }, + { 0xb8b1d0edb98e803aL,0x29fd31264620cea3L,0xea10dcb49eebe49cL, + 0x29b7b6999ad55911L,0xc77d33dbf0ca87e5L,0xb9a166ecfe915661L, + 0xc2ad03b0eccf8c45L,0x7b16346d053affacL,0x0000000000000082L } }, + /* 9 << 343 */ + { { 0x916fd08c8984d1dbL,0x7c7870365b1bf3d2L,0xe148da9f2b88ff97L, + 0xa962ff45ac7c4189L,0x7c1ecc275768d54cL,0x33da8d97d43156d9L, + 0xbfe05acc50f9f6c3L,0x0fd6737671323d62L,0x00000000000000ebL }, + { 0xe45e1307c547c707L,0x3db010d28499f1ebL,0xeabff4004f388db4L, + 0x0b46757cb6041ecaL,0xca91315ebf746334L,0x5980424bd4445cdfL, + 0x7ccb2bb7482799ccL,0x9c9aa96e3129e0d9L,0x00000000000001b9L } }, + /* 10 << 343 */ + { { 0x5e6daeb491bf9e59L,0x9dc3d78928978a08L,0x8ba213793cf5ab93L, + 0x13e05a849f7f32d8L,0xca46a51d24d864cdL,0x49da1d0958f27ab1L, + 0x3d44ac191dd5bb6bL,0x14eb042f960ba63fL,0x0000000000000031L }, + { 0x8c9a73c4857451b4L,0x61a5f60d29e1992eL,0x6b6cd67b1aff692dL, + 0x3b702f11b835afc3L,0x4270a9babfc9d3c0L,0x68c63f32ca0c7cc0L, + 0x841cf2826ea38e95L,0x85c69baf0d60d28aL,0x00000000000001eeL } }, + /* 11 << 343 */ + { { 0x45e6c6b23f5f1b24L,0x15c28586b7cce3beL,0xe985cedf965cfc87L, + 0x26747eb284cf2b49L,0x681700f7c15ce505L,0xe2d3e6344273ba87L, + 0x09e2e994691f61b5L,0x13dccaf844b47927L,0x000000000000014cL }, + { 0x8c7d0330798eedbeL,0xe0e7fa0ec3f4951cL,0x99e5422f22ffa060L, + 0xa977fefc3e202a9bL,0x40d939b29fda521eL,0x49c677526e65acd9L, + 0xc17c3b685171db47L,0xda5735369cc9843bL,0x00000000000000faL } }, + /* 12 << 343 */ + { { 0x085d3a3cd827cc1aL,0x66ade8b0b0a2b341L,0x4b810c4cb68dfc88L, + 0x77fe410ee5230a76L,0x8426dd8e389a3a1dL,0x2dda407d7d94bcf3L, + 0x5c661465415e7675L,0x442edb73273f2602L,0x000000000000016cL }, + { 0xe079d5bcb074acfaL,0xa72d9c62d08169caL,0xe1d49019f8828607L, + 0x6ec373632e99dad2L,0x00f11e7bc8707b4cL,0x8b3275a0face4dc8L, + 0xfd045d4660ad7b4aL,0xb59f6b352d870ba7L,0x000000000000005fL } }, + /* 13 << 343 */ + { { 0x8d68557b51272392L,0x5309f4ee45658c02L,0x3095b408efe64d18L, + 0xaa43cad23d85c797L,0x859fdd6a4466d136L,0x19522c68bdd1220fL, + 0x2def78829b8cf52bL,0x3374cc6bda0d3564L,0x00000000000001caL }, + { 0x70ba7af1481ff4fcL,0xbfec18ace52060eeL,0x193045817332c570L, + 0x9bca275ae05313dcL,0x792532b3e4688a8fL,0x73a7088b2cc15270L, + 0x0d5f57a3cdd433a5L,0x53928b9ce4932780L,0x0000000000000119L } }, + /* 14 << 343 */ + { { 0x360e5a48a3d933ecL,0x0ab120bdf7bbea20L,0x30b58f9adb780cdcL, + 0x30736d9595285853L,0xd52795fb1b3839deL,0x61abb62ef74f223aL, + 0x7134f16b31f8ec5cL,0x716089459beced0dL,0x00000000000000a1L }, + { 0xe346fc2d8d5e323dL,0x56aa6ef5ae6c4aa1L,0xdea1e40d130796eeL, + 0x7685efdcd71960e8L,0x07e19bb51040e1bbL,0xb89e54e64709427dL, + 0xb09e9978ab677ccfL,0xd155e8cae4e79e41L,0x000000000000017fL } }, + /* 15 << 343 */ + { { 0x37911cec263d9d14L,0xf47b2325e6c4d1beL,0xda885016f0147d78L, + 0x0885ca03887ecb3bL,0xed86eb282d186bfeL,0xbdc86af2532d79c5L, + 0xb383a5e811c79f33L,0xe07db5fe06434eeeL,0x000000000000005fL }, + { 0x94ebda7ed0e9c38eL,0x890cbadf57eec5ccL,0x0307af50c3e7bb87L, + 0x7a736b3ad82a5ccaL,0xa11df6677ff0813aL,0xb6108387e88d8258L, + 0xbd705b30d4d86b60L,0x8236cf82dac7fb06L,0x0000000000000038L } }, + /* 16 << 343 */ + { { 0xc08891ef1f60c05eL,0xf9407f855915d764L,0xde8e7a4f0c96f04eL, + 0x6f2753d62cc9d658L,0x59c566b1ddd346f9L,0xf47b8329e3197938L, + 0x279a225bec0f1790L,0xd2001390302c441dL,0x000000000000001cL }, + { 0x71f6e57c20ea5e7fL,0x12786ba3c8eccc18L,0x31bfeee7563adb36L, + 0x688bbe01c99e4db1L,0xc1a39c8458226d89L,0x4a23db6dd0436b3dL, + 0x9009c835bcc4a5d0L,0xb5765b5b4bc5e903L,0x0000000000000147L } }, + /* 17 << 343 */ + { { 0xe6b4a598aa1bedf5L,0x961741ea269742a1L,0xa70c0ead730beb27L, + 0x6630a4735b0951b6L,0x7705bf6ebe99d329L,0x72c5d355f107c12aL, + 0x3339727727eb0146L,0x88057c43805888faL,0x00000000000001efL }, + { 0x0e50400725ef77eeL,0xe4055d7b45c6b5d0L,0x50fe7dc291aab5e1L, + 0x0a4a41f4b30dde14L,0xf79ef05566d0acecL,0x47ff1b08d751cbebL, + 0xb3b376414b70d1cfL,0x839901ca495f96c5L,0x00000000000001e2L } }, + /* 18 << 343 */ + { { 0x70f293f78d7a1a55L,0x6d47ae698dbd1bcbL,0xd972ce7e2a482890L, + 0x565b5a69ae414647L,0x1ff7f1f4fac08052L,0x6ac7e4f7a6b8ea59L, + 0xb00046d54c758546L,0x57fda89278c94b54L,0x00000000000001efL }, + { 0xc89c53d4ab9b1fd8L,0x120412c01939d37fL,0xacf8ab74178a657dL, + 0xf02ff62e6277e2f9L,0x6d388adadef2648fL,0x0a21d194d18d7ec9L, + 0x28a91fb9620af185L,0x9a4be9c4cb7ebf4bL,0x0000000000000025L } }, + /* 19 << 343 */ + { { 0xbc45e6e4b2dae93bL,0x078b15245136d380L,0x61d2079bcde5d587L, + 0xdcb2f7bf4d18976aL,0x3134fa08f1163d89L,0x4548e1cc166d26d9L, + 0x372b77f520cf54c8L,0x21e059e7370c6405L,0x0000000000000090L }, + { 0x88f8eaa80398ad72L,0xef4dfabdc756e71fL,0x9793c8cd26ef090bL, + 0xa2183be477e6c7ebL,0x89687bae0c15e96eL,0x973c780afb7901f1L, + 0x98d0434c8c13b4e5L,0x5ac0079c7d545f02L,0x00000000000001b8L } }, + /* 20 << 343 */ + { { 0xb6cc18ebda4a348fL,0x75ec91dea1828b88L,0x33d09001749d90e6L, + 0x09157f665444d6a2L,0x3d99e23ec900287cL,0x5b7ea7f56e259af4L, + 0x23cf7c3d7f716c57L,0xf3eb640ffe71bb94L,0x00000000000001aeL }, + { 0x8244fc846e9cdcf1L,0x595beb65e002db4cL,0x5caff71941a87d50L, + 0x79b6aab4e3ccf8aaL,0x1c57658749d3eb51L,0xe81912249671a8dcL, + 0x64c41ec1651fc983L,0x9f8680d37983f157L,0x000000000000006bL } }, + /* 21 << 343 */ + { { 0x60ab42fbe05b2926L,0x1c587bf61d33e1dcL,0x96f5e70249ff79e9L, + 0xb962af43e24c2e89L,0x30042cb3be7c4f64L,0x79f652a81d63ce35L, + 0xd898a931ad6a7c44L,0xb2fb1570230d1accL,0x00000000000001f2L }, + { 0x2022d6a58be3811dL,0x00194f6b73c51c56L,0x3ef9434003d77a7bL, + 0x6d1989c6b8e27ccbL,0x03eae3f13a64841aL,0x41fadc74c63267efL, + 0x3e79f28722c3f704L,0x9d6bb8f454828d4eL,0x000000000000018aL } }, + /* 22 << 343 */ + { { 0x185cb8e721e2e26eL,0x1866dbbff82c3492L,0x5a5e2a8913fe27abL, + 0x2a97a225ba74bc51L,0x9699342b754d7055L,0x11cbb11a2395566fL, + 0x7a1a2e8d98ded0a3L,0x17986e2379b54681L,0x0000000000000003L }, + { 0x646f4099da8c44d5L,0xb738986d451eeaeaL,0x09dbb26b7a3b8625L, + 0x5b427f68db3935bdL,0xd946c20da7131ae5L,0xae04ca918a745c0dL, + 0xa1b0ef3a3f2341a6L,0x7b87fe01d6c790e1L,0x0000000000000098L } }, + /* 23 << 343 */ + { { 0x72170b94be3f7ed3L,0xbce35d7df87e5a5bL,0x475dc4f4a187c49dL, + 0x8f70214da1e51c4cL,0x4a4f2173b03f9991L,0x0f402769644bfa17L, + 0x1fcb1fd205185c69L,0xc73f4d6ad59730d6L,0x00000000000000f3L }, + { 0x718cba01d0af16c4L,0xf877dfc0321d02baL,0x92005bdd160a85bdL, + 0xa77c73b9fa3cc000L,0xd32784fa789d71a5L,0x1fed3e8451aa04d6L, + 0xe5eff488d6838f92L,0xc6dd5be9b197e512L,0x00000000000000b2L } }, + /* 24 << 343 */ + { { 0x78a6a00515ccb3f5L,0x77707acad710f874L,0xa9f60018df76fab6L, + 0x7a05131eb0ef7585L,0x73076403c0347bd4L,0x5677f9b970f16928L, + 0x71e01afcacdb7c70L,0x2801531df2261bc4L,0x00000000000001b3L }, + { 0x2e0884c087ab879eL,0xcef683ae5692f07fL,0xe9378ca2ecb5108fL, + 0xb259e58446581fb3L,0xbf13bd74d9ab63e2L,0xdbb1a8660488e2f7L, + 0x308d1a9c791c614aL,0xc064c4b74bafecceL,0x0000000000000068L } }, + /* 25 << 343 */ + { { 0x3baa82b9d0344395L,0x61b0ae22f03f32d3L,0xfdf609fdb9ab6330L, + 0x8d2d4961e63485d7L,0x118bcbae7ed8bf75L,0x48084311dab22cceL, + 0xd878e073ffa72bf0L,0x2d31d41024088b64L,0x00000000000000d8L }, + { 0x1e2ad88893e537ccL,0x723c084c89496f7dL,0x154523b8e2095efeL, + 0x60efb050a2a64c74L,0x1da398ac0f97d458L,0x3a4f1f7c382044e6L, + 0xced99f7be34a1f0fL,0x7d757166ecebb070L,0x00000000000001f3L } }, + /* 26 << 343 */ + { { 0x2368670204035fbcL,0x494b7bd0a09ffd39L,0x24d81683d3d1a346L, + 0xd8717302a15611edL,0x67d48150235fbe70L,0x7056885b92733e93L, + 0x7088dd8d149e63c7L,0x9d1fa18c9fa2b660L,0x0000000000000093L }, + { 0x05ccc95bab42e341L,0xf8ddae598d84b663L,0xecbabdff710e5d05L, + 0x6bf408efe7d83fa7L,0x01e9fba9cc612f03L,0xb11b149a4b68351fL, + 0x2eb12cb1c441fdc5L,0x1c5aa7073f1779f2L,0x0000000000000079L } }, + /* 27 << 343 */ + { { 0x71a41f98b7c8df35L,0xc0956536a098c8e6L,0xba543f95b726e746L, + 0xc930b14a1d6414bfL,0xbe77ca4d1ffa41b9L,0xe2c9b33a01b6503fL, + 0x8f618a589be1f9f7L,0xc5352fdb58a1e937L,0x000000000000015aL }, + { 0xab583155d4d38a4dL,0x7dae65b732f456ffL,0xe6dceba0864ea025L, + 0xe1b8eedb2beb7e85L,0x1afa273c24ef0333L,0x15c04d35f74c8c1cL, + 0x3df77f47fe398bc2L,0xe6759438fca98336L,0x000000000000010dL } }, + /* 28 << 343 */ + { { 0x37097c79f3582e38L,0x32fb2f72acb94fd0L,0xe9f248615bd8e99eL, + 0x021bd377d89f261eL,0xc27ccfba354d698dL,0xbd90006363d0d0d1L, + 0x1a2ff3650ffe8c76L,0x2e06e2ac0a5a912eL,0x0000000000000056L }, + { 0x32d5f87321ac73f8L,0x6071df7f1ce57659L,0x7b26cfe7f3d02f88L, + 0x49889f71ac4b7cceL,0x6a8e756e4aa74667L,0x68698ee6cb72bec2L, + 0xf10fc0e132a6e923L,0x25d53885dde3045cL,0x00000000000000d2L } }, + /* 29 << 343 */ + { { 0x2578f6e4c55d2794L,0x33d4f6c052c21979L,0xcb692ea7f0ac80ceL, + 0x4a075eb4d0d2bb1eL,0x2bdca085d766fb6aL,0x28800b66f9cef794L, + 0x79a504a18e1af8efL,0x87b7c42918f6d72aL,0x00000000000000e5L }, + { 0xa0315547c4fd70bfL,0xed90418d1bf84addL,0x9444ed07561cc0a9L, + 0xe218634f926e1d91L,0x1c2867c9c45cccb5L,0x8867a11b0088364aL, + 0xb33066483293a8c2L,0x5379837264319b9dL,0x0000000000000059L } }, + /* 30 << 343 */ + { { 0x6261f11a490fcb56L,0x80e1e5b92166a7fcL,0x2156e7b7410ca8f1L, + 0x6ba2949dcf083d43L,0x6f8c280e85f7a658L,0x86921a03c5362a6fL, + 0x212c2960a82059ecL,0xcd6e8bf65535b1fbL,0x000000000000003eL }, + { 0xde0debf94fa231d9L,0x3891ee50bb251008L,0x4f2e882b1d3cf2f7L, + 0x55ae8ab4a441ea5eL,0x1a2fd89697f3d7d0L,0xaddcebb8240f6844L, + 0x00c9e36a7c1ae8a1L,0x017a9763b8c7c299L,0x00000000000000bbL } }, + /* 31 << 343 */ + { { 0x7f8843aa27c1fd2cL,0xc7c12738b7767dd1L,0xed262ca40459641bL, + 0xcfd418f94ae40df4L,0xfea0e51e4b4adc47L,0xf71a6afbc1076188L, + 0x51c967efae2fb1ffL,0x10af8b7593d3d135L,0x0000000000000005L }, + { 0x60b155619802e32bL,0x1b766903875c51c2L,0x8e8f66b6afa3d68cL, + 0xb174d2ebe8cf549dL,0x514733991f65fa9aL,0x5bc23426688a0642L, + 0x7dafa87c49871359L,0xa01b3e316ee4c06cL,0x0000000000000069L } }, + /* 32 << 343 */ + { { 0x1e2df23635210df1L,0x387b5c569aace566L,0xdd8152c9ee401323L, + 0x56fe9ec99cac0076L,0x88ed7fc1e1a9f782L,0x6551487d681f0428L, + 0xeaacc10197689006L,0x626bcad13b7e7fe6L,0x000000000000015eL }, + { 0xe1b3574a7acb23caL,0xadda7c9e3636a1c2L,0xfc998cf89f3d947cL, + 0x38ee4df41ba0511bL,0x1f40cc1403e4694bL,0xccecf4e0800fb6d9L, + 0x021f708ae1665d06L,0x2bcd7975c492d329L,0x0000000000000063L } }, + /* 33 << 343 */ + { { 0x96b4d04fab8001feL,0xe57a3a7bfe85f93aL,0x8ddba50b81d1d925L, + 0xa721d664f4b81297L,0x97ed33af14009a3eL,0x566f67398d669f51L, + 0xe390958133398239L,0xd7a868f9b1e9107fL,0x0000000000000027L }, + { 0xdc583df06773c4c8L,0xd92a6a1e5e741619L,0x9fc62275eb819e25L, + 0xf9854e0cf083005fL,0xecbd75ce7b51e108L,0x4cc07d2722a8eea2L, + 0x06b4a95fa203629dL,0x65458a822e92393fL,0x000000000000009dL } }, + /* 34 << 343 */ + { { 0x0c33804b49726318L,0x02d8b4cea9a6b656L,0x308eaf52e8cf960aL, + 0xb36b768adf3a112eL,0x8131fab42b004e40L,0x0559670c887c5137L, + 0xff0aaaa311f55b20L,0x08f4a3c37882a219L,0x00000000000000f6L }, + { 0x1e4c54f236e35580L,0x75b801076317c307L,0x08da116384ed361aL, + 0xda9c804e9fd2252aL,0x12f480031c027a95L,0x4c2edab64ba5b4b6L, + 0xc7e6ba6f307f959fL,0xb541004fdfb90d0dL,0x0000000000000099L } }, + /* 35 << 343 */ + { { 0x6e84f0d99170c45dL,0x6c18bb4d38aaf72fL,0x07982c04541fb2a3L, + 0x87b518be80d9c949L,0xdee66e2966394187L,0x31511309f8dbf99aL, + 0xa8778cbaf823156bL,0xf9faea959b606d0fL,0x0000000000000093L }, + { 0xfd31893563961535L,0x7986e24f3f52f702L,0x5b1159b45ddfae4dL, + 0xcd9fbfec2eab8be6L,0x7b02b3fc8041e138L,0xd6a03b7891c5a91fL, + 0x27bbfa95e2cb2f1cL,0xa9ce01ce466ef3e3L,0x0000000000000133L } }, + /* 36 << 343 */ + { { 0xee5cc4b56463339cL,0x5db849afd60e46a8L,0x376ec6edbb0cd719L, + 0xb25bc974b73d2548L,0x4d3b9651ce5787cdL,0xc9f56856fb8a6b9cL, + 0x40712dcf9997fafcL,0x1e9a979d88c38605L,0x00000000000000ebL }, + { 0x65aa240f387c6c7bL,0x8dba59ea2e4d5a46L,0x4dae7703cbc76db7L, + 0x616a9765ce122f7aL,0xa7ba80479be5f249L,0x0e3cc2bc559ff606L, + 0x533dc60febcf865dL,0x07f49e3224af8ab6L,0x0000000000000104L } }, + /* 37 << 343 */ + { { 0xdd77ebff685989acL,0xb6f95799ab8308a4L,0x4e3a55af398e759dL, + 0xa450ca615693678cL,0x54fc55a0b225faaaL,0x16584ec91dc73d17L, + 0x1a5d318febfd0546L,0x76b7d32f96c3c17aL,0x000000000000003eL }, + { 0x51be80cc328cc1fbL,0xa7a78f194965e6aeL,0x5381b42874544db9L, + 0xaff29152aacad99cL,0xc48bbcc61156add8L,0x363047efe566895dL, + 0x33d0a4a5bd032371L,0x880ec2cbff5f5959L,0x00000000000000a8L } }, + /* 38 << 343 */ + { { 0x9690d5e55f5e7c31L,0xef7369b18adef74eL,0xf74ee9ef2a7db0c9L, + 0xd59cb64addf21c43L,0x6eb9fb438da8e4ccL,0xd1962bb086d3a35bL, + 0x7f8df70c107dd755L,0x4226aac2d64da7dfL,0x000000000000008fL }, + { 0xe8ba50a0fff053a4L,0xcf3d93f3aa55315eL,0xb3cdbb97d1f458b0L, + 0x4b08420dd5d406bfL,0x7d869582f55c2820L,0x6fcb33b1698de39dL, + 0x6b7745f5daaa8a63L,0x6ecec49a60db3a06L,0x0000000000000189L } }, + /* 39 << 343 */ + { { 0xe7ec7a73a6fb4bf7L,0x9c4cb462c58d8eecL,0x1774812b9e39b420L, + 0x3eb336168ef96477L,0xfeb07e85981c4ce4L,0xc5cbf961bc2806edL, + 0x558213f9c99082deL,0x6da1895930463857L,0x000000000000006aL }, + { 0x55fc5626e084de84L,0x1c14a1b6b1388433L,0xbcd81c8c13bcb966L, + 0xf3c19eac80ec3df1L,0x3d8209db319be8c9L,0x009dd32c97cb53ceL, + 0x11b045b569348a7cL,0xf062b5bf08a42755L,0x000000000000013eL } }, + /* 40 << 343 */ + { { 0x1c48fc22b0b3927dL,0x0bde722f69b0963fL,0xe408cacafa23304bL, + 0x0286346c33892f47L,0x4edfa1d344dca6d2L,0x4ed4dc92dc7e794aL, + 0x875f0a691bc2fa3eL,0xb6719990c61f95f1L,0x000000000000010aL }, + { 0xd69e90f6b9d1af63L,0x78a0578fb8525cdbL,0x423058d607e5dd88L, + 0x4566d2724e64b363L,0xfa878a8931cb06ecL,0x88ff5b4a1bea9ffcL, + 0x343e558867212ac2L,0x0e4cb5b440dd6e75L,0x0000000000000184L } }, + /* 41 << 343 */ + { { 0xe0d0a82ddc9454b3L,0xf487a58cd0bd7052L,0x872289d507f23cb3L, + 0x739561b660a4d759L,0xa785b509596c0ef8L,0x0013852d0842b0e4L, + 0xf52d1e53d66380ecL,0xbff385dd62bb1e96L,0x0000000000000113L }, + { 0xbdcb323fb841727cL,0x6943c4849a825146L,0x31893c76e74e00c5L, + 0x62c0e3d55e3801ccL,0xa692149105663bc5L,0xc521169e924ea71eL, + 0x3082a14de324bd5fL,0xa571779c8641c086L,0x000000000000004eL } }, + /* 42 << 343 */ + { { 0xfb39fb3373facaaaL,0x1305273c23052258L,0xa9205e1175193205L, + 0xef9fecbb8cc8098fL,0xdd0f157be588ca9eL,0x3364b687f8f2e14dL, + 0x3e0a4cb00594f821L,0x2697b264600abec1L,0x00000000000000e5L }, + { 0x5f1048d60c425ae0L,0x596a72abd8196cc0L,0x76dbf0a6ff36707eL, + 0xc7ab267255665a0eL,0x27ae6b082aacd81eL,0x4a2646471184702cL, + 0xb6420d7981ffcc1fL,0x7ac1e298536904d8L,0x0000000000000188L } }, + /* 43 << 343 */ + { { 0xab8b3503416f5d87L,0x4fc288e16651df50L,0x3f0adfd9f62ec45dL, + 0x92cd8547172e5630L,0x434fbc19c3aca907L,0x38eb1388ccb55646L, + 0x19729f62dafb3a47L,0x8ced2ee49cfc34cbL,0x00000000000000edL }, + { 0xb08ba1c3cdbc6116L,0xd075c733b7ab4b91L,0x547d457ff97f1759L, + 0x919a38e18c601000L,0x2fae128fc9a3a7ceL,0x050486e31643b442L, + 0x9034efad2afd1aa0L,0x92702ff21af79b99L,0x000000000000007dL } }, + /* 44 << 343 */ + { { 0x68d0506218f40095L,0xe085992447df111bL,0xab2400df4b409e59L, + 0x5f8d22db555656f8L,0x606ced3146df6523L,0x4c92283116251601L, + 0x8b383573007bb6a2L,0x99bb3be83dd4c4eaL,0x00000000000001e6L }, + { 0x65838cf4d21d9bc7L,0xe07a59110168059dL,0x193d6b80c27c988aL, + 0x08d7198bb6411f17L,0xb229275330ee2a99L,0x4dba6885690e2ed7L, + 0x658b187cf10e2b34L,0x8002fb6b9ac63db3L,0x00000000000000aaL } }, + /* 45 << 343 */ + { { 0xf2aa7ff4fdb6b1f7L,0xfb45d9ab038645afL,0xfbe46a595d991f1fL, + 0x777ebeadd152a3f9L,0x830688a04a7e59fdL,0x205e03ef3a6d1d41L, + 0xe61a446a1ff8c82eL,0xc2557922ee29bd8dL,0x00000000000001e1L }, + { 0x9181192fabfa9a84L,0xbcb13e6a99b82faaL,0xa509d7609f24f727L, + 0x30f91875d7fcdb25L,0xf5e87646a279f077L,0xa1abe3b1afcd136fL, + 0x5096c338f5d393a0L,0xe18e7d97c77f082cL,0x0000000000000023L } }, + /* 46 << 343 */ + { { 0x99a8fbb5a161f30dL,0x4b936314a6dca904L,0x582fbb6285f072baL, + 0x638b964291f813ebL,0x8fbc4a5f33b67634L,0x75c4f8854786ce86L, + 0x12fe375cb2be82fcL,0x57b6c79e1af62afaL,0x00000000000000ffL }, + { 0xc584de033815bf50L,0x59284a85a5d2a462L,0x99205733a2db6069L, + 0xac414cedc789585fL,0x74c918a17924fc12L,0x4179d5c0e01a417bL, + 0xf3f6d46b2de7f1dfL,0x04d576ff993a462cL,0x0000000000000058L } }, + /* 47 << 343 */ + { { 0xe9599d6ac0d5f4e1L,0x175f50fe3ff2ad8dL,0x8387d0c9cbe62ffeL, + 0x19bd7a93f759c79cL,0xd6dc8c1718264806L,0xffcd7dc91f93bdb9L, + 0x272bbbde842b0f37L,0xa25c2429cea01bf8L,0x000000000000001dL }, + { 0x6a294073e4187767L,0x209d9bc423ee345bL,0x720ae363dea56eddL, + 0xe3bf827a23447df3L,0xec4fc4d9dbcb4442L,0xa523dea0df06b38fL, + 0xb7a20ff275ff70edL,0x758975429c985220L,0x000000000000011eL } }, + /* 48 << 343 */ + { { 0xd6772399efd3e26cL,0x08fad2b4016cc649L,0x04b688e18c93ae74L, + 0xee4cd37f884e93feL,0x0adfc5f43f4161e4L,0x38b754b36a15d63bL, + 0x0a7c520fb63e3bbbL,0xc6d4a82a6bfb98c7L,0x000000000000006aL }, + { 0x3fa2075432fe9124L,0xbb2d3081b6ba6e4dL,0x21294a30716ec099L, + 0x930eebfcd2636892L,0x0f2d1febac11c6dcL,0x4a22383d797f5a02L, + 0x40c0453b4e874895L,0x1d1ea32146bf9197L,0x000000000000012bL } }, + /* 49 << 343 */ + { { 0x98adef8589638e94L,0xef3d1a9e3d38b340L,0x6ce88584062c0c59L, + 0x7a306b12e4d7ed2eL,0x3fd19f650e0ad96fL,0xfd800846bdd0f7f9L, + 0x41aa2176c8958c40L,0x662b0a5f6b80a9f2L,0x00000000000000fcL }, + { 0x3c1da866bc0ff89eL,0x8f659183ae7b7052L,0xf499f6a9c3ab5947L, + 0xbca8b00cb8e883ccL,0x71b521e4ffbd2095L,0x6d9347e395bc461eL, + 0x974c20ade248b5b4L,0x8b0e856009a7b234L,0x00000000000001e6L } }, + /* 50 << 343 */ + { { 0x0b011c32ee2b97d5L,0x79200cef3ad0b2ecL,0xea1decec50b54070L, + 0xec2fa4a8daf1d7c9L,0x9c2061eaa9203504L,0x01ca3089e8e76ddbL, + 0xf009b4ecb2ab1903L,0xd42e9d8d17418ffcL,0x000000000000003cL }, + { 0x05c5851078dcb449L,0x366e588014134c89L,0xd3c226ca0f9d0ca8L, + 0x90368b2231202cdaL,0x4900c6c679c08f36L,0x282ba56a42a6f1b1L, + 0x3146c4170b497bceL,0x4b6a7d5d8b761fa6L,0x0000000000000182L } }, + /* 51 << 343 */ + { { 0x9687fad20f33e22dL,0xee13cbf0f861fe0eL,0x7bfd7b51e420708fL, + 0x7e3b03c29408f6c3L,0x98e32d6cd2478ff4L,0x46c6a5ee34a542b3L, + 0xfb063d20a9d664c8L,0x0d46f2fa462cd3bbL,0x000000000000005fL }, + { 0xd864149319bdb879L,0x4c1eb5a09c32b27cL,0x7245a9ac8bb683f3L, + 0x6c0416d6147ac3bdL,0xf0b832fcf13b6273L,0xe4cf4607f2140e06L, + 0x3627fed4b74a3da2L,0x0b285a0d5eeec17fL,0x0000000000000118L } }, + /* 52 << 343 */ + { { 0x1fdcd60d607b148eL,0x52ba3a64ef0af894L,0x93d7e10f8e017e3fL, + 0x54586254a1c56691L,0x7ba1d3344dc39c87L,0xdb980aa5947633fbL, + 0xe005885333908ff9L,0x752e4a4cfc9c017fL,0x0000000000000111L }, + { 0xf00b5caadca578c9L,0x33e7b721e5e36c85L,0xe12eabb0d35b31b4L, + 0x7e7f6feb77020642L,0x593a80b27efc68aeL,0xd15b1c05b098be57L, + 0xf2f8a3d4a680dd49L,0x41bcfdbc8743a900L,0x0000000000000152L } }, + /* 53 << 343 */ + { { 0xe941ed524f65e73bL,0xd3e1d57affc2f0daL,0xc6d535243702b0f7L, + 0x81cbf7778599b1e4L,0xec7ea8a9c500dfabL,0xba77eb049d0bfbe4L, + 0xa3d1cb95e1324dd6L,0xc8ef4cceede51d63L,0x00000000000001a3L }, + { 0x6c2624e5052f9069L,0x235b7957a284f707L,0x2cef19e959304063L, + 0x0e2ccfb01ea15b4eL,0x9a4c9bf0010f6519L,0xc1edf0173339bed5L, + 0x5677cbbf8bddb55fL,0xe487e6d90b86d922L,0x0000000000000097L } }, + /* 54 << 343 */ + { { 0x942ee9fa3f9dfb04L,0x98f99ee04bb518c8L,0x7e82e828b4c177deL, + 0xcdc34f88cb890189L,0x7662d2b1295eb6c7L,0x40f6e30126ff62adL, + 0xb09ada6615539133L,0x0240f1ab994c1ee3L,0x0000000000000119L }, + { 0xb788badd5b186991L,0x2f0f51d1feef6adeL,0xa6fcad6697ebc752L, + 0x7a41969ecb9d2718L,0xe8b76733a3690ba0L,0xfbb3978055a3075fL, + 0x905400f11e407ebaL,0x1cf6c26d29b33ca9L,0x0000000000000026L } }, + /* 55 << 343 */ + { { 0x82e0c91375602369L,0x37772fd0cdf88991L,0xed11013ce687811bL, + 0xadc2a6ef0e32e842L,0x9b95a8c33183cb5dL,0xb7ea194ef178fd02L, + 0xdf35cb1b0e373bbeL,0x4c70f3b2d5b46c11L,0x0000000000000154L }, + { 0x62570b6cc73964d1L,0x0a0b268480c4e024L,0x400b160c1205a6a2L, + 0x826be2f82f65aa46L,0xd10b64a20a054f68L,0x12bffa7464766cf5L, + 0xfe9420788d929d96L,0x78a8682b8fa29a3fL,0x00000000000001a2L } }, + /* 56 << 343 */ + { { 0x567357340ee20005L,0x1867e14953306288L,0x30a011a2a8c834cbL, + 0x85dd3b76e4be86d6L,0xe080a590059d6838L,0x27ba4bbc384fc1a9L, + 0x08bd92bc1fb5f754L,0xbfe47313795078b3L,0x000000000000016dL }, + { 0xd266058c646b3dafL,0x32edfdb12fbe6000L,0x641eba0fe2f9c075L, + 0x02c67e0c69ad2966L,0xe2f469e903ae6d25L,0xe7e4198ad0927943L, + 0x63e387902caf56f2L,0x563c3fe290ba1dc6L,0x00000000000000daL } }, + /* 57 << 343 */ + { { 0x986795c4e5fa0ed3L,0x6cacef9a47ffc3c3L,0x8406f9175b1dcda8L, + 0xfeb8ce4570d199f9L,0xbbbc1f37d1b1aa68L,0xbdab7e0ecf1643a5L, + 0x4fe791e953c4a8abL,0xebae700e4d20cc4aL,0x0000000000000177L }, + { 0x702b8bdb97b76b31L,0x73da6623d6e6fde5L,0xb36946a708ea1f3dL, + 0xfb25cd3ff11e7ae3L,0x996cf485c8475ecdL,0xf3ac42d20ef1dcf7L, + 0xf218353842a60719L,0x3cc178281d61944aL,0x0000000000000191L } }, + /* 58 << 343 */ + { { 0xfcd47f6b67508a16L,0xe31427401bdf577cL,0x4895185967f0cff3L, + 0x7d6fcef807638e5eL,0x39b9d003e9235a94L,0x0f58af795a824938L, + 0x6cf21f76518b7880L,0xc485929558044273L,0x0000000000000186L }, + { 0xe0f75e1aca93ef08L,0xd5644e7b4340dbbdL,0xcf01ad2491a927d6L, + 0x43829aa53be7c5e0L,0xa0c72646c54fdd3dL,0xabedecc81d939f5aL, + 0x8bd70b024c607671L,0xa0dc64200adc7aa7L,0x00000000000001e1L } }, + /* 59 << 343 */ + { { 0x26a9889300d4249fL,0x65abdecb009278b3L,0xc118a3f337065e41L, + 0xea008486e7ce142eL,0xb98918b36b326823L,0x0f1031f33887e45aL, + 0x7530610e1629d0aaL,0x6193410a3bdd47bfL,0x00000000000000c8L }, + { 0xb1da190f84192eadL,0x61f8d02c3303a700L,0x6faa862b1b3bcdb6L, + 0xf55bc5e116da96baL,0xd95bdcfbc7109dd6L,0x1a75f2179573e7a4L, + 0x04ccd08b873cc228L,0x5b2039ee25a9f6f7L,0x00000000000001c6L } }, + /* 60 << 343 */ + { { 0x5fde9be18e99f004L,0xda1fca7fc4fcc705L,0x8776b448e77c338dL, + 0x12ae11e1e0384911L,0x28172fff338ac117L,0x7d32e433fc4479b5L, + 0x55f257f9c5c8cc74L,0x70190a52ed205691L,0x000000000000011eL }, + { 0xb255a18ad36a0c3eL,0x435abf4b3c166cdcL,0x51f796a0d75211c3L, + 0xfa43f06b521d35baL,0x47fae95d0575debbL,0xf9f7ac07626266b8L, + 0xefd12b4837e8edc4L,0x03ccbf8b6af450f5L,0x0000000000000001L } }, + /* 61 << 343 */ + { { 0x4c3a5b247b4fd7f9L,0xbe4f6aaa73cfa932L,0xf8720019c183b294L, + 0xa88e61113efbffd0L,0x02a45a2f505fa176L,0x5614def1ca32b335L, + 0x27d80f82eb09898cL,0x341f3eb60072f7dcL,0x00000000000000bdL }, + { 0x3b1074917ef9c7f4L,0x1db05b0daaca0f0fL,0xb4e8236c35fabb88L, + 0x9ecdb35c2d887bdbL,0xe9e477a4685ff351L,0xd66108c5c8693cb6L, + 0x3f45b6e0a0408fd2L,0x712b6b89197ecc54L,0x0000000000000075L } }, + /* 62 << 343 */ + { { 0xad5096b436373ad1L,0xc5c5d96a67e09911L,0x71d976bcd1c1bd0eL, + 0x734eef2e26276014L,0x39826b9398abb08dL,0xf97c29bb60504d90L, + 0x03e6b7bcc8dfe26fL,0x62fbcd06edab53bdL,0x000000000000001aL }, + { 0xb8e19bcaba51fc6eL,0x51980bedfb188d4bL,0x947062215f9340eaL, + 0x55fd26f2ea1bf912L,0x9b4bfd5b52acf80aL,0xe0cfdad11517a40eL, + 0x0231b221bc0813aaL,0xd93ede1067c8de5bL,0x00000000000000f2L } }, + /* 63 << 343 */ + { { 0x14c36eed98352662L,0xb2fc535820cc7dc2L,0x4d9d0399afbf0b1cL, + 0xc45410e2bf803069L,0xadf0b1e408a75f7eL,0x32483ad625d55976L, + 0xe03ac79a40134cdeL,0x8a5e15bfeca20104L,0x0000000000000094L }, + { 0x54bb69bfb2ad7a77L,0x1c7e147a4c7a969eL,0x3abd4c100ca1cfc4L, + 0x25cebf8026549e9dL,0xbc93e23dcf7a3014L,0xd35c8df023b55805L, + 0x6febda25410f6adaL,0x724a6fb680ffb7e4L,0x00000000000001e9L } }, + /* 64 << 343 */ + { { 0x2fb204ffc6ac25bfL,0x04264d4a20b8ff3fL,0x95b64338a430a774L, + 0x573a4b367da8bba5L,0x950affbb5b83d767L,0x9254695bb560c0f6L, + 0xfcd8a4fbff22df77L,0x8625ed302d6e43dcL,0x000000000000004aL }, + { 0xed8010f61cac9547L,0x0612e17392c7ba4fL,0xaf48aeed051d9478L, + 0xf7067a1069a78479L,0xb17c4be898a1aa82L,0x2b21d42a7a94d7e1L, + 0x461b848fd38e3b49L,0x76af1bd6563a87e7L,0x00000000000000e8L } }, + /* 0 << 350 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 350 */ + { { 0x5a4ab9c18d71ef32L,0x08205f9bf4f4cf75L,0xfe64565db34a8c55L, + 0xd12be047169994bfL,0xd3c3f84f43f91e62L,0x1dae2e732601b686L, + 0x2a07fcbe2dee30e1L,0x25e562459a754926L,0x0000000000000174L }, + { 0x288455f32ce4d340L,0xa1f4e6b0e321170dL,0x9a36bac390039106L, + 0x1682b04d98c7ed01L,0x27166c68e9ebef5eL,0x41efe4f001ca803dL, + 0xf76898ac460a4c5eL,0x6a787fc31acbcda4L,0x0000000000000103L } }, + /* 2 << 350 */ + { { 0xdcaed4e0eb86608eL,0xfdfd21688d7ba967L,0xd4793142866e3436L, + 0x259b748bf9e22704L,0x86914896ac039548L,0x2236c40dd7c7eec2L, + 0xd76953aea1c8873eL,0xf3e9f9539cd08a3eL,0x000000000000013cL }, + { 0x2d653796535c50c7L,0xcb8b6a753478f1c4L,0xa2abfb2acf0ad848L, + 0x9cd60fbf7fc08e6dL,0x3706c38749f48743L,0x3ea91a1d3e0781e3L, + 0x8c19973bd007f364L,0x566d7cb522e5f10aL,0x0000000000000103L } }, + /* 3 << 350 */ + { { 0xff04036328faa051L,0xefc95341172c6ce0L,0xe4028c4a7d53a624L, + 0x481fb2fce3aa1e70L,0x4671faf3d2d92ac4L,0x0d581a28cad49733L, + 0xcfab195da5e23867L,0xf6784d62c8bd1eadL,0x0000000000000170L }, + { 0x2ba73d671694ffaeL,0xbaf16d6d47d613b7L,0x5748fb2fe24a2182L, + 0x337d36ad8b9d5587L,0x07b92ce38cfcf82fL,0x99b04584fca2cb04L, + 0xe8fe5417a5db355eL,0xa29a83971e795369L,0x00000000000001f6L } }, + /* 4 << 350 */ + { { 0x674e392f0261ada4L,0x5af762f55bb103a8L,0x281ad6a5c9e8bf69L, + 0xa88ad2e45c57af63L,0x7fbce1f23b909ce5L,0x37883da84a8c86daL, + 0x94f7c5708fc8357aL,0xa1f2fb59c7c21ea8L,0x00000000000001acL }, + { 0x30c79e7b1cba57c5L,0xf71d84be6062fa39L,0x8fb70b03e2b9c131L, + 0xbb0f27e7cd862b12L,0x2fea10874ffd6a0aL,0x177871582a7a7b08L, + 0xf79606cc8653ce49L,0x9514c960629a5ff9L,0x00000000000001a0L } }, + /* 5 << 350 */ + { { 0x685cb5e1ee76561fL,0xc7eed1f695507f6bL,0xeeeab42cd2aff6dfL, + 0x32417d37c5c5362eL,0xf1dca5a7362b5f64L,0xa632d6b9fce1fb3aL, + 0xdf5ef2ab8d95fa9dL,0xebae375560e41269L,0x00000000000001ecL }, + { 0x18090f8b62102f3eL,0x1f92da5a150229deL,0x3c7262f9f45b03a9L, + 0xf12f42c84967dec8L,0xc68b707051a319cdL,0x5fb0202c58e88be6L, + 0xea58670dbacf6553L,0x4a218180bb263c69L,0x0000000000000038L } }, + /* 6 << 350 */ + { { 0x2df8ac505afa5f7eL,0x6df783774959ec90L,0xb26bdddb192bec1aL, + 0x1c8d1feb809b6d87L,0xbf8e71e3c90fd61fL,0xb779d00f9d3abfd2L, + 0xc9b25e7d40d5d7a4L,0xcf5dc7c2ca6c2dadL,0x0000000000000100L }, + { 0x360d9b18c50b6ba3L,0xdf5958ff1b4ca3ffL,0x1073b7f6e05fce33L, + 0x819739fa5936c3e7L,0x5d7bf0ce95b80771L,0x27043464d723e7a8L, + 0xf665f01f769d0313L,0xe8ba3b82d21e5578L,0x00000000000001afL } }, + /* 7 << 350 */ + { { 0xcc1a5d125c84085fL,0x689ccbbf11e07a83L,0x818e90645bd5ce0aL, + 0x7ff51fe77dd3b557L,0x244b2fc597c092fbL,0x56b49f61ce7dc3e5L, + 0x60465ba316e1cb43L,0xdad9e6d70bca87efL,0x0000000000000159L }, + { 0x410a5b331f2824f5L,0x7940293fff4bbccaL,0x2c37932925ce7150L, + 0x4b0fcc706add5653L,0xbc7fa7355ad55175L,0xc751542bef886eb7L, + 0x4c8352570b74d18cL,0xae7eeb6061542f24L,0x000000000000004fL } }, + /* 8 << 350 */ + { { 0x1fb30b26534de721L,0x467328664123d0bcL,0xe4e83c0d3a143f8eL, + 0xe1f5f92a691180e2L,0x5fb00e76081f7230L,0x0e4e513d083c3776L, + 0x5b81ea91eb042c2dL,0x7a221cb200af7f4eL,0x00000000000000e4L }, + { 0x7bfd3f9131e75c9dL,0x8545adada2c68d12L,0x01c295b9f986ad9fL, + 0x186952a541eb1f2fL,0x6f7e3e3ee802ffc8L,0x704e75d0b98fc92cL, + 0xc233afee4800a8fbL,0xd0111a57549d5fa9L,0x000000000000010bL } }, + /* 9 << 350 */ + { { 0x703bdfc28fa5a117L,0x3324b4c7a72de61eL,0x66bffd4e189dd1aeL, + 0x0dd312e197be2e90L,0xc0f200e159e34175L,0x494e207fb97fc751L, + 0xd1a1b7a1bb27e3eeL,0xaffbced9b305b999L,0x00000000000001b0L }, + { 0x82ecc5d084711c62L,0x339f3eb9f8990ca3L,0xbf24f07a5e5cf050L, + 0x0e63379096b80f96L,0xb2713b232cd928d0L,0xdb2acbed1bec9ba8L, + 0x259a9461a584c845L,0xd024aa292d12b85aL,0x00000000000000b6L } }, + /* 10 << 350 */ + { { 0x162d7affa798f350L,0x904fd12e7ccf599fL,0x2e13439240e92c4fL, + 0x7439ad22afeb0edaL,0x2111df7dbb3a3984L,0x17da71600ba49716L, + 0x03c556c8ef500d81L,0x292974427287b40dL,0x0000000000000066L }, + { 0xc61ab9d32dc3165bL,0xa427265826fcadf3L,0xb8ccd6c0838ee2b8L, + 0x2b43ac9dcad6ed19L,0xc5e072a314ac704dL,0x148021b6b3289a83L, + 0x3da3fcb61d3e7f5eL,0xf6e0d0596fbc3f05L,0x000000000000018aL } }, + /* 11 << 350 */ + { { 0x07d7152724577876L,0x0ad731b07475d82aL,0x8181bc1ecc35789eL, + 0x47453c419ab56a3bL,0x87ae49281343be4bL,0xff1e1a462366665dL, + 0xd89941a533896339L,0x8603a0feee010861L,0x00000000000000e0L }, + { 0x9aa8f8f3ebc782ceL,0x0b8cdd59dfe2867cL,0xe6bd219d63a7e153L, + 0xb7748a8c62371590L,0x6f5ff5a8f67ba1d9L,0x9f057b843cc70904L, + 0xb3949d0a3aa770d6L,0x1d7417ebc36c1dfeL,0x0000000000000134L } }, + /* 12 << 350 */ + { { 0x238d0f45e5375e53L,0xb576c6ca60b1447cL,0xa3a372a662216556L, + 0xa36f6c4a8649c9d5L,0x00e80b35ab50c2cbL,0x11e5520e2556c5a2L, + 0xac58d6f1d3aa02dfL,0xd56bf8228b95fddfL,0x0000000000000094L }, + { 0xd172331a2a513cc5L,0x35eee6975ef6a6baL,0x4f02b17fb0acdc95L, + 0x07ec621e1eb68058L,0xf266346cdac73d4cL,0xbe978e8aecab7ddaL, + 0xda0244e8968c2eb6L,0x50b38345cee91ed9L,0x000000000000014bL } }, + /* 13 << 350 */ + { { 0xe86714c444bbba14L,0x4a0ba67fe0ebe2c7L,0x4d7b6342e19e2833L, + 0x29e0766b768b7bbfL,0x109e7003d5b9f660L,0xcfaf2ddb39cb47bfL, + 0x4be8337c9fd257d6L,0x5ee652afd3082203L,0x0000000000000185L }, + { 0x0eace717a4e898fbL,0x936b7743a3ed5280L,0x13bb777fe20eb2d5L, + 0x6e3fe7ac2e467eeeL,0x8b1bc5234bf9a409L,0x8e6eb8d336a9bc8aL, + 0xcb22f89c060f98a2L,0x18a47fe4fe67785fL,0x0000000000000171L } }, + /* 14 << 350 */ + { { 0xfb7641f78dd2f484L,0xd3968fdc8458af7fL,0x49aae2a03f2256aeL, + 0x3868855c184b5272L,0x02207efdb0964247L,0xc2e07ae88f026dacL, + 0x6043c98a10fccbd7L,0xa0dbf1ec385aaccfL,0x000000000000014aL }, + { 0x8e75ed542cffc319L,0x83c12af3727fb8e1L,0xf3fb0e7d5665201cL, + 0x282b216e2c59469aL,0x7a71d0b07a2722ccL,0x3eab574727700dc1L, + 0xbec4c097b809482dL,0x5363ca88baa425a3L,0x0000000000000182L } }, + /* 15 << 350 */ + { { 0x3e16bd1f7200a8ddL,0x028318f4b10ec669L,0x1fa937c6b9c35fa7L, + 0x0ca277d969c407d8L,0xe0230f44ed5a32efL,0xa04df533e9dc21fcL, + 0x5775e2b0847b808fL,0x7319f97b14c0cb44L,0x00000000000001c6L }, + { 0x4f313cb5e7259335L,0xc2de5cf8a2e9a717L,0xc6daf9a41a0c65a2L, + 0xe304c3aec39e1b4aL,0xa9a178e03cfad39cL,0x35dd14cdd2555011L, + 0xdaa2701adf9b2103L,0xbf3d425b40c31b92L,0x0000000000000087L } }, + /* 16 << 350 */ + { { 0x01188022f8816103L,0x64e1c614db3855d8L,0xcd4bd4c229a7c0d2L, + 0xd7ec42894d5ece84L,0x947a8bdc722fc703L,0xa4c09186bfebacffL, + 0xf7fda81edcf3b746L,0x3bf6dc1b435034fcL,0x00000000000000c8L }, + { 0x319fc666878b4418L,0xd600347ccfa14a68L,0x5abcf8fc912b3c2bL, + 0xc82c5e12c5cd581dL,0x022f767c50a27865L,0x5c606862ead1f5f7L, + 0x2702e8192259d674L,0xae371a48e02204abL,0x000000000000001bL } }, + /* 17 << 350 */ + { { 0x97e034b8a9080de0L,0xd85ffb9e7489f5dcL,0x5e82572b143fa372L, + 0x0011ac86471e02ecL,0xedb1f0e68bd15910L,0x9f88d9fb76ba5f6bL, + 0xa93eec11e73fd22cL,0xf4980f348e89e8d6L,0x0000000000000131L }, + { 0x6972155ccb678eeeL,0x8450fe29b3f64220L,0x92a56dd2a22f685fL, + 0x5938dc70bb05e6cbL,0x83b058f8c28d997dL,0x570df3b018f71a2aL, + 0xb7cea974dc59cccfL,0xdf9376c83099d0fcL,0x000000000000006dL } }, + /* 18 << 350 */ + { { 0x767a6f66b1d1d2c2L,0xa1322854fe1b6fa1L,0x4be1a1cc6461d755L, + 0xeed07ec250c221bbL,0x1ecbc0e0d9a306f0L,0xbe134f4df014a398L, + 0x5b391aae8a62aa33L,0xf89d0bd98f8374d9L,0x00000000000001cbL }, + { 0x834f7a49bd81ad66L,0xc0847d0c5b161a3bL,0x55ca398ece94c03dL, + 0xee52a9325f7e7da9L,0x3c0764552256b0f3L,0x70a087cd10daae34L, + 0x1e749e8b041e8275L,0x32e770866583c7c2L,0x0000000000000187L } }, + /* 19 << 350 */ + { { 0x1705841bcd474570L,0x8286faca79fdf349L,0xfc5cfc0fab4d7902L, + 0xeaa4781d494df4d3L,0x160dd228a8d6515bL,0xc5e6a4393fac5de4L, + 0x6f080b52e14684a4L,0xa1171a632a68c81eL,0x0000000000000185L }, + { 0xf6da51320bd7b821L,0xf02d821685f6e165L,0xe4fc78f80c8b9ae0L, + 0x0cb0278872ccbb8aL,0x37972865c36b4672L,0xa2e0aded0ddef47cL, + 0xcdb8736483760595L,0x5ca51c2951194739L,0x000000000000003cL } }, + /* 20 << 350 */ + { { 0x2af67b2b1d6f6e88L,0x02b012d358551562L,0x6b82b1fecafa5d8aL, + 0x7f82b4e2e6fefe9bL,0xac91d292e6b8bbdcL,0x1e330708aeb0467dL, + 0xe5723339d2226ec1L,0xdcb1dfc411ce4198L,0x000000000000017cL }, + { 0x4c55853aa74e39fbL,0xdeadb9343e55add6L,0xbb7f6b234d9052ceL, + 0xc39590032f3e7badL,0x78636279ad990758L,0xc476679e34bcb90bL, + 0x9ab2badcf9fe3f2aL,0x414a32c01b69ef65L,0x00000000000000c1L } }, + /* 21 << 350 */ + { { 0xf972e207226d5378L,0xcd708a71ff9ac89fL,0xbf32f744ba4d2463L, + 0x46fe3a5fe3e118afL,0x746a334f64442a45L,0x716caa321f011c2cL, + 0xfbfd3914b0a5fee4L,0xc1e251fd8637d992L,0x0000000000000086L }, + { 0xb45d3c09336dde94L,0x418b61d9bab03eaaL,0x1ec1facd52276f8cL, + 0x31a2838d276b4c3eL,0x5c0ac0e5344b4fc0L,0x84c5b86c7802d850L, + 0x7f2bde812d1f1518L,0x749429ab89820544L,0x00000000000000a6L } }, + /* 22 << 350 */ + { { 0x8fd557cda14acfe3L,0xa78896e226a144cfL,0x9171e9bb7e81425eL, + 0x1b03073ac232010fL,0xa2a67f9e4506d254L,0x9c74103f9b9e87abL, + 0xf03b5f8cfd9e2110L,0x701e9602fceae96eL,0x000000000000008eL }, + { 0xed614e9542b8326bL,0x1deca584f7c276fbL,0xc1c4acbc5c38606cL, + 0x6c1d93a6a03fce8bL,0x114af997f1d63590L,0x63ff01ed89679610L, + 0x6c1091e92ef1af27L,0x7577c9b8e47fbe2bL,0x0000000000000115L } }, + /* 23 << 350 */ + { { 0xe1e509705873e203L,0x92537e522ccb0f6cL,0x57f7ce82a48a196bL, + 0x355baf4fa3f71850L,0x564770cb0fe17d67L,0x5f17428ce88a7b9aL, + 0x2ea41803c7ede7e9L,0xcb4b4df25e9ae4d6L,0x00000000000001a9L }, + { 0xd9149883a345b2baL,0x5f6edbf7aff49765L,0x6ccb568246bb6e78L, + 0x28df77696f46d96eL,0xba7ce7265c6d0f7eL,0xef69e4d2e8c413d3L, + 0x12be550be5d1ee20L,0xfee595f001ecba93L,0x0000000000000116L } }, + /* 24 << 350 */ + { { 0x114453e727f2f67eL,0x7199960246edb75aL,0x481cfc650714f2ecL, + 0x7a359e11bd48c221L,0x6807f0b5bf80fbdcL,0xdfe57fff51b3a6fdL, + 0xe58f3c47b6794229L,0x7f22cf6adef683baL,0x00000000000001a6L }, + { 0x551bbae28c39f9afL,0x5057c3a7b1e075dbL,0xeeedd6009e0d386aL, + 0x8d60b730715b2507L,0xd65f990b840be62eL,0x666437b2287cc361L, + 0x11c2019c9cfe886bL,0xbeef8eb4ebf4ce61L,0x0000000000000008L } }, + /* 25 << 350 */ + { { 0x71df4f783f6d4a3dL,0x6ac9564700648a32L,0xb4173a57d4684f87L, + 0xf0ae2e221d713b5dL,0xe8eef4e62c7e0fc5L,0xd646dd421ec5f774L, + 0xc5e4f3dc1af0e91dL,0x73fcee0ba0aeff88L,0x0000000000000123L }, + { 0x054667e6cb349e02L,0x2d76accf002f27bcL,0x639a3672d120b014L, + 0x8402c2d156548bc6L,0xfdcef81372e04759L,0x09aed35779d36956L, + 0x5d0843b61de45356L,0x16e122f7b2d31760L,0x00000000000001c7L } }, + /* 26 << 350 */ + { { 0x5ff6e5554594bca8L,0x85a83cd1bcb20291L,0x4202282673cb711aL, + 0x24303de0175675c0L,0x426f264d2756e198L,0xbf84530704f7f348L, + 0xe8203455e5137f75L,0x5ab0febd0a6119acL,0x00000000000000a3L }, + { 0x7e826f3fc403eb15L,0x84d63da08c0152f8L,0x4c0b1ad51370c544L, + 0x3a5afc691444a291L,0x4ffe236f29fec214L,0xb55331949df836dbL, + 0x816a0784d7e7ef89L,0x28a78c1c534192f2L,0x000000000000018bL } }, + /* 27 << 350 */ + { { 0x1ef828db53638a6eL,0x946738cd62a782dfL,0x126f1b39d74972fdL, + 0xae08daf5321db625L,0x8ed69c8a39bf7767L,0x3c65874016d76e57L, + 0xbebf5778cac27566L,0xcfbcae3b36ef536dL,0x0000000000000088L }, + { 0x3f548dcbd43d8987L,0xcfe2a5921e3b8a98L,0x19ec85179e1c2d42L, + 0x9ebfff35ffe49227L,0x127821fc499ee1a3L,0xf02ebf0ae455b9feL, + 0x6e9864a56340b993L,0x615e97fedcc15810L,0x0000000000000012L } }, + /* 28 << 350 */ + { { 0x422833d5e2cce69eL,0x3c1c8bdb69ff8b37L,0x44d41dfbc9fb427fL, + 0x15bee2ddc54a1824L,0x9c804025e8ce4aeeL,0x5bcc7e5d9e5665b2L, + 0xf38636a040b6270dL,0x84cada1a60b6778eL,0x000000000000014bL }, + { 0x28b4411b9b47ccbbL,0xe4bc1e916c368197L,0xc0f36e6b2562c900L, + 0xc60d562888c55430L,0x2e9613dffb81c20fL,0xfdd934ed56872b5cL, + 0x6bacd3ad187bbdddL,0xf6f821d7d1e42d9bL,0x000000000000012eL } }, + /* 29 << 350 */ + { { 0xcd767d6025104f3eL,0xe0d35530b0b2f859L,0x6ca28dab02abdd82L, + 0x6067b9bbdf3510a2L,0xdedc9f658f189d83L,0x80496472f42b7bcaL, + 0x9c8802199fd57966L,0xe6774cf970aaf3bcL,0x0000000000000107L }, + { 0xb1fb2955713d14faL,0x9ea12ac28769fe2bL,0xb0a35fbaeff1af3aL, + 0x1fbdd1db98d8b108L,0x6d7db12ef9b6f0a4L,0x0a56e4cbdadac116L, + 0x08431700b1c6b49eL,0xea501d0b69c53deaL,0x000000000000001cL } }, + /* 30 << 350 */ + { { 0x28007de29245c149L,0x0befcc909b90ec07L,0x734c6339e8639957L, + 0x3f754697d0a82a41L,0x4336d11d47c0be63L,0x8325f084bda10defL, + 0xff63085e6c2de53cL,0x67ee3dfcd36ba132L,0x0000000000000057L }, + { 0x315b2f045951d063L,0x5c7f6d6eb6f8a257L,0x4783c43abe41ee82L, + 0x08bf94b9341eda90L,0x85f12aced8558536L,0xb6cef1f47c1a76afL, + 0x10c93a9304c6e842L,0x48967ee1f5ea5860L,0x0000000000000074L } }, + /* 31 << 350 */ + { { 0x66227cd38c9b254eL,0xc23c940e19fc7eaeL,0x20d5a008c3ccf958L, + 0xd81a909cec5f6eb2L,0xfa042069526ce89eL,0x9d5429428e7af449L, + 0x79cd81ded46860c7L,0x490a80a877275fc4L,0x0000000000000061L }, + { 0x056ee04b63c81130L,0x4c31c93ad967f41dL,0x4f91d3347b98d3c1L, + 0x5a584efc0f5d4299L,0xb591f98e4bb653f4L,0xa4bdd54c2f4e0cabL, + 0xe2b6ae4e8905a6eaL,0x0101e1289d3abe7dL,0x0000000000000087L } }, + /* 32 << 350 */ + { { 0x15581925de0d59fdL,0x861dbef21e668987L,0x000ca03681fdaea7L, + 0xee45e5edb5007bf9L,0xd98680509485c5fbL,0xd58ca541e2a800b9L, + 0xa0d643df05e1bf66L,0x3db52cbfbdcf7da2L,0x000000000000011cL }, + { 0x0a1db94387af7109L,0x33754c60fd35138cL,0x2bdff118fc19982aL, + 0x2e8eb645d22ccc69L,0xa7873cd2c73e39d4L,0xd024363e1d04ff18L, + 0x492503d60cc6abdbL,0xade16418185ab722L,0x000000000000015dL } }, + /* 33 << 350 */ + { { 0x69f6ad632c6d6564L,0x547345e6aaa70f65L,0xfe220b531e796903L, + 0x9dc4208eac90043aL,0xdbcb2f9b1ac77eb0L,0x1509e41b952cb582L, + 0x5fe81cfafa615100L,0x271bdeea16628120L,0x0000000000000084L }, + { 0x09d9f31d93ba6f29L,0x87762e121d5b9333L,0x5c78a0f96729ad96L, + 0x252d6ba8fb40c1fdL,0xfa3b5aae976145fbL,0x3838d4e64345c9ebL, + 0x90a0427561b22f62L,0x3f2b49f9eade3582L,0x0000000000000139L } }, + /* 34 << 350 */ + { { 0x7e06283b88db8098L,0x5254f2d8bdc2ee3bL,0x19a4b99da7d1d2bcL, + 0x5b7cb7c9c823fcafL,0x1610c35b55920769L,0xe39bf6381629e60eL, + 0xbb17270b9ffdb484L,0x41f2041ae9758c69L,0x00000000000001fbL }, + { 0x51a46b85b2c0c563L,0x2b69e1e711275735L,0xb671220e53b3dcbeL, + 0x7340bec853362fa3L,0x6f57ddb541eb7772L,0xdf0ee415cb596fb9L, + 0xc417728bff8477c1L,0x684c4fa4c3d566e6L,0x00000000000001d3L } }, + /* 35 << 350 */ + { { 0x43631e362ace4d30L,0x476ddfb7e15627dfL,0xd855ee87e9821f21L, + 0xaafc352d13a160fcL,0xd491b1e722a31120L,0x2e330621a26e8121L, + 0xb50d21650feadde8L,0x223817649b3f8fdbL,0x0000000000000161L }, + { 0x577fc13560e6851fL,0xf4782a8cb3f655caL,0x74d5c579b0f5f244L, + 0xdc93688ba2d001acL,0xa28376ef70768956L,0xbcb66d8a81df4a2eL, + 0x2d1fd4b5f6ec5bb9L,0x95fa7d6ebc9d4619L,0x0000000000000069L } }, + /* 36 << 350 */ + { { 0x9c427efd472a1c76L,0xbb1abff22bb334b6L,0xec7c11468050c592L, + 0x3795ab5851c77dffL,0x403fa93cd0a8565aL,0x7ea1b2ca8560a630L, + 0x644b40bfd92ff341L,0xb83c8b3e25f2cbbaL,0x0000000000000178L }, + { 0xe46c901a355d1a67L,0x0c8aab17077d71ccL,0x7d04573304f187a4L, + 0x3849de53bd2def81L,0xb0b7dd7bb334041bL,0xb05f3b1d245df6e6L, + 0x561b16a93433e78cL,0x25e1f0a1995c0111L,0x00000000000001b1L } }, + /* 37 << 350 */ + { { 0x998716bf439d38a7L,0xd7f8d80f14ec2b79L,0x52cc523e0a116fbdL, + 0xb6fa8f35c553eb57L,0x2a9daed4b1df475eL,0x0f2a03f56efb1ea3L, + 0xd87de564a207c6daL,0x0d29f1b85c150d6eL,0x000000000000000dL }, + { 0x35c9657e7f6bd7b5L,0xcc1b5f16e96359f6L,0xc90a1783a5f70bd1L, + 0x27e8319147906464L,0x107cc69df52d2fb1L,0x224e7abbf2dfc93fL, + 0x6e0e805ec41e54bbL,0x30574ac25e5db791L,0x00000000000001d1L } }, + /* 38 << 350 */ + { { 0x32b656e891d18868L,0xadfe8b2599eece6fL,0x6c153a31bdf1972bL, + 0x1b84a569401b1912L,0xa2dd8754adb8c7f0L,0x15bd0ffb165374c2L, + 0x7d7f5807ff9e26d0L,0xe009311e92a12880L,0x0000000000000156L }, + { 0x54d8f6bc9af03d71L,0x1126a5a5e0a9ef63L,0xc1f792885b103cb6L, + 0x5e67739473108a70L,0x9a3befb284db9b2cL,0x9333f1aca12fbb34L, + 0xa5d2a0f07cda2b55L,0x5372d3c2551e98a3L,0x00000000000001a5L } }, + /* 39 << 350 */ + { { 0x3d682a3d4db17b24L,0x7560eaf34eca0d89L,0x47e3a60c20dd7043L, + 0xab67577582a0d23bL,0x2714437809a33798L,0x117b3f8609a73afeL, + 0x15f677792e71e0daL,0x861f0bb2216749aeL,0x0000000000000034L }, + { 0xb6755fb0e3ecd9c8L,0xc2db4ae203f7866fL,0x5d4e3aa16690fbd2L, + 0x1e38e9a09b18ec9cL,0xc2c95824c703231bL,0xa8c457089d8a4205L, + 0x5504ec5e4e6d1e7bL,0x42b6c5bb54127e2cL,0x000000000000016bL } }, + /* 40 << 350 */ + { { 0x94dcb7602d3724f7L,0x092d781e5fcdc7d4L,0xc3fedca6dc823e29L, + 0xc2c19e31aebf4ac9L,0x2aeac69beaee6608L,0x2c2e52376c85a59cL, + 0x08949223a91d8cf7L,0x9ef9b311f0811914L,0x000000000000003bL }, + { 0xbc09e4ad786d0a50L,0x482bc9fd3b427aacL,0x614bde569d2ef9b9L, + 0xd9d846800763d7b6L,0xe1c9849c7685856cL,0x5761de36532a8b13L, + 0xaee9ae018ab8534bL,0x6297f7cdcffe238eL,0x0000000000000045L } }, + /* 41 << 350 */ + { { 0xfab2dfc946c1f383L,0xd3520f6418f56f5aL,0x443d47dcf8bd38b2L, + 0xdb480bec4f279932L,0x5bb596b154a485e4L,0xdcfb04fde2601b21L, + 0x0271c9d48a3b9c6fL,0xf67d082d938b7bb8L,0x000000000000015fL }, + { 0xeef77c2d42f4ac8eL,0x2dd70fb546d67b2cL,0x5e588ff9d68b1997L, + 0xe29565b76ba5fa5aL,0x6764f0e9d4f57c19L,0xd6b78633e66716aaL, + 0x3483bdb27fd1a5a0L,0x24be2d70396089daL,0x00000000000000d4L } }, + /* 42 << 350 */ + { { 0x13ebb2e30333009bL,0xa842a7f9346ee656L,0xcb9fb67a1cb3b9f9L, + 0x9b5fc3e5903d755cL,0xe776dadc2c519041L,0x1a3bbea475246222L, + 0x8a1f073c2b03e847L,0x85beb02d5652c2deL,0x00000000000000adL }, + { 0xae2eada769e89348L,0xbc20bd57cb14c2cdL,0x079bf80535196234L, + 0xbb48228f19ee0845L,0xf53f5367fb15d09aL,0x256bd1e0244cd8e7L, + 0xd4a0ec81c471a7e8L,0x9ba40ccbd4b56f89L,0x000000000000000aL } }, + /* 43 << 350 */ + { { 0xd8f106f9a769db32L,0xfca92fe9493257bcL,0x32b2bed729a888b6L, + 0x13fc9590529f5843L,0xe2edce083bcd4012L,0xb316e6af3edeb588L, + 0xa0bd8dde9cbb3ae9L,0xddb5d928d8601cbcL,0x0000000000000053L }, + { 0xced907601d3ae473L,0xaf2f245cbe08d340L,0x15eef9c599466d91L, + 0x3bfb970e9a2dd09bL,0x03c79319fd4d1efeL,0x9c6204518ce8f852L, + 0x34b1b4dd37bb72ddL,0x225b8a0b6e356b38L,0x000000000000009fL } }, + /* 44 << 350 */ + { { 0x354d7d39ff3579ceL,0xd61a6c33687273dcL,0x8e79f8ba0bf66ea7L, + 0x0914b68cc532810fL,0x86a1f3be19d74cd1L,0xf663a8d2ea1de01aL, + 0xe890d4e8f989647aL,0x1c31bdc54c3460f6L,0x00000000000000edL }, + { 0xc48f0b654d05c7c2L,0x10f0830687586519L,0x3a28a202b0b0b6edL, + 0xa1ebb5b9a5655d5dL,0xbf19a5ab05790e30L,0x1e4ad7b17d237ad6L, + 0x829ae2daa98c9125L,0x5406f57a544704b3L,0x0000000000000099L } }, + /* 45 << 350 */ + { { 0xb9a01925ff611f79L,0xd0dea6882d74a7d0L,0xd80aae28a95f1244L, + 0xf70563196bebe957L,0x4ae651ef7898c6c5L,0xd293ea624619976fL, + 0xee6b9a710bb857f5L,0x360dd469c307fd59L,0x0000000000000060L }, + { 0x1a0bd0e9eb91952eL,0x30b79c523a77c2b4L,0x847f13fa4936db71L, + 0xcd9fe25186f14605L,0x7f06535625b761afL,0x01d47bae6eb335f1L, + 0xfdd96f8d6c1417adL,0x7dbd3cab6631990eL,0x00000000000000e1L } }, + /* 46 << 350 */ + { { 0xd83c97e2086cf3d4L,0x590b4f8641268fe9L,0x5c1bb1b8a6161137L, + 0x49a11f2f49eeaf5aL,0xc2cf4d03cbb75280L,0x21ce922a465592f9L, + 0x8c6eb3c8ee919d03L,0x5b9a98e8c6b5102dL,0x00000000000001b2L }, + { 0xb4ef0faef120a8b7L,0x5e89cb627dcb8d5cL,0xcef0e7bef2f28c26L, + 0xb52b392db642cfc8L,0x040a684ef03da73dL,0x5a723b5297933d4fL, + 0x29fee2268dc82a55L,0xe3c0f2c4aca0067bL,0x0000000000000051L } }, + /* 47 << 350 */ + { { 0x8e229d60a712d633L,0x7f1fe86bdd051e59L,0xc11af604d4ee7fdfL, + 0x9c0795378a1c305bL,0x6a1303f3cb306a9dL,0x07ddfe2aa268a120L, + 0xc779ab13dae81354L,0x99f2a4b38406f8f3L,0x00000000000001daL }, + { 0x484fb852a158cfd0L,0xa4af6f9d2ca5e503L,0x4a8f9542f4809264L, + 0xe2c2fbd1146c0eafL,0xf06da61c067ff9c1L,0x33667ed5cd3a0fc2L, + 0x708523a842fcb10bL,0x18b6364260d97a55L,0x00000000000000f9L } }, + /* 48 << 350 */ + { { 0x6af20098edcc581fL,0x744e603fb1d8ef85L,0x77a155e23a4c70c4L, + 0x7cfec62980e719e5L,0x6ad16816771adb9aL,0xab2b9ae6917101b6L, + 0xfa1c8fa122b8606cL,0xc6ade9b6912fc59aL,0x0000000000000099L }, + { 0x5842a39a8bbc4442L,0xf7ccacc0937825eeL,0x8af901af76669dddL, + 0x4cf82e79e7ca6918L,0x379dab5022df76d8L,0x7070302afcc877bdL, + 0xa959a86b0dbbc81cL,0x38c984cb78c2c637L,0x000000000000008aL } }, + /* 49 << 350 */ + { { 0xe5e34c9afe2aa4e3L,0xbeef09e66cbd98ceL,0xf2326ace0f624fe7L, + 0x3145bf2e6b6bd9e5L,0x8e26b154371241dbL,0xa8d70886b20897bfL, + 0x240fcf30e4ec8a60L,0x065ad618b722ddfaL,0x0000000000000184L }, + { 0x04e64b08825ed711L,0x2bc526e052ae37dfL,0x0d76611b2229209cL, + 0xc5ec72065543bb31L,0x5d661329bd37b323L,0x4aee961c97f208f9L, + 0x296e8e5af8bcea25L,0x55be6c51c7f25a43L,0x0000000000000183L } }, + /* 50 << 350 */ + { { 0xbd485a2168b9bac6L,0x96f8ca66b92c9bd3L,0x54651d6c9d5cd2ccL, + 0x85a397f49b4d3558L,0xf9d46be109323661L,0x0345303a0efc8936L, + 0xd7cb4ab7b2193896L,0x3838fc1e8567d8a5L,0x0000000000000092L }, + { 0x9d27d6cc5888449bL,0x92fb49af08f1717cL,0x7bd684a61b7df8faL, + 0x8d22465d49ddd9f5L,0xea8a1ab00ce6997cL,0x8a1f6f92b62a9a5dL, + 0xba4ee8c067caa406L,0xeea34c787853c0fbL,0x0000000000000153L } }, + /* 51 << 350 */ + { { 0xcca88cfe71c9210eL,0x80dfea7f906985e3L,0x2d6d64511adc705aL, + 0x1f00f1cd6a679020L,0xc90876f676b76b73L,0xb6782716fda27e31L, + 0x38816e90e3a35a4cL,0x68eccad2ba92fb37L,0x00000000000000a0L }, + { 0xe79f9da1ee9ca568L,0xf5177267a2f60f20L,0x51bd81c4442792a4L, + 0x16871ba6b31012d2L,0x423ae3d46c49f876L,0xc1761593ca51334aL, + 0xe3e4701cbf879981L,0x718001bdf5648e7bL,0x000000000000006aL } }, + /* 52 << 350 */ + { { 0xe05ad50282808e09L,0x7b5dc16001e4f0b6L,0x78d1ca1933a6eea2L, + 0x9af18043509cb3caL,0x1e796ce5ff953005L,0xcac11cada26b33f2L, + 0xfb9db0d67d84e525L,0x1b66890a8bb37875L,0x00000000000000adL }, + { 0x7bdca42375d780efL,0x0af21a0bd16d7858L,0xb56371f7cf42aee6L, + 0x0ed2542214bc7ae3L,0xafafd8948c5de97eL,0x340a2c54bb065e75L, + 0x4adb4195861c19d2L,0x98c390dba914dc62L,0x000000000000001fL } }, + /* 53 << 350 */ + { { 0xa7f5f77023a25854L,0x95c125267a96c4dbL,0x61219e73c1777cacL, + 0xc6fee78ab8df403dL,0x78e99f85fbf090aeL,0x9b7e068c86a1283dL, + 0xbe9e1b8565ed0dcdL,0xf7a0bb1948a40837L,0x00000000000000bfL }, + { 0xd389955952c2db22L,0x40469180e852e506L,0x450f976387d5db74L, + 0x7cd7fc5ccc0b418cL,0x1fb11db4a5fe3416L,0x4b72467f1c837bd7L, + 0x5619b345e8279f50L,0x4d0dea319ff61f46L,0x0000000000000107L } }, + /* 54 << 350 */ + { { 0xa948d7bf766acf6cL,0x8f741ac1004e4c47L,0x7f1a1ef4defbd931L, + 0x5c0d5e57e6cfa20eL,0x496aead973d9db88L,0xb5f11ff5d6f826d7L, + 0x668dd88800cd9347L,0x22f18499a4ba53dbL,0x00000000000000d5L }, + { 0x420e05d696fbb953L,0xbe035ecb7c5c8155L,0x8027eea1d72b42f1L, + 0xd679566ddb39e9c0L,0x60205b0969fc5b43L,0x9a8ba2d92cfe4776L, + 0xb6677a309c14db60L,0xf9a134b3f1fc12dfL,0x0000000000000024L } }, + /* 55 << 350 */ + { { 0xc696cf6211e1d5caL,0xbbb285ad84300330L,0xf241d535d08e2f4aL, + 0x0c85f24ee196ee2eL,0x2b070d57bf38d376L,0x7ef93bd06a98849eL, + 0xb45d3d7bee04656fL,0x5fe517a5ad03b679L,0x000000000000015bL }, + { 0xd3536e2059c48b4dL,0x5cf6b777913f2941L,0xab4e101bd476b4e6L, + 0x1f717cd6a1e63091L,0x65c565f6c58ba036L,0x72bc476f79c53ac6L, + 0x467ede3be42bf81bL,0xc31436b46e7c6917L,0x0000000000000161L } }, + /* 56 << 350 */ + { { 0xe8077e42ed76067aL,0x7a4d661b5885cfc8L,0xed03423d6e12c1efL, + 0x2af0bd867d62b094L,0x5d5c959dbcde7990L,0x91f3292361c8c493L, + 0xd3a2d1371ddd8404L,0x43cf907ee9416bdaL,0x0000000000000179L }, + { 0xadd9d450f88ab3d4L,0x26abefd97b3c1b2cL,0x0eabdf60f0ec7168L, + 0x48584461499d981eL,0xe2024f8a65f1e952L,0xb13926bdcc1fc913L, + 0xe32e53c3ba02b960L,0x44b16d28b114c958L,0x000000000000003bL } }, + /* 57 << 350 */ + { { 0x0015027283ce0bcbL,0x5a6f1e8ad0a33983L,0x4d483980f1c66143L, + 0xb37a9caf03204939L,0xb5e0caab9891623fL,0xaa5e602a0af5cfd2L, + 0xdb19404a6aeb8e1fL,0x5e564a47d547c845L,0x0000000000000077L }, + { 0x6502ddab6fcff0b4L,0x9b3b917c3c4d269fL,0x4277a91c89572b60L, + 0xc4ab1a611c4fc851L,0x5e50a72e44e44005L,0xfcbb6af8780ad0dcL, + 0xb4a90497c67497ebL,0xe9db959ad12b1f98L,0x00000000000001a9L } }, + /* 58 << 350 */ + { { 0xdcad1a7a26b19fb9L,0x0c1c5ff9750b92a2L,0x0f1fa28b058f95fcL, + 0xb032e27098ee6b1eL,0xec3517835342789bL,0x61a075fe7dfa4090L, + 0x2f5bf3468f71a0faL,0x3c420b15ba67509aL,0x0000000000000179L }, + { 0x96facab3da8a643bL,0x1072c6a32b8a5245L,0x9b4459674b2a6bacL, + 0xe7f134edb7657d74L,0x6fa48f3fa4ef87d2L,0x716df761e3af3715L, + 0xe49d3969fc7f3d67L,0xd570e53846ba85dcL,0x0000000000000096L } }, + /* 59 << 350 */ + { { 0x47a2e7b45d421347L,0x049637bb55c6424fL,0x5213873c8891d0afL, + 0x06df936818937d59L,0xab5b355bb5d15bd8L,0xd7cf6264f203554bL, + 0xbc1ddb87b0e99f34L,0x8fbc558ac4d2b9e1L,0x0000000000000084L }, + { 0xb56dec543b99c8abL,0x41e629b48623e2feL,0x47827ed735ba9b9fL, + 0x23b57733e9486cfeL,0xa700b45f5fd4f31bL,0xb79d74ad710066b8L, + 0x7b479783645d8579L,0xee14d73cf1a07835L,0x00000000000001abL } }, + /* 60 << 350 */ + { { 0xe769d8f767a1352dL,0x49b1d2c3e9856974L,0x9c15a1fc86e54afeL, + 0x2d2c7e68b0e5b119L,0x3556ac15432cf4a2L,0xe25ebe20f39d2d76L, + 0x048843ae51e8c89eL,0xce4822e802531be3L,0x000000000000004dL }, + { 0x4992a988c947b6d7L,0x7eb5fe67eb717c00L,0xd4f2215be33b7f1fL, + 0x32b9fcc2a4511d0aL,0x4884419d4d1089c0L,0xd7457e3e3dead6f2L, + 0xf12c9f16c66123f8L,0x2ff7bccda2831727L,0x0000000000000025L } }, + /* 61 << 350 */ + { { 0x0e0272a0d8d3b893L,0x63fb603b4c72d7cdL,0x2f171bdfcf3b582aL, + 0x624213d2708de1bcL,0x8a005cb66837c905L,0xd4146129e3c98adaL, + 0x1ab6660facaabcb7L,0x0db3d2d28ac26da3L,0x00000000000000d5L }, + { 0x48509cfbc07194c3L,0xff8f379fd2f9b6d4L,0x08de73963fc8d8cfL, + 0x01b7305c8cdb7e13L,0xc49a1ceb4975a9a1L,0x5d43e036ac7b27e9L, + 0xd966fb67695b828fL,0xc7ec9f958dbdffebL,0x00000000000000acL } }, + /* 62 << 350 */ + { { 0xb044c5b0a1f25fdcL,0x6bd4233dcf34c1edL,0xbdbfa1ee0d505218L, + 0x1437801986e88bfbL,0x954329dfc9a0bf44L,0x7fe788acfcddd582L, + 0xc89b05d567f5acc9L,0xfb5501ae95b0f029L,0x000000000000008dL }, + { 0xd48fc71fa2c08347L,0xd7b35093f58203e8L,0x577552bf58cd9e2eL, + 0x4832a9a2b63e6139L,0x6c51bff38d52008aL,0xb06b694411d85498L, + 0x617537ad72466c2bL,0xd62576bc4e09a213L,0x0000000000000087L } }, + /* 63 << 350 */ + { { 0x66ced69ad8552a05L,0x53532f6f8c357a98L,0xf59b886be8474111L, + 0xa434122383b36ae0L,0x5116daeee9fb4e22L,0xcbf240f8f9944958L, + 0x7f5ee08c446b39b9L,0x7b1439a31d6d5952L,0x0000000000000013L }, + { 0x1bd881fb009f21c3L,0xd0e41f4209dc3ff4L,0xa0a93d64bf8bd854L, + 0x4755443ddf4bce85L,0x2c7dd3ae5137e57cL,0xb46efb67abbc8e6dL, + 0xa9992f040bb87209L,0xf87c5f8f5b4ea762L,0x00000000000001b3L } }, + /* 64 << 350 */ + { { 0x58e8ca2cea2e4075L,0x5307ba37f271d545L,0x14de3b6e7ae7ad07L, + 0x6bf3dac6169efeb3L,0x7be665b6f0996ac4L,0x3cbd076660a81ea5L, + 0x93369d84b0876044L,0x33cd68dc8a489412L,0x0000000000000133L }, + { 0x44912d16dca5b802L,0x3c30f445808db8c1L,0xd59e92904d2b7d5cL, + 0xc1e20a6b2697a600L,0xfbbe023c3a028772L,0x8873d0bbc6e3b099L, + 0x5fe76ac9050828adL,0xcf2ac286c0ab0ab0L,0x000000000000003eL } }, + /* 0 << 357 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 357 */ + { { 0x5fc885130076c379L,0x85e6f65cf31e63d1L,0x5def4f28ebc87b34L, + 0x74d28f51ed0be6efL,0x1b83692d38771840L,0xd791f738a859b7beL, + 0xed0cc1fea02d058eL,0x2b1befc5fecd2e54L,0x00000000000001a5L }, + { 0x63864b8948cca591L,0x38195d6129335913L,0x8dc688bcc799aae4L, + 0xb6d23959417331b4L,0x3d08b4f2743f8e65L,0xd46215b24fec2809L, + 0xb37d17000497e0f5L,0x6bb17cc026ee48a6L,0x00000000000001e7L } }, + /* 2 << 357 */ + { { 0x59614f8580c55f68L,0x9dcd662c60ced87bL,0xe6dc932bf852f5aeL, + 0x9db84a914f45a31eL,0x6062b2d6babea7acL,0x54a5881d28b65906L, + 0x551e0b60b0f80dbfL,0xdb468350b286f88cL,0x000000000000005bL }, + { 0x905656106bbb805fL,0xe38f65a46668e10cL,0xe619256657f2578fL, + 0xc26d80922a72a1abL,0x904a9a525119d939L,0x622f5869d6709295L, + 0x0a6ed42974543616L,0x8dad3c361c388e75L,0x0000000000000135L } }, + /* 3 << 357 */ + { { 0x8bf2fd7e6dae890dL,0x6b675c1fc9028f7eL,0x627b27a810d1a8c4L, + 0xd24c988bf522e753L,0xab18f36ae0be9869L,0x9ff0e1fa962cdfa1L, + 0x14911ef55945db5eL,0x6fd48fb37652ab6dL,0x00000000000001deL }, + { 0xf8184ce8fbb2e5ebL,0x02056b25b09791edL,0x2e11264024abee11L, + 0xcf22e0fec5119309L,0x64e4ed26136a9cd4L,0x16bbcc713e9b4baaL, + 0xf2970b1f83d64789L,0x82c66547ec9a3758L,0x00000000000001cfL } }, + /* 4 << 357 */ + { { 0x9bf40d7110205c46L,0x10f27886161df55dL,0xdda5b7624763c7b1L, + 0x4bce55110fbe9f17L,0x8fa954484bd30a7dL,0x302e33b31d27e799L, + 0x4ddde60f9752ffd2L,0x98ddb153cded611dL,0x0000000000000069L }, + { 0xdd9abcbb3e817caeL,0x3c6b3ffd52c627c2L,0x3209e7b3e28c3dfcL, + 0x376f4071edb287bdL,0x9ddaaa58af0fab10L,0x969074cff859b34eL, + 0xe16aec58c87e6ef9L,0xc5c7d470d113b6e0L,0x0000000000000066L } }, + /* 5 << 357 */ + { { 0xf0007020c199ebccL,0x1b8570d6f910a2a2L,0x35daadb9b2f82126L, + 0x49a1e3218ca81d3aL,0x608ed9749a7ad74aL,0x92ae9decbb27faf9L, + 0xea7db297dedcc552L,0xb61e57ff65e726f1L,0x00000000000001deL }, + { 0x8b30b706d4b6b7e1L,0x9dcfebdd75d5772cL,0xa1e51c660b5f72fbL, + 0xc594354756a7d61dL,0x45520f75912e1f45L,0x5c5df563771562edL, + 0x8a2ee7850bc64d28L,0xd2c21b1430d7b664L,0x00000000000001b5L } }, + /* 6 << 357 */ + { { 0x845cd18adeaefda9L,0x0e694ca269b50e21L,0x060bd03952e5334bL, + 0x4ab963a525bae929L,0x7904f9fd2a8fceb0L,0x535df726e889b7beL, + 0x1fe0c30dab8d94f2L,0xddc0e7323f92e2a5L,0x0000000000000106L }, + { 0xcc4c26e5404844ffL,0x53c01a4d301aa56dL,0xf7eff6a4a13a9558L, + 0x994253bcb771fc98L,0x5b9323e8b1d9733cL,0x29ed522b1cb7c6acL, + 0xe537003a3e4426a7L,0xbd83f66c36f23bb7L,0x00000000000001afL } }, + /* 7 << 357 */ + { { 0x732634970e7e5ee4L,0x01e0819a4f8e43afL,0x04dc6911b2620d54L, + 0x5908bdeab166dd4bL,0xc27f597530b81a6eL,0x4f969894d47d8ce4L, + 0x7d26b944a9b9b5fbL,0x1c77d724f2e481a0L,0x0000000000000007L }, + { 0x99eafe7b9fefc298L,0xafeab30d35dfc4ceL,0xd9868f02b3fd8794L, + 0xe8470c347893a641L,0xbde8016f9a5f544cL,0x5735714a2d93e0baL, + 0x72d7a1dc41d47e55L,0xced50d7a89fe1dc2L,0x0000000000000137L } }, + /* 8 << 357 */ + { { 0x9dae6d7af8ece594L,0x7ad12f48cce0b041L,0x9d97efc7e7374970L, + 0x7b8b1668311f5e2eL,0x0d68a7b6bb741640L,0xf9ea3dea9be65aa4L, + 0xaa3e6811a11c78a5L,0x155ad4e50ac6e65eL,0x0000000000000062L }, + { 0x5936d4fefebbfad7L,0xc94f1262a30ed6c6L,0x6013faf4b2081a7dL, + 0x684ffdbf85a4e883L,0xaaff243819dbdad1L,0x8f3484441f91e40fL, + 0x145d06f61ecea707L,0xeb7a4980ecbade82L,0x000000000000004dL } }, + /* 9 << 357 */ + { { 0xaf2d0a71a39faa14L,0x8c192e6be5bb2afeL,0xd62e45a44cfcdd3dL, + 0x7b553314289c2da9L,0xfc4135f79db2af8aL,0x9f4be8fde9590378L, + 0x20cb71f77e1f9d5cL,0x7b970c069a6d8b0eL,0x0000000000000090L }, + { 0xdad63341ad2a5036L,0x8939b2fc3a6dbfb5L,0xd660a719b32c3bd2L, + 0x61fea0eae187ad41L,0xc6e6c4ea766eed17L,0x0e85a989e14c91a0L, + 0x8bf97e641167f7f1L,0x7a2eecaf37718ebfL,0x00000000000000d9L } }, + /* 10 << 357 */ + { { 0x374b1611a7274ad1L,0x2a0b82fdfe17b597L,0xa38209dedbe87483L, + 0x2d31a7607617b6e9L,0x8b68a60b8c2631d7L,0xe290dfe601490869L, + 0x9eedf0d622e67ae5L,0x17cdcb0f9c04930cL,0x00000000000001e0L }, + { 0x8946af4e6874b6ecL,0x76be50db614e6518L,0xaf1e6990ff304e05L, + 0x810413073edb16eeL,0x9d7160e06d0ff72cL,0x580b29bd12ef3a1dL, + 0x47c5d81c5af9a9f4L,0xf96ef0842d828b7fL,0x000000000000009eL } }, + /* 11 << 357 */ + { { 0xa49b5b4d854620c5L,0x9e05bef6a25f2145L,0x53939ef2c63ed521L, + 0x436783a1db13d740L,0x38f3aa8913f1e4b7L,0x23317818c364e263L, + 0x301a26769ed21c63L,0x6e11a2342730950dL,0x00000000000001fdL }, + { 0xc0dd00fe5bb3e26dL,0x0af3994912689233L,0x7ac01fb7c309b0bdL, + 0x21ee2903f25bebf8L,0x9d9ae3a052f8b604L,0x3602803614c273dfL, + 0x26126518bf5a2cf8L,0x022b5683c773cfd8L,0x00000000000000fdL } }, + /* 12 << 357 */ + { { 0xad453d3e15710148L,0xd5a72dcf93265c01L,0x153d58940f137f83L, + 0xd15b25236577f8f0L,0xc378424bb1bd2bcaL,0x7ad83eb42dcc38e1L, + 0xbe6c5006dea00d85L,0xf381d7b3003be5f5L,0x00000000000001cbL }, + { 0x4290cf81a85be17cL,0xc4387af383bc4817L,0x81afe2281c4b8079L, + 0xf231b430a26513baL,0x1d86f33cd7539aeaL,0x1bb43b3e9c4253fdL, + 0x4eb49f8461000d51L,0xeaf801bc8115d791L,0x0000000000000053L } }, + /* 13 << 357 */ + { { 0x4fb83cd0f34cb82fL,0x69c93fd4019a99e9L,0xf9b3eb3e08b00516L, + 0xcf76e0e08510cc7aL,0xe20bf27cacf20956L,0xa953142555e33a65L, + 0x03e4e262d550453dL,0x493cf4e09dd063a0L,0x000000000000004eL }, + { 0x677087f92a9ac3feL,0x4e26f17fdd36a879L,0x35822e0e3ced41c4L, + 0x0e5d370bf3c6996cL,0xc7a460b123afc844L,0x2b4d9af0771476f7L, + 0x5c3fffd48f8c1c25L,0xb02bbf7ab8ab8cb2L,0x000000000000016bL } }, + /* 14 << 357 */ + { { 0xfafd5607c5208d6fL,0x827f270e00c9ba87L,0xe8e7a1c84b607882L, + 0xdd16443cf35d1a14L,0x2bd6a6a7bedfd9feL,0x62942d8dc5b2c568L, + 0x590c4759127313a4L,0xc53b5fc26501778cL,0x000000000000009dL }, + { 0x77c992e56221cd29L,0x1087ff0a2a5ecbeaL,0x1fca36f63f82e7dfL, + 0xdd6bd34d86fcbc98L,0x1edd8206d7088562L,0x2d9973d492bd31bbL, + 0x7f0e3aaba49088c9L,0x5a80616ca6b9082cL,0x00000000000001a6L } }, + /* 15 << 357 */ + { { 0x6c9c00cc9425d127L,0xb73e7c9e46995ad4L,0x63ea143a999706a4L, + 0x6c760fa3855b0262L,0x596b82bd2e3f833cL,0x6d1125d423701b52L, + 0xe44c7901fd4fdba1L,0x6e669d073ac0d4f9L,0x00000000000001e2L }, + { 0xa580b58e12c34bcfL,0x56f7058f44605f24L,0x10addb7d59c18830L, + 0xd6c8cebf0522df9eL,0x1d4e0092ce01eedbL,0x545e99de98246639L, + 0x5898dc3c288b42dbL,0xfca8d04f85fc96dcL,0x00000000000001e4L } }, + /* 16 << 357 */ + { { 0x600c5b055bb88603L,0x9e4b5b739be3b3fbL,0xd485d76b4844ab63L, + 0xffa20a3320cd3b55L,0x6003ad11e40ec83bL,0xeb5423b4eed37ac2L, + 0xc25c87c09d4a0f44L,0x2de4bfe703c0e7d4L,0x00000000000001eeL }, + { 0xaa9ee44d14bc1043L,0x9fc3875a24c86d4eL,0x8f9bc8b6b7405ee2L, + 0xaede13e5b46a336fL,0xbf5de69f6651c1f8L,0xb8e7a34070dc1e30L, + 0x437d3592f4d76655L,0x2d2c90e1beee67b4L,0x00000000000001c6L } }, + /* 17 << 357 */ + { { 0x4664ebe900c657d5L,0x7a43a626fad6a0baL,0x0fae848fa1a45bf4L, + 0xaaba18c1a7724169L,0x10f5d5da8bd42cf2L,0x7e8c8325bf839af2L, + 0x8d93d80931e37653L,0x74a5f0c70d179a63L,0x00000000000001f1L }, + { 0xa04092aac673ef06L,0x22273ee1f1f44e71L,0x82b91d97d2424e61L, + 0xa9bbc3a3e15c0618L,0xe5affe60c664d826L,0x69a5df34ef461b41L, + 0xb5fcb32a9537f22bL,0xab073066f0b136ceL,0x0000000000000077L } }, + /* 18 << 357 */ + { { 0x291c30d8d404dbe2L,0xae586d4eba4aaab0L,0x4aa25d064fe7c560L, + 0xdd8a3fa74a2c320eL,0xb4d269773ce46c90L,0x095e11baa1e6595cL, + 0xbbc02c14dfcf4a78L,0x300d1c7c6fca5975L,0x00000000000000faL }, + { 0x8f0f92e04d75128dL,0x32cf5813b7cce9f5L,0x723bf4fb506f432bL, + 0x6d4165742f9bfc6cL,0x6d46f9a0afeedb54L,0xa80d2408ab7e42b4L, + 0xdee2b761092cd638L,0x5ca05ac471680b52L,0x0000000000000078L } }, + /* 19 << 357 */ + { { 0x67ac7288f9b1e2a9L,0xd39accd88a9618dcL,0xf2eea4079788f1cdL, + 0xad399fed231e29d4L,0xdd9ed7a7c3fb75bcL,0x5e209b29d3b22d47L, + 0x207a0f402d4f8b93L,0xd6c21ae9ad537dd5L,0x00000000000001d6L }, + { 0x12f0d8e20c6e991dL,0x9b2203f781f55960L,0x682308fa3fb0ba6dL, + 0x58c21718501385bfL,0xe7edb949f2e89261L,0x711b06333e12d5dfL, + 0xac5ff61ab4711546L,0x359f9e52fbe942ecL,0x00000000000000f1L } }, + /* 20 << 357 */ + { { 0xa7141cb5aa76edf9L,0x709fb9dbcdcda2d2L,0x49a8a4a4cafc26ecL, + 0x4fc050c2b2aaa10cL,0xffdc2eabf44d5826L,0xf762925f09ea8dd8L, + 0xd6378df3f8d1c25eL,0x26e9594cf8ef4a7eL,0x000000000000012dL }, + { 0x17199c483eece377L,0x629bc6f2efd1ac52L,0xf4d78f2194fc4c0fL, + 0xce45f81dc083c2b9L,0x26e4498e79b23374L,0x8e61a4c5d50a6fe3L, + 0xde6b4c398df5a418L,0xc5b45d1908a404ecL,0x00000000000001a9L } }, + /* 21 << 357 */ + { { 0x59b3569b5557920dL,0xad74bcaee1289fcdL,0x1da74246b83a07d3L, + 0xb8281e1a04d7b322L,0xff7eb19711c785efL,0x401faa11f3120671L, + 0x757abd6ac64f7560L,0xba32b8d9912332e2L,0x000000000000015cL }, + { 0xbb6740b88645d3ddL,0x21300afd5fb11e5dL,0x52e8e04d824b3790L, + 0xff431ad6955ae754L,0x297251b6af3db93cL,0x815b56ad6e5fe4c4L, + 0x7bc72cb2302c1847L,0xb47872d5d8ae63fcL,0x000000000000010eL } }, + /* 22 << 357 */ + { { 0xef086d92606c5ccdL,0x0368a9b91ad1f2a1L,0x7dca53159a4216d7L, + 0x3b9722752ba7308bL,0x680fb882cbdcb20dL,0x8cf5b86093be3a49L, + 0x2c366b9798528bf8L,0x83e51a9a2a100078L,0x0000000000000039L }, + { 0xc4d52471a89bca47L,0x1b57283abe28d462L,0xd2435fb58f2b7ffeL, + 0x9c3b714e58596f3cL,0x29cc40da99689f33L,0x1f94e6936d34becbL, + 0x226148c7690e3096L,0x1b37296b5db2e1a9L,0x00000000000000d9L } }, + /* 23 << 357 */ + { { 0x86aaa7871589aaafL,0xa58a71afcf62397bL,0xb3f39935783c57c4L, + 0xb4bb84bd1d05d2daL,0x8dcaa3a8bcf38515L,0x0328ff676c302fbbL, + 0x2f6ae585de90d3bfL,0xce78807594b3fa66L,0x0000000000000082L }, + { 0x3020d12b6ae46a4cL,0xa4a0efdd0714efd6L,0x4ce6635189a1f1f7L, + 0xa42a3a2d17f8bbb6L,0x3e63a2011cd5084bL,0x0e34ebb1e4b7753eL, + 0xb0db48f4e104b70cL,0x6365edb85230ae6dL,0x000000000000016bL } }, + /* 24 << 357 */ + { { 0xeaa998a0008164d4L,0x15d52dcd413f4ff0L,0x3b180c19e4ff76a9L, + 0x879fdc34c30ecbf4L,0x7ec379cd518fd432L,0x8a51bb32e6161641L, + 0x92e85885d582e0e8L,0x5503550ae00847a0L,0x000000000000008fL }, + { 0xddf4c5cc039b003cL,0x0c8d3f93c4814d97L,0x213ca9b502075f02L, + 0x347d87b988920f8eL,0x96ef11c896dbf1b7L,0xb092ba1ccf927dc6L, + 0x6a12dcc7d068f694L,0xe98db68d16a6e447L,0x00000000000000aeL } }, + /* 25 << 357 */ + { { 0xc29f18d0d033e447L,0x4bf3a1eea22ae892L,0x92527cbfb0eb6f5bL, + 0xbf7f74b77873907eL,0x32e0d8d0c55a06f2L,0x801bcde493e6f6bdL, + 0x8ea36c4790dac3f8L,0xa17b404d8d084a01L,0x0000000000000096L }, + { 0xadab47b6278c74d3L,0xbd17de910fa2bbdeL,0xb85a0413ca875158L, + 0xade2a656639a7532L,0x5434ba0b0f86e65aL,0xa4b00d71423a16d1L, + 0x2025b8db4f9d19e8L,0x774d5ba564a9c56aL,0x00000000000000e2L } }, + /* 26 << 357 */ + { { 0x89e6bc861181af3fL,0x989fcb1039d582cdL,0xdbe760be95374939L, + 0xc2a3b1a495db6d56L,0x00ca290f0bef3e32L,0xa80d17d5fb7e42c6L, + 0x37f23682391433b0L,0xa1497300fd3ba245L,0x00000000000000faL }, + { 0x6684500110b99e4dL,0x163a5154d25f774bL,0xc3cb0e0eedd0e224L, + 0xf3b428594639adfbL,0x29b757cb827a88cdL,0x3b93e712313a5d62L, + 0xe90427941f2b4ad6L,0xd96f3be6f339ce2fL,0x000000000000010eL } }, + /* 27 << 357 */ + { { 0x9d900733986982c4L,0x318552d9ac043e1bL,0xc31329e818f19cd9L, + 0xd4545cc6e6d8fedfL,0x584a8c309990ad83L,0x9822bf5cc7f5de2aL, + 0x88a4ab27a1a0595dL,0xf33dbec11849603fL,0x0000000000000044L }, + { 0xee18a6fa902af180L,0xbd6749d62d8a0745L,0x90f819e378988322L, + 0x77cbf6c1a8a89881L,0x2b25b032afc38279L,0x039111b11f85e6f7L, + 0x2d5c9c44f53149aaL,0x612b1f34576a5889L,0x000000000000017aL } }, + /* 28 << 357 */ + { { 0x57e0945b3f162106L,0xbf09909356b68350L,0x0dc60a427081a881L, + 0x83130a725c91ff51L,0xdfc2900ab5421031L,0x7adcbe4cda92cb5bL, + 0xe3b1b4e350fe781eL,0x825e00dc27e2e2f5L,0x0000000000000023L }, + { 0x69dd98e974ae5f26L,0x65920f1833082671L,0x9667d392dfeeace1L, + 0x080e764c3d330572L,0x2e4af049cbec5f4eL,0xc33a0fcd50d4523eL, + 0xea84623a4fd8106cL,0x030d9f36f111aca4L,0x00000000000001deL } }, + /* 29 << 357 */ + { { 0xd59b137dd906f557L,0x13e53d54b000c6f4L,0x97b11ef277220004L, + 0xefa864eaba1ad04bL,0x01bbe0c53b9940a5L,0x093438c59461c4f5L, + 0xb8bd4858f02e4c47L,0xab236955aacdcf32L,0x0000000000000177L }, + { 0x300f6e4fdc6fa477L,0x636c5e24479bc943L,0x162fb16b0a3037b9L, + 0xc9b34c7d8d1c8090L,0x7575aeaa39f26c09L,0xea576004e4a9b03fL, + 0xaa6ffb966b070d8bL,0x5f363e8ee19d31f3L,0x0000000000000156L } }, + /* 30 << 357 */ + { { 0xaec64b223ceb9d8fL,0x01c0e019c11322e0L,0x7fa117ffa28fed94L, + 0xb4fd42817951378eL,0x9272e9c11a7e709cL,0x778d28e9d6c3adabL, + 0x5dff9492913d9fb1L,0x5295e39d2db50e08L,0x0000000000000022L }, + { 0x0793b7a2b251c930L,0x38b7ac80b20ea902L,0x63832322ecd86c5dL, + 0x7c7085f103041135L,0xc6a97824da3beea4L,0x243049099bbf9d98L, + 0x1dbbd3e6c5aa02c5L,0x777406cf81715102L,0x0000000000000101L } }, + /* 31 << 357 */ + { { 0x9ac223c2833f5b6eL,0xa41661e2c8ce1d45L,0x8e5c81882f76714bL, + 0xee29488c14400154L,0xf64aff1c7f02d81fL,0x2b93109a6b843a48L, + 0x88156fe61c2562efL,0xd4636076ee474fb2L,0x00000000000001c3L }, + { 0x42c4cdd8a6b2aabcL,0x0cefb9353c0ab8d5L,0x950ee61226769ee7L, + 0x516831ada2977cacL,0x1174ac00ed854041L,0x261d3bcee16cfcdeL, + 0x8706465a2f4e7899L,0x18f2aaaff290e5cdL,0x0000000000000037L } }, + /* 32 << 357 */ + { { 0x46f9436c06c9837eL,0xa4feb9d508fea5bbL,0xc733ef31aa2ff650L, + 0xa58f3ffe205e0f43L,0x54d1150fbf567988L,0x78c0ac9216052b8bL, + 0xed2e587ba2f59a1bL,0xf80d6e5c2d6b5969L,0x0000000000000105L }, + { 0x5029c675207ff447L,0xd8842988dedc6a4cL,0x5c3b90e4cf78d639L, + 0xa008ae416e8e9794L,0x098d2fe729023bd6L,0xf4bc574a003e7a86L, + 0xa7a024993d4ffa1eL,0xd55641098d521bd5L,0x00000000000001e8L } }, + /* 33 << 357 */ + { { 0x747b5923d475cebdL,0xe868d30ae7454df7L,0xc60fcf2ae8560236L, + 0xddcff0f2967f3f67L,0x84fbe85f458c2bbaL,0x3daf2c80fbff1388L, + 0x4799d4f27e86e7f0L,0x84482f9e8a74569dL,0x0000000000000018L }, + { 0x521f01352b57d1d3L,0x7b4a58b49c2977e1L,0xd4b814010c9765b2L, + 0x50c0eb1af6d91eefL,0x2ad82cb773e68f47L,0xd4768ebb16252241L, + 0x92b37e3e308599d7L,0xea44f24f999beabeL,0x00000000000001d4L } }, + /* 34 << 357 */ + { { 0x7ec4adf92400c234L,0x956e6df24207eac9L,0xd2676feac9e07ecaL, + 0xb1d7900cd44e5bc2L,0x7371f0078421b464L,0x9dd2c07c552ddfb8L, + 0x55b32fb91ab48f64L,0x4c7aa6352d161efcL,0x0000000000000016L }, + { 0x62af0f34d40817d4L,0x2bc7f399354cef0fL,0xf614ed51aef88f33L, + 0x2a8aaaf6562eb7b7L,0x3f69e05f9374c7f9L,0x4e8580ac24a5d254L, + 0x9fb8ded394aaed09L,0x47ee6861b44f65f8L,0x000000000000009fL } }, + /* 35 << 357 */ + { { 0xb88da0dd0cb7d987L,0x5803bf709d01e454L,0xcaa7cc1831cc5a05L, + 0xfd0c0e2094704898L,0xd20841e583df7331L,0x0dadf18abf70720aL, + 0x0eee9aa2e95a1826L,0x47c43dbaf7356e8dL,0x00000000000001b6L }, + { 0x46f3881f01992a30L,0xf99d10419928c31aL,0x8cd5efdffba1c52fL, + 0x527ddf05123c3357L,0x46943c89233150b9L,0xf41e12734bbd1ea8L, + 0xa0cd7fce8c62e10cL,0xfd21a999192e00f3L,0x0000000000000183L } }, + /* 36 << 357 */ + { { 0xe6e75221cfe610aeL,0x1c98a33934eb829cL,0x1c8d8d909a4ac715L, + 0xace287a451bd9ccfL,0x24fd7cfcbd6b8384L,0xce03abe6093b52baL, + 0x42ac7a7ae38467d1L,0x97663c10c0df6af2L,0x0000000000000173L }, + { 0xeab753a507e4f493L,0xe36b7afafecb8668L,0x02694012da27628cL, + 0x8cf6fdf942bc520aL,0xe8549a7e7f995666L,0x5f0a927973d4a08aL, + 0x1a87664ab708a5f3L,0x68333933435d6adcL,0x00000000000000ceL } }, + /* 37 << 357 */ + { { 0xb577c1533f989761L,0xae35d211bfa3d8fcL,0xd47c40c9b1f4a399L, + 0xd5b0975b2364454dL,0xed4d9984b3237eedL,0x97c31b497df401f9L, + 0xb7b427e22bc4659aL,0xd9bf226f45a55ae6L,0x0000000000000075L }, + { 0xa5990f28073531a6L,0xe76bc93d8f8b9a4dL,0x588489cb74e5ed46L, + 0xd13ed6ab72ee0648L,0xc2577aeefed5353cL,0xdab9de078dc20accL, + 0xcac8e1f9d57b513aL,0x5e76607aad484dc4L,0x000000000000002cL } }, + /* 38 << 357 */ + { { 0xbcb19c11883359b4L,0x3ad2e194c4e7949aL,0xb773fca20ffa7882L, + 0xbd1a7603e60cfad7L,0xbea03ebea5b4d66bL,0x875da4b871223c53L, + 0x2e33d74ab5abef29L,0xa3ce7d910fd9b58bL,0x00000000000000d2L }, + { 0x54784134feeafab8L,0x0af5113379e28a21L,0x02245680f0f5e643L, + 0xf467cdb0027bfdd9L,0x9446e78a520929e4L,0xc0190cecb05f7059L, + 0x31cbdade20e99cd1L,0x970d57f4c9001716L,0x00000000000001d6L } }, + /* 39 << 357 */ + { { 0x19dfeadf318077a4L,0x646f8bdab99ea8cbL,0x9c32bf839bce9a97L, + 0xf054bf9ba74ef02bL,0xb51e016b44a4ca3aL,0xc6e07a7bcd3dc334L, + 0xab3b52ad878eb40dL,0x12aa8a92ed323d42L,0x00000000000000acL }, + { 0xa3436773633c177eL,0xa9c7bee1e2b5ef90L,0x175955d8acc698a1L, + 0x473dd3eb2725fc60L,0xa7b4bc94c65fecfcL,0x8767e77f84427009L, + 0xb719a53fe6c0bfa1L,0x66025e6e3b082160L,0x00000000000000a6L } }, + /* 40 << 357 */ + { { 0x8b335fa8eb78796fL,0x632a1e3598c921feL,0x8a81e3dc13ebb873L, + 0x6ea0a1ca392ac9a2L,0x12ac8f7d7a111fe3L,0xe8019e481b74982fL, + 0x970840ab372bd012L,0xf08f46778030b6baL,0x0000000000000191L }, + { 0xf1daff85ff6ca70bL,0x83a30831c4c98b48L,0xb3d1402f4b44f090L, + 0xea42a852ffc5fc43L,0x64fb5a0e5adb2237L,0xa3f97d5cef91d510L, + 0xaa8a0026f0d6d405L,0xa9db41befedcd04aL,0x0000000000000061L } }, + /* 41 << 357 */ + { { 0xec4e0725f2d955e6L,0xf65913b6ba0ad53aL,0xb5436673f9c3c2edL, + 0xdbb1e9214c59fa00L,0x480014831381e59eL,0x688f1d33ba2b09c1L, + 0xcf5af8117a4e2a57L,0x11e023ee52198011L,0x0000000000000162L }, + { 0x62cf35fa3dfe2dc7L,0xbced5f18d977b622L,0x240d0df41edd8ab6L, + 0x7e047352991127e7L,0xdc9572f31df7bc5bL,0x97b0d175afbc0867L, + 0x6a42f136de3d2346L,0x1fc3c25f5158affeL,0x0000000000000138L } }, + /* 42 << 357 */ + { { 0xeab39f78787edb77L,0x56a60472ff2e9f55L,0xce7b46bff785cb54L, + 0x278a0c9ec7d39690L,0x0cf992f68c3e887dL,0x9e148e8051d0d1c6L, + 0x5b33c695f3755acfL,0xb0451f2f658905c1L,0x000000000000018aL }, + { 0xa6e028307fbc9643L,0x9338cbb303b27b50L,0x5c64eda85026ee72L, + 0x7f1647d2fcefe41dL,0x85619fccf3dbb4bdL,0x393e6efa01d6451aL, + 0x935171004e8d8ef0L,0x32c37325ffea99afL,0x00000000000000c8L } }, + /* 43 << 357 */ + { { 0x2b555d0e6fe03ba1L,0x773f545a8b03d806L,0xa31ffe71bcc6d9deL, + 0x446edf311990a436L,0xd8c4fc206592df48L,0x3317ee33c15ac3beL, + 0x41696fb6f63b4e1bL,0x83a4cba346027c25L,0x0000000000000084L }, + { 0xde69ed4512dd9c85L,0xd667dcced6c26d22L,0x4ded2f5b385619c4L, + 0xd2655d51821e0244L,0x1311765ebf2c49a6L,0x9415d8cc38383d4bL, + 0xe71bcaf8bf9db4b7L,0xb3e5627ccc381e0eL,0x0000000000000025L } }, + /* 44 << 357 */ + { { 0xa4933fbea92fddffL,0x7c32d97032b86443L,0x2e9a7398b183d623L, + 0x486972894721f8c5L,0xbee88c0cc969e75fL,0x842542613c4d5de1L, + 0x909eae9e787a1700L,0xe31ff428974bad84L,0x00000000000001dbL }, + { 0x984cb4ee752c8899L,0xa92f6ac567310f87L,0x47f0e7caa6a1de3dL, + 0x5c3dcb5fcfeb910eL,0x7dca121b1205acf2L,0x6399f030cb5ebb96L, + 0xe1c316b2e398cc1aL,0xfa8d37e0852051fcL,0x000000000000004dL } }, + /* 45 << 357 */ + { { 0xa2b6eb345f87572aL,0x0732cec41782a0d3L,0xd9435216d41ea9d8L, + 0x543571ed5cc97716L,0xe3521d10956feeadL,0x750eb198f41fe528L, + 0x4af6cf943d29c833L,0x78e073e6436a223fL,0x000000000000018fL }, + { 0x49c4e5bc26897e4dL,0xb80996a2959ec43bL,0x13ae0906c00242e5L, + 0x9aee89d706f4670bL,0xd06613d5c8d05987L,0x201b5d15c8675f76L, + 0xd19defe0d4c74f5cL,0x043e361e6cb41c71L,0x0000000000000180L } }, + /* 46 << 357 */ + { { 0x3c015437c95ad2b2L,0xca901614d09999a6L,0xf8549f51d5fa15c3L, + 0x7fdb2f64c5630ea8L,0x1e4ee6802b7b842bL,0xf475e8cf313eb14cL, + 0xfcafbdb3d6c70e1cL,0x77bca78f4bdd4355L,0x0000000000000032L }, + { 0x1512a85e9b20aef6L,0x2728e6820c9decf2L,0x8b3cf87bbefd8a4dL, + 0x3a42b96e601b9833L,0x6f27e84d058180d8L,0x0cfbc003489fd730L, + 0x208b7f66f8db094aL,0xf5987e8fc3f1fc46L,0x00000000000001a3L } }, + /* 47 << 357 */ + { { 0xf79e90d3ba0cfa35L,0x3d442ee9cd0223cfL,0x21b64ccc2dc0768cL, + 0xcbe9a804db61bf73L,0x5871a42fa4d78ca6L,0x67e5f390e0b6be3bL, + 0x9f4b155bff116508L,0x73f1924ac84bc47fL,0x00000000000000afL }, + { 0xfb1d1caa6f41f665L,0x4e2717a196ad7cffL,0x1772f0581d490e34L, + 0xd52e78b067c82478L,0x53e323f99b612421L,0xb23240ab51b055c3L, + 0xb479a9cd2902f229L,0x1a63508d40c0e2b3L,0x0000000000000005L } }, + /* 48 << 357 */ + { { 0x56f46aeea79fec2fL,0xae7420e6caa8072eL,0xa8d82818e0909a51L, + 0xb6fde2a2d53b908dL,0x2c291216aa4b288aL,0x522c8be8315bc370L, + 0x27d342ed48cbe4c9L,0xcc7e7d2282bda6edL,0x0000000000000179L }, + { 0x28c243b826b789cbL,0x0ee9307a736b8f95L,0x6bb0026317c8d0abL, + 0xfb2563cb96c50f3fL,0xda495d9aff006875L,0x4fb2a94706b90869L, + 0xf00cd1a64aec98d5L,0x0eff72f28bd40482L,0x0000000000000197L } }, + /* 49 << 357 */ + { { 0x98b780b5760dcbf1L,0xf260047b515a1d29L,0xfac95ac3814e0ee9L, + 0x816e57a0fba262ecL,0xfc5cd59e48334247L,0xa341052546e7e886L, + 0xd1db72bdddb0dcadL,0x5624b146ffd69bd4L,0x0000000000000178L }, + { 0x35e3ace300b92a71L,0xc32822663c82793bL,0x53c3e94494ffd3b8L, + 0x9f71ff3e58dbdfd0L,0xa7a41d4df73e87c2L,0x78c91fa31039d1c0L, + 0xb58daa057807f696L,0xa450af015c2cd7b7L,0x0000000000000035L } }, + /* 50 << 357 */ + { { 0x7619732f42b7fc50L,0x765e9ae03259ee5aL,0xc6e36b88f486fc6eL, + 0x7161e3133e057364L,0xb06faa190eb94e6dL,0x5018350dd7092a7aL, + 0x2cd31d11fb6c1de8L,0x9561568bbc8ec647L,0x0000000000000126L }, + { 0x839b1c55ed5d4d74L,0xf167bec107576342L,0x8afd2ae346e1cc51L, + 0x2546e4ffa5126110L,0x403b72bef10cc605L,0xe611d43086ab00f7L, + 0x6c828ae6ced04aefL,0xdac7bcf83ad5c8e8L,0x0000000000000029L } }, + /* 51 << 357 */ + { { 0x921b03409c60750dL,0x1fa1dc07a742d533L,0x2001a510cfe647c0L, + 0xfdcec65d559bac54L,0xb006791b6c9979ceL,0xd8786269d197b3b3L, + 0x8ba2d90530372529L,0xf0552a77063e1943L,0x00000000000001c2L }, + { 0xaee03501d5b06ec0L,0xefbc6b5a906d4e35L,0xcba825b51f8fe6e1L, + 0x098a5cf0e1db85e0L,0x0ca97bd479d75481L,0xe2090d3481d62082L, + 0x0ee36412b26ca32fL,0x349e9c106f66e6d8L,0x0000000000000110L } }, + /* 52 << 357 */ + { { 0xe4fd1cf233c62e14L,0xbd7961f581615b2fL,0x1b82c51effc3ea38L, + 0xa02c70e8e04b3a9aL,0xf8853863926b63d2L,0x57013d79e2733711L, + 0xb62f0fb7006a0326L,0x225a8b3b98a22842L,0x0000000000000175L }, + { 0x6718ed9f53021cd8L,0xdea4cdec2a3bf941L,0x0323bb3a5211899fL, + 0xd83957736e557ae1L,0x657633b4a363900fL,0xda61a25732721ca0L, + 0x9e53c3dbd9eb3821L,0x28972548aaf7a7adL,0x000000000000001dL } }, + /* 53 << 357 */ + { { 0x0e6ab03f3358352dL,0xaf91d6e912a5cbdfL,0xfc0c814d8350e169L, + 0x6d76035e025bc3f5L,0xc48c841c91cc2fffL,0x988a58692d378112L, + 0xe6b22cf41f1c5d84L,0x4d6a80c966be7431L,0x000000000000017cL }, + { 0x329989467452fe56L,0x7c949998851cde83L,0x62f685df83aae234L, + 0xebc255cf3273ce30L,0x5d9b1076cc1c544bL,0xdcb2cc0ba0afe211L, + 0xb4985ff2bf40b28aL,0xe6162860dd765e5cL,0x0000000000000059L } }, + /* 54 << 357 */ + { { 0xdd36dad51f7dbea4L,0x668dad4d6506ae83L,0xb38d22d45b93e93bL, + 0x8f01ab0073a449e9L,0x66d3db72d8e6e6e8L,0x19a9dd5d3d2427faL, + 0x0fd8d0da3d40a973L,0x479f47bc10ca67abL,0x00000000000001f0L }, + { 0xdf05de0bff3a276eL,0xc54847b14ffdc307L,0x56caf53a6fca2200L, + 0x9fb352117ecda564L,0x9a0a56923369ffa9L,0xdcfbf647f8af9180L, + 0x5b6d4451678c3fb5L,0x5a60678c6ff73ed9L,0x00000000000001bbL } }, + /* 55 << 357 */ + { { 0xd478174e1742c389L,0x5a2383c346b36bc3L,0xfbaca1a26581793aL, + 0x2a1d1fa6f855f601L,0x14a376f0308d3c27L,0xee58276c0cc714f5L, + 0xcc4030a2344e36b0L,0xce095846f956b0c2L,0x0000000000000093L }, + { 0x89a622e8aeecd9c9L,0x206e4d6f27cd372fL,0x317476b49441b11dL, + 0x399a84cf983641d8L,0xe9f70bdcf169555eL,0xfe01d5c38ad9b9b0L, + 0xba6a96fc72ed19a9L,0x30239e4187057a78L,0x000000000000013fL } }, + /* 56 << 357 */ + { { 0x4ade586f26762e69L,0xa4d590d75c5452e9L,0x866d96d7a3d17b63L, + 0x4820299b54373909L,0x9c4970f071889f59L,0xbe409ea4a47395c5L, + 0xa1c14b17960a0ea2L,0x4a33c8a0307327b7L,0x0000000000000102L }, + { 0xe590fe7c2b54a6c6L,0xb4a1ab5bc8f7ed0cL,0x91f19a290051c927L, + 0xd9f5d6078eac2400L,0x9bc268f6f0cf556bL,0x823c77404112a75fL, + 0x1a8168497d3647c4L,0x46f70e49b3b45d09L,0x0000000000000050L } }, + /* 57 << 357 */ + { { 0x66bea937a3a32754L,0x780ed9ca96a00b32L,0x65e5ad8e2b5cc630L, + 0x7283fbc3dc7657b5L,0x15dc690b934d9824L,0xd6f130dba98a847dL, + 0x0096e9cf3f226e42L,0xe2a54507e1e7d57fL,0x000000000000014aL }, + { 0xee2fd2150bbc21edL,0xe095c08d4301f751L,0x32b119d43e5cb50aL, + 0xe435e70435ec749fL,0x83369dd530a40178L,0x594a72fe98292f0eL, + 0x118880ad3d2ea843L,0x9eb5cc991890318cL,0x000000000000011cL } }, + /* 58 << 357 */ + { { 0x361c1f3b4d6df598L,0x6956400f491b9e2eL,0x84efa820336843b6L, + 0x88c0cdee43925859L,0x29193ebb69c6047fL,0x13607b35f0aad25fL, + 0x20206ebd61dddd4aL,0x0e644f1eb9fc054dL,0x0000000000000108L }, + { 0xca58a7ac1436fe82L,0xa817cc24e935e783L,0x2345a558c8ad7900L, + 0x64c0aff682da4250L,0x3070a842a7e6b289L,0xab848eb00aecec1eL, + 0xa26ce7e0900a03f0L,0x3fcafea7f76a6eeeL,0x000000000000018dL } }, + /* 59 << 357 */ + { { 0xc93375d857c613e4L,0xd6e163aa792ef674L,0x5b8da9694c9f17d0L, + 0x51f0cb78ce67b932L,0x3c1667b022f10ad6L,0xb370de10b6a741d3L, + 0x5b359e3527254ceeL,0xa4699c4001436030L,0x000000000000017bL }, + { 0x444d66615ab27a0fL,0x7a63495561218274L,0x7accc97c8b6bc0b3L, + 0x31e90f52e9feb480L,0x0ae5f490d73c2467L,0xb42bb6330947f2efL, + 0x12130e62ca8bdc7cL,0x87538842afc7773dL,0x000000000000014aL } }, + /* 60 << 357 */ + { { 0xa0e61ef29c860a3dL,0x82efb508a7a35111L,0x95110f183fdf7c57L, + 0xd08244f748f762fdL,0xe2584f839119ce6aL,0x0818a297f38db17cL, + 0x0e604711bc3bc9e2L,0x46d5fe361b183756L,0x00000000000001b3L }, + { 0x3c6c64217662e9b9L,0x1e057724d4a00cfbL,0x285c8771102bcc5cL, + 0xa606d16e09724e97L,0xab2089a86dd635dbL,0x6fdd9e810b59f6e3L, + 0x43d64d58ff787a8eL,0x7fd128ddd9699167L,0x00000000000000eeL } }, + /* 61 << 357 */ + { { 0x6e1e35308e1abfdcL,0x108178640e58238bL,0x91902f99aab590acL, + 0x5132a07808a83158L,0x530791584fdf05d6L,0x629edbe68ad80625L, + 0x32e9651d3f107300L,0x3e162ac228a30c6aL,0x0000000000000155L }, + { 0x32d00fc0bb96dfefL,0x14d6778057d671d4L,0xd644f22da23a3842L, + 0x68c9d17f7564276eL,0x4081b0b960b55ea5L,0xfb317cd3bd1dd449L, + 0x3ae6ef5a8a1905a7L,0x2daed738a3b1e642L,0x0000000000000003L } }, + /* 62 << 357 */ + { { 0xab014c8728aa98f7L,0xf140362355cf61a1L,0x193f4267b5240bf3L, + 0x07d29ec4df79676fL,0x7e5d124afef053f3L,0x577b7ef54d8a7c4fL, + 0x05e53aa25f9e45cdL,0x0306e26cc0709ec7L,0x0000000000000145L }, + { 0xd4609e0b47d36131L,0x0a27167e024b55c9L,0xc913e714bec0a1b7L, + 0x391fbba1aea3e853L,0x5541c7015910da47L,0x40e545fd1c083ebfL, + 0x73a6b44870eea6e2L,0x9578a3a138a320b2L,0x0000000000000133L } }, + /* 63 << 357 */ + { { 0x4d63d8568a4afc5eL,0x8dba9bbb93fe42a9L,0x2786def64388b3ffL, + 0xb178c71c588ff35aL,0x9acff904dc6b8ddeL,0x96fc5adfe39951bbL, + 0x36c16b128622694cL,0x8d3c04a120bbc11eL,0x0000000000000088L }, + { 0xe910f16ab73d75a5L,0xce8c56594d0c3e49L,0xf9682dfd11ec38c4L, + 0x1c98872dec673776L,0x65e891ea498fb63dL,0x9c743402270806bdL, + 0x773660181eafb44eL,0xb52ff43420582f10L,0x00000000000000fbL } }, + /* 64 << 357 */ + { { 0x768ed0f6f74fb22cL,0xc967f6567e169c57L,0xf6c74d22e59559c1L, + 0x0a419045556961cdL,0x97e83ef261c6f540L,0x434d28d7f523b49bL, + 0x0f83e17117ac09eaL,0xfb02352b4a281f4eL,0x0000000000000160L }, + { 0x78577044666031b0L,0x2aa75f54d401794dL,0xabf814bbde68d202L, + 0xc1d18b79fd8f841dL,0xb68edc5e8c8449bdL,0x837b65b088a85d15L, + 0x31a2c34e13249fa7L,0x2e20348e54be5f59L,0x0000000000000148L } }, + /* 0 << 364 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 364 */ + { { 0x1d40359e91609a61L,0x9adf4e086890616dL,0x89dd66358b14a62eL, + 0xfaa34c0a49adeb20L,0x174689f7105a9bf0L,0xbf727a3da83a1154L, + 0x4afe90c10d6d5d50L,0x9954d9d01f2fe4ffL,0x0000000000000126L }, + { 0x027df2555d2ac4b2L,0xa61a555783078809L,0x6f5d2cd40237c9bdL, + 0x7cd9f911cef7c441L,0x0d5256efabc19578L,0xd86047aff338db58L, + 0xaa2a1b5014d7c98eL,0xbe9edffaf5e65a3aL,0x00000000000001bdL } }, + /* 2 << 364 */ + { { 0xfd4ac9a343d7f5d4L,0xefaf0979c99e0960L,0x49f0cee8309e568cL, + 0x72b9d2770e83b629L,0xc9462e6b6b2c1601L,0x20fd87a59014edf5L, + 0xf153ae0a4589a246L,0xdfc0c033d0ad9b0cL,0x0000000000000034L }, + { 0xea4cfb1187abe460L,0xde9d0bf08e491e8bL,0x8b4f7381e7b44c51L, + 0x5b470fd4582fc6f2L,0x351d31d09bd31114L,0xae86870521911e42L, + 0x8c25ead699c6cceaL,0x6372bb04f6408194L,0x000000000000012dL } }, + /* 3 << 364 */ + { { 0xf76d94ea2648a187L,0x65cd1ed8841945b5L,0xa8b8b52c6d3b41ccL, + 0x729f4f50c03e31c3L,0x2b315b9d4d930a40L,0x32c503647c78db5bL, + 0x53b03b69e0cab657L,0x1cb16125d343324aL,0x000000000000007aL }, + { 0x0d0cc220cbba148cL,0x21e13a15066281e5L,0x304904e6ae6bb049L, + 0x9ed34cf1580464daL,0xb1c66d4684a6cdaeL,0x6ad36012751e42a6L, + 0xfeeed4dcc89edbbeL,0xd9897853a31a337dL,0x0000000000000002L } }, + /* 4 << 364 */ + { { 0x9f89743c7de96ad8L,0xb36af506b0e3c020L,0x92ba9b369da8c48eL, + 0x3b06a6c914e3dda7L,0x4ad7dbd9d035a505L,0xfd87c28a5825e565L, + 0x1f117e6b5633716cL,0xcef5e5d6dcd99a7fL,0x00000000000001e9L }, + { 0x839ba6957fed2cecL,0xba25c337132c54ccL,0xf032353e857c3815L, + 0x9119f2e5b3b42940L,0xcf21d2c0fecbf8f8L,0x66b09ab9b5bafb4fL, + 0xdbe05a703946ff07L,0xb83571fc2d9ba878L,0x0000000000000018L } }, + /* 5 << 364 */ + { { 0xf62c03a397de0e00L,0xf48d1de82611619dL,0x15d1a317721bc870L, + 0x6ac60d814ed06bf2L,0xb8bc2ce0890cdbd6L,0x7810822846f4b48fL, + 0x04ef830c287c7697L,0x32961e64a49838daL,0x00000000000000feL }, + { 0x97cd5e96d8551d19L,0x68053374b2b1fd05L,0xe3013cfcccf9a8f2L, + 0xcefdd29f981fa6c1L,0xef3a4ba402683ffbL,0x2e945f5855edc412L, + 0xa4a58e25bfd433f1L,0x621ef049513f74f4L,0x0000000000000120L } }, + /* 6 << 364 */ + { { 0x9e1fadfb16822e74L,0x601a0218e2fde79bL,0xfaf91c0fcc11fb66L, + 0x39b3d9cd38141aa4L,0xaecee4237b5e91a7L,0xe78c36baa418ff4aL, + 0x97ffc63f7f084d25L,0x8d9f7b524160ab02L,0x0000000000000079L }, + { 0x4460a50327772a7bL,0xca164baf7bb4b563L,0x576f7498cf894432L, + 0x62af7b73b7c21ef1L,0x6559cd67a1ff287dL,0x366d49030189b834L, + 0xce4f2dd602496d3dL,0x140a8383d27b4ffdL,0x0000000000000125L } }, + /* 7 << 364 */ + { { 0x16a826340847c82eL,0xb70042545f9a9912L,0x798f9e628d0b4dffL, + 0x65342a6503c638d1L,0x4f20eebe5c54e53fL,0xc14bbfc9f840e2ccL, + 0xb7bdaec4bf5122a8L,0x36f7770447b6ee89L,0x000000000000014cL }, + { 0x1a055b39452878a4L,0xec912e63a4357006L,0xfa8318c8ffc61cedL, + 0x2b0d1441f05c5661L,0x0b31a6b7f197ab70L,0x69d91f5999bc8257L, + 0x62ce77826b1fe057L,0xcc37b64e8fbf4a9fL,0x0000000000000160L } }, + /* 8 << 364 */ + { { 0x73c9ca8c32bf47dbL,0x9914d5863827df79L,0x594400907e5d5d65L, + 0x1fe4f4bdcc1f4322L,0x8e14f73d38ad9ffeL,0x4b47c892eea079b5L, + 0x753f974ac9bd577bL,0x4231ef4ec3271dfcL,0x00000000000000bbL }, + { 0x871424ac14bcaefdL,0xb1b24d5c97355e9dL,0xd38496396b934706L, + 0xe90a821ca8c892f6L,0x884df6cc145aa180L,0x0dc1730e4c7dcb49L, + 0xdad8a1ef67744185L,0x444de13badd5ca97L,0x0000000000000008L } }, + /* 9 << 364 */ + { { 0xfa51638c3342c8cfL,0x0f68b9af3860e485L,0xea4d914d06cd5f80L, + 0xd5a9b6123d548a1aL,0x19cf167328c71d7aL,0xf88496f3e3ea941bL, + 0xf7458d464cd8d096L,0x40a7eccdc3a72c66L,0x000000000000009eL }, + { 0x017e43b0f067b7ebL,0x0e89bb59ae8610feL,0x0c2437f2b2bc6765L, + 0x58c0a84449bf2eedL,0x723b0a50c13a6053L,0x3c2f581a1d1b46a0L, + 0x2a930c3a68e9e875L,0xf6144dd459874673L,0x000000000000013cL } }, + /* 10 << 364 */ + { { 0x501c908869c2c724L,0x114cd1394d26e7e7L,0x2c8e71896128092cL, + 0xd379c77c98d95167L,0xf4dc94f8c54fd3c2L,0xcb5d6568d61f0a9dL, + 0x57e935c0222d29c5L,0x19707d68e5ebead7L,0x000000000000018eL }, + { 0x0ca3d650df704c4bL,0x3a11092f9e7fbd89L,0x94accaf17b93a0dfL, + 0x5e2ee6fc7a3cfa2dL,0xfe64b185e29c1ab4L,0x450622cc0dd624bcL, + 0x19b6dace885fe0efL,0xbca6ee1159121790L,0x0000000000000026L } }, + /* 11 << 364 */ + { { 0xc3561d8aba77ae3dL,0x9668544884e9a9faL,0xd02a219cfabb2bc7L, + 0xf05419ff19500a0dL,0xddec1e04fd9db302L,0xe8163eed27bb75e8L, + 0x167a448cf55bedacL,0x0674936fcf331bb5L,0x0000000000000112L }, + { 0x6fdd6a8f05915d9eL,0xd10aba566bc230bcL,0x3d08b2be6495b1deL, + 0x1a677d3e25d490f3L,0x5435cabfabbb4e9aL,0xb1b88fe09dd066faL, + 0x4143d97f96ad240cL,0x84d22c1a8105c161L,0x00000000000000bbL } }, + /* 12 << 364 */ + { { 0x50c7c1f5570b56deL,0xc4ec2d0e45e64231L,0x30d743c79fc8812cL, + 0xc156a390336a6837L,0xed05f8e2ab56ec07L,0x9f31cb93c6ea8f39L, + 0xb4e6d426fe637c9eL,0xa2940274a1068079L,0x0000000000000131L }, + { 0xe90e065d0de0a523L,0x6a732029e7aa66e1L,0x54d8e21335eff2d8L, + 0x316c567a8c25f616L,0x4926a680d4d8b337L,0x5af43676371babccL, + 0x3de2803eea35e392L,0x1a31dc8d2569c1f0L,0x000000000000015aL } }, + /* 13 << 364 */ + { { 0xe37816de9b725c0bL,0xcc2a89c9166bb8fdL,0x49ca0257d89fe85bL, + 0x48fd498a9b83fce1L,0x6ba525640cf52c0eL,0x1123fbde28ef21ffL, + 0x85f8991d4d0cdebaL,0xfe5797d8772b53b3L,0x000000000000007bL }, + { 0x3fdf5c44dd6b4d3fL,0x5321b885e2dcd484L,0x69ce5c11dfb203b2L, + 0xe9a482386f8935bbL,0x9fcd2c9c75cc99b6L,0x251ac714e76daec2L, + 0x772611564802f1c5L,0xaf4e8d575d6e5d84L,0x0000000000000170L } }, + /* 14 << 364 */ + { { 0x7fc39bb869546059L,0x17a87451dc30034fL,0x66bd74dadc7c93a3L, + 0xf511104bf8e0b2a5L,0xffc0b9cecf5112f7L,0x32da477959194a90L, + 0x37f686a5fc465013L,0x4418ddc33c921bd0L,0x0000000000000074L }, + { 0x60e891417cca9624L,0x924e88461d0c45a8L,0x63575d9e03f81752L, + 0x39ffb99931b565cfL,0xd2f96beca087029bL,0xbf5c66a0dd1e6fd2L, + 0x3a3bb584a7da0587L,0xeeac3f724fc63a28L,0x000000000000007cL } }, + /* 15 << 364 */ + { { 0xc06eb52d95408d0aL,0x617f37aae6cd02bdL,0x605c3d6c1493be3cL, + 0x74f50aedf516ed28L,0xe57889af120f76faL,0xcad1cd04a4bbda56L, + 0xfd346ecbfc158650L,0xaee419b5ae3c475aL,0x00000000000001faL }, + { 0x53ab6494eba5c579L,0x89baeaa366501244L,0x76714c0a0e1dfcd7L, + 0xc31b6ae005ce3982L,0x38742418192bfb7eL,0xb303fdc69fa107f8L, + 0x50f6e46723401680L,0xed7828e6032db727L,0x00000000000001eaL } }, + /* 16 << 364 */ + { { 0x2a7e8ac175804da9L,0x0b41f6118d50bdc6L,0xcfa0a757d8fafb1cL, + 0x75540d94d231a6b0L,0x524440fb0c4fe03dL,0xb8665fbfce9738ddL, + 0xbb74f6a88f1bd64fL,0x063afa3cbb8d6c67L,0x0000000000000189L }, + { 0x259b9da4a864385fL,0xdbafc55c17c2597bL,0xde890acb56bf5d23L, + 0xe51182c0f8455b59L,0x75d51a03c456e1c7L,0x9c7929c6b318e747L, + 0xaf23a7f139b3ed84L,0xee136a2b910f4ab2L,0x00000000000001acL } }, + /* 17 << 364 */ + { { 0x0d6618c6b5262bd0L,0xd008879e083e8d42L,0x46be04761e2a0f05L, + 0x29355fcba5a5a5d0L,0xf2990303abcc3151L,0x2ce9c4275a2b71a9L, + 0x1c9bd913b773a173L,0x64748c121fbefea8L,0x00000000000001f9L }, + { 0x386d740f94f39cedL,0xa22fe8b4b4202079L,0x23754fe4118f988bL, + 0x3678c0d6fc32136dL,0x5fc81b01feb30812L,0x948119e46b9c9859L, + 0x5636de19370dac51L,0xf614eea049b1fab7L,0x00000000000000c5L } }, + /* 18 << 364 */ + { { 0xd019c1759fc99b40L,0xf967bba57f015d82L,0x3cb81a604805189dL, + 0x964ad8c7a00da205L,0xc888c43a64539ab5L,0x431eeadc9e553bdcL, + 0xfc5527a9fb748695L,0x208a7a8bd0016886L,0x0000000000000123L }, + { 0x5094299ba7c932fbL,0xaad7933f74be1be9L,0xfbc80552444ba30fL, + 0x4ab16553f035fcc6L,0x364f1240c3e85c28L,0xa682343c2bb08da1L, + 0x87f463e18186f10eL,0xa693ff1c33925907L,0x000000000000019cL } }, + /* 19 << 364 */ + { { 0x09f6995a1c48a442L,0x247f21612a842352L,0x31ab8596522e8ba6L, + 0x5a378b5cda550880L,0x3cd7546920a22f99L,0xcc2308ad2a1b6f3fL, + 0x102b70a618c84da4L,0xd4fb60db44ee1f04L,0x0000000000000019L }, + { 0x26f00eb8c1395620L,0xb74ce39019a12c39L,0xe4942801abec4e92L, + 0x975d591b94361e1cL,0x2996b121f3a13003L,0xcf1d269e23c37980L, + 0xe140df82c6996c73L,0x10e9b26e40c5047fL,0x0000000000000054L } }, + /* 20 << 364 */ + { { 0x7dde1d1d5efb05b4L,0x6b37ab24499b96d8L,0x9a11b06965ca03b0L, + 0x26cab3b3efe4f8e6L,0x438ff1523fa08ec0L,0xefcf8f96378206fbL, + 0xab9c72cb8483c3e7L,0x0870abe23c21f2eaL,0x0000000000000045L }, + { 0x04b9b4a909d596d7L,0x45d7b711168e1715L,0x86f4b7024059f9e1L, + 0x1e23920b80207075L,0x247d24b7e32dcffdL,0x7fd7ee339b29daecL, + 0xae0501392e5b646aL,0xf15cc9311f0b3acbL,0x0000000000000047L } }, + /* 21 << 364 */ + { { 0xa7585ddfcda0c2adL,0x5cec5557fbb2fddaL,0x1548595a1e228a3fL, + 0x002f9003085c420aL,0xb68b0ae22c655b80L,0x9a41ed4526ea4931L, + 0xbbea439a2b93a6f8L,0x37e82cde83f487a5L,0x0000000000000036L }, + { 0x44fb9f9aa4688d59L,0xf43ccad09af2f558L,0xcb0bd99c421900acL, + 0x6b14194f74d5dd67L,0xa515fb0e19820676L,0xbe5d2afffd020877L, + 0x7861af502826917dL,0xee0fe35be172b2d6L,0x000000000000011fL } }, + /* 22 << 364 */ + { { 0xaed4feadb6cf4a42L,0x6a1bf5325717bfe7L,0x276babf2e055049fL, + 0xe01a9b211147d4abL,0x92e2937e4d68fe1dL,0x4c40460f2682f5d6L, + 0x7921148cbeebb6b0L,0x0f17c058d8a93c95L,0x00000000000001f4L }, + { 0x97b9609deaf84a2cL,0xc723aedd1812bc14L,0xa884415778c0b6d2L, + 0x5b68fd87c8fedc3bL,0x8109a51e6bec8a71L,0x476483e4a764bda7L, + 0x7a997c16a7945df5L,0xccbfe8d8ad8f125cL,0x000000000000019eL } }, + /* 23 << 364 */ + { { 0x3d7957be4e45dc3fL,0xb06a358bc65dd97cL,0x63040ec31388da6aL, + 0xe9adbf2a28e9515bL,0xfc0edda0f7900882L,0x4e1b100bb6465e9dL, + 0x331d94772276c413L,0x95d57b9c2e8f8278L,0x00000000000001a9L }, + { 0x163d561ce4c6e97bL,0x911f98435b29a4caL,0x1577698cc6de446cL, + 0x48e6b4f38767f7e4L,0x126e23c51ea0038cL,0x8d52323e03c2a5f7L, + 0x56e33850a0a2f55eL,0x5eb93209ba225457L,0x0000000000000124L } }, + /* 24 << 364 */ + { { 0xd797e61ad1ac73e4L,0x8b484aed7b9eb978L,0xc447ce99d1eb07a8L, + 0xd899e82580667c06L,0xda95128239aa8ed6L,0x822cd6da7f6791f4L, + 0x50600320ded12afcL,0x8934ec503214d5abL,0x00000000000001aeL }, + { 0xf517b1b73737e867L,0x0ceae5828cf888ddL,0x450dce832afa92f0L, + 0xc5fd288ac704f4d1L,0x5743e5b0f2c5c86aL,0x81bf9379d3c76ab6L, + 0xbab459db9789efd6L,0x1d34125a28e098dbL,0x00000000000001eaL } }, + /* 25 << 364 */ + { { 0xe068ffa804c78d4bL,0x9d83b38317fe7e23L,0xa43422d05ffbd663L, + 0x784837bf1788e709L,0x6a19ae5ea3723448L,0xbe6915ec36bb4307L, + 0xe34cdcaaa29d7e4bL,0x7f3a28c031be1af7L,0x0000000000000062L }, + { 0x3c476ec90af51a99L,0x8cd5d84f4b33e5d1L,0x4a257f60e9cd7f91L, + 0xf6ce609ca61dd30dL,0x972ada9af1fc10e3L,0xf3e400881bbab939L, + 0x886722f3585887a6L,0x0cb7cdecebaf6fa6L,0x0000000000000104L } }, + /* 26 << 364 */ + { { 0xd7d4d4e717431ee1L,0x8625a46a4dd90921L,0xf9c316c0dcecbca8L, + 0xdbce57c02b95909eL,0xe97ae5d403ab6816L,0xab298218fd75225fL, + 0x3434b7146592f521L,0xe5971064d7eb9710L,0x00000000000000eaL }, + { 0xe1455ae54bfe5904L,0x15c778976d8f481fL,0xb400810f008ce4c8L, + 0x779d772955e45c01L,0x8db56fb1e7d37e5dL,0x16686881921b330cL, + 0xe1bce71f3b2e17a1L,0x07149446c5c3c7a3L,0x00000000000001cbL } }, + /* 27 << 364 */ + { { 0x3257b5c4f07663a5L,0x929494c319ff6597L,0x15af82a9765f4edbL, + 0x483993cb5143381fL,0x6943fcf0b09287fdL,0x2fbb3117c67415b8L, + 0xd7034c07b7c892f1L,0xf10b0db718d9a7a5L,0x0000000000000120L }, + { 0x76c9338468458d05L,0xdf2208aca2a5d536L,0xb5a419630a99477bL, + 0x51567562348c976eL,0xc695998909c4dcd4L,0xbbb28a3706af3fa4L, + 0x15191540091300dfL,0xad333a273f1804fcL,0x0000000000000065L } }, + /* 28 << 364 */ + { { 0x5de5079168e68512L,0x6eaf85bc7be5bbdbL,0x308bfd92de4b2460L, + 0x616a1141a7e52011L,0x9112ea97ef642831L,0x43149479aceeb127L, + 0xf1b98d034c36c7daL,0x8eb9ffb8b493cbf4L,0x00000000000000c4L }, + { 0x146df8d848853921L,0xf179eb875b1d03c8L,0x1a59f02db362eec2L, + 0xe83b9f80c763c091L,0x17683a39794f86afL,0x6cd845d8f501ac40L, + 0x873e5f099dff69acL,0x3442ded6af6124ccL,0x000000000000013aL } }, + /* 29 << 364 */ + { { 0xc5590498d555d898L,0xc3d1e58af00e8206L,0x499ebd23f5f2de8eL, + 0x396cf1ae7ee84857L,0x14f61adf09b5553cL,0x8e6b51ddbf0b1843L, + 0xc49674c2948f473bL,0x95aebd3a86d2ba2aL,0x0000000000000036L }, + { 0xb804d00b393f0237L,0x0bb1751cabab5c21L,0x1413fa32a74a192bL, + 0xbbb543ce65016834L,0x2d8a08b90125f257L,0x3663a514bf052c53L, + 0xc69d8e20946815fbL,0xbfb6a7656272ee32L,0x000000000000003bL } }, + /* 30 << 364 */ + { { 0xaea6f72ff310550dL,0xf9e9ffc76380fb54L,0x074b3feda3542522L, + 0x7af0bce77943f24dL,0xdbd33292ce11b0c2L,0x2e4f83e8bffe48cdL, + 0x32a133ee388ae49dL,0x1b3521eab2f1f673L,0x000000000000007cL }, + { 0xc4ae5fe493f8a0c0L,0xfaf94f2b69efd7b6L,0xc5838b693296409dL, + 0x98330b32525a280eL,0x174ff22814c11c9aL,0xc8e5b0912047f8feL, + 0x15d70d58072b06efL,0x8dcfcc607c7eed95L,0x0000000000000088L } }, + /* 31 << 364 */ + { { 0x00d6f3b0bc8bc4c1L,0x369b84bf4048d8deL,0x0faa2409e1ade43bL, + 0x0fdb76c28c01adbeL,0xdccfe4b7706c272cL,0x4407382cb8f26255L, + 0x07a11518539912a8L,0x1dca7e6519fca10aL,0x0000000000000091L }, + { 0xd012c49634100b1fL,0xbbf05b6d90c5a9c3L,0x574bec951ae000d1L, + 0x0006bcb031af3f21L,0x4efcfd1c01b77496L,0x0d7de152d781ba22L, + 0x404117375fc20376L,0x4b1ef6553da0d8d5L,0x0000000000000146L } }, + /* 32 << 364 */ + { { 0x6e1b7d0b35e16743L,0xcad975b354d72c65L,0x78f7434a453b8906L, + 0x8d140cae9690fa05L,0x6c8e152a31fe6bffL,0x1721155e418e415aL, + 0x73b115dc77693d9fL,0x1274ea409bbecdf1L,0x0000000000000134L }, + { 0x5953c5fef3354b80L,0x1e60a291bcda146aL,0x1e20c5c571ebe5d6L, + 0x0bd949bfce4dfd53L,0xe489a64483280185L,0x819082883743f2a4L, + 0x6126ed3581ec227dL,0xa4aab2907ff6cfefL,0x000000000000012bL } }, + /* 33 << 364 */ + { { 0xfb1b1a6cf4b320c7L,0x8cdac3fdb49695b3L,0x7c408a7acaba7e3dL, + 0xdebef05ebf18e5f5L,0x806100c4ea74f06dL,0x3a0632a981be0875L, + 0x16729db36099042fL,0xda8ade1d095c77f4L,0x0000000000000115L }, + { 0x44e85000f9515d7dL,0x5ebfe250cb748746L,0x09543f257df28b44L, + 0xc39738d79bb0b64eL,0x75a2ff624e839850L,0x690b6ea356b25b71L, + 0x6e23e1ff783dca86L,0xcb504c26dfe0ea77L,0x00000000000001a9L } }, + /* 34 << 364 */ + { { 0x08c2ce64b4d5547bL,0xeb6567bb7570811bL,0xd2d7337a3e351a31L, + 0xef2e55f1e1dcb4d2L,0x87a1f20bbd45eadbL,0xb378609624c1cffeL, + 0x4aee03b50bf03514L,0x56417387fd881168L,0x00000000000000bfL }, + { 0x8d047d51fca2895cL,0x82ae96e1da4c8e44L,0x95a9a423cfd2216bL, + 0x448c829927592c95L,0xf825d63f8fc79200L,0x074291671bf6baabL, + 0x0ff441a2842248bbL,0x20f55cb3f2c6bed7L,0x00000000000000a4L } }, + /* 35 << 364 */ + { { 0xd0dcc4d999045a60L,0xb160c98a07cee1b6L,0xbe8cff874c5b2888L, + 0x70d351c7798a555dL,0xad3b4ed6fe119e8dL,0x9757616c9c6c31f4L, + 0xc52176c7462106d7L,0x77fddc8df2ad67afL,0x0000000000000131L }, + { 0xcf84d1f92610cdb3L,0xbb418eb03591c4f0L,0x0b494ecbde10bf20L, + 0x70bf152917d05487L,0x566b9d733ea9d353L,0x93597d749bccb340L, + 0x803844ce3462181cL,0x1786dd011a14d060L,0x00000000000000fdL } }, + /* 36 << 364 */ + { { 0x9c89e7cd382cccb9L,0xe53e72d7ad6eeff4L,0xa3f5e6442aa88cbaL, + 0xb469c241d0a71546L,0x42273290f1c278adL,0x28a6cc29c0ea960bL, + 0xe580ae1c9ff3b57bL,0x1bc9463b7b46d573L,0x00000000000000a9L }, + { 0x810616e386692079L,0x486d7875765a87f7L,0x85a742aae0ea3788L, + 0xd2c1955ea491b157L,0x1258547a517fc3a4L,0x69856b22b73f61a8L, + 0x5dc2140349f00472L,0xafd5b26dde8c4751L,0x00000000000000ffL } }, + /* 37 << 364 */ + { { 0xaebc5a8a3c902e8aL,0x0039907bb2ff532eL,0x6bdf54a1854fbcc5L, + 0x47e47af3bee412e3L,0x2366a7967b1e5aaeL,0xb2727457a8752568L, + 0x2a5bb5b73e2d49c4L,0x43fd4aba846e36b9L,0x00000000000000dfL }, + { 0x07fd53c858bf3f4aL,0xf5d34130b3725916L,0x75ffa232665c44d7L, + 0xb016eb7dbc13d752L,0xfafa7dfaf6e37ef0L,0xdaa24625b805e1adL, + 0xb197266153688d53L,0x1641661f16cb34abL,0x0000000000000027L } }, + /* 38 << 364 */ + { { 0x298160581c03281aL,0x75d01a189bd0684dL,0xb81bb844686e7da9L, + 0xd71a3c0bc40baf8aL,0xa4b3a9cedf1cf9aeL,0xf3bea4bbb31c2e29L, + 0xbf969b09628d5818L,0xbb1228938a88b8b0L,0x00000000000001a4L }, + { 0x1a053aeaf0bdd2abL,0x3f1e00a781b5cb4fL,0xbf606681ee3eed67L, + 0x30882b9db7a8958cL,0x4c4ba93ff0f63b80L,0xc55305cfe4dd1958L, + 0xc7980ec98839886dL,0xd0baf2020b84f8c9L,0x00000000000000c6L } }, + /* 39 << 364 */ + { { 0xc169b72d1a9696d0L,0x79d5eab70f71a373L,0x78392dc783d9401bL, + 0x27d3106e64b017d5L,0x2ce058abee26b0c0L,0xf22292a464858df3L, + 0x77df2276a825f6edL,0xf5c2e794c745605cL,0x00000000000001d8L }, + { 0x5842cad01e3262dfL,0x46c2d6695f54d3ddL,0x62f0553f0317e888L, + 0xbbd1443d6a562002L,0xadb380e016279319L,0x1748e13513a945d9L, + 0xd2681a0e43efa572L,0x38243ccf1668bf58L,0x00000000000001dfL } }, + /* 40 << 364 */ + { { 0x58b0739464e7b370L,0x20ef7659a93df058L,0x7ac8f45caa81fd2fL, + 0x90c96baf8a23f6faL,0xe9598d129d6246a6L,0x75d74776229c6106L, + 0xc6d98ba922f9ac4eL,0xf8792c97aab500aaL,0x000000000000012fL }, + { 0x3264168025617adaL,0xd86e42c0a6be76d4L,0xd5b8928f0df9cdabL, + 0xedf03a9f72787fbeL,0x257c8c9ab5556315L,0x97848014e655dc92L, + 0x06634000b2992b0cL,0xe138f022e211ecceL,0x00000000000000d5L } }, + /* 41 << 364 */ + { { 0x1cf9d9c562b826efL,0xde0751a348e1c189L,0x733c861c6c5c6359L, + 0xa75beab65162e6f7L,0xd84ce05b9aa1c7e5L,0xb69b230c41121218L, + 0xc120a79fc1206f23L,0xcdf5ec7b4bb91988L,0x00000000000000ceL }, + { 0xaf493f97a913ee89L,0x19e21de5ac7f1f20L,0x7f0754187bdd0e2dL, + 0x439565b4bbab0c3dL,0x84fabbfddaab4827L,0xd5a718839b957b40L, + 0x65dd01abff3ed391L,0xe83562f3a0eb441aL,0x000000000000002aL } }, + /* 42 << 364 */ + { { 0xf532d6f892fc2c85L,0x1a97475f9caf0c74L,0x44916ac4803e9c82L, + 0x80f014aac9945b50L,0x9ebcba5a4b8a8324L,0x9637f84bbfc2d0abL, + 0x1d7908a96150d329L,0x389543cd86b92024L,0x0000000000000035L }, + { 0xfa7fca4ff8d0e456L,0x248bb158f3de61a4L,0x1df7b5ebd5327ee7L, + 0x374c2f0c2af8ad25L,0x51f3a8052aa22e38L,0x373e647638aa0576L, + 0x2dd8f6d1b9f97a4cL,0x6ee1f4bee073724bL,0x00000000000001bfL } }, + /* 43 << 364 */ + { { 0xab26101395d029a2L,0x9f45cf6c13c9832cL,0x1e2e6b3ac91c3af1L, + 0x91dabc8d3d494d57L,0x038545e9d43ae977L,0x5ead75a54db2d953L, + 0x5efdfcc5311df140L,0xee39830768154467L,0x000000000000013bL }, + { 0xb485d389e6691cbfL,0x1c58b2e497046a1dL,0xb29aaf67b741e05eL, + 0x6874c8a5612c46d4L,0x52630bffc1e99d9cL,0xe43b1e7b0eab9a81L, + 0x9478a8cba64b601cL,0xa70734a2196fee1bL,0x0000000000000002L } }, + /* 44 << 364 */ + { { 0x3587f767bfa255dfL,0xfc983ef16af83cb0L,0x5659537ddb44c3f9L, + 0x12c8477b4f59440cL,0x3e19703bd25c5fb5L,0x4cb6e3973476d63cL, + 0xe96e9d7e04a21d13L,0x02e48301348ff11dL,0x000000000000015bL }, + { 0x2940b770d2f12b6fL,0xcd619390a18a0f70L,0xcdd831ab0615603bL, + 0x0e20657a0b4b54a5L,0x8a8ff290eb63b419L,0x6e48d4c674903abdL, + 0x8478c94ff92d7568L,0x626c5a4784711d96L,0x00000000000001ccL } }, + /* 45 << 364 */ + { { 0xaa734cc4102a3487L,0x3ebd19d83a09e6d1L,0x7a5e991c510cc7d8L, + 0xce45833c76adfd7dL,0x77c3dcda166b0b39L,0x5ba724c159d056b3L, + 0x6b925b8a841ece2eL,0x568a6bf7a7f90edfL,0x0000000000000139L }, + { 0xd0b1c8c1322539a4L,0x1e34c638530b052aL,0x6905b2added5b43cL, + 0xe7b28e65808ea0b7L,0xaa066ef38d84deddL,0xdbd9101db7d82120L, + 0x9ab79b41f1b84018L,0xf4772c849e4ca179L,0x00000000000000cdL } }, + /* 46 << 364 */ + { { 0x38bc3f9dadeda939L,0xa31d9bba85bb4ee6L,0xced24b2e359d0796L, + 0x50df5f0d6683d656L,0x976d878c99e37319L,0x6c283c459b607d4dL, + 0x8ed89d1d9ce07db7L,0xbe380a9c0bb04d85L,0x0000000000000198L }, + { 0x32a3bf77e788dc40L,0x7efa9172473ae393L,0x405f0a92035ff1faL, + 0x92e3f9ce4bc2d463L,0x925514e40a2021f6L,0xcd3b486a24f17a36L, + 0xb3d53d04f3613476L,0x71c3f97fe77820f5L,0x00000000000001fcL } }, + /* 47 << 364 */ + { { 0x852f1fbcfb7f52b7L,0xb0cc00e8a5216d54L,0x8672df32b5963f15L, + 0x762e9282a02e8cb0L,0x4d7014de28e19483L,0x00d4fe7499924b2eL, + 0x3774ec31f8b18141L,0x0420a9c17157790dL,0x000000000000007cL }, + { 0x911b65954e573db3L,0x32acdb9fa20c4d41L,0x711a9ec71305a54eL, + 0x9c3d65a366d148ffL,0x625f52948fe247d7L,0xfab2043c4670bf1aL, + 0x12582823a07de38eL,0xafb1eded5b959f06L,0x0000000000000129L } }, + /* 48 << 364 */ + { { 0x5d8c23c4ef873f3eL,0x2a29d680ce7bae33L,0x5109e098bc00fc8eL, + 0x9bf92efc3cfad961L,0x272dfa3362036bcfL,0x5abb516cc2b13688L, + 0xbfad4cccddcb08abL,0x05945b934f260709L,0x0000000000000043L }, + { 0x67fe093b08a5f87fL,0x5f8e248a6c6fe1b1L,0xf54f710ba3b887a2L, + 0x12e1b68790f7e972L,0x7002bd70429cd6c6L,0x0f010d4ae896b58bL, + 0x44fb054b1a7f5713L,0xb9f3026865508714L,0x0000000000000173L } }, + /* 49 << 364 */ + { { 0x65483a1ab5c1fbc2L,0xa155ccbd53bb27d9L,0x9094f0ed3d5359f1L, + 0x362abba1d9f40d89L,0x8d7bc1e7fa134421L,0x636633a976bdfe89L, + 0xc6ef5d639c5869b1L,0x8e7ba642480bb0a0L,0x000000000000000fL }, + { 0x88d645e1d0feed4dL,0xeb4adfa1c20d0d63L,0xe2209996fb1b2e2fL, + 0x87d28ed192f8ce53L,0xed470981ceaa7a0dL,0x85aa8a0d90cf06b6L, + 0xc60e34c958714ef8L,0xc8981d2c3809d2b1L,0x000000000000012bL } }, + /* 50 << 364 */ + { { 0x72fc8211bef81f27L,0x537a557025bf42deL,0xcf4897acb921c86dL, + 0x5c8699ef53e61dbeL,0x8035b6a21224893dL,0x2898a3da7493e0eaL, + 0x513c67d938bb7c30L,0x061bd9489096e40aL,0x0000000000000045L }, + { 0x6dfd6e4f28d8262cL,0xa5e899779626f8faL,0x96ad23672ccf3544L, + 0x942edf03854002f4L,0xbb5cc34da9661773L,0xb80cf020b8d08e21L, + 0xaf4b59e6c43a0450L,0x808e168e090b9341L,0x00000000000001ffL } }, + /* 51 << 364 */ + { { 0xa21298f924d00a8fL,0x72bfd3d362d44b6cL,0xca5b5295b39fb777L, + 0x5825f273d6c47ffdL,0xff94e450245eb6afL,0x6bc3a25b526f6540L, + 0x7e2ab869326c3a3aL,0xfe19c44598793894L,0x0000000000000048L }, + { 0x458eaf5d312c9130L,0x799818f21bc835f0L,0x9a0379fca84cf15fL, + 0xe1e881b333eb47eaL,0xe8f388c96c148464L,0xdfd0331bf918dd3cL, + 0x0fe9948d37c326bdL,0xa2e594354fdeacd5L,0x00000000000000b3L } }, + /* 52 << 364 */ + { { 0x167a44a4f48d1a6fL,0x17bd533c0674b096L,0x608ef0ff851af68aL, + 0x853711b006c5a744L,0xe673db4b76e6aafeL,0xa818d41e84bb4967L, + 0xce0648d4874cd99bL,0xe54897de3fd7bffcL,0x0000000000000121L }, + { 0x4459a34f07254e0dL,0x134eba30898bf5e9L,0x7ee5ba719c1abf50L, + 0x66a612de34bf7003L,0x78acbb6315d9f945L,0xb6d28e14bdb7f451L, + 0x689d24f8b95c7028L,0x6483c9b4ebadf135L,0x000000000000004fL } }, + /* 53 << 364 */ + { { 0x822f612e68d5a09eL,0xacb6e113a31bd8cfL,0x2c3ad4eae8d17d74L, + 0xab51cf8edfaf7e2eL,0x449183963772dca1L,0x62a617efa96de321L, + 0xf09187352e20c698L,0x6e7a487e6bb591eeL,0x00000000000000deL }, + { 0x1d33a89b7db906a1L,0x4091e01471dc64dfL,0x0540b534a58cc14dL, + 0x849222c707396a5bL,0x58aef7eb775ea99bL,0xce7c63f65f302c6cL, + 0x7a1e02cd1b867273L,0xebc4857b3743b723L,0x000000000000017dL } }, + /* 54 << 364 */ + { { 0x0671c7e112ca894dL,0x6b0a2c88c57f111aL,0xca88bfaf1721a2c3L, + 0x61cb31da9a514c04L,0x4514bd8599b99880L,0xa9f46871a2059f8bL, + 0xec4e6edad93af5b5L,0x1cec9d601d560d16L,0x000000000000017dL }, + { 0x6257f235a79d25f3L,0xb1979a667f90a7a1L,0xb38545ac0679b79dL, + 0x0703d6507dbe20d3L,0x5b88e91e64a68386L,0x7a5629c6c91e584aL, + 0x0915e551ff339016L,0xf8a9e56d23a0500cL,0x00000000000001a8L } }, + /* 55 << 364 */ + { { 0xee27859c0521098aL,0x970c4051d872b16eL,0xad0ecde9c3c5bbf1L, + 0x019f0c2c4b149f15L,0x9a5a4e5503a6d0d6L,0xa52621d2895c64f2L, + 0x707aa7e689956400L,0xbd0e5a053795410bL,0x0000000000000181L }, + { 0xdc51bda6b2a3e2a5L,0xde72d588cfe02f6fL,0xb18b4bec1be05194L, + 0x68f8b786d6037923L,0xa494a1c4415279c0L,0x0e51e168a5aebb47L, + 0xe79a88f04a96a716L,0x1feb112e1d788449L,0x0000000000000186L } }, + /* 56 << 364 */ + { { 0x281825f0af90c0edL,0x508b8ef58b11c46cL,0x3cb0e5a0ff67b87cL, + 0xee6365c9a50bdce9L,0x3f933c55f589fb98L,0xf7950a283567064cL, + 0x21e677926924a0ffL,0x6d5be0aa7eaceff1L,0x000000000000005aL }, + { 0xa914263d816da5c1L,0xf5a6429d6a464081L,0xea57f436d831fafeL, + 0x23641c3901b9ece7L,0x661d8811e2403f65L,0x023481f7816a7a7bL, + 0xe93d5b84dc13eaf9L,0xacb875252dee995fL,0x0000000000000011L } }, + /* 57 << 364 */ + { { 0x7bac8a1f71752fe1L,0xb6ee08d1ef8dbcc6L,0x3fdf644ebb0a75a5L, + 0x51e3c1eea521e7afL,0x920792c578e167dfL,0xbab8522244800674L, + 0x02e31bbf3cb5b8f9L,0xf0bc9665b24b43c1L,0x00000000000001b8L }, + { 0xc3b8ebc338cf85bfL,0xb1c104d25af1dd95L,0x2bdfd6dcda6cbd8bL, + 0xe6ac454268e06800L,0x468c05305cbf5287L,0x65a8a23cdc2a274cL, + 0xe44faf739e3692d8L,0x88b9600a9770e1f0L,0x00000000000000b9L } }, + /* 58 << 364 */ + { { 0x78186f42d20e93e5L,0xf882de3b52cb40b5L,0xe02c79387365549cL, + 0xbc1fff4ede0f24f6L,0x4716829f33e8fb70L,0x487999c200e2d58cL, + 0xd32e04f967046452L,0x5c6c7d0f139cd04fL,0x00000000000001a0L }, + { 0xf5210c69ac33a880L,0xd32472219054b79bL,0x399bf1bfb9b9b279L, + 0xb145a7345fa2a328L,0x70cb655f6a7cb853L,0xbb5c9d5edb9862fbL, + 0x239fe8d6762ffc06L,0x912d2a518fed6745L,0x0000000000000084L } }, + /* 59 << 364 */ + { { 0xcb67ef37ff6c372bL,0xe9425af5c428b56fL,0x7854e7f36ff3a356L, + 0x5965c0105029b282L,0x493d32f7fc87cae6L,0xc7ad7ff9dfc472e3L, + 0xcd9ab378d8bf71a9L,0xc49c4707acec3621L,0x000000000000005dL }, + { 0xe4c80149cf6db5aaL,0x52602b84909b1173L,0x36ce666e6714c0c4L, + 0xc72ae2b5f3bdddb5L,0xbb0d0ceb584464efL,0x78bd6e2e92cdc8abL, + 0x77107721019f02edL,0x7dbc999bec0397b9L,0x00000000000001d5L } }, + /* 60 << 364 */ + { { 0xf252109992d52590L,0x30ed7b6d4f408844L,0x4955bd8bbba75995L, + 0xd37937658cb5aec0L,0x1261cf5df69bde61L,0xce174a491155e67cL, + 0x7a1dcb85672e120eL,0x01af5025a31f4618L,0x0000000000000194L }, + { 0x5d88154d535c4264L,0xcf3544f8262baf6cL,0xcb20720cd283226eL, + 0xbaac302321f39572L,0xfe4b16de5314be46L,0xfd21b184e3413cb6L, + 0xd03c87359ac85ca5L,0x0ca26710bafa0f60L,0x0000000000000084L } }, + /* 61 << 364 */ + { { 0x502241e716f77078L,0x6bfa20aab341ccc0L,0xf1778314d15149d2L, + 0x7bcaa76057a09879L,0xdf92e0fc69388438L,0x203a14ae7744871dL, + 0xe7327d6a7937eee8L,0xa319cc7ba9d12fdcL,0x0000000000000126L }, + { 0x909ce004523922bcL,0xd1da8fd14423b041L,0x7166e840ec1fd1fdL, + 0x7c04a7938d916182L,0xafa25c31611e6ef9L,0xaecab8083296fcdcL, + 0x5483703b06078065L,0xd008cd2df0c882a0L,0x00000000000000f1L } }, + /* 62 << 364 */ + { { 0x0e7d946f1dfa75e8L,0xb1e329eb0eb8e7cbL,0xe1df4442ba80dccfL, + 0xb3a29cdfc142cb7aL,0x3712a3cb0e5e7f49L,0x7037fc67a1f9236aL, + 0xf049ac787833129eL,0x638fe65adac94ad0L,0x00000000000001a9L }, + { 0xec8ac9e743be67eaL,0x84d8f6db517473c1L,0xad9de5dbc4ec6612L, + 0x2f7ebb78e15058b4L,0x129353c9adf4076bL,0x7dab2a69d2683ad0L, + 0xff049895a4228a12L,0xd0348267bb32f462L,0x0000000000000067L } }, + /* 63 << 364 */ + { { 0x2e0d4e523c99d98bL,0x2ef5e71343e2da00L,0xc0c2309974663959L, + 0xc9df3fc467c2cd19L,0x5da2f192ba2721c8L,0x8702415febb5f418L, + 0x4db2fd402eec0530L,0x2295bd8643c2146aL,0x0000000000000174L }, + { 0xe7a701ddd762e18eL,0xca06d33a84fe83ebL,0x168dafbfc46cc2ebL, + 0x398e74542544fc12L,0x4bce2b7d0e00ca94L,0x2488fa21756ef9e6L, + 0x8e5b79ce4e3f6937L,0xbf90406a0a5d699aL,0x00000000000000a0L } }, + /* 64 << 364 */ + { { 0xd414309fa567339cL,0x4ee5c7a8dff5fbcaL,0x33d8e6c3064cc887L, + 0x15992eb4ca553cbdL,0x3d733bb4459da20dL,0x98c2de2ea682b305L, + 0x68c13bed5602ed92L,0x639b79f1c3bf7b46L,0x00000000000000b5L }, + { 0x93de5e06289407e1L,0xa8f2b4e1b05a27beL,0x9d07927a9f5a6be0L, + 0x9a5d02e2f1b97f94L,0x320db20df2dabbb9L,0x991313330c14980bL, + 0x44845e16f888e761L,0x1baeb18063a5538fL,0x0000000000000196L } }, + /* 0 << 371 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 371 */ + { { 0x55417ffa525abf32L,0x1ff40e1678067dfcL,0x431a0ae12795ea6aL, + 0xc3de691cef54c597L,0x60a0c51b95f9d2bbL,0x30840ca982772f1aL, + 0xddaf460496eb0344L,0x03a9f6f48615d783L,0x0000000000000035L }, + { 0x74598cb233edcfa4L,0xbb866bc9de9dccdeL,0x0c4831162a673bb2L, + 0x3eddd388fee306a7L,0x8785a002c80eb5b9L,0xe2bd81f71937371fL, + 0x501648595684a105L,0xf0f3fdde1f26e0dbL,0x0000000000000119L } }, + /* 2 << 371 */ + { { 0xa8c68b9d76bcd267L,0x95f78ca896362c64L,0xbb8302f303603f09L, + 0xf15b0d54272622b9L,0x4cebfcd197692826L,0x4d29d2b119c758fcL, + 0x4703bb56e11ac4feL,0x7152abd536b53bdbL,0x0000000000000060L }, + { 0x68ac983427bede63L,0xd5c6c405e54a999bL,0x3d763e32231e8eb0L, + 0xe36ca2ff47c7edb7L,0xd9f185058d2b6016L,0x4e576eb060d55105L, + 0xe3e9f24976e3f591L,0xd5bf8f0083ea6a2cL,0x0000000000000157L } }, + /* 3 << 371 */ + { { 0x19167b042f78b31dL,0x0bc39ff3ee7cf690L,0x1ba8b94e84229ddbL, + 0x3246debb20f057feL,0x48ca85514add6cc9L,0x8b7f118143ae3103L, + 0xb668a2b5a6c8bb6cL,0xb64cc5c6b9096bb4L,0x00000000000000dbL }, + { 0x9c55192350f09302L,0x9d4b2f64b0cd8facL,0x5082aef750c652ffL, + 0x8bcd4095cf098fe1L,0xa0548bd149d1d0daL,0x88bb345714f1bd75L, + 0xc0d5ee3412d51f96L,0xbf0bc99b3428d838L,0x00000000000001dfL } }, + /* 4 << 371 */ + { { 0x43164d3a02079397L,0x2a3fd379490d948fL,0x79ad8714635e0c5aL, + 0x5ba9289871b5cf85L,0x1babc5a1956bead4L,0x5408a355ff6d701dL, + 0xd924d7a369570fe3L,0x60d41a46831755afL,0x000000000000000cL }, + { 0x7ccfe1a4ec2f26c2L,0x549a32be85ebc29bL,0xe183ff35d46e8b7bL, + 0x0e8dfc66d421604eL,0x1ab4332cb30ddee6L,0xad4b6b316c35cea0L, + 0x1d9c4df7856f9258L,0x65882c90850f81e4L,0x0000000000000052L } }, + /* 5 << 371 */ + { { 0x55a5a6ccdddca192L,0xb38bb7287b3e7f50L,0x7b9a20026c38d9d2L, + 0x1d2b51021b932082L,0x999e6e2237f9c375L,0x4479906f28599828L, + 0xf2f530c4a190dd77L,0xedf7a92bd90a48c8L,0x00000000000000a7L }, + { 0x1f671a64995e4d4cL,0x600bfd00452d4641L,0xba5ff839569ac924L, + 0x0b7be6fdbe11e6a8L,0xd6254f776167c3fbL,0x9ca2fb561db1833fL, + 0xeaf23c8a2f38a1d6L,0x5ae79637580328b8L,0x000000000000007fL } }, + /* 6 << 371 */ + { { 0x14b97da09278f093L,0x77b62eb2aaf50774L,0xc94c47e5595788ebL, + 0x3f6fbf17e78a7acbL,0x8beaed91d648cdaeL,0x1402478db0281eeaL, + 0x06f48d3e057a9cc4L,0x9a75cb7b56f972cfL,0x000000000000001fL }, + { 0x74749b84a7049117L,0xee6187df275efa96L,0xc8a4dd7fa7daefb7L, + 0xc2a5680ff26b4deeL,0xe64ae23802c8ed96L,0x55d8e7bdc75ab457L, + 0xc29f93f613daad11L,0x7e485b3f10ef96b4L,0x000000000000001cL } }, + /* 7 << 371 */ + { { 0x1c9e957789aec483L,0xba5e770a1bf4467fL,0xd7526f872451b462L, + 0x75016fd27032ba28L,0x2dabd34d28d9400bL,0x061fcf0b059924b3L, + 0x7b10c93f7ffabc41L,0x0036de6be31e7493L,0x0000000000000138L }, + { 0xbc1e36684cf1f0a5L,0x7ee63407d00afe1fL,0x6cbde6d008365960L, + 0x547354bc69072183L,0x6034bec0b1ac8702L,0x87fd85e523bfc10aL, + 0x4324629f3e70e47eL,0x297e13f8b293881dL,0x00000000000000a7L } }, + /* 8 << 371 */ + { { 0x27cdf0121f91a935L,0xa3a2d0ab9b1da866L,0x62bc6ad520c6250cL, + 0xe637b6b235233aaaL,0x3921e6a2e5109b06L,0xa24b99c558d73641L, + 0x33073b25d3bd99f7L,0x60fad5fec0498cfaL,0x000000000000014aL }, + { 0xb187185263080bdaL,0x8270739112d3fcf0L,0x770b113c7b3cd673L, + 0x2cf6e741ca843432L,0x77c74d8dd75af1c0L,0x979e1fd4a0c24279L, + 0xbf05a641f335ef51L,0xd7ec205d26b269b7L,0x00000000000001e0L } }, + /* 9 << 371 */ + { { 0x4a39cd1170a3f57bL,0xf7fad21a6293478bL,0x764c268bcdefc0feL, + 0xa00708591e0f3357L,0xede58f82eb1922d0L,0x892fff77f01c6ff9L, + 0x7560da0994b99265L,0xfa2a8b7655cd790dL,0x00000000000001a0L }, + { 0x654458aebe04cab0L,0x54ae24c556d74126L,0x57a824ee98adf4c2L, + 0x40bad2871153f8faL,0x6ccafa8041c1dcfbL,0x04ca0dec0f0d346fL, + 0x823065b4b8909c28L,0xa2e9d0ed691fc92bL,0x0000000000000108L } }, + /* 10 << 371 */ + { { 0x12df8924cdf52775L,0xd89749666e3bc06fL,0x7f3310ce3e68cf93L, + 0x0a012c7f97117fe2L,0x65288f5ba16adbb8L,0xb1e26bb892bc7054L, + 0xa6833ac98a63d1c7L,0x5f3b945a5de7f156L,0x0000000000000176L }, + { 0xeb8958990dd3a366L,0xef27b635958aa6f3L,0x8ef1df5f1dabec19L, + 0x871fec8b0f53bdd9L,0xfbb413be9efe0c6aL,0x06e372b6aa16add4L, + 0xdd08c66fa945e423L,0x66c1b806f3b12febL,0x0000000000000166L } }, + /* 11 << 371 */ + { { 0x933b1a154bc2cb58L,0xac8e7a848ef1d6b4L,0x70260e4618701778L, + 0x57de74846479f950L,0x978936a560f27f7aL,0x4e7bb94ff6be0506L, + 0x1457516e3fb3ea20L,0x0441479c4f869273L,0x000000000000017fL }, + { 0x685fe4c987220437L,0xfa0bfbb2e943a125L,0x6fa0b3ed370ece4aL, + 0x93c118d6dfd5cc92L,0x6030a716ddc2f6a4L,0x0a8ac606b43868e2L, + 0x297b0af0c5884f2bL,0xd1a4017575bdb5b1L,0x000000000000007eL } }, + /* 12 << 371 */ + { { 0xa200e9eefc3de00dL,0xe02a24d1d3a22751L,0xc793c92ead1898f2L, + 0x46ab743bb70f915eL,0x274fed3cd9a716c3L,0x292a43535c0d17a2L, + 0xe7619f937874c664L,0xf21dbab53e3c8169L,0x000000000000019cL }, + { 0x6d19c5de10f6e0d1L,0xc8d263ffddffa70cL,0xac7886e640a77aefL, + 0x22bd9cf6d4a9191fL,0xc53a3301624532d3L,0x8edfa100fa74a13eL, + 0x15f04d3357701dd1L,0xd5cfd2ed793d4da4L,0x0000000000000170L } }, + /* 13 << 371 */ + { { 0x398ee0581ecd73b7L,0x2f0d408133d6ed24L,0x1fdcc54b5fdbdcf0L, + 0x0395f23a4c72a862L,0x367f52079a32bf9dL,0x5b0e3a0c52b644bbL, + 0x1a611bc7d98749d5L,0xf33a5068dbd98abbL,0x0000000000000171L }, + { 0x2e997900353531a4L,0x61b43baf887ff703L,0xc7f77d0a186f6196L, + 0x1848d88b69925319L,0x5714b501bc49f4fcL,0x8fea9fe19f096741L, + 0x967d954bbe08a7bdL,0x3924cd2b8379598dL,0x0000000000000187L } }, + /* 14 << 371 */ + { { 0x627f65fd51ed20ecL,0x1c31d4a36dec3a4eL,0x8f222ec0ea76b32cL, + 0x890e15c3bd5a76a5L,0x236174697a3e2d67L,0xbed29d6dd8178f45L, + 0xd61f7707c90e2df7L,0x31c286dd91be7739L,0x000000000000008fL }, + { 0x7ea9ae7ce3d9f5a5L,0xcc26143e9dda23c5L,0xc231022efd1ef741L, + 0x66e91f63dc1fa1dcL,0xe831781e364d03e9L,0xf2247b2b629af703L, + 0x08f7eb82ae2b773bL,0x7c543051a0a208b0L,0x00000000000001d3L } }, + /* 15 << 371 */ + { { 0xb761f9dacad555a5L,0x1158077a4c08bebfL,0x20eb5340d9178836L, + 0xa017580fc9cb0f56L,0x6398ac5dd6a9dd78L,0x8c61b607b81a037dL, + 0xaeff0fa89e9b6cb8L,0x87114bb165251d54L,0x0000000000000047L }, + { 0x54fe66cde6d7bffdL,0x691934c952cbec15L,0xeec20a71b2822a05L, + 0xca8eb263df0d5327L,0xe64d4637b7c3f818L,0xeafa8d4f41fffc65L, + 0xbeca4d821438206fL,0x5b1e7299b08e6da8L,0x00000000000000d4L } }, + /* 16 << 371 */ + { { 0x55a64b0a30239552L,0x447c6e084bfab408L,0x43cb43277063035cL, + 0x68afe121d293b9eeL,0xa975707bed50a41cL,0x15a6a8be4488f96cL, + 0xdbb6a3d81bf798e4L,0x0de2f103b5e600c9L,0x00000000000001f0L }, + { 0xc070dab445bb5352L,0xd590d895afdac74cL,0x679fd21d37bfade9L, + 0xe8dc87e7a0aedd41L,0x9e48f870b31dbd72L,0x21afacd8c7ea78b4L, + 0x8ac51f26fbcf9d22L,0xb52293afde2b01dbL,0x00000000000001f8L } }, + /* 17 << 371 */ + { { 0x3758f2af7936635aL,0x74b798c72b40548dL,0x8f746cb7bc08951cL, + 0xe7c48c0db90c67e5L,0xb3aecd64547b6065L,0xa7abe0f2357ebb83L, + 0xf7870c3f514cd9bbL,0xd4d390c38af455d5L,0x0000000000000026L }, + { 0xe3adfa666f5d6b82L,0x8aaf5a4741ab8606L,0x9f3a4e2e5c7cb83bL, + 0xbfb5e5f97f817e03L,0xa1774513f962e421L,0xd8530e824f95199fL, + 0xc634291ffd835e8cL,0xfccc2dcaabd346f0L,0x0000000000000086L } }, + /* 18 << 371 */ + { { 0x7f428dac77331596L,0xbf977f5fc255949fL,0x3da177d5c4d98be0L, + 0x7cb4b3b7a8527c4bL,0x040683908032ca6bL,0xfab16224bb04c171L, + 0x6660197a9c5610cbL,0x8fe2ad3091c7c2fcL,0x000000000000018fL }, + { 0xc8df54981bc9c007L,0x9901a26b6aeeb5ecL,0x0c6cd18f85b8d02bL, + 0xcddd3b72972a7ea2L,0x603843318c6fec2cL,0xd4200454211f8358L, + 0xc7f5670ee44ccacbL,0x9fcb7d2d97a7417dL,0x00000000000000bdL } }, + /* 19 << 371 */ + { { 0xd144b65c368e0f94L,0x2aa6fd1861635627L,0x3f31fb42fe180a54L, + 0x8883d6b30264febcL,0xe08156372a275187L,0x1b41b0ab2efeb608L, + 0x7d4d5c8cd9209a9eL,0x059b7e52fb5e4ae8L,0x00000000000000f1L }, + { 0x99f230bf1ae6d4a8L,0xd4305aa703d12070L,0x500e9eb3bfe7d196L, + 0xed0419172cc4d4c1L,0x86b8ce8a7c13ad0cL,0xb98fbc926325540fL, + 0x40a2b38df38b9857L,0xd9db5066e09d37d7L,0x0000000000000079L } }, + /* 20 << 371 */ + { { 0x00c31d64181f278bL,0x582351a85e31f839L,0x825b8006a74edfabL, + 0x3d9bcb438e3019c0L,0xa05cc896415f4421L,0xfa32c6f82e7c6507L, + 0xbbabcd82d133935fL,0xae65b063ca469a57L,0x00000000000001b7L }, + { 0x6c872de99ba01391L,0x2905a70571855ccfL,0x9cd262192d4210d1L, + 0x9de7f89b7aa6af53L,0x8b4cf2b2be39c3f0L,0xc563fc954e4d215bL, + 0x957fa0bce34c2924L,0xfa619e7a4af69b61L,0x000000000000013dL } }, + /* 21 << 371 */ + { { 0xe46ef9e2f940c3e6L,0x54d48ea1dfd4f480L,0x35065f580b11f229L, + 0x18835d382a2631f1L,0x986d96526284ec7fL,0xd7edda2e522a1c50L, + 0x351d9e0abcef1fb2L,0x42a4da050d3f2923L,0x0000000000000048L }, + { 0x65621778920b9d2cL,0xa4e48292ca599b9bL,0x705f6b5f09a51b05L, + 0x17e2e9be14fb9f4cL,0x626f13c1fdbae0a7L,0xe0fff527a6c97623L, + 0x0a492326e3b401a9L,0x14005f55d4c7923bL,0x000000000000014eL } }, + /* 22 << 371 */ + { { 0xe86339bafd804ef7L,0x0167369276bbf53fL,0x41236f2e3918ec65L, + 0x1043b6b3c14159d2L,0x732bcb46ab04bb98L,0x88d1cb4c30d86216L, + 0x3dca271b85ddd190L,0x927fba7b58e758caL,0x0000000000000182L }, + { 0xca047c60f46e0e28L,0x915c93fc92b64674L,0x8fd233c6794d425dL, + 0x028132b544a3ec1fL,0x975c8aa816509efcL,0x041ef627622584e8L, + 0x1989a692938c99dfL,0x7704d7c7bfbbc611L,0x0000000000000192L } }, + /* 23 << 371 */ + { { 0xa7eaf4c895142d4cL,0x61e607a03cd97db9L,0x4763744a14981a6eL, + 0xf54972914ede7722L,0x149141767b2054a3L,0x214aa26ecbf81d43L, + 0x0f6799c64e524018L,0x0973267933808997L,0x00000000000001dfL }, + { 0xd8603475f3af64a2L,0xf5e79fd58ab0f9e4L,0x691d0e631fe48f7bL, + 0x58500c202bae9eceL,0x8867f3767fa96563L,0x5ec76c8c31bb14b5L, + 0x4a24a4cf719e74c1L,0xa55ab3dc3018ac71L,0x000000000000008fL } }, + /* 24 << 371 */ + { { 0x8013eb783899c832L,0x463a9c46e6ba78baL,0x6d3a471ac9682ac6L, + 0x599a9fb58dbd293eL,0xfabfd28a1897913bL,0x1de9fe55b9a23b31L, + 0xdd90ed9ad55d93c7L,0x07581309290a5eabL,0x000000000000000eL }, + { 0x54443bb9978f36b2L,0xc2c254c99f6c65c6L,0xaf9b5009f7b984bbL, + 0xc5416eec0b97ffd9L,0x1107f9bb5ebb7853L,0x0ff4c94c9d487ffcL, + 0x7b4f0985507ba23bL,0x0accf6dd5eb91f71L,0x0000000000000044L } }, + /* 25 << 371 */ + { { 0x192da2cd26f4c420L,0x572cee9631d8df5dL,0xdab3e7f5bdcf0a04L, + 0x6a1d4dfbce93d59dL,0x61e82d140038c2cfL,0x8dcf11e453b76f40L, + 0x0a4a89fff6fd7ad9L,0x1e290ca5ef8283c0L,0x0000000000000115L }, + { 0xa9681e1b041ae86cL,0x8ed63e5f3f833d4dL,0x3610987bb4d3ddb4L, + 0xd2ccde6e391b2249L,0xca83117b8d72067eL,0xe1ddd26fa6bdb2afL, + 0x15ae36b72c70e306L,0xf98cd43ea7758469L,0x00000000000000beL } }, + /* 26 << 371 */ + { { 0x1a07bb1c0ac926c8L,0xdbdad1e77cde114dL,0xd677815bc1f3a574L, + 0x5758e60628dff5e7L,0x95c76291a7501407L,0x5cd2407fdcb4d1f6L, + 0xeb6d28c4b9472593L,0x0092a86b0b6fd187L,0x00000000000000fbL }, + { 0x961f53cb791dec5cL,0x77e3a3129a8cd9d7L,0xcfc28c0775f8fc57L, + 0x748ab16f531e8336L,0x100e37fe05192627L,0x9477fb71d0292d0cL, + 0x924c1e0546a38ba8L,0x2018a9da4543f219L,0x00000000000000c8L } }, + /* 27 << 371 */ + { { 0xfc15c19894d6a58cL,0x6f96632933f600a9L,0x739ca01ad3897cceL, + 0x4d55aa32f9951c45L,0xba939be9855859c4L,0x6036fd9015e2f163L, + 0x5d186f9beeb5f81bL,0xa4fcf9103c005c91L,0x00000000000000d6L }, + { 0xfbc5c0d24972f122L,0x98ba7c8a5c76ac91L,0x2ee0549a178b8a08L, + 0xa4b87f877d08e3a6L,0xb79bb97e78826c20L,0xf4d47cc987e0716eL, + 0xaf149859ce5232fcL,0xf06f3b636558ee4fL,0x0000000000000078L } }, + /* 28 << 371 */ + { { 0xdca43ef744ad140dL,0x18c95116f62a3beeL,0x766d5e214e144f65L, + 0x1087ab025d99574cL,0x3bae4ddc4954da34L,0x673d2cac0ddb3e23L, + 0xfc2f18ff40cd4d2bL,0xb67bbdbf2e578cdcL,0x0000000000000142L }, + { 0xa3352aac08191be9L,0x24523f454fcf86eaL,0xca79a37a6eb6b7ebL, + 0x24b8db952ce7f66aL,0xb8ba67584e8f293eL,0xf38b987a8bde44aaL, + 0x0f3f142659cbcb5dL,0x647440675a6bc1fdL,0x000000000000001aL } }, + /* 29 << 371 */ + { { 0x4f553f1e50084eafL,0x17d18de4080f9a12L,0xc601052c365b8689L, + 0xbcbc88ed44666175L,0x520b172e6e738779L,0x17cf5474d9331305L, + 0xe5ca54f6af37d665L,0x729f3bb20505c180L,0x00000000000000acL }, + { 0xb10c1bff0f04c79dL,0x521302ad827da287L,0x2beca71081205674L, + 0xe817a9cc8bfc4bc6L,0x22bec5a6eb2b7888L,0x57bc24e031e4912aL, + 0x1a575f46f1fc81a1L,0x52726c48a7f20ee2L,0x000000000000006dL } }, + /* 30 << 371 */ + { { 0xd9d203c4598d6047L,0xe1356b6104351a31L,0x8657a9c458b1e12fL, + 0xcc26f6378ed5aaf3L,0x7272b2e03b6c0450L,0x2ac17670212aaa79L, + 0x354b375423e6acbdL,0x22b9deae2a4f7adfL,0x000000000000005eL }, + { 0x20421426b405d02bL,0x62f72a9f20ed4116L,0xb93c24e0308d525dL, + 0x5489564edbc245ecL,0xa904ab8f3b6d7c99L,0xe4c11c4d99f5d096L, + 0x1685220588930daeL,0xc2569fe5faef9b94L,0x0000000000000079L } }, + /* 31 << 371 */ + { { 0x54b7d472b9625eedL,0x96bf80ab1a3fb182L,0x29904fff4ee84a0fL, + 0xdd03b6edb1a2ee86L,0x940f809aa2dc4daeL,0x2afc6d8d553e27baL, + 0xc40f2a4177ffdc58L,0x8af0695509881ffdL,0x0000000000000052L }, + { 0xbfd40b6420913ccfL,0x4fc3e9d8f708e344L,0xe401cede8f9c65a3L, + 0x524d923b6757c00dL,0x34ae158b58ac6a2aL,0xcca4076ede97ca62L, + 0x2504275746574675L,0x5b8c8366883cbd67L,0x00000000000001faL } }, + /* 32 << 371 */ + { { 0x0079a845a2fda60fL,0x6dfdf102da8db63aL,0x7d2759600148a559L, + 0xe089c346684b9737L,0x09f865b2820bdb1cL,0x000daef6891ae57eL, + 0x384f5eeea4f64f15L,0xdd68e4903da7e79cL,0x000000000000013aL }, + { 0x20b7457d514ac5dbL,0xa03028fb954721fdL,0x6f20d5185c77cb17L, + 0x28f92697ad74495aL,0x630e01566b327dc8L,0x6c5067d665832ca9L, + 0x9c9305fbd3f6db89L,0x681013fa905fffdfL,0x0000000000000188L } }, + /* 33 << 371 */ + { { 0x052a6504fcde276cL,0x9ff83d3e03dca446L,0x6c456483086044c5L, + 0x400568d5025b2693L,0xeb70c97d3644f851L,0x1c742ab829e4ac6fL, + 0x6af46714baae2f04L,0xd4479f7478f947c6L,0x00000000000001cdL }, + { 0x1e1fbf4082e4fc01L,0xeaa68ae275f50a5dL,0x499c4cf8e48656c0L, + 0x3f40e8ea21124f0aL,0xc566ab479726ab46L,0xfd51c6e3f33cf47cL, + 0xf8534cc9851da00dL,0x2d986dc979325a3eL,0x000000000000009fL } }, + /* 34 << 371 */ + { { 0x2d1cc7a62d628095L,0xbcbecaccf7084f83L,0x78bad38b2160880dL, + 0x1df37a887240ee90L,0x0b59cee4ffc4d943L,0xd4bf72533ff81538L, + 0x13ddd2ed2d735a8bL,0x70db19b11ebbaf08L,0x000000000000009dL }, + { 0x7aab738e2d9699b2L,0xf14683cb590a2690L,0x70724263822d9936L, + 0x814b413b358857e9L,0x9b284542f4d1fbe3L,0xba341ce4894486c9L, + 0xecfa29df24e844b1L,0x8c25f4197db398e4L,0x00000000000001bfL } }, + /* 35 << 371 */ + { { 0x0989fd6fa3f8bfbaL,0x02117ba75cf7c0eaL,0x4c5844374540f23dL, + 0x1710744e9e8e6c43L,0xdc1372b3f6438f10L,0xa0a2cb729c4e8561L, + 0xd1bab93a68368006L,0x08b2d51c5111f2d7L,0x00000000000001e7L }, + { 0x705118155d2f0743L,0x7d1b044ea896f737L,0x0f91e692474ba5bdL, + 0xb3f073c2df57596dL,0xac9fdc864f784ce3L,0x935a5e29481b5c27L, + 0xa971f3adde7a4bd6L,0xa1cffafcecf9a22dL,0x00000000000001e7L } }, + /* 36 << 371 */ + { { 0x1b626aed7e856756L,0x4c02b4f05910510fL,0xa2a90de9ad139464L, + 0x83ea60047ae191d0L,0x1928f46a9ab9cb7aL,0x5534f73888c91d27L, + 0xe0031a2f2b97814cL,0x85b9dbf6280ac4a1L,0x00000000000000b1L }, + { 0xc4c2339403d60f12L,0x946f2de54e7af304L,0xf6586026d0fd54c4L, + 0x19818309ac2c7138L,0xd9892eb25758d774L,0x451fc66b1c44714fL, + 0x3f25ab9540fc9e5cL,0xe56efc7a8aac7a22L,0x000000000000016eL } }, + /* 37 << 371 */ + { { 0x84f0e7a3cea4d0ceL,0xa7728bf113707f9aL,0x1d933350247981ebL, + 0x34f0f2bb7bb094eaL,0x2f1819ad7e307990L,0xc52e823c730a56b0L, + 0x29203f56f2cc5b6bL,0x065fc11932c1430aL,0x0000000000000079L }, + { 0xc38deb590a0c113dL,0xa8e60a9b5c85b7fbL,0x1b4a907f44e567caL, + 0xa57c9bff9092f1d5L,0xfa8d7fa56e0d6c5bL,0x559ae140d6c660a4L, + 0xfabdb4288cfb6e11L,0x54cb7688b87dda9dL,0x0000000000000065L } }, + /* 38 << 371 */ + { { 0x4cdf8a5640802856L,0xb093e241a4a7480fL,0x576c0cf150b6457aL, + 0x14ff4a8b058e9d35L,0xb21d8c190109ed61L,0x7e5665920c1db4afL, + 0x36ba4ef915791634L,0xcaf371a71d77afa0L,0x00000000000000f5L }, + { 0xad4eb0fd4602316aL,0x4c1a0bfbe55635f8L,0xce0ed653ee1f570cL, + 0x8073dd5e35096165L,0x98a7d8c3635ca5ddL,0xc5c250773c9e3650L, + 0xe1fcd377487c2433L,0x3319733e097d8560L,0x000000000000013dL } }, + /* 39 << 371 */ + { { 0x304c6914e1dea7e4L,0x41d73698a178321eL,0x8282c3544499c318L, + 0xc89303ea889a1aa1L,0xf22db4e07f57871cL,0x4674f5c53ce03a4eL, + 0x6691881b63513ca4L,0x94a96d0fe012c78fL,0x00000000000000c9L }, + { 0xc3ecade9ef232dcaL,0xddd2a751d4c84690L,0x2f25657a9d3d585aL, + 0x87c2944a846e84c1L,0x1d698bf57907e091L,0x65c42161a2f220f5L, + 0xcff188228c247a7cL,0x3391b401c1c4a35aL,0x00000000000000ceL } }, + /* 40 << 371 */ + { { 0xab14f586b69e066eL,0x3d8308fe198d67f6L,0x9bc53a0cbb4ecfc3L, + 0xa2057c4059a3b558L,0xd21e271d18789187L,0xd6d29ba1562cf2b2L, + 0xa247ac0da5521075L,0x419bb80c0b39367fL,0x00000000000001b3L }, + { 0x0950c5ca7fce595fL,0xd07ff0ef4ee87d4eL,0xa2ed463145b980f1L, + 0x6bc63c99587079e1L,0x285d306134f889c3L,0x93c029ade97b8a76L, + 0x048f151b1dc86800L,0x69c5d19225bbc4ecL,0x000000000000007dL } }, + /* 41 << 371 */ + { { 0xd3f0afc8bdf94481L,0xc2e80a6c7e877adfL,0x640e4c028b332b2cL, + 0x48e6df1ddadae6eeL,0x2d21985cbe6727cdL,0x508bcca2a0c64c31L, + 0x2357969462e25c17L,0x2d968e90833629fbL,0x0000000000000074L }, + { 0x0b400771bf0004d1L,0x0dd2ba8f732ac6c7L,0xd2763f3ee9c8ebcaL, + 0x5a8ce5f4b4da65e4L,0x1e35a7e4fe30cce6L,0xa06e97884b791927L, + 0x73f83c9a9857643aL,0x67fa51e3abdb7475L,0x0000000000000180L } }, + /* 42 << 371 */ + { { 0x4b13370446e00296L,0x58469825ecf84fdcL,0xe74203b87facef7bL, + 0x54e0d777083cae38L,0xa4793f4b884beec4L,0x9a8ad2accc02badfL, + 0x290844ff9959c92eL,0xf20beeebf615c267L,0x0000000000000004L }, + { 0xe05d9d8869641a28L,0x612febd9d4942cb6L,0xc4beed25bc912c67L, + 0x2823c041b15d0758L,0xb4a3b23607b1ef19L,0x6d93041ca6fcb0e9L, + 0x291d2e3a5fd96b41L,0xc8d023c0ad731fecL,0x0000000000000177L } }, + /* 43 << 371 */ + { { 0x69db450ecf05454aL,0x525c58ba75dcc6b7L,0xabeeb3bd0f61f7ccL, + 0xa7f6efe0d44b4aeaL,0x18229e9f49553eb7L,0xb2a463ea04fec5cfL, + 0x79b8b43a90bd5c0bL,0x4d51d7f82413ad3aL,0x0000000000000077L }, + { 0x0a5e7644b24c675bL,0xb2bc3292baddaff6L,0x7e9b435708757e9fL, + 0xa9fca74e7dcf5d67L,0xe472ad966fb18401L,0x0bf3fccaf63d812aL, + 0xfb195c5c96de8f40L,0x2301c12a4d363556L,0x0000000000000164L } }, + /* 44 << 371 */ + { { 0x853466f88199c13bL,0xdd186f3bb0442634L,0x6d825347e8a180a5L, + 0xf311344bf05ea1c4L,0x543f9e173f1923f1L,0xafe9831ddb31fbf5L, + 0xaac76e2e3fe85ce1L,0xcb2c4b1739f665e6L,0x0000000000000026L }, + { 0x3f57a6d775b32737L,0xbab11b99a81e2c0aL,0x8b08cbcc651dac3bL, + 0x889d9229f60dda2dL,0x01f560868c1dda9bL,0xfd2f6e052a872c52L, + 0xd3107bf129a751afL,0x1817e8dd467418d0L,0x0000000000000142L } }, + /* 45 << 371 */ + { { 0x7c9274e1d1cb0137L,0x01a8534b7cbb09a6L,0xc6c9af57e5131af1L, + 0x8fab2771d276194fL,0x28137f5170cb22baL,0x5da5963a1723f388L, + 0x7e695abb9271a05bL,0x5b529cb96f26897cL,0x00000000000000f5L }, + { 0x6740acd14d086e85L,0xee755ca560ddae0fL,0xc088e52bb4ff1a7aL, + 0x297fabb504dc5ab7L,0xff60a2d3d794648aL,0x6c6db4ae62e808e5L, + 0x0d8e99f3839f623bL,0x7578d594663b07d6L,0x00000000000000e2L } }, + /* 46 << 371 */ + { { 0x5dd4c07bf5fdc2eeL,0xbcec9e0dd1d27deeL,0xaffa6445e7512c4aL, + 0xd6b231ba13b92689L,0x0bd338d613334d2dL,0x9bd0284664cf3419L, + 0x0d52aaf854f6723bL,0x976e912d4b5a9b89L,0x00000000000000cbL }, + { 0x0948483cf4895aebL,0xa34bdf41f41c95caL,0x85e0ef7f3df7ae66L, + 0x2815167549268058L,0x78edc00d1b0440b3L,0xff99353b86ea5b8dL, + 0x6b678541865c84ebL,0xc40ec092b9391588L,0x00000000000000a9L } }, + /* 47 << 371 */ + { { 0xa7b60f40bc65e597L,0x37cd630e90841defL,0x58baffce898b2bc7L, + 0xe885f4a0581ac6c7L,0xf964d08319b9fb1aL,0xf4a5ad855b3ac282L, + 0x90717aa56443f6a9L,0xac47726ca0dd88c9L,0x0000000000000182L }, + { 0x3cdce286373ca424L,0xa7aafe97d673367fL,0x0c49b41131deeda9L, + 0x847dd95df6d479a9L,0xaa72287549401096L,0xb2a054cf08697d21L, + 0x1b72fdcf99963174L,0xb2cca97313acd04dL,0x000000000000007cL } }, + /* 48 << 371 */ + { { 0xa0d6c9190fdc0e64L,0xd937bde3d6679591L,0x9fe10f20a7296b3eL, + 0xbdf41b3b5deae5cfL,0x6e5b59b969a28c2bL,0x17c81d3b37854490L, + 0x61256f231f7560abL,0x2696b1a3d3960a12L,0x000000000000018eL }, + { 0x5ec3263c732ac301L,0x6a9d57cc6da756ffL,0xb613de1323d4d5c1L, + 0x90e38bc60196d425L,0x68ed94b6587d8458L,0x332f6403561f02e9L, + 0x0b27d3cec8e40caeL,0xe9390832106640feL,0x000000000000019dL } }, + /* 49 << 371 */ + { { 0xe79632c57658c9c8L,0xb1f037013bb06475L,0x5bb0edce86ba01b4L, + 0x06572eac16fbe169L,0x2c8e64c3a5924068L,0x7ede0ffe23732feeL, + 0xa4591159b92acbf1L,0x1b4f74c83a486e3bL,0x000000000000011aL }, + { 0x0d82a706815e1bc0L,0x06a4c7551a01cb3cL,0xeac6109da5a2752fL, + 0x9c365f91d94f2e37L,0x20cdddbd3616120fL,0x92b19f0c53afa50aL, + 0x7f2c06d314dc5f36L,0x5a35a24d05dfe73eL,0x00000000000000ebL } }, + /* 50 << 371 */ + { { 0x2940829b80478312L,0x6d88f23799ba7c9aL,0xed75717d55f305fcL, + 0xf6234707f1c984b6L,0xfe0a83681e1eadd5L,0x793a987e7a7b7a56L, + 0x13e0532a73a9a0d3L,0xd848c44c039e4c62L,0x000000000000004dL }, + { 0x31b935621a681f5dL,0x0d47a8022edb93fdL,0xa221a02ed1f56820L, + 0xf9944066e96bcabaL,0x6c3e8d317fc20290L,0x2e17289b025a2ff7L, + 0x868dcd9301d72466L,0xf3c79dec7f37ad64L,0x0000000000000080L } }, + /* 51 << 371 */ + { { 0x80badd631af7163aL,0xe2608a8f629cc578L,0x1892410d5406ae87L, + 0xbc09e8683d5d72e5L,0x14574558087607f5L,0x00948800d06fba2eL, + 0x060ad156ab277f3fL,0xc764bee6823c305fL,0x000000000000014dL }, + { 0x917cc72654d505f2L,0xe7795a1811309b88L,0x081915da5c4cc728L, + 0xe3effbf55402b2c3L,0x508c65bd11febf13L,0x816b40557ac06302L, + 0x4c50c5415428384bL,0xbaca344ca425f49dL,0x0000000000000111L } }, + /* 52 << 371 */ + { { 0xefebc3458f3735c1L,0xefe2d438ca5765afL,0xfd5881b2e7d0155dL, + 0x307995a912eee27eL,0x7b9d4f553b608fb7L,0x708a3f06ff1c1eafL, + 0x211f548861ef1913L,0x7bca0d46345312e5L,0x00000000000000eeL }, + { 0x4015390c5a59bc8eL,0x669a2ef16c9ea8beL,0x37929e3112ae532fL, + 0x6de9c6360a87cc7fL,0x676cd09d6c8c1ed2L,0x35aa3cabb7391859L, + 0x4a95c14768a7d508L,0xda6e0209342e55abL,0x00000000000000c1L } }, + /* 53 << 371 */ + { { 0x85cf89c2f55b9236L,0x96d932f8623e2b82L,0xbbbaf4ed76def064L, + 0x15ba1b2b59036e92L,0x1a2bc54b01f493c0L,0xb1dd70b1dbf80e4cL, + 0x165dfa44fcd048d4L,0xc5f38ed45a7a26fcL,0x00000000000000efL }, + { 0xffdb58f7756de9cbL,0x562fdec0964e7b0eL,0x40aff120746111edL, + 0xcfc66bfe369b2314L,0xd4bd6928d1966fc1L,0x0e4f2a7e7aa9bcd0L, + 0x7e4b8b6bd7888e90L,0x0bc4574aa4d094b7L,0x0000000000000192L } }, + /* 54 << 371 */ + { { 0xd94caad6bf3e4eb9L,0x940077c745546cf9L,0x23828c7cccb72d42L, + 0xa06e1dd3e7fd28f3L,0x736bbc49c6a02e4dL,0x0aca0d8e8dd80a94L, + 0x1e7355bdceb1aba9L,0x33cf05a96f071875L,0x00000000000000f6L }, + { 0x87b3066dd067d6e7L,0x05db3ffab697139aL,0x681dcd62d8d90d65L, + 0x9710f9de2cc68eddL,0xc7d09e4f9b997b9aL,0x496208d612c858e7L, + 0x78dfdea3f1692d2bL,0x8ac90137e2dc87f9L,0x0000000000000114L } }, + /* 55 << 371 */ + { { 0x28dc28c979900f5dL,0xa2e881e24e099d79L,0x0d7885bbc60cf219L, + 0x72bc84f5273c738dL,0x9321cc7f255f875fL,0x8e9b118bbc4969afL, + 0x6886bf6250d698a4L,0xa31bc597d162494dL,0x00000000000001f8L }, + { 0xdbea4f4c2ca3c41dL,0x7058c9c296f26b6dL,0x6ffbc7e3d72b2797L, + 0xf15734a58075e016L,0x8ae59627d4df5cc5L,0x942ceaa51c4364bbL, + 0x63d829e0d5a8b20aL,0x214a81864f8e049eL,0x00000000000000c2L } }, + /* 56 << 371 */ + { { 0xc0df77af85724c67L,0xcc83a425dd94fbe7L,0x6923e6ffe4c0e0ccL, + 0x3dad274320f8571aL,0x7ba73c28b62ef81bL,0xd60f9232838d2f78L, + 0x49a10395a7a6f55aL,0xc655ff3e8f133f51L,0x0000000000000075L }, + { 0x5789909e2d6ab713L,0x5410badca52cb233L,0xbd3f37fe63454a5bL, + 0xc71998f0977b8cc6L,0xc70977414b81b017L,0x1d37afe2393bbc02L, + 0xc1f809b6d7cc8c20L,0x18920c3706ca3cd2L,0x0000000000000154L } }, + /* 57 << 371 */ + { { 0xdba5d98ececb3b2fL,0xfb97226090e743f2L,0xbd6ce397fe42c950L, + 0x28b067328d584404L,0x980fb92181815117L,0x02054320454978d5L, + 0x0563def626ab7bf0L,0xf531752a8c7146eeL,0x0000000000000140L }, + { 0xd5cb7c6a87f97b5aL,0xbc242e6e9e1a8cd7L,0xfdfb12e765cee902L, + 0xd2e3803a56a42f2aL,0xe8ac69ade9206ed8L,0xfb57c402668b14a8L, + 0xef8f858c226e171cL,0x5021b98bec8fb4fdL,0x00000000000000d5L } }, + /* 58 << 371 */ + { { 0x2449088deebada79L,0xa23a4de43ccdafb2L,0x78b6631a789d4228L, + 0x419af03cb6ffabffL,0xfa3699ee13133cb5L,0x737ec4d896c22a36L, + 0x32edef00af55b6baL,0xe6f1c72e96e0d6a2L,0x000000000000011eL }, + { 0xc0d0be8fc4de6c1aL,0xb7eede852cb83932L,0x65c116ca5f89a922L, + 0x77b8e6aff6c6c162L,0x7127e0625a82e691L,0xb9ae11c241062230L, + 0x64716a12cad22949L,0xd6cb5ed80e142596L,0x000000000000000aL } }, + /* 59 << 371 */ + { { 0x5563c8044eaaf6d0L,0x83785e884d61282aL,0x2d19641075b9e703L, + 0x738abe2a67031bddL,0x7351a702392114fdL,0xd74621a2b438377aL, + 0x224da5806c76f049L,0xa7ce99ec732477c6L,0x00000000000000aaL }, + { 0x6e0e5a3a38bca0cfL,0x576efbc8ce259960L,0xfef2bf2e07a2c533L, + 0x0c374e7b37f23263L,0xd8436d82f9603370L,0x5afdbcd6f047d841L, + 0xa40d0067db50a26fL,0xfbc47928c1287fd3L,0x0000000000000133L } }, + /* 60 << 371 */ + { { 0x10c10e89a4f78e1bL,0x0bf7c82eae25d4f8L,0x198522759bb8dd0bL, + 0xdeccab587ef26c90L,0x51d20addcf49361eL,0x997a0f9909827779L, + 0x102a745d200dafdbL,0x98e1484b723114d0L,0x0000000000000034L }, + { 0x3df6bedb00b69cd3L,0x7bd05f836f56e429L,0x9d31ea08762b3f9aL, + 0xef84ee087204e805L,0x20d6c20de5a264e8L,0x85b6881ad7fc3279L, + 0x8cff29870dd12086L,0x43898d6cbcca4f36L,0x000000000000007cL } }, + /* 61 << 371 */ + { { 0x8fdc9befa560413bL,0xe9ea9e2031f2730eL,0x8f3b05243a61d0e5L, + 0x2932274c6f9d4628L,0x36d58f3218df923fL,0xf66083b38611bc64L, + 0x6348e55c2d53743fL,0x69059a3b79693090L,0x00000000000001abL }, + { 0xd72bb101f0d34585L,0x22d4ada32f8309f1L,0x19c6543bdb11c225L, + 0xab0ba09d5349e6fbL,0xfa38b0c8fd6bef5fL,0x40e20ab68c5ddf83L, + 0x28351a4d2e87ae87L,0xf51f0b52b8ff6cf6L,0x0000000000000024L } }, + /* 62 << 371 */ + { { 0xf768cfa5b3ce1936L,0x82c193e7cb0845b2L,0x0c2bbb74fc4dec8cL, + 0x6f0684557438b1bcL,0x7d7535bb62d4a96fL,0x40c7498bcced8820L, + 0x1ac733eed2c44657L,0xbc9de98f03f077d2L,0x000000000000004dL }, + { 0x0db45688d55b796cL,0xa2f1857c7be53b69L,0x4df73c3c4671711bL, + 0xfe01dc6cd27fc313L,0x2f770bdcc11c8592L,0xcc8eb844ac8593b7L, + 0x8163d65855437a70L,0xe6072e3b4b436e88L,0x000000000000014aL } }, + /* 63 << 371 */ + { { 0x6a90b471803837e5L,0x18d7ef7345c32e26L,0xe5f4a8575ee5bdd5L, + 0x5d0201d0e7e31757L,0xd856baa14b111dacL,0x6e346ca66b462db1L, + 0x45ea4fb338b88b95L,0xde8cf5a046477f38L,0x00000000000000f1L }, + { 0xce05bfba7c2ff5b3L,0x27291f1f1f7a8bd0L,0xc7620f3025a57de6L, + 0x1847ea5c759e1d2cL,0x02ab7ec1e3774792L,0x3807be6e22b81177L, + 0x389ec8496c206771L,0xf5c4fd984a375d6fL,0x00000000000000cdL } }, + /* 64 << 371 */ + { { 0x9d590ca72a708597L,0x5ea564a217c45d92L,0xa7098ca409f8d031L, + 0x231c716117a9a6d9L,0x5919a777951ecc81L,0x9fb5df367ab0cd85L, + 0x7f4d2d7a2b6b445cL,0x73a2764668251f9dL,0x0000000000000037L }, + { 0x7bc56153e9e14113L,0x4747084f030af56bL,0x865abbe91efa69ecL, + 0xaf6534b5da5d565fL,0x40549f0bb0001866L,0xc451348f812351afL, + 0xe7323ae9190eb107L,0xfb3fd76fbfc098f1L,0x000000000000001aL } }, + /* 0 << 378 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 378 */ + { { 0x7ef1f62ccf0f33caL,0x225317b7184de5d6L,0xba7645c1260fc821L, + 0x522c34f2d057844cL,0x53282114c4a6dff0L,0x6d3d84005f6dbdbdL, + 0x165e4194f3fb787aL,0x210a607ac8c575ebL,0x00000000000000fbL }, + { 0x68c834c2832c8deeL,0xcc4ff8611602f932L,0xdcfa537d456ed4bbL, + 0x68f201750c079647L,0x972d770b8f28ecd5L,0xaa8fdbe82d099800L, + 0x06285716ecaddb29L,0x67fa9a61d5dc6040L,0x000000000000015bL } }, + /* 2 << 378 */ + { { 0xe12b714e622ed5c2L,0xb1e036a7cb699185L,0x3920a7371ab44645L, + 0x9844c8d9f5a325e1L,0xa91bea02c02de758L,0xaae624a018ade985L, + 0x3a700e40cc31937dL,0x47c0b398bfdea449L,0x00000000000000caL }, + { 0x04213750a9e1b74eL,0xa1d25057b3f7ff5bL,0xf8c7fdcc259dc673L, + 0xa837e675953ca2b9L,0xe23e1e1fabf04630L,0xafe1f1b99889ea3bL, + 0x40988791c4db25cbL,0xec8592e4f18669b8L,0x000000000000014aL } }, + /* 3 << 378 */ + { { 0x1fcf2222c7504de2L,0x7a048586b37c96d2L,0x75a974ff2600255fL, + 0xfd8effbeea7922b7L,0x871c04531c3aeb4aL,0xe9445f23f167a3eaL, + 0xe8b8f95431d66e47L,0x3ba0fb51b46bc080L,0x00000000000001dbL }, + { 0xfa565b995ec9920dL,0xfeee4e89a25ec3e5L,0x8428e8ea5fe155d8L, + 0x31b55db4caadb703L,0x1c66ace9fab8f8f9L,0x9190e7bf0f688545L, + 0x36695f2f7ad75316L,0x6674f106c6d6a5c6L,0x000000000000016bL } }, + /* 4 << 378 */ + { { 0x18caff50efbf6b21L,0x0eeb65ec51f2ba1dL,0x2cd8a43f296c458bL, + 0x04d795b29747ceabL,0x52b0bca6dc306c57L,0x39fe8ad517ec9c28L, + 0xa4a9e3fb81a0cb57L,0xdd6933a9a638d6f5L,0x000000000000017dL }, + { 0x9893eb50d39ac021L,0x3bf7087339bd296bL,0xa10440a7792dec05L, + 0x96be97e7a67ad18fL,0xcf2175ae17ae5f6dL,0x70fbb8bf0b6f0503L, + 0xbb565fadf7a755adL,0x8904f9a093ccfd37L,0x00000000000000f8L } }, + /* 5 << 378 */ + { { 0x971b01fc2042af7cL,0x11b2c0307449ebbcL,0xe151578f6c456ea3L, + 0xc31adfedd9445535L,0x5dc3e787b13a3d98L,0x980db984e1bc2ac0L, + 0x97f837868ed9752dL,0x0eccc88119cc9947L,0x000000000000010cL }, + { 0xe7a55af26e4202cfL,0xb38c0786032e736bL,0xe315cca3db912a92L, + 0xe02fb77c5d424e69L,0xfc196d63cee674b6L,0xd19ffd2edb4f40e4L, + 0xb9f89932b6e2749cL,0xfa1b8b625345d087L,0x00000000000001d7L } }, + /* 6 << 378 */ + { { 0xbbd6b9b6fcb957a9L,0xb8cd2aac28e9a858L,0x16959f6e00533aa4L, + 0xa81de300f05c87eeL,0xd64c0234e01f1fc6L,0xa35e2db9035bb306L, + 0xbf57719d17545c33L,0x67cf28a2fe2c9778L,0x00000000000000baL }, + { 0x747839247b2ee141L,0xb20fd87638107770L,0x38f43f8a115e285dL, + 0xe4dc2337be44c1aaL,0xc0a9fdabebdb2590L,0x83b1017260412a11L, + 0xbc0797cb6c8e3144L,0xde47a666ebc61f01L,0x0000000000000060L } }, + /* 7 << 378 */ + { { 0x68c60c44993f8222L,0x3242a8afcabf4bf6L,0x6151a88033d5edecL, + 0xce5e0d4b5b66eb95L,0xd5a8ddd2df34b84fL,0x0a57559677e14d2bL, + 0x6b8e78f78bdbc216L,0x1f6b94ac13b5a167L,0x00000000000001b6L }, + { 0x5a252b95aaa7bcc7L,0xb0366a61ee28e333L,0x721a9b24e1fbdb73L, + 0x4e73cf1cdc60cbf5L,0x7744e830ddbf9b87L,0x0bc943e99d8e60c4L, + 0x48c37b3ef7aeaeb2L,0xea23eb3ac6610f28L,0x00000000000000dfL } }, + /* 8 << 378 */ + { { 0xc7c5d4242b72359fL,0x9aedfa4ed89718f9L,0x74773e4f858b335cL, + 0xcccf6730b5020d9eL,0x7f2b567baaa8bee0L,0x929b3d35d1d4c7cbL, + 0xbf1c55591d6602b4L,0x58fd96154782966dL,0x00000000000001c2L }, + { 0xf87aa9d87095c048L,0x145af97f67c5dc33L,0x1541a34b5808379cL, + 0xd5be204572f16afcL,0x008a4aa0a10e4d9eL,0xf75936efadb03c27L, + 0xcda58468a6d93005L,0x1817e5bad0cfa2ccL,0x0000000000000078L } }, + /* 9 << 378 */ + { { 0x7fd99a18a2f628e5L,0xeec261701304cb4fL,0xf99f01680146e6aeL, + 0xf3e3762978f53f23L,0x5fcabd21f138d376L,0x7872711aa084d16eL, + 0x96911ccd5f3b555aL,0xf959c94ce8c39ed8L,0x00000000000001d6L }, + { 0x2470f0a5f551f7aaL,0x7eef9b723dd57d82L,0x04110c484b42ac7fL, + 0x6c1a12a04eb664c8L,0x47f1715711e0fd68L,0x4e51416c90d00893L, + 0x38843dcf77b01086L,0xab132a0fbda87debL,0x00000000000000aeL } }, + /* 10 << 378 */ + { { 0x6258b29f813fbb71L,0x9f43e2a3d4201179L,0x04bce1fd4ae85168L, + 0xb6919e51b37d3de9L,0x147eea3e85fe635bL,0x56f4e6e7898401b9L, + 0x711ff12cb0b8a1deL,0xa6fa12cd8720cbd8L,0x00000000000000c1L }, + { 0x4b5a3c99e8a60e98L,0xd8e57a658a00f03fL,0x91a8dcbbcaebcec6L, + 0x44b04cf5355e112eL,0xacc706ab9cd62b2dL,0xd24a1f6ff56e7060L, + 0xd5d6a2b9b0814270L,0x1f19697e0659fb13L,0x00000000000000e8L } }, + /* 11 << 378 */ + { { 0x6cd1913f5c97d8d8L,0xab72e99fbb94b154L,0xc288647a9049a264L, + 0x6e9d0a5ea9bad9afL,0xc2c7f9c684af75dbL,0x171064782b1355e2L, + 0x8b29dbe3352a6b20L,0x179974899e7c0e22L,0x000000000000011fL }, + { 0xef8d33f97c34be01L,0x1469347f10b46c03L,0xfd1e25cbdfc7f736L, + 0xdc1715fd11482a6fL,0x041c0ca1f7afc700L,0x513277296b2e7b33L, + 0x820b886909015f92L,0x7fd2bd94ba584623L,0x00000000000001a8L } }, + /* 12 << 378 */ + { { 0xf669c4582cff741fL,0x558296d0cb12d0f3L,0x4ca064d770be227aL, + 0x02b8d5cacf89f9f2L,0xb4e066608f1e5a13L,0xa030d85a9c08b765L, + 0xdfeb77d6922e04e8L,0x65930c729603967aL,0x000000000000015dL }, + { 0x70cdeaff4ff2e952L,0xbaa81ba88b6332a9L,0xe573d5b4798836fdL, + 0x5b68b878605c5eccL,0x3f0a6abd47bc37ebL,0x823a54e1dfa7f440L, + 0x2fc0aa7a5c500937L,0xb97b588c35953c9bL,0x00000000000001abL } }, + /* 13 << 378 */ + { { 0x7d789b9087b7dd4fL,0x4976f243c24b301aL,0x0f6198c9640d22cdL, + 0x7a23eecf5f188165L,0x822ff8c203fe646fL,0xeac1adb321eb3279L, + 0xd9d1b22e196e1874L,0xd06dc84c8fe3f31bL,0x00000000000001f1L }, + { 0xcff3200080218f0bL,0xf427e062189bbfe1L,0x11f7040839650e1aL, + 0x78cf93c09bd7d3abL,0x297ab5cc4bb1dc5cL,0x3cafe6cabdeb897eL, + 0x38ba64a35508aeceL,0xdfc60e77a99847bbL,0x000000000000013aL } }, + /* 14 << 378 */ + { { 0x727469356d9aeddcL,0x0063b3643e937a7eL,0x4451a20aa161dff0L, + 0xbcb491a8a3e9efb9L,0x878d694f5134d997L,0xed8ee2f3215771a0L, + 0xe231e6705410f9ccL,0x72716750ec1fea93L,0x000000000000010aL }, + { 0x254ea4bca45d8095L,0xb3d903725398d742L,0xe51298cff66aedfdL, + 0xa530fd64c377e046L,0x68b31f366c7f5f39L,0x7e157769c86662bcL, + 0x8ff3a79cde21fb57L,0xdd498d1d7c37978fL,0x00000000000001dcL } }, + /* 15 << 378 */ + { { 0xf72ba696bad7f9f1L,0xef0bbbbfd7bf9dd5L,0x8b8bb6f20e7a7b7aL, + 0x86de41c7cebdc20aL,0x3be5c2c2b0e47167L,0x2cf82c5efce45a8bL, + 0xb309beb60f8be277L,0x485f837752133ceeL,0x000000000000019bL }, + { 0x3f0fdc6f16b88e7aL,0x6e4ead572b6edd43L,0xe1e73101ebf2ff39L, + 0xbd513613ff830b18L,0x5803953eae63df26L,0xbc4b8e2d0f2a108eL, + 0x624c2bdb7feb9952L,0x91ce40cc4a8ed81fL,0x000000000000010dL } }, + /* 16 << 378 */ + { { 0xd89de8041ba719b6L,0x9a84d6145b7139e2L,0xc2f91c87d16e4f4bL, + 0x7adcb047bb8175eaL,0x4c301c4b3c2fe568L,0xec7dea0ce6e82790L, + 0xcc2c78869ce824abL,0xaa0f82dc64e12c47L,0x000000000000016aL }, + { 0x3c6dca8ea0b4332dL,0xb161c4e9d82dc4dcL,0xfda8729063029487L, + 0xace5d0b634209806L,0xb8c940b78968ac6eL,0xb61329e9d0c0973fL, + 0xbcb0a143b1aa99b6L,0x96b831932211ae06L,0x0000000000000057L } }, + /* 17 << 378 */ + { { 0xcc654c3af4fab0d0L,0x3a7193d45e93c2bcL,0xdc03184c037e85acL, + 0x994431edc77bfee2L,0x7f840982a1d69ea6L,0x64144b55732776e9L, + 0x6dfdd076f3767206L,0x10851baf67f36980L,0x00000000000000a4L }, + { 0xbc53bbcd0a6119d6L,0x56f7202f83f4b808L,0x9bb6c525fc7d8415L, + 0x4e2c95178310de3cL,0x571b5e0936d1c854L,0x624cc7f6fabd72e2L, + 0x71eb58cdd9dc56bfL,0x9d76432a443b8059L,0x00000000000001c5L } }, + /* 18 << 378 */ + { { 0xb9118d534b29393bL,0x8bf825e2e9376465L,0xecc73fc2f6b988f1L, + 0xb0431205a33b16deL,0x91cc2704f5e72a40L,0x072fa1aca15f1e6dL, + 0x414dfb0e57824766L,0x86360addb12159a2L,0x00000000000000afL }, + { 0x9dd10faff209743aL,0xcf7178eb7cdd7a84L,0x8e5d30ddc7e5e621L, + 0x1a6ce1502049e4b0L,0xa18d488715cee376L,0x0db2ce2deb316c23L, + 0x0941fda5c1b42ea1L,0x895b347c8213b66dL,0x00000000000000feL } }, + /* 19 << 378 */ + { { 0x13fca9016d4b4f9cL,0xd76d3bc50fdeff6fL,0xf389e515b1be5101L, + 0xda9a159ba6ff67a0L,0xabe611a18258a9b7L,0x300fd58ae44d3117L, + 0xf942ed560729163cL,0x6a9c601afd9b49ceL,0x00000000000001dcL }, + { 0x01dd23e56a998f6eL,0x1e8e8da62ed942caL,0x141780e73437bc31L, + 0x5f81d0587b25adacL,0xd166b90f7126e231L,0x9c4af4b145aaf0d3L, + 0xb3fd98f0f33f7481L,0x98f6cb285307b8d2L,0x0000000000000037L } }, + /* 20 << 378 */ + { { 0x39a64d5d2fd93ad6L,0x667ff12d2637e7f1L,0xed31162e5151441bL, + 0x9abdf9168277e2bbL,0x1b7630f0ec0a5f93L,0x89a828cc5b002930L, + 0x912428adb246befaL,0x632b0787765dbfe1L,0x000000000000018dL }, + { 0xebf44ba86693bafaL,0x3ad447318a12bb84L,0xc0e12abb5d02d2fdL, + 0xccdcbc009ac44290L,0xfcc3f910ea70a61bL,0xcaed56197dbbd680L, + 0xeca08d0b2d159763L,0x4f51a9af6d9dc6dbL,0x0000000000000095L } }, + /* 21 << 378 */ + { { 0x3730726c925f6af8L,0x524421fae7224ab4L,0x1aec24f0a5f4383eL, + 0x629f850c80fe5b2dL,0x8aaca9471188c165L,0xccff714298ede9e4L, + 0x43c517b119ab2bb4L,0xcaf6524f70094fd5L,0x00000000000001f6L }, + { 0xdc1ec8c7ccff9006L,0x2b848d94bd3c886bL,0xf0450380462da9cbL, + 0x5fc95f0aa7246725L,0xd8c6a8f472969208L,0x47615458614cff26L, + 0xceb01f21dab37dd7L,0x2d5fc9084a992e5bL,0x0000000000000095L } }, + /* 22 << 378 */ + { { 0x0a5c77dddfc1712dL,0x94a283f3cdb8fe3cL,0x9180c2149c7ddf13L, + 0xc61e53c7fc061c25L,0x3f2d5898ed6b66b0L,0xf956d429f2775780L, + 0x009936172599b777L,0x275022d3315c4538L,0x000000000000006cL }, + { 0x09ffdfe9389149f1L,0xdf706504130dcff6L,0xd5e9e831a43853f6L, + 0xcb16e7db2c76d906L,0xfa644e731a6023bbL,0x93ccda7c56319253L, + 0x0f36a540e75c969aL,0xe5824f21d1c63fc4L,0x00000000000001f1L } }, + /* 23 << 378 */ + { { 0x12a31002e3beda68L,0xae12c484bc59084cL,0x21f82d81a6320c77L, + 0x82a9e0e6c37dd1e5L,0x672d57b4396dc114L,0xde55fbfb692cad7fL, + 0x58613e19cea6fcb1L,0x6c1ac62e9322e880L,0x00000000000000c6L }, + { 0x92578cedb2d1f22dL,0xdff3a47d86773c2fL,0x6bd302c02edaad87L, + 0xa320a18615e3ca42L,0x2f12c817bac6dcf9L,0x59fab6f0e98fb56aL, + 0x378b36fb6afdac5aL,0xb71742e57cba0062L,0x0000000000000141L } }, + /* 24 << 378 */ + { { 0x6372245924ff803aL,0x4c6743967c6401e9L,0x322820a88efb71adL, + 0x49a9bb7e18386499L,0x809f13b4011be153L,0x21ea3bed1caa0e61L, + 0x062a8eb9cb8a3c34L,0xdad37ca0d371b482L,0x000000000000004dL }, + { 0x517e7e99425737bdL,0xf0d5129c74652ea7L,0x46e5f98b493390ddL, + 0x889c952faccd9071L,0xc01d4483732246ddL,0x6e3982ca191b7f2aL, + 0xef390898ec4ffa9dL,0xac0c56044dcf0ca9L,0x000000000000011dL } }, + /* 25 << 378 */ + { { 0xa89e9095f20520f0L,0xb661e4bf9109833aL,0x59a9216a15746d4bL, + 0x66734a014b9b7b1aL,0x92958d4d87b01cacL,0x1fd10ccd73679175L, + 0x12a3f35acc92c829L,0x60943f7e876f0493L,0x00000000000001cbL }, + { 0x8920cfd084536908L,0x27c0cc691195d0b0L,0x8203d7edf0d640ebL, + 0xc2aa233aa5db5d5cL,0x13dca3e4097a5f7dL,0x95698b5cddbee822L, + 0x68aa3cfad37f3c60L,0xe481f4454b5db726L,0x00000000000001ccL } }, + /* 26 << 378 */ + { { 0x4faf81a13da0fcd3L,0xc4f41b64bf5eedc6L,0x83413747016dba9cL, + 0x3e386d921053b061L,0xbc1f8c328779267fL,0xfae5180521378daeL, + 0x4c355e52cf8aed54L,0xbf4cc913379b3965L,0x00000000000001fbL }, + { 0x9681bfb7b9829287L,0x1f8e11d1d0a0cab4L,0x5eabefb67b24dd7aL, + 0x49d30515db2856bfL,0xb7fdc303403cf7b4L,0x319eb33d9de58853L, + 0x7c0a5c445949bdfdL,0xa4de3e0f2cc2f57eL,0x00000000000000d5L } }, + /* 27 << 378 */ + { { 0xcc57d8f66ee81af4L,0x0853a12ac61b41b4L,0x1c2547e7fe3ce741L, + 0x5c804f085be4d27fL,0x470ef59f6b35897cL,0xa0ac230bf6f2a082L, + 0xdae8427f0e65b7a4L,0xc72903cbe4637dc2L,0x0000000000000170L }, + { 0xcaf13ad329899253L,0x4ac375937ff415acL,0xeca18182a3e41fefL, + 0xce65d25d560f469bL,0xea0fd26a373b6eb3L,0x0261286e9b094b32L, + 0x93f9f0b4d6c2e004L,0x6cca5edad8c82ceeL,0x000000000000010fL } }, + /* 28 << 378 */ + { { 0xf0b9ede34dd2d2d3L,0xb06079398c4d1ceeL,0x1ca4ec570611df8fL, + 0x7f32d19753b914d6L,0x99b6f882dfa4df49L,0xfbe161cfb265f518L, + 0xc6db288e40c76bbcL,0xc8970e3f281437b1L,0x0000000000000139L }, + { 0x9d76d4a51a18db0aL,0x59e76809238445c6L,0xa0f52e646a48deaeL, + 0x3692a8a8338904d4L,0x3fe48d48ba28d02aL,0x9f2130aab958b23dL, + 0x210a547ecf8cd8e9L,0x46e7623e5179029aL,0x00000000000000bcL } }, + /* 29 << 378 */ + { { 0x53f5f5178569de0bL,0x4e6537efa3b8228bL,0x08bfd7df2c494a3eL, + 0xa3bcb0835d18c56aL,0xe4af9a2015abd36aL,0x09e5e04722ae49d3L, + 0xc8e36b51c397412eL,0x29196125dfd6ffdcL,0x0000000000000037L }, + { 0x6693d42066e82af2L,0x4fb5ac25fadfecf4L,0xb164f341bc2eda30L, + 0x04f0da03df7e18e9L,0x53e083ed285c2522L,0x2f841c4607496d5fL, + 0x747ed61b427e3224L,0x42d48a2449331c47L,0x0000000000000090L } }, + /* 30 << 378 */ + { { 0x85e622f707cff9c0L,0xb8f17575de26c2c4L,0xaa599c8cbd1f2537L, + 0xa43898660f10a87dL,0xd884accdd54bd70dL,0x4cb6081af85b6a22L, + 0x795d0348af6068caL,0x2fa1f312111fb967L,0x00000000000001c6L }, + { 0x80ed98b42ce0216aL,0x11debd5b76a91579L,0xe6e2957b8b468ee6L, + 0x4342a477d5b0f947L,0xea4dce18e48f5922L,0x52271ff6d5e930f7L, + 0x957be36854819139L,0xd46357cc2613a69cL,0x0000000000000055L } }, + /* 31 << 378 */ + { { 0x2110cec0704bb446L,0x5227cdf087881081L,0x471af5acf3bba746L, + 0xc4c1c63bddaa6a2eL,0x411d5b30c65b5847L,0x428e1d0e0b7a1558L, + 0x2b3ee41fa104910eL,0x6dfe7d92a5cb24d1L,0x0000000000000071L }, + { 0x2d6421d20c322db5L,0x3da59b47ec01fd37L,0x0297ffe5359e020fL, + 0x772575eb9679d700L,0x22d11608e029b068L,0x6f2151099c5d9d69L, + 0x23ee93a156e9b96aL,0x3dd497407cf5309dL,0x000000000000011fL } }, + /* 32 << 378 */ + { { 0xbe58970d503cfe9aL,0x72000a862dd620a9L,0x5825b2836d0ef833L, + 0xb88d53b7c706f2c7L,0x5f7df7cd0501f0f7L,0xf1fd2e3b2fa3b809L, + 0xf3afd9fb557798c9L,0x9254426ab970a87dL,0x0000000000000135L }, + { 0x577560bee8947e84L,0xc60b998cb0f49a8cL,0x81d18bef9829431bL, + 0xeaa5fa385f48ada5L,0xba2cb72612db61afL,0xfa9de7ddf3d26bbeL, + 0x97876d0626881c14L,0xaec866638c84185dL,0x000000000000015bL } }, + /* 33 << 378 */ + { { 0x8e48e4e19306d591L,0x4b6d69bfe9712645L,0x7247afac5d8aee55L, + 0xddf6c68c41f8fc2bL,0x084113863c7c87c0L,0x120957078b0f0c59L, + 0x46947fbebefc7642L,0x863bdf60f346ed41L,0x00000000000000cbL }, + { 0x75b2e77916cf6d59L,0x4e9533e6bbea42ffL,0xdce016328f701e08L, + 0xba39be402e0bc080L,0xa2f9af3ae20f673fL,0x40267291822b60a5L, + 0xbc77e4c4dbc55825L,0xd05a9f0c76e1abf0L,0x00000000000000a5L } }, + /* 34 << 378 */ + { { 0x63fa956e2881ab6bL,0x5f97abe5a6d783f0L,0x344bdf96bcb29243L, + 0x0472af080c53dc0fL,0x548e9be3e496461eL,0xe42405b45c491e0eL, + 0x511c368dad995038L,0x8598ee29b972eb7eL,0x000000000000019dL }, + { 0x0fdaeeaaa63dbc35L,0xe5afbb55acd56541L,0xeb3f3ef7dcfa1178L, + 0x8ef2f4777f100925L,0xbd0f868ae3e055ebL,0x375cec42990b7aa7L, + 0xd9208d18c2d32362L,0xdfa689dd976f791aL,0x00000000000000c7L } }, + /* 35 << 378 */ + { { 0x6f5a60e4fb6e4091L,0xe2350a12bb9d8f61L,0xc03c9d59633d52daL, + 0xcf0b5d046e9e386eL,0x3ec86484059f0bfdL,0x26f396363c158dedL, + 0x608edd54a4550c89L,0x6cb66b6cb7317194L,0x00000000000000c1L }, + { 0x1cc8c7fdab732b3eL,0xf1855941bc3597f7L,0xcadcddc4802f2f45L, + 0x95a6da93f7af2141L,0xe555f720ed61d1aaL,0x2a080de97297d73aL, + 0x8e9cd2c7d3e8e814L,0xfb38e2637857c2b6L,0x000000000000015bL } }, + /* 36 << 378 */ + { { 0xc988f588852cd09dL,0x67d21418af218601L,0xda5f50ec0c10a857L, + 0x504418a30e2b2960L,0x9f44ea6bf4038971L,0x6b5d406a2e458d2dL, + 0xfc240795801e5fa5L,0x0356185cf7584734L,0x0000000000000102L }, + { 0x7a948fd177a57d4fL,0x5de3afba2fbb8782L,0x1ae44706bcffc674L, + 0xb97083b5563f5c74L,0xa91850575466ebd5L,0xf9c3a26cea623a4bL, + 0xc917551994105039L,0xa570c1c1819c3cdaL,0x0000000000000075L } }, + /* 37 << 378 */ + { { 0x46dc6fa89638c421L,0x4cf437b157d0f402L,0x6c0b0445701342c8L, + 0xf0b14b15d6de77a9L,0x0330a9ea2c12d1f0L,0x7ac4e03e350b8b49L, + 0x332483357702f7f0L,0x24c3fa9da7f3df83L,0x0000000000000134L }, + { 0x40137a2bf88c7b4bL,0x00fa0f35cc5d6687L,0x873e236070c137a3L, + 0xbe8cf5ee9f32b2e5L,0x818f219f5482134bL,0x6c068d2515968a1dL, + 0x7426988417e92272L,0x8754eee4604a22f4L,0x000000000000004fL } }, + /* 38 << 378 */ + { { 0xee6b7f37fad88237L,0x2d162f469c14454aL,0xd67666908adaa0c2L, + 0x7aad29a762504b81L,0x550d67ce1623cf44L,0x7d50d506ae816ec1L, + 0x56d5a4f9ffac6535L,0x0bde908295e83917L,0x0000000000000177L }, + { 0x56a58061dd9c907aL,0xd9d7c49ecd4c54bcL,0x4a9907a4bc6a0d7aL, + 0xeb86db82f6346080L,0xab70905fd6cc22b6L,0xe14a687600342763L, + 0xc033b0ad1dfee808L,0x2396008039a9c5dfL,0x0000000000000089L } }, + /* 39 << 378 */ + { { 0xf1f290a881e90f0aL,0x458336c79f36c486L,0xa956a20dce22f976L, + 0x027fc962a8516eeaL,0x7e14bad2c9b08021L,0x698bfa124cc598ddL, + 0x4bcf3d871d01279eL,0x1b94b7b30114d592L,0x00000000000001bdL }, + { 0x02fc0a2093157584L,0x92faf06480300f42L,0xb05d7c3f5318f4dfL, + 0x1fa3eb6f3723e3edL,0x94aac7051da5cedbL,0x50d945d9cc65e3d4L, + 0x651a7b0c7daa7bf6L,0x4e497b4681a86469L,0x0000000000000052L } }, + /* 40 << 378 */ + { { 0x297e708e61fbdc72L,0xf59c64fb4dcb9e90L,0x4a61f848814f38e2L, + 0x081dabdced48623fL,0x7ca592c953b0cfe1L,0x1462ce03c049d136L, + 0xb5642b78b47bd071L,0x99e3adbb87d24f68L,0x00000000000001dcL }, + { 0x643879618e44f3b7L,0xc1910e8c3aa42510L,0x45de7844b07adfc5L, + 0xdb7910a633804208L,0x77d0ec7574be91e4L,0x3e015cfce842cac5L, + 0x11c32fa6554fec82L,0x7c43fc38829dc4ccL,0x0000000000000062L } }, + /* 41 << 378 */ + { { 0x02ee8b2122993f93L,0x4223000d4723ef89L,0x1c6bdfbfc16af026L, + 0x116239f3ddb9aadfL,0x14eeb9730c4acc54L,0xf50134a6ffea830cL, + 0x9ac616a5f1f1d7feL,0x33ffb2cf33eadfeeL,0x00000000000001f7L }, + { 0x68c32c5dc98eb88eL,0x258448994073d0f5L,0x885f35f066b25598L, + 0x8410c3f9269b7cdfL,0xee851ec9fb4d1244L,0xd94cf16a0f45f2d9L, + 0x18846fd1320d195bL,0x31093265cbb27bf2L,0x0000000000000158L } }, + /* 42 << 378 */ + { { 0x36f55d29067f0144L,0x31518706a7088a9bL,0x58c145700c4992a7L, + 0x38d22933424edaf8L,0x75e07014c6ebc015L,0x153a99adc1b7c05bL, + 0x94ab96cb56b197f9L,0x86645eeeb5eea03dL,0x000000000000001fL }, + { 0x5f97f0b5125f3db1L,0x981e52e815dc479dL,0x7ba639b09f915ec4L, + 0x5c27dc763e5a1a0aL,0x350c904109dcfd90L,0xf571c49b5507f709L, + 0xfa19d576fda7d982L,0x2b60baf9b4533fd6L,0x0000000000000182L } }, + /* 43 << 378 */ + { { 0x3fa53846346c78afL,0x3ad12e905ed12162L,0xcecabd5a086ef430L, + 0xdcc55887eaeef2deL,0x47ef2f7bcd688b2aL,0xd4c09587a1d916a7L, + 0x0959a988121ec022L,0xf419bde5980a39d9L,0x00000000000001faL }, + { 0xfed8228e2d3ca021L,0xdb264c8a05dd7141L,0x637f4aa24e4d3e80L, + 0x49ce73f1be791a76L,0xa9a2d7b25eeabfc5L,0x4961a76bd9e26135L, + 0xa9af174a2caa9899L,0x8cfe47a6f2b4ec6fL,0x000000000000016fL } }, + /* 44 << 378 */ + { { 0x3c3814b8a3acdb65L,0x42cea8683ac121c9L,0x95ccdca1554edbd9L, + 0xc8127847badd4e7aL,0x8249a372c95fe784L,0x1e5998d520661224L, + 0xf88a1a0815019264L,0x501ba0a9619de345L,0x00000000000000bbL }, + { 0x27bce70edabd6eccL,0xbdbf47ad41144962L,0x949d92fd302c2f1fL, + 0x7ee6077edac7d399L,0x4b1ca3ab70cbd239L,0xbe03cf169c08640cL, + 0x67d91ddae90bb15cL,0x989110bf3584fe3fL,0x0000000000000033L } }, + /* 45 << 378 */ + { { 0x1ca61c2dd76f0dc8L,0xa128cf5611e0f9f6L,0xfe51323b25502465L, + 0x13ab91d00bc94de6L,0x28079c8f3b1e6a02L,0x38d9d80583cc5869L, + 0xd6a83dc6dd952429L,0x37bb912864101dadL,0x0000000000000028L }, + { 0x86036ada8abaa4f2L,0x8f29ab0d2138c5c1L,0x50ae84e55a3f84bdL, + 0xc1cbb22a565e767bL,0x9b9f37f45a6bd205L,0xad2a69778bb3fbb5L, + 0x53dbbc715b169cc1L,0x8695b45396820288L,0x0000000000000166L } }, + /* 46 << 378 */ + { { 0x957c58ab585f2470L,0x78c7130743ec2283L,0x3c996caae6b95670L, + 0x6361761891d24b79L,0xe08b5b76a4281338L,0x77269a7a2091999aL, + 0x83e93389d6fac12bL,0x4599815c57805e7eL,0x00000000000000efL }, + { 0xddcafd581b749d60L,0xea0911b95dd775ceL,0xb375a06039909bbbL, + 0x9e821684d7a4a2a4L,0xc1f34df74d520003L,0x427a57eafda0d68cL, + 0x92e8fd75768380feL,0xc70e584b69988733L,0x0000000000000083L } }, + /* 47 << 378 */ + { { 0xbcdc8a861fc73886L,0xbce7b5b885f4eabfL,0xd536699712d0b54fL, + 0x76ab760e8ac35fafL,0x5da79c8b25cb3d53L,0x435013bd284b22b0L, + 0x7a9059fffb9c2d44L,0xc8c98a74c8283091L,0x00000000000000faL }, + { 0xd68585686a560d41L,0x21648ff9f86d0cadL,0x79bae93500f23e11L, + 0x92b6a5633f41bfe6L,0x1ca24a5ea6ff9d19L,0x12138d86b3a31dbdL, + 0x7a593f3c4180e5f6L,0x7a93e4a3f75d9718L,0x00000000000001aeL } }, + /* 48 << 378 */ + { { 0x77d57155881b7b9aL,0x41639a4746101b26L,0x06edc878203c9899L, + 0xe4ac1d9648d8ec5aL,0x7cec08752db1051dL,0xaf9facf257c860a9L, + 0x94ad13ccfa3dd49dL,0x853273184e345924L,0x00000000000000e9L }, + { 0x9956ad2c84311ec6L,0x3d494579bfb208a2L,0xaa60bb0ed8ccd0a5L, + 0x6d3d4df518418197L,0x56386035884b2cb1L,0x7a104900464eebecL, + 0x0785d58b72ebaddcL,0xf5ad930908356589L,0x0000000000000191L } }, + /* 49 << 378 */ + { { 0x9cd32d496a0eb239L,0xffc5d2115f947b28L,0xd8fc9f9147a7c456L, + 0x0aa962f8dabfa006L,0xde49f418f9dc0c43L,0x019ab5542bf89f6eL, + 0x1feafd6ce086e756L,0x640de15457b70d2fL,0x00000000000001eaL }, + { 0xa5b450dfb341f2e8L,0x2b3b8dc21fa4d7ecL,0xfe158c5b65700970L, + 0xc76f0c6c14a4d28cL,0x22b2faa32a1c0cfbL,0x63df95e94f73de9bL, + 0x649ebafd52ce6fceL,0x1e1e64ad89f379c8L,0x0000000000000128L } }, + /* 50 << 378 */ + { { 0xcd7ec65fe314b91dL,0x794c5c9db6d1882eL,0x3be845c193805243L, + 0x442f9269a76572cbL,0x6a284b3606390457L,0x5017a685566b2b8dL, + 0x3c429a7e068147d7L,0xe6e676dc3d7d6573L,0x000000000000000dL }, + { 0xb0e3e213d011d3d7L,0x000fc6342254ec45L,0xe1ae05778f4dba98L, + 0x051781505eb4cfe7L,0xf48317f98c099058L,0x4aad616562e46888L, + 0x8b24c7fae08df65cL,0xc6fdaf0725f0f3bfL,0x00000000000001aeL } }, + /* 51 << 378 */ + { { 0x54b4c7c446b8ad3bL,0xa20b60629f0c5ef3L,0x6bfd32c40430b0bcL, + 0x6a9e2a3514c0b78eL,0x0de18d29d1bc6260L,0x9e69f8b6dcacf599L, + 0xf46879ae0342f0d4L,0x3a97c39069b78c8cL,0x00000000000000e3L }, + { 0x0d979410c49c5e32L,0xc8ea1cec45e6c311L,0x535c6afae971af26L, + 0x2d1dfd6cecf4a950L,0x542ddbffbb1970d2L,0x8e8a91100eb842aaL, + 0x5b8b960b6529ed29L,0x6783066072a2a6ceL,0x00000000000000cbL } }, + /* 52 << 378 */ + { { 0xa58e52d8f7a4d317L,0xaf23bf92dc6f7aecL,0x70121c821c520880L, + 0xf68499f37bc182b5L,0x0edc4e6fb02b3e34L,0x4378badce425564dL, + 0x3b74b05fe75eb0bcL,0x5a234bae599d4d45L,0x000000000000011bL }, + { 0x3094e3c091572efbL,0xc370662f6f509233L,0xc741ef37beb6af1eL, + 0x5e1d62655bf3bbd6L,0x39044bc61029591dL,0x05ff006b7c1724f3L, + 0xde62a8fca492bb41L,0x9682c9ebd3f4a832L,0x0000000000000159L } }, + /* 53 << 378 */ + { { 0x06101e47154beb06L,0x8a47bae12dd56ba5L,0x976db80f6a6e6cbbL, + 0x301b4afc4cb3e0c5L,0xdd64084a266c37a9L,0xa07718a2cb920e87L, + 0x9ddf6bc5d55d3754L,0xaaedcf9b5f737e92L,0x00000000000000dbL }, + { 0x635303c5597c9f7dL,0x1808eca68f6f1955L,0x4ab48d642f11e00dL, + 0xa458c021206d04abL,0xa008af7312ba62a0L,0x5eeaf4ca985ad01dL, + 0x1bcc70a06bfde38bL,0x2e6677736abc5c8cL,0x000000000000009bL } }, + /* 54 << 378 */ + { { 0x989800d0d51106fcL,0x414919311337369cL,0x1789e03117476c07L, + 0xc38797d77a766687L,0x1eca11cde24df708L,0xe7932afec333cebfL, + 0x521256dc587ca6d4L,0x175eeb6278afc48bL,0x000000000000019dL }, + { 0x9bcc1e605a409618L,0xb09ed90848452cb0L,0xc5b18a20839bb71fL, + 0x1137bdbf58d1f0ebL,0x4a020da2d9349073L,0x86cb98f90426e19aL, + 0x734d928504ee89f2L,0x2f82c2c9578df861L,0x00000000000001ffL } }, + /* 55 << 378 */ + { { 0xe119b7ec4dc900efL,0x35ef1e2ee7f6b120L,0x6c02b0384a8f5961L, + 0x8b99d41eea69e482L,0xe43de7767de16897L,0x48c7f64aef564cc0L, + 0xfa0d48d3b3691c55L,0x27688e1d0cebe168L,0x000000000000012dL }, + { 0x192bab59f1e8aa56L,0xd9bce84222664c4bL,0x73dee69a5fbb90dfL, + 0x973907e9678eec7cL,0xa1568020dcefce7cL,0xf0a37f14c72b6736L, + 0x3c5ce56d115d70abL,0x4c444963700cde74L,0x0000000000000027L } }, + /* 56 << 378 */ + { { 0xb32047f1686b8e76L,0xc292c8960fead8feL,0xf552fe3f3d1bab8cL, + 0xdaa248af483f96f7L,0x2ada20d3b898320aL,0xc0e0325ce0ee1e67L, + 0x4769dc2e72f55358L,0x33cb916bbf19057bL,0x00000000000000ffL }, + { 0xc1673b3e8a2a8023L,0x39484339b3d830bbL,0x91f2b0f7fee1a7e1L, + 0x0e2da8d5eeac1c7fL,0xaa77624c0292b469L,0x0a57d873d551849dL, + 0x6c78f50f595c052eL,0x33be2fc03744d368L,0x0000000000000000L } }, + /* 57 << 378 */ + { { 0x2a44d59cb95bfbc7L,0x02721445757b2e4cL,0x9a35c0d0c4ba0fd1L, + 0x588679287bbf16cdL,0xb250c3b6cd5c4373L,0x1334f593a72dc1ecL, + 0xc729aec7e9dd1186L,0x7d783ee76442117fL,0x000000000000006bL }, + { 0x94d753e01d0c3d58L,0x3c96b6b638fff1d0L,0x0165d4cf9f22aedcL, + 0x565ce447eb09ecb2L,0x916ab8c27d1b5fc8L,0x759049861fe28f47L, + 0xdc0172730766822fL,0x295b6799ba9ddf82L,0x00000000000000c2L } }, + /* 58 << 378 */ + { { 0xeb2dc700fd394524L,0xfeac25e1d144a193L,0x9b9e20d4d700de34L, + 0xb7de3dcdcae6c578L,0xd2b130fc64b8c44eL,0x2f38b9c6c0a24c78L, + 0xbc0c4d2339c51692L,0x09fc3bb7f0a87c29L,0x00000000000001eaL }, + { 0xc0146d59288a18cbL,0x0390fff82057898aL,0xf50646cc1a248697L, + 0x2866b5786190c9bbL,0x2179a3e5f18b0d82L,0xa1467d4b6d1550d5L, + 0x3f764ac4a485f508L,0x713e87e7b16d9964L,0x0000000000000038L } }, + /* 59 << 378 */ + { { 0xb78b208d153147d8L,0xbb8fda83f5aba599L,0xa17bebf0692f8e10L, + 0xea3ce68fe2f61fd7L,0xce34d743f27289ebL,0xf8dc8834a6ed4d37L, + 0x7823bdc45a72a376L,0x84253be985ccab89L,0x000000000000001bL }, + { 0xc52b1b18edd17c50L,0x855aa457278702eeL,0xa4df8b06f0d89cc4L, + 0x9711592b7d65576fL,0x75ddf111029ef7e8L,0x66e25d8b3d858f44L, + 0x61753b0680ca5a3cL,0x40b270c67025f431L,0x0000000000000026L } }, + /* 60 << 378 */ + { { 0xbc3f4c479d702e02L,0x6a06f96d86c216d1L,0x9469cef28c2dc6f3L, + 0x753d64d2c98d9271L,0x7c7ec13b051508c9L,0x7b94098d95fd48e2L, + 0x724358006bb4f8d3L,0x01d3f3d188eee18aL,0x0000000000000039L }, + { 0x764baacf1a6a057dL,0x7d880282f7326213L,0x37c2394043d2615bL, + 0x7e09836a6eef57deL,0x3f7e265cd790bbf6L,0x10025e4549a4f757L, + 0x515d51bbdcb5b373L,0x05699be5f534f5fbL,0x000000000000005eL } }, + /* 61 << 378 */ + { { 0x7edb67b24d9df16eL,0xbeb1fd5b9697647eL,0x47a1a16ea9c068ccL, + 0x2c834cb2b36fc492L,0x120019eb5e7d068fL,0x34074d5a2e4afc37L, + 0xc2e75a089febffadL,0xf789c01d12a0b113L,0x00000000000000ecL }, + { 0x7cc8481c4f63e95dL,0xb6f5e67341ec8492L,0x4feee2f212d6bbaeL, + 0xc60be12a62737198L,0xce48ba8d9388c8aaL,0x7e6b28eed47186dcL, + 0xd62fb8a0145ae15dL,0xb08e7f37a6edd2adL,0x00000000000000b3L } }, + /* 62 << 378 */ + { { 0xc9101947f1a2cfc7L,0xe3b88442fe8ff8b9L,0x61ba92f3ab98fcceL, + 0x58bb6747b2128502L,0x91ea81ead23a937eL,0x8d0846ad6a138d6cL, + 0x9d4b901e5ec186c5L,0xe2cf1d36f0cf52fdL,0x00000000000000a8L }, + { 0x3cd95aa50fe009e3L,0x933f220bc246d7faL,0x325b953ec0d3f179L, + 0x366196e9d25f08d1L,0x1b47a8966b247f04L,0x7e304fe85044a3b6L, + 0xce4199cc201f4438L,0xd816ed7de850cf8aL,0x00000000000001e2L } }, + /* 63 << 378 */ + { { 0x8052bcc2eafde3c6L,0xd80dca3e9b9351c6L,0x957a0b78e510ec55L, + 0x6cf7a4d7d0f3d49cL,0xb55b5ed2e3708855L,0x94ce53a450515ef5L, + 0x752e68fdcccde771L,0x95b49c5d4b31604eL,0x000000000000009bL }, + { 0xed84542a33b84ba0L,0x41656dafcf833a49L,0x60eb8d9113262619L, + 0xd351476cda206701L,0x67d4bb1c5e340587L,0x9a8ae0da903f6673L, + 0x5801b4bb90cff4e4L,0x6a4aa36bf8cf6d47L,0x000000000000002dL } }, + /* 64 << 378 */ + { { 0x442ebf300bcf3b88L,0x4b863857dc5bfd2bL,0x632a1056c50960c2L, + 0xdbf49444c9d74020L,0x335f464b78287d19L,0x0cf8d5c75c490a63L, + 0x4f9d9bd7d7d2d2c6L,0x1490fb61ad52d8a9L,0x00000000000000ebL }, + { 0x42aabe7434a088dbL,0x871248c89e5b69eeL,0x7d624d6e55a87edbL, + 0x81bc0bcfbaee4473L,0xf450c5ad9559a1b0L,0xa604379cdf4c738cL, + 0x36b304e210ec0574L,0x948a50d38d8cf3d9L,0x000000000000018eL } }, + /* 0 << 385 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 385 */ + { { 0xc6ecf2f55565eb6eL,0x05e894602b260a3dL,0x459e260432c54616L, + 0xfbf40aab47dd43acL,0x54d5949f9ad0b934L,0x992295a5cea0eef4L, + 0x8a5af63cd5520b7aL,0x536ae69fa7f1ffddL,0x000000000000017bL }, + { 0xddd868a73f0bc9f5L,0xb33a39d1bb82dc6dL,0x878d8aa048953ce8L, + 0xf2a35ce29802f566L,0xa2b9188a07f25b60L,0xd01fed060933ca89L, + 0x89013fce3114f728L,0x8128fdbe87828fadL,0x0000000000000054L } }, + /* 2 << 385 */ + { { 0xf20e17f13f9fa837L,0x0f66494807db1380L,0x5bcdc63187f0c342L, + 0x56c64eccbd3f369bL,0x5e2dd96811c76666L,0x77a37823dc4303ecL, + 0xa88173a174b208cfL,0x1c6268951af41e96L,0x00000000000001c5L }, + { 0x72da351465467b39L,0xa97a3735ce45abdbL,0xa1cb4a2cc98eee0cL, + 0x03dd02d494c63a07L,0xd22b087a3b2689f6L,0x5a50b83c3043678eL, + 0x069adf5f844a417cL,0xa9eb7a19983fcfc1L,0x00000000000000c6L } }, + /* 3 << 385 */ + { { 0x4abf6de480e0c811L,0xfb0db238cf33e5aeL,0x3f65131865638f5aL, + 0x93e5d62e41078c4bL,0x53ec3d90b4b44d9bL,0x9fe564607ddcf3b9L, + 0x4714385079848b05L,0xbd3495a99e60315dL,0x0000000000000144L }, + { 0x90409b61670bedefL,0xf0b1baf0b61f1014L,0xed3ccb5ccc4ad55fL, + 0x391b2125ebc6a449L,0x9f49ab65c399ddd3L,0xf7706ca403eddc81L, + 0xe42d1379579211fdL,0x9452b0d11ede88c3L,0x00000000000001fbL } }, + /* 4 << 385 */ + { { 0x984da95de0ea0baeL,0x7769cffa3ab813baL,0x02fd64a784cc8b68L, + 0x138655f25ea90ed5L,0x71074c7c40b410c4L,0xf3971b004a05f173L, + 0x113d8242d36b4f4dL,0x63bd460f4cdd1b0eL,0x000000000000015bL }, + { 0x26bfaf816db9fe99L,0x001943c40a3c3a72L,0x258eccec8adf321fL, + 0x8bf97f30f50d775cL,0xa7b60597e6899fbfL,0x622bf2cb49eabd50L, + 0x9ff457e9b19e9649L,0x607570fde2dbb651L,0x000000000000010aL } }, + /* 5 << 385 */ + { { 0xf878768dc39ebd2dL,0xb1b035621b3a6b46L,0xe72832d029c2b56dL, + 0x218d6d25056bd4caL,0x612ca701aec98ff1L,0xe163cfb4087e18faL, + 0x87de2193c2bec7a0L,0xd4021463b488ad62L,0x0000000000000063L }, + { 0x0edd28a4b8a8a68fL,0xf1ffbbca57379793L,0x35148b571e59f460L, + 0x05e012671c86cefcL,0x8a9be0b60125bc67L,0xe7e5a7b6ba39760cL, + 0x6f2c362f681eeb77L,0x6da71b8a21896363L,0x00000000000001f6L } }, + /* 6 << 385 */ + { { 0xeef26ae3c25300f5L,0x8bbffbbfb5cd52d2L,0xc6883fe52008ae5eL, + 0xd9c9a0c8e2b01d57L,0x9acae3b306c31ab5L,0x7b2f19eb43b0c30bL, + 0x95d3152ab1298985L,0x4794f0ec5f12ff03L,0x00000000000000a7L }, + { 0x3bdefa96ef21ba62L,0x3b32556422a57592L,0x573f6bb2555fba4fL, + 0x732ad6b3bae1a0fbL,0xfc41bc29a2761ee0L,0x621784f7aba990f9L, + 0xd4ba1ab2e48a868bL,0x66312db4d1399b20L,0x00000000000001d9L } }, + /* 7 << 385 */ + { { 0x8049507015ff7901L,0x1478179a05613016L,0x59efd46ca8be1593L, + 0xbd1f8e76113933ccL,0x09e6abf36992abb5L,0xc95822eca1d045e8L, + 0x7436d5981f6c515cL,0x8d7c824f1bbfbeb2L,0x00000000000000e4L }, + { 0x7d026a123aa9274aL,0x830bfca50bd883d5L,0xea9b5934fbf1bcd7L, + 0xbf8c71836b06d84bL,0x4c0885bc205fea9cL,0x5fed94ad4cff3b03L, + 0xd9f7b8f98be81d5dL,0x64d908c6893fa1b5L,0x000000000000007cL } }, + /* 8 << 385 */ + { { 0x51318ac974ba879aL,0x23723b6da68b584cL,0xa7872bf002ac441eL, + 0x1e0c8fd665c7b9f4L,0xe8e3bc24b893449dL,0x2e83c6b8f06e02a6L, + 0xba754c0178d50ef5L,0x7c6c06ef2c547c38L,0x0000000000000132L }, + { 0xabbfe4266dee6431L,0x2d51aac612fd88eaL,0xc6f215251c83591eL, + 0x85c550e59ee42180L,0xe1a4297da8cd1c50L,0x28f07cf6b55723cfL, + 0xbede788e4113977bL,0x8c7572fcbedaeeabL,0x00000000000001abL } }, + /* 9 << 385 */ + { { 0x60221dac8c4046d7L,0xb29cba0533e800a3L,0xadcc3238ade5ecb5L, + 0x7c31f4169d44fbb1L,0x404d9db9fb584dceL,0x7fb5b64fb56640c3L, + 0xac38556d5e0e529dL,0xafa97e58214dd3d2L,0x000000000000008dL }, + { 0xa24be8f4a0417228L,0x0b7b8efb5859d256L,0x70a6c4f46d40f4d3L, + 0x02c765814b0aa0daL,0x353a45a5197e7128L,0xf9bd691dbb79a5f7L, + 0xb428835a81ff0839L,0x89031e1bf8d35083L,0x0000000000000062L } }, + /* 10 << 385 */ + { { 0x92ad8865b1b131e3L,0xa2d8e77b3103a569L,0xd081c2a22d46d978L, + 0x7f1757fe957e5123L,0x258ebe578c2f4a29L,0x67dff774bb023960L, + 0x348ace6f79bb89e6L,0xaa23d41e5297feeeL,0x00000000000000adL }, + { 0xbe8697e4a856e349L,0x2d3107c7e737bc92L,0x19b99e2f3e58c73fL, + 0x11837e89c5062b4dL,0xc2a68bd6f73092e8L,0x80babf047dbc2becL, + 0xfac9c42fe03d29beL,0xb7e929a3aca6ac1dL,0x0000000000000035L } }, + /* 11 << 385 */ + { { 0x8c4765f47229f2cfL,0xc8572c7d29c7e408L,0xf7bcff0b98553b8fL, + 0xbd8e805c1c3e85c1L,0xb2d8dc14369c9dbcL,0x63b17347afd0ed33L, + 0xb3f90b99d2d47136L,0xf1fadf5488ebb89fL,0x0000000000000052L }, + { 0xe04cd6862e48f6b6L,0xf76ec1fa7a543a03L,0x20b50eebf9327d53L, + 0xd9309aa39cce80c1L,0x3997de2cb01adc78L,0xf4a8fcb0aa455bcfL, + 0xae359685f0199eafL,0xc8bc160985a31115L,0x00000000000001e8L } }, + /* 12 << 385 */ + { { 0xcbfda2506cdfcf62L,0x53113f8f15d70806L,0x4cf4b5dce80c79e6L, + 0x14c33f38b5232e2cL,0xce68bf8a96819808L,0xff81633abec13a2dL, + 0x969a055f1f77912aL,0x82aedb5c17822f63L,0x00000000000001bbL }, + { 0x69352a3bdb5017a0L,0xc3ece4982392bd59L,0xdca599bd1b712201L, + 0xed41a64f4b6bb033L,0x99b107acea5c5ac2L,0x504405d0322c3dbfL, + 0x5fbbedce26ed81c2L,0xccbc266dcce08f72L,0x000000000000008fL } }, + /* 13 << 385 */ + { { 0x4dc83ee2d1ae29fdL,0xbaa31bd4bafce02eL,0xfa74ddcf5d8f2ed3L, + 0x14b7d53567047459L,0x66b98c8706389572L,0x8215c3ab0a7617b7L, + 0xd14796ab06956b2bL,0xc80fd323b54dabacL,0x0000000000000130L }, + { 0x7400113068d54d81L,0x11823157d23ea02eL,0xaba5c32f9bb04c0aL, + 0x0f99a803a3c680e4L,0x05264fe5ef58a7d6L,0x67e9f52cd303758dL, + 0x4242203dcd5c1e1fL,0x98bea8cf604d23d4L,0x00000000000001ddL } }, + /* 14 << 385 */ + { { 0xc261b128c9294e78L,0x7e683d7ab56d383cL,0xa2af179eaec454d9L, + 0xb6fa2f8e64f12f29L,0xc0f33050e8ab3d63L,0x333793a2bd5691eaL, + 0x5db823fc3b368830L,0xf546d32a6a84ada1L,0x00000000000001c4L }, + { 0x43664d6eac1380a1L,0x2c04be94f4f0f52eL,0xb9371f7f106b47daL, + 0x110a17dc191734f1L,0x9dc73cb428b143e7L,0xfa1c936f7fd60d8bL, + 0x7bf9010f2b6073d9L,0x1bb64c7ebece22e1L,0x0000000000000119L } }, + /* 15 << 385 */ + { { 0x25fbd37a93744747L,0x33a9d3b70c1a7fcaL,0x989f96796ae94f7fL, + 0xe2106d35e67bf578L,0x07fc53faf949bd10L,0xa8157d4460c12a97L, + 0x075634bf35cebe07L,0x71e2a9cdb340fcbdL,0x0000000000000121L }, + { 0x1ffb67f0c2e6d92fL,0xbb9c28bc18900dbcL,0x3fe4feedc24b9a66L, + 0x28e9362d04596798L,0x10ba62895c8dc108L,0xe68c61d13a1458d0L, + 0x13d1e0ad364d8338L,0x123d93809d04965bL,0x0000000000000140L } }, + /* 16 << 385 */ + { { 0x72916be74e6df03bL,0xb14241ed090a7b6fL,0xec0532fbcf5e6f43L, + 0x750cd95b680383c6L,0x2892d449c7cd9543L,0xf0550587d5d3ad9bL, + 0x3b330694e9c29c0dL,0x6fa03173bfe6860dL,0x0000000000000111L }, + { 0x6d55b870ab75c854L,0x156f9a4002f049f7L,0x72e2093bfa48732aL, + 0x4ba2d6cb9a3c272dL,0x58102be5d89ba292L,0x4074f320b3c5c3b0L, + 0xb478c05ee462c677L,0xebd4978f97182fe6L,0x0000000000000182L } }, + /* 17 << 385 */ + { { 0xae41de76e2896c2aL,0x1b2602cc3c601237L,0x9ae2e0c9af035deeL, + 0x209a883c726c6673L,0x9a58dd920a725ff4L,0x4f8eceb3832a10e3L, + 0x6c4d809c573317f8L,0x9121d8eac82c2a4cL,0x00000000000000cbL }, + { 0xadff98fb9601c342L,0x2d42a3a3bae085d2L,0x5cf981d50ac42277L, + 0xcc545cb079b74c44L,0x1adec8675675aa3bL,0x78603cb5b8e8ddb4L, + 0xe48875e3002ae8bfL,0x6839bf245e1cb314L,0x00000000000000bbL } }, + /* 18 << 385 */ + { { 0xf8fae13deca4dddaL,0x671c8c158c52a92dL,0x5cb3ecc642bbc3c4L, + 0x2ce7feeb00cf0233L,0xd7c0cb28aa604a63L,0x4a16938b01f87be7L, + 0x17016645164102ecL,0xb8909311c9d1539bL,0x00000000000000b0L }, + { 0x9567501142c22cc8L,0xc848925de932f6a0L,0xe4e700a4cd544b40L, + 0x026bbe7378d519e3L,0x90b7fb394152421aL,0x4e53739aec641e36L, + 0x481b7945122113b7L,0x49f3857d6b608fabL,0x00000000000001f5L } }, + /* 19 << 385 */ + { { 0xacef52d7c680deb3L,0x5387587420059de0L,0x1644e4b1f12a16d1L, + 0xc1fbfe45c593cdcfL,0x4408421f5d780052L,0xbc641892768b8368L, + 0x026a2b172ca3e625L,0x25d8c18e8294dd5eL,0x000000000000004aL }, + { 0xf32800d6f856d7a9L,0x9c00ffb281a1f59fL,0xd4140fa9178c667aL, + 0x7f1e6c729c9e0df2L,0x5f77c3089371c816L,0x38e375d72850e31cL, + 0xfbf767e334b9cb0cL,0x69d86303ea279d88L,0x00000000000000b8L } }, + /* 20 << 385 */ + { { 0x6d3407e582d09cebL,0xc82381149c2bbfb3L,0x2305405592b91258L, + 0x8c29220ab1b79073L,0x6e2cb5dd0dc5de61L,0xc93055237d359d6fL, + 0x73cd337ac477e64fL,0x86a3f9fd7e8a066bL,0x00000000000001bcL }, + { 0x0da1223f1b1fc70dL,0xc3d2d816b3c73ba0L,0x3a52c249c299cb6cL, + 0xdd532fb9d7dfefa6L,0x2709093b148749ddL,0x920933d71472abd4L, + 0x5cbbb77a1e9c9913L,0x582303e159854987L,0x00000000000001feL } }, + /* 21 << 385 */ + { { 0xd952ed9293f30e54L,0xf2283ddb40dfd445L,0x081f0864238b3390L, + 0xf19b4cdf2a2746daL,0xa3328cb41ad23d9bL,0xdadef9c990130bc0L, + 0xc12002c460008b07L,0xde3d9eb4c595f30eL,0x0000000000000022L }, + { 0xfe6ef607efd5a387L,0x4cf4797c5e1e8932L,0x8f86442f05605d18L, + 0xff1f353769eb95f0L,0x79fb4e69e1868299L,0xb39a34470ce7a27aL, + 0x4c9b1dc040717364L,0xd6b2243f73ee2b9cL,0x0000000000000115L } }, + /* 22 << 385 */ + { { 0x1df547c1006a1b02L,0x75d489d5d0ae81b3L,0xd41b67c625b28229L, + 0x4032ac036d8b9daaL,0x274a3d504296baaaL,0xbbc28a28b538dd5fL, + 0xcf1f5a62d4df39dfL,0x56a0cad5d4f636b0L,0x0000000000000142L }, + { 0x09732bfacd4a1f6fL,0x00b7aeb0011da78cL,0x2c967709103ab2f8L, + 0x47d6805e9f83ec41L,0x67ded88e17c4818bL,0x81d03ea39249a058L, + 0x5fd3a0fa51bce6abL,0xeb113e83c0a0c60eL,0x00000000000000f2L } }, + /* 23 << 385 */ + { { 0xcacc5334dbf7ffafL,0x481e6074e8597feeL,0x22aaa02633a987d6L, + 0x77e8e0fe6243bb61L,0x786b31ce6f7f8afcL,0x438f8b23c3bfd839L, + 0x86b7cfbfb8cfe3c6L,0xf996706fa357ffdbL,0x0000000000000184L }, + { 0x0c3da18e104c5c98L,0x34545c356f3e682eL,0x4f1af92d2894962bL, + 0x009941c8e16a8ea8L,0x6af6deca88a739f4L,0x846dccbc81b41d5dL, + 0x867b168b1c5ffc95L,0x58edec94a80e7d65L,0x0000000000000111L } }, + /* 24 << 385 */ + { { 0x32c0cf8f2b1148ccL,0x881d8c7cc2086aa9L,0x6b25daf57c9fd47bL, + 0xbfbb7226cd5ec48bL,0xf2b79deb9f2307baL,0x03d30de55fcfb278L, + 0x4b2d3a4671ef137bL,0x4e4a24b1cad4495cL,0x0000000000000173L }, + { 0x363160471ecfc10cL,0x8a4ac3bd75c27aadL,0x6fd1abefca6f6103L, + 0x6ab15369b741c7ccL,0xdb25d3cd86719621L,0xe0d34a98ee85249bL, + 0x25beecea370efc6aL,0x7d8a2ffca2cefbf6L,0x0000000000000057L } }, + /* 25 << 385 */ + { { 0x2b88e77fedb6e29dL,0x900de27858445e1dL,0x522be722d81ee6b2L, + 0xb9847465d06e0e28L,0x395dc5cc82b6650eL,0x3fd0468d0c60d859L, + 0x6aa7a4fff3820f0fL,0xb90a772a88d99f9cL,0x00000000000001dfL }, + { 0xe2befebb447ef8b9L,0x24f508533c891b0eL,0xa8aa0aaa3230c6eeL, + 0xa73632633dd77107L,0x46e85896920f93c1L,0xf72d7453b2104594L, + 0xe5a8f1c909ceb433L,0xd2b8190067e39bb2L,0x0000000000000168L } }, + /* 26 << 385 */ + { { 0x1493570cf66b1750L,0x50fa5ae6f33cd4bfL,0xfbe2a3ecf561f917L, + 0x4b258b77bf898f7fL,0x02d1ec70beac2642L,0x564b6272573b8580L, + 0x8feedbf9ac5126d0L,0xd28fef1ae3a6f0e2L,0x00000000000000c2L }, + { 0x75b483070a7958fbL,0x38a08c5c739a7ebcL,0x2fbd9f56e113feddL, + 0x99aeade0627d745bL,0x333b1f30c79028d5L,0x4d2b246a3ac98d41L, + 0x27bbeec3a17ca8a6L,0xd112ea7715c67437L,0x000000000000006fL } }, + /* 27 << 385 */ + { { 0x0605b9c712740e86L,0xdb6fb159ee416603L,0x1f40031e378c94c8L, + 0x9e927ae7e0c09055L,0xfb48258a00745eeeL,0x17a704069ead3088L, + 0x5e2bde8a82fa07ebL,0xcf976c7ddc554a08L,0x00000000000000e2L }, + { 0x999adaffe0db5952L,0xab1e9977d27441b2L,0x866e1f16e9f1eeb0L, + 0x72c71bd9ddbd25bfL,0xedd79e2a7b597c62L,0xc6f23d50844b5124L, + 0x0709f4d02bd5cd8fL,0x5073c53ac01ec73dL,0x0000000000000196L } }, + /* 28 << 385 */ + { { 0x225e32b66f12962fL,0x65011534fcab5e88L,0x862db9d870ef87d3L, + 0x4d97fd3e79bbd513L,0x142eccbffb7fb6dbL,0x9e775e1506e8b3acL, + 0x6f6d15237bb36a4aL,0xd9b2a0b0c852c087L,0x0000000000000001L }, + { 0x9fbaa9b411ee4180L,0xc87d89aabe044a8eL,0x60d2115a9d92fa69L, + 0xf8e31c6d7ba7b5ebL,0x9932329bb6df0134L,0x6c01970a3bc69b2aL, + 0x6737c958226ddb54L,0x3093ac432e11e8a2L,0x00000000000000acL } }, + /* 29 << 385 */ + { { 0xd55aab0baff63fb4L,0x157c404a98f804d9L,0x9093d56a62ac263cL, + 0x8b2a1a6b7a1d63deL,0xdbb15dd256b07744L,0x53d216cc32950c7dL, + 0xbd3ba7f922f29864L,0xaee5533c0b98ed47L,0x00000000000000aaL }, + { 0x09e3a1ebc42105f9L,0x1bf3123ae34db19bL,0xed30f15386624ffaL, + 0x591b7778c2514f17L,0xdd3df3dcc880f93eL,0x97d5affb755e1411L, + 0x9465d5cd7316c386L,0xafe559200ce1ed81L,0x0000000000000161L } }, + /* 30 << 385 */ + { { 0xc5fdf2cd62f1c1fcL,0xb0646997d3c5ca15L,0xf1c45742869d0aaaL, + 0x1a9c6f309fad74f5L,0x093dcb0c3c18f5cdL,0x00d14f2fc8ca593dL, + 0x3f70f7e63c4f17a2L,0x39881e1d95d3df06L,0x0000000000000035L }, + { 0x7afff24c258658e9L,0x5a4cacfcbade261fL,0xfe7767120735bba4L, + 0x85ba530a9c82c1f3L,0x5769e18fe973b899L,0x18db2bc75e4fcfcdL, + 0x9333ceae8e06e55aL,0x458767f2aac217feL,0x000000000000018aL } }, + /* 31 << 385 */ + { { 0x7dfd1fd9ca2c4cc8L,0x5f1a04ed9f267faeL,0x444c8449586d471bL, + 0x02f37da060b87610L,0x6eaadf6d90b1267cL,0x6424f0e60c425c19L, + 0xead76c1e5979d19dL,0x97135c570b5654ccL,0x00000000000001cfL }, + { 0xfec92d05477c6636L,0xb5480096c82d5536L,0xe44d98c2c2f86198L, + 0x86c2ec24cc6a078dL,0x7b95096318833141L,0x7dc32105e34fc08bL, + 0xc28892a13971cbb6L,0x463463058d0debb6L,0x000000000000007bL } }, + /* 32 << 385 */ + { { 0x70148a3a1b27f40bL,0x4ce48c61b7c6cc7fL,0xf6ee8026e7beaad3L, + 0x37eaec76d11ab074L,0x060f2ede99c1a334L,0xe9ce92e469150184L, + 0xf90e484852d73bb0L,0x0436a9bf01b5ae4dL,0x00000000000001abL }, + { 0x3f83381640d46948L,0x318236b2fc39fcccL,0x8835b602122bcc3fL, + 0xa430b37b965ac801L,0xf448ea0fbd0585fdL,0x49e52f74252bb5dfL, + 0x810768ae1085ace7L,0xa8ceb3299242e43fL,0x0000000000000189L } }, + /* 33 << 385 */ + { { 0xbb72e5ce86bb6ae3L,0x749128972db25cfeL,0xb7c395b6f4e593d4L, + 0x372cf85ccdd2d6eaL,0x445a6b38d1b19db9L,0xe2f1c5aff02db1e5L, + 0xf40284f827342884L,0xd719e46f7ff61a12L,0x0000000000000193L }, + { 0x85eef4cd0c3e37e9L,0xdaab82657d012a8cL,0x53fd0cdf23c548daL, + 0xc34d267335e40658L,0x2632c0ec6bead888L,0x56b52a6c28040011L, + 0xa61cba5662cd9b3aL,0xd6c1e7a395ba9887L,0x00000000000000caL } }, + /* 34 << 385 */ + { { 0x0f3c881065ae1e9fL,0x799b5a9cf0ffafdaL,0xb34a3084f57f8537L, + 0x457b247d9712dd17L,0xfd397b692a860504L,0xbb9d2244c382e2faL, + 0x183b9ddbc05020bfL,0x6f191b5ad4db7efbL,0x00000000000000ffL }, + { 0x4939abecf2576b34L,0x6e5798a1eeec4778L,0xc3df13c2b3b7580aL, + 0xdff5b312ac675c29L,0x778a0194555b2cb1L,0xf9cae67a3f92285dL, + 0x97ce17b1d7de701fL,0x8a9fd15db49e90c7L,0x000000000000010dL } }, + /* 35 << 385 */ + { { 0x3a7f3f9d32ef9964L,0xe88455e705b4501eL,0x068b8e3e93d07babL, + 0xdc8519791ecf38a9L,0xa62a62579f9d6f8cL,0x7ac87da256aa5868L, + 0xa40029a323a119deL,0x2e49e7d41bc4b0b8L,0x00000000000001e1L }, + { 0x927eadf7c6307f61L,0x2dfa993d9c5dee17L,0xa5797580174f907eL, + 0xc72b1db10a1a1a63L,0x138d2381a926cd5fL,0xdbe05d6d4c571edaL, + 0x2d7239fa034e513dL,0xf768ed948d94b578L,0x000000000000003cL } }, + /* 36 << 385 */ + { { 0x222bc61f3aa3ae2bL,0x0b4319f85d0a7022L,0x57b1ba9af02709c2L, + 0xc67d4a2175d1c7a2L,0x57f3c2dd4dbde7afL,0xe0540e677f154d4dL, + 0xd326611a854758c8L,0x76d36182953b2828L,0x00000000000001b8L }, + { 0xcaac624b594e7756L,0x29d87d7566978fb3L,0x7db2834f1a86fec9L, + 0xc3fdaa53e1e61a46L,0x3247175b496c598cL,0x2a3665e118def4f0L, + 0x7f16bd2286744f3fL,0x1d857a5588d6bb31L,0x00000000000001cdL } }, + /* 37 << 385 */ + { { 0x18e8c833b3916e3bL,0x50322ed8a25a77a8L,0x24a82c230f33743dL, + 0xd72d2a918dc8941fL,0x97ec76addc5e62c5L,0x37f2aeaee2b1f956L, + 0x0404f85aa5a4cdf7L,0x2731e35189098b68L,0x00000000000001eeL }, + { 0xdcd6fb17be2d5536L,0xb6e4ddfd2a940f5bL,0x8f9be7ace6efc7aaL, + 0x0c3dc0406bc2863fL,0x9e250a533e9db65bL,0x565400b82775b5f2L, + 0xfbc78c75fbf0a926L,0xfd96db6c14630b19L,0x000000000000012dL } }, + /* 38 << 385 */ + { { 0xd1d64c7e70cb0e6fL,0x997708567217b5b6L,0x037f050cb96d710fL, + 0x5c378b693989e449L,0x45f5ae2c0daa4766L,0xf536e14e7cdc17e5L, + 0xc2e3f81a4a1e7c0eL,0x5e72b0196a5b8147L,0x000000000000005dL }, + { 0xe084cdca97933356L,0xf687802de0f5495fL,0x04ef1478fb3f21ddL, + 0x896ba7f3fcd00810L,0x066c5f0935f6ca88L,0x537e6a61c9437172L, + 0x335f7cd8ad6a9893L,0xd94e13ac3666f7abL,0x0000000000000192L } }, + /* 39 << 385 */ + { { 0x9e5d42e974d23a2dL,0x65600770d82bc0e1L,0x398a0086f26aa51eL, + 0xf9af5ca91cb33414L,0xafade7f67627615bL,0x9b292aa17f2615cfL, + 0x5807fead310e94a6L,0x1e448d4bdd2d85c3L,0x000000000000003cL }, + { 0x4f7f2205a8ed0e39L,0x314a4f3372c1bf70L,0x75555231cf595b09L, + 0x3e7a57821100fbcfL,0x8f5e0ca34ab8ef49L,0x26e8382b2cb0db78L, + 0x0712983ead48e8d2L,0x0b8c06c41c943b28L,0x0000000000000022L } }, + /* 40 << 385 */ + { { 0xe929b729504f8b28L,0xe706657cb887ef71L,0xb4bd5cda0b83db33L, + 0x465187f0e761913cL,0xbd632bd6040b98afL,0xd82bf2af91c37379L, + 0xcc1058b15380aec9L,0x243967a1c506f0bdL,0x00000000000000d1L }, + { 0xb298442687d73c34L,0x5ffa94ea46cbe44aL,0x5814a34f20ddb255L, + 0x8ea5436889b68f69L,0xe21603b0d26b66c6L,0x4695dff2320b6862L, + 0xa5d5332742d53e67L,0xf17df48aa5931fd7L,0x00000000000001bbL } }, + /* 41 << 385 */ + { { 0x04c70f3885087978L,0xcd12cce53c8b44acL,0xfd4f376998f7c502L, + 0x5aff3f2f98e9d246L,0xb3f2d8a82e0cb01eL,0x3f858b53fe4d8812L, + 0x0b389f33abaf289cL,0xc9bcac4d85e783d8L,0x0000000000000158L }, + { 0x93e51ba7c3bdb05fL,0x9043397744508070L,0x52617f035d615383L, + 0xc2c4e23fad138cfaL,0xb701a0c9d06ba79aL,0x48a3165417ef47fcL, + 0x495700fb2da06186L,0x15cef318fdd26d70L,0x00000000000001afL } }, + /* 42 << 385 */ + { { 0x9d3e388e0cd836e5L,0xc974d434ae7e8fa6L,0xceff33a84b7dfae6L, + 0x27a8202db4d988c2L,0xf74ea8b2ce5a2905L,0x28b4c3d397a229e5L, + 0x00c1ef677089a383L,0xf9a5cea3f47e577aL,0x0000000000000186L }, + { 0x44edeb76bec9adcaL,0xbf02c338c5ffafb3L,0xa48a7dad4e0fcb0fL, + 0x73665bd816a050a0L,0x102f101b93a18024L,0x9fbc6810f7ce9939L, + 0x18723d426cef653eL,0x54d4b68723015f50L,0x000000000000007dL } }, + /* 43 << 385 */ + { { 0x645853a6e2304ce7L,0x6d6f5d6885f9b592L,0x3eca27a281bb3a8cL, + 0x6cc8c8de108b3b50L,0x586b66bb96537c56L,0xfa80528910974d25L, + 0xc040c8cf9654df6dL,0xc97f68c367879489L,0x0000000000000080L }, + { 0xd38c57c16d2eec2fL,0x306846d4f6ea3d15L,0x2ff350501539a565L, + 0xb45d32a19a8932e4L,0x23bf0d184fcbdd72L,0x12fd080596cd132aL, + 0x7ff6b0e56bf400f6L,0x9c7d3dad7052b488L,0x000000000000006cL } }, + /* 44 << 385 */ + { { 0x5ab5af9029ea9024L,0x62586bb3faa25d87L,0xd43ef6de49148521L, + 0xffaab71343df6049L,0xc0aed310f00e3fdbL,0xe028fc70c33b556eL, + 0xf32399e91b8997a2L,0x1194cdd91f8f2427L,0x00000000000001e4L }, + { 0x8d750b3d3951d0bfL,0x85ecbe512ba1a463L,0x46486edb52e64beaL, + 0xb66ae34e7ba482abL,0x6d57cca5622a95faL,0xbec47b1e8417d830L, + 0x740b9a5887cf80ddL,0xa6618207870a8a2eL,0x000000000000015dL } }, + /* 45 << 385 */ + { { 0x8a5916f00aa91c51L,0xa976e99375eff325L,0xa8408bc54ae87723L, + 0x489d2e43f775bf8bL,0xdde2904c56c8ba33L,0x37550da29db2b339L, + 0x1dc50f82a347dce2L,0x9499d24128c84734L,0x0000000000000051L }, + { 0x3167754dddbca30dL,0x8a8577969acb20aeL,0x79980c5bd6bb103aL, + 0x12849fa2f6ad7a1dL,0x84d5a7c0e6a33774L,0xa909f1a10580739bL, + 0x97a40113e7b102dbL,0x070a98179145c800L,0x00000000000000caL } }, + /* 46 << 385 */ + { { 0xbe7c607599d8558bL,0x0822198ba470d7bcL,0x8134516f9b7f5084L, + 0x5ca862b58ca39d9eL,0xfff8c52dc7bd24f6L,0xdcb06ff955cc6708L, + 0x7ed3263cbd8b69ffL,0xa789da086cb08d57L,0x0000000000000017L }, + { 0x7370cbd65877a86dL,0xbef9b4d34365e6e2L,0x73af11d1ff8243faL, + 0x041c0263639a4178L,0x1907fd7305d03c9aL,0xaf28c8a796ecf611L, + 0x073e51bc9586b05aL,0xb601e442e8c2096bL,0x00000000000000f8L } }, + /* 47 << 385 */ + { { 0x1194c0915f1b4c5eL,0x1d9f3b7afb88b9e2L,0x3bbc16c0a0f8b8ffL, + 0x2c750d1b3c524bb5L,0xa463b2158ed71219L,0x3732bfcb948322daL, + 0xe8cd027a3d667adeL,0x13d77bd8189f2eeeL,0x00000000000001e5L }, + { 0x6adeab389ea63cbfL,0x4cc00ec1873d9cfdL,0x91206bd9dde3e73fL, + 0x673213982af25a9bL,0xdd29fbc0c7c128faL,0x07e8deda5044c8fbL, + 0x555612ea3dd8c690L,0xe56de5e753bf04e0L,0x0000000000000199L } }, + /* 48 << 385 */ + { { 0xa63bbec0976e030dL,0x7a81ce08e65327efL,0x0ac6397fa7c90248L, + 0x14ef2887488f9ba1L,0xb2b47a2f497add24L,0xa86a385740274de2L, + 0x25acdb8c12374808L,0x96f4ef892c57a8b1L,0x0000000000000050L }, + { 0x3a8d6bfb85199966L,0x78bf5c36fbf35493L,0x8f1db78b5bd8876bL, + 0xb8ea47b6b2becda0L,0x5decd86d13992438L,0xc0abe6d250dc68c1L, + 0x85b05da73eaca155L,0xf8095622b1bd9e0bL,0x0000000000000104L } }, + /* 49 << 385 */ + { { 0xf17044b942b59f11L,0x716b5bccf9bc1552L,0xcdb5e87eaa109cdaL, + 0xb4b8bb4a0a21bde7L,0x56914afb00ddf7a2L,0xd9ad9a5b7a0a7463L, + 0x8cee2a803b0b3482L,0x2bed5feebf03e4feL,0x00000000000001c6L }, + { 0x410ab42537a1cb27L,0x9811d04052ac89caL,0x04597789c9d044f3L, + 0x85209250bb67711bL,0xfd886ffc3c39b579L,0xdf3e4df20a8bfdf4L, + 0x6d2e6d431ca33a7bL,0x8e3aa535624713f5L,0x0000000000000140L } }, + /* 50 << 385 */ + { { 0x77f51e0c0442a7c7L,0x500c2b2ca9e3b607L,0x0ee098290847e5deL, + 0x20349adb09354cdbL,0x6b55b08cc2d79e4dL,0xdcb3401e13d8863aL, + 0x7baaec44aaf4e45dL,0xe52e2d8030e3684dL,0x000000000000015dL }, + { 0x400371c05c87ddbfL,0xb6e45175f64b1914L,0x47b78ec27d6647d7L, + 0xd1b8a05f3eff6489L,0xbf7d56859a637fb3L,0x0928b2419e67d5ddL, + 0xdcd099e5146112d0L,0x5835194e38fa6f30L,0x000000000000004aL } }, + /* 51 << 385 */ + { { 0x994334489930200eL,0x57d680aeca0c5cb5L,0x500b59d823868cf4L, + 0x8a0c2ca2b5702fd9L,0x365864638d68cb48L,0x99d533d4fe2bd373L, + 0x76e5f84a8fdccef4L,0xf7a3adeac157857dL,0x00000000000001e1L }, + { 0x9f89da8738e35a2dL,0xad503f99a5791c04L,0xeb04432cd0b1f650L, + 0xbb49f73c9de86969L,0x8029aec67728cdc2L,0x668e0a8ea7b3de2aL, + 0x316e9e0d920d8b5fL,0xd212034d5aafe07bL,0x00000000000000e8L } }, + /* 52 << 385 */ + { { 0x97ad23c88762dc4cL,0x20968d0f2d1c69afL,0xe2632e0a129c000cL, + 0x29f88599dc732336L,0xa44c59d4bac2d80cL,0x0cf8411a987353b2L, + 0xdcc28c77f498d31eL,0x78b5352a88cd769cL,0x00000000000000edL }, + { 0xb0e90a4ebfdb4f02L,0xfe9de895a3128446L,0x5e56391e08215646L, + 0xca6dd28866fa22d0L,0x72eeff81542c7c9bL,0xd6c39a86d59415d4L, + 0x7b25916353a292afL,0x202fe2b6e8975c78L,0x00000000000001c2L } }, + /* 53 << 385 */ + { { 0x9806bd85cbec6480L,0xba6a35c617677c63L,0xfbb6db2a810128dfL, + 0x88738bb1e9051fcbL,0x9e84681e40f5e477L,0xb8c311c61481f032L, + 0x9e9a7bef01715e80L,0x281e49e5f705aa5aL,0x0000000000000004L }, + { 0x9b309b2de678ef56L,0x3debc468f3faf55bL,0x46746951f0341856L, + 0x001ba8e5718dd586L,0x05ea65aa9144640cL,0x7e7a5e4f9362e2e0L, + 0xc16935252a9bfa2dL,0x8a8a985a2220ef82L,0x0000000000000022L } }, + /* 54 << 385 */ + { { 0x9f3090354dbc1eb3L,0x7c6d6a66748a3ab1L,0x76e43bf83c4a32cdL, + 0x0556dab29283b2c5L,0xbb92ce2099abd7c0L,0x32c8445805611ce6L, + 0x351c209534d866adL,0xb812b2e4c8c7f664L,0x0000000000000080L }, + { 0xdfe378b30e2a92feL,0xe32b3be4077bd06bL,0xa07a0268acec79efL, + 0xbf50bf0d53a8e15fL,0x2d5cc452ad111c55L,0x9c9138f202874227L, + 0x2a5c81a2332546beL,0x3dce449a895ef21aL,0x00000000000001d5L } }, + /* 55 << 385 */ + { { 0xa63fb2af30833554L,0x54e207774ceef45aL,0x28daaccf9a93de1aL, + 0x8b71ff06448451c5L,0xfd21f9ee3deb0b23L,0xcfe45600d9f7168cL, + 0x60de95282235c182L,0xe30cebba4656a139L,0x0000000000000101L }, + { 0xfc656b2211b55f1bL,0xa13bba09c477a553L,0x66fd4c2893274502L, + 0x9b86a1fd1a24b861L,0xb6afd1ffe2a47053L,0x6c0769ff48df1e95L, + 0x2b3b730d1b63f97aL,0xbec1276691aac5a4L,0x0000000000000144L } }, + /* 56 << 385 */ + { { 0x1b50fa628d00da24L,0x98c7ac500a0590bcL,0xc33c9f7f6c101946L, + 0x653848c30c4eaf7dL,0x7ab38c79dc892656L,0x2ce0ae53420316ceL, + 0x9df9a0087012d563L,0x130cd5656d51bd57L,0x00000000000000b7L }, + { 0x21a2c95078ffe5efL,0xbc330a097b991b38L,0xc5151e1b2d4a375bL, + 0x70a29d1080c0f67aL,0x2e75a55f2247b284L,0xc95a638058f332f1L, + 0x0d7a5242a8fe1cc9L,0xc0b1176ca712a8ceL,0x0000000000000035L } }, + /* 57 << 385 */ + { { 0xe534d7e5b1b6b3a6L,0x442cc9a460e15a20L,0xb5b505780fc877d3L, + 0xef21ffca4c062301L,0xd29e142d0a3457fcL,0xdd64c344ceadc8c1L, + 0xb590679cb14c251dL,0x3fe76f10ab9b3856L,0x00000000000001e3L }, + { 0xf136f66e27ca4c20L,0x7146348c84d4b0e8L,0x5d75318419adb36bL, + 0xa7218c7b88ceb93cL,0x2df7eeaa837ee73cL,0x026cdcf3914bf563L, + 0xf3f3d1a4997d9514L,0x16dbbf9d40ba713eL,0x000000000000019bL } }, + /* 58 << 385 */ + { { 0x0e9c00daa1923d11L,0xa4a935ba9e9a0e88L,0x235a9253ca54fc63L, + 0x392920afdbd25679L,0x856d174652a57c44L,0xc3504988fb239f6bL, + 0xa907f9f71cd72a71L,0x2925f0ced9f5d416L,0x000000000000011cL }, + { 0x208bb07c7c23fe95L,0xb042893810ca84d3L,0xd0b3bf252f00bfa4L, + 0x34cb8007cd3bbac7L,0xc08e0a6ab206c13eL,0x1462ec66991968dfL, + 0x409af523c0694b39L,0x8ee92b86a076f928L,0x0000000000000078L } }, + /* 59 << 385 */ + { { 0xc9d7fc5dbd1a3ebdL,0x231bb307a5f1851fL,0xc749aba7d0d7e150L, + 0xebe287d36a97cf7cL,0xa7bbcbfaa5236547L,0xb20cff6656a38e46L, + 0x9483da01fe570880L,0xc9445f539dade10eL,0x000000000000018bL }, + { 0xabb59e65fbfc603cL,0xe47b710ffaf6b799L,0xfe3b4ef8bb3471eaL, + 0x6b525a9fa2deb2bbL,0x66e384c3b6f775e6L,0x0edcc15e3ae04586L, + 0xb1ece91a0177a763L,0xdf3e41341824cecdL,0x000000000000008dL } }, + /* 60 << 385 */ + { { 0xc0f0454dd75aecf9L,0xdc6c13e10d80a3a1L,0xfcf9387eaa843e19L, + 0x062744c75f9e673fL,0x4930360b8a9a5169L,0xfca298466dfc67caL, + 0x4b177efcff3e9788L,0x40f92ea72d8232d1L,0x000000000000003eL }, + { 0xc9ebe2461dab3098L,0xaee9d337a007be6eL,0x9e3a204cf11e73a6L, + 0x7827ccd7f46edd7cL,0x5c602ab2c4a2c4fbL,0xef03efa28ab96392L, + 0x22ebf4f017be24eeL,0x0b1dd25cd235301dL,0x00000000000001b5L } }, + /* 61 << 385 */ + { { 0x92b6122d364ef40aL,0x0f469c7408803dfcL,0xbb3f660c5008d200L, + 0x2e4d68e12a330a19L,0x276981d7235df9d4L,0x420bc1c3a23c3b5eL, + 0x2a0d705524eb5212L,0x576d86d23001513dL,0x00000000000001deL }, + { 0x6afb220edab35c28L,0x405c936730584725L,0x5a57996203793bc2L, + 0x7d3538be9fd9bfafL,0x416ce5282511c844L,0x846db71687796d79L, + 0xf682f8faa73a6974L,0x0b2913aafdd19bbcL,0x00000000000000d0L } }, + /* 62 << 385 */ + { { 0x70db7f0c58ddd6edL,0x773c028cdbf9b863L,0xa04ef8b00cfdcb7cL, + 0x513684b8a4a65e38L,0xb60cf9c1508347a2L,0x54635f38bde9bcbeL, + 0x0b41f76ee79fc803L,0x6334d72bf373f10dL,0x0000000000000009L }, + { 0x4cb3f53a0d2bc647L,0xa89a0ae94301232aL,0xacc6701961d28592L, + 0x82718dd4fa93ad0dL,0x845908b550eed143L,0x7f66caf46403a023L, + 0x003fdaae32636684L,0x776946a19c27c09fL,0x00000000000000d1L } }, + /* 63 << 385 */ + { { 0x1da47097240362a1L,0x30e9b0ff5b43cb0bL,0x5dd728da4f3db2c0L, + 0x0027f719ffe4271dL,0xf1adecee13577480L,0xf6bf7133b47c7e12L, + 0x1af855ba8221a9f7L,0x88896c19d53e3940L,0x000000000000004fL }, + { 0xd07fee83edddb456L,0x362604d7f71a3cb9L,0x040220eda3c197fcL, + 0x7abad6759101be2cL,0x618e46cbeaab7424L,0x05c1a27e57e4b841L, + 0x197916805b141498L,0x37ed04403de4c4cdL,0x00000000000000feL } }, + /* 64 << 385 */ + { { 0x7b0373e4fd998a0aL,0x0bddac80c37e7937L,0xb979fdca8eeccc27L, + 0x150c3141e82b945aL,0xcf88a830cebf1156L,0x649c7c1abbef5c8fL, + 0x3199e1c5f10104eaL,0x31e4fd39898b7fd0L,0x000000000000011dL }, + { 0xc24063de449bcd85L,0xb0cc16e1b96fef39L,0xfa391388b2f631b2L, + 0xd39c21b41f4a449bL,0xd036cb3135f5045aL,0x58a0729c367f428dL, + 0x7dde830655c42fb9L,0xd14ab2fef68c64faL,0x0000000000000163L } }, + /* 0 << 392 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 392 */ + { { 0x5663f565a308e123L,0x91a48983157c2290L,0x222f7f93c054408aL, + 0xd8694963b839a520L,0x864b491dd314da50L,0xa315d96ed675cc09L, + 0x32964930390531b6L,0x9387671921e11f6eL,0x00000000000001d0L }, + { 0x20f4598e0c3477faL,0x14c758b4ccf7f114L,0x0c83f642051b875eL, + 0x14ab1b501b290e7dL,0x99f812c7f1b18fa8L,0xaa60f39988ca8e75L, + 0x00de2bd0b9c6c5f7L,0x765da97328703a2aL,0x0000000000000015L } }, + /* 2 << 392 */ + { { 0x05a704cc2934ed82L,0x647089fb989edd8cL,0xe0b239d40ce7c62dL, + 0x4c892ea6105a5effL,0xa519395fd5ed6b04L,0x806c7003509ed794L, + 0x882e9886e70ce5c4L,0x50730ca1ff01f6a9L,0x0000000000000088L }, + { 0x90a78a16dbcc5484L,0xc1ab078cfd454b50L,0x6f488252cb09e525L, + 0xdd663f53e19b2ed7L,0x16b10da1a67bf59cL,0xb47f6b9536bb770aL, + 0x6bdc8428777b2bceL,0xcd02ae3d561553f8L,0x0000000000000017L } }, + /* 3 << 392 */ + { { 0x06ba35c1ee2f1290L,0xec34093fd403d1e7L,0xc1b8ed8fa659075eL, + 0xea322ad8bfe99020L,0x3b7a95d8d5148b86L,0x979d99bec2cd9adeL, + 0x89a6a55285dba94cL,0xa5b7e51be090e6a7L,0x0000000000000120L }, + { 0x033b73ceaf82552aL,0x8c98e0b55ca604c1L,0x177a92e5533addf7L, + 0x6eab8277ed1ad360L,0x05937e24bfb8e3e1L,0x4193e5cfd061128aL, + 0x4f781b5dfc1326fcL,0x74403eb19d0056bfL,0x00000000000001f1L } }, + /* 4 << 392 */ + { { 0x36da34d11edc1498L,0x7ba9673c18d4c455L,0x9acd94ff724eb3fdL, + 0x5db71c7ad31cfbc2L,0x64357011d49a2214L,0x80b6101f0aae0c21L, + 0xeffec6209853d1f0L,0xf928ed94eda4fb5bL,0x00000000000001bcL }, + { 0x0031c5cce3db108eL,0x2663b3aa5110eb67L,0xc0432c4f3b2d50c5L, + 0xe5b812ab3d94b4fcL,0x031fcbe4e9828d03L,0x048dec907a08492dL, + 0x7420a4edea50d639L,0x0bd4bdd076eef289L,0x000000000000015fL } }, + /* 5 << 392 */ + { { 0xe7718e9bdf3566ecL,0x2acd4c45d1d77e5eL,0xc78694add9787985L, + 0xc7c600c5f645c923L,0xcd1e697ac393f54bL,0x7928ed5b20054d6eL, + 0xd3c839e03cafeabfL,0xbfaafdcec46a4425L,0x0000000000000196L }, + { 0x6baeeca1e1804d34L,0x6996db6c70aa5e48L,0x24e11983b9577c2eL, + 0x3966f9a104335832L,0x5654cbdd50889c2aL,0xee86177f32df0deaL, + 0x7e9c591df8d76ecfL,0x57de93bfddbac362L,0x0000000000000022L } }, + /* 6 << 392 */ + { { 0x78709a5ca92d6561L,0x36b5d1f10880bc8eL,0xf980cebc0af2078aL, + 0x4b5e964a14edaab5L,0xd526f78563afd322L,0xf5a9d4c668e17240L, + 0x0c420e2819fc5026L,0x3327540903459b12L,0x0000000000000157L }, + { 0xae433226c4d6bb5dL,0x5e39057b13ea6565L,0xeb7729af7aaf08e1L, + 0xa74c0ed7012567f2L,0x65b64576b6e89abeL,0x6b025e87f2498699L, + 0xbc92a03cc229c0f1L,0xc54da03b0f1f67c7L,0x000000000000001dL } }, + /* 7 << 392 */ + { { 0xeb525fbb07d2778eL,0x4cd96634c965143eL,0xb2ea802d742e1fcbL, + 0xa2f1ec4c75bc3cd4L,0xe778f1e1125ebaf8L,0xc700e4e4650b81ebL, + 0x66d593a59c841f1bL,0x4da417578d2984c9L,0x00000000000000a1L }, + { 0x6e9ed77d57d4a16eL,0xefb8f94c1c1696aeL,0xf2961d2c8acbc2a6L, + 0x8fc9352de1d5de1fL,0x786158d1a7b8579cL,0x3ab33268fdcbac8aL, + 0xf237b433d583ed20L,0x3edcc184e4c10d57L,0x00000000000000cdL } }, + /* 8 << 392 */ + { { 0x8ec9a4151e96a885L,0xc95ec1ef7e63cb24L,0x8a77bef7c506a04dL, + 0x4e4dd4100ad06b3fL,0xc2eb62e4dc28fd42L,0x7cb2239454040fabL, + 0x74a3429a64595d06L,0x05983dfa26fd26d9L,0x0000000000000151L }, + { 0x56acb9c1291ba5b5L,0x9967485f11c55994L,0xfc8eeb0bb23cd929L, + 0x0fe8db6b39d78fb1L,0x3d7281e846f04210L,0xe7f29c805d9fe1f9L, + 0x2ea28b7ef78b31a2L,0x594d39ee4dfe2d5fL,0x0000000000000087L } }, + /* 9 << 392 */ + { { 0x67f5679c7c9beac4L,0x985a78336230b567L,0xa250d8de064e4738L, + 0xb8bfbdd3b564700eL,0x087bf9e82e14ad7dL,0x4cf9220aa0317fafL, + 0x6614ae6eb05bc341L,0x1deff5d59e41105fL,0x00000000000000feL }, + { 0x8b47d53a03966619L,0x72d5b26cbff94109L,0xdfc37d1510b76762L, + 0x669e715664b09f94L,0x16e999e2d3fb006bL,0x3e5aa52b6aa4487fL, + 0x8bfd7f5533c1350aL,0x68ff6cd44e72ea36L,0x0000000000000161L } }, + /* 10 << 392 */ + { { 0x450eccbf95b7cf7aL,0xdf3b7da8b44af149L,0x373e6b4f40c25b44L, + 0xf9db1d32e6c86b87L,0x02466d20439af230L,0x17bd88275830bba4L, + 0xf8f5e1805aef36c3L,0x6986ab02b7ed8e89L,0x0000000000000173L }, + { 0x5c05151d51fe4224L,0x824fbd0f5576ccb6L,0xbe14d2f8e1751932L, + 0xc608e4c367fa6561L,0x4fa94021a856141fL,0x752b95bdda4ab1a8L, + 0xf4d032c295439524L,0x408521c03a8e1267L,0x00000000000000faL } }, + /* 11 << 392 */ + { { 0x9b7d1f3bfc0f4a8bL,0x7a8f0b0f5ccb978bL,0x00d61c73d3415ae8L, + 0xf46d7b22c6aede9dL,0x1fc2937114822df1L,0xe3e1d4675f740f97L, + 0x3a7e3ba3b9159596L,0xf99ca1a803b5ec47L,0x0000000000000029L }, + { 0x04243bdfbb5ccd4eL,0x055a49f78802bd75L,0x51f2d40a306ea61eL, + 0xa70666ecda558bdcL,0x92ae69d13e523a23L,0x98055b32c4ef7644L, + 0x3748d45e0e228566L,0x9c4991a170bae7d8L,0x0000000000000106L } }, + /* 12 << 392 */ + { { 0x6502b92cbb5c5c05L,0x85d920e3bfbe8363L,0x7a2aba0d6e2b1eb1L, + 0xc02768187c1e4e5fL,0x8eed0e03e426684eL,0x164e869178e5d42eL, + 0x606a3a5498aa47a7L,0x2350730d0b830942L,0x0000000000000148L }, + { 0x5e1965a01d104b08L,0x7e28dadd373e0f14L,0x77b7c36fac533cceL, + 0x8c2d1096e00217beL,0x016538a1ae057a93L,0x2fb0bf1e5a58f3dfL, + 0x0e6a5e9d9785d24cL,0xb945866bd5a89bfaL,0x00000000000000a7L } }, + /* 13 << 392 */ + { { 0x143510e9fe207454L,0xb0fd9824bbf845f0L,0x39c13279e8bf1a9eL, + 0x75fa89a10feca613L,0x2c66a7eb825a3dd7L,0x9c2b32c8b26368aeL, + 0xf92e1aaaf619a005L,0x4de668fdaaccfca3L,0x000000000000012bL }, + { 0xa92801b445cfcf06L,0xfbd77f9cf74d70ddL,0x1e47198c6340644dL, + 0xffba0b1f6b4dae22L,0xe48cb9bbc7b899dfL,0xa2f0ece2c71b8b4aL, + 0x5d9815e62a173ebeL,0xb9b11bc50e9906daL,0x000000000000011aL } }, + /* 14 << 392 */ + { { 0x55ac3726fe2a7478L,0xe94f2031e30ff599L,0x469519bd10936c6dL, + 0x878d1fc50515a43dL,0x187f4315806e0b6fL,0xe03dc8bb2772ffa8L, + 0x32b48a444a6b058bL,0xea5822f35164ec93L,0x00000000000000a0L }, + { 0x5c617856737160e6L,0xc9c64cd423c701eaL,0xfbaeb50175437eb9L, + 0x99422e75de99481aL,0x0f222393795537c5L,0xf7a2094923a1c0c7L, + 0x5e14e523f71edde9L,0xe03bfb13c7ec1652L,0x0000000000000099L } }, + /* 15 << 392 */ + { { 0x592528bcb0d0888aL,0x32b7ca4dcabde573L,0xa25e1b62404f7268L, + 0xa3da442d16381bf7L,0x62caeaaf5d6acdc7L,0xc9f64cd37e764a4fL, + 0x297687a84e48f5cdL,0xb315f2345e2d02a9L,0x0000000000000012L }, + { 0x4f91c00520acaebcL,0x61ce683cdb8cbfe4L,0x22a4efd555ee34ceL, + 0xf8d7d3ca64516080L,0x38d087a027af0007L,0xdda0dfb2e0de0ef0L, + 0x7e0601f0f4c91376L,0x23d16e102ebd6270L,0x0000000000000141L } }, + /* 16 << 392 */ + { { 0x1bd8ce7b5a53c22bL,0x78733fcd7cab446aL,0xc44ca4e248acb394L, + 0xa9888b1ea38c790fL,0x36afb6eb15c34237L,0xb913b8a8fb702063L, + 0x34b77cc5917508faL,0xa931d7a7f9e4732bL,0x0000000000000050L }, + { 0xa90a429056d21d18L,0x8266630755b410a1L,0xb4684a8b894a6b05L, + 0x8a1ade63828cf75cL,0x4fb2f85a127702a3L,0x83ff7d05adf7b709L, + 0x1d3f5a92a68d1db6L,0x243ce1dbc093cd5cL,0x00000000000000f5L } }, + /* 17 << 392 */ + { { 0x747050b4dc1f408bL,0xf1aa5f1cf0e53056L,0xfcd32b5db55215c2L, + 0x83e780e2a643d3d0L,0x10c32cd4a683aa1aL,0xb566a42ef274f385L, + 0xa94cdfc13c628a9fL,0x3a4dd494f28cf4d5L,0x00000000000000b4L }, + { 0xd12f1ca48707dd0dL,0xc250d375379846d5L,0xb3d1f3e6cf8f7906L, + 0x828c0769504ef581L,0xd8ddb5fb91ed0a99L,0xb53dad6bfe27e621L, + 0xec6e3cd22a31b15eL,0x402c5dee5cd9ba0cL,0x0000000000000160L } }, + /* 18 << 392 */ + { { 0x39b44df663e47f96L,0x2c7206891f130336L,0x5543679012acc921L, + 0x2944066a5d875d70L,0xb6ba5ecab637f3dfL,0x69c8cadc79abbfd9L, + 0x359753d5bcef620eL,0x1caf0d9bbd2e9239L,0x0000000000000116L }, + { 0x2412742cd051ae6fL,0xc8cb1cac54d13ad9L,0x19bdf52082ff17ddL, + 0x7310001c46f5ae00L,0x9f6dae8cefcb214cL,0xe3afc3481ac29d5dL, + 0x6162b535843d1a4aL,0xb25ff812e5174c3dL,0x0000000000000015L } }, + /* 19 << 392 */ + { { 0x2f2a4ecf475c5d3fL,0x012f076856a9da6aL,0x2d35ab720eb73946L, + 0x50aefcb5af90f132L,0x8ad439af4c3ab322L,0x70d8e68b3cca3e59L, + 0xff7ca8f5f7498297L,0xbb166b54aa99e5ccL,0x000000000000011cL }, + { 0x08dd12eae174eb28L,0xca75bd49a8a34a97L,0x262fc207883d05e7L, + 0x6372076829e98b07L,0x6434261d442c2457L,0x6cf09b54ec47cd5eL, + 0x6e4d2a8b5ed4b6d3L,0x6950403c677585bfL,0x00000000000000d4L } }, + /* 20 << 392 */ + { { 0xe8bf0d2b9a86c296L,0x56b947a575d4fd5aL,0x7364354793941d37L, + 0x493899e4728c0b10L,0xa0d636a76ca7e1ebL,0x083f811e16ce84a2L, + 0x9602ad8b90bb012eL,0xb1c8808cfd7d4057L,0x00000000000000feL }, + { 0xae262388f65f859aL,0x220164e04c8e2cf4L,0x7e1f59f2639a7befL, + 0x7777613522bd2b24L,0x191b18530b92201dL,0x9ebf0a3d99fdf0d4L, + 0xc250ea2f792cdf64L,0x9da499827937b250L,0x00000000000001dbL } }, + /* 21 << 392 */ + { { 0xe95372547eec5ab8L,0x20854600dc649495L,0xb2c454502060a38fL, + 0x074640c83b17ef7dL,0xc2ba81afeafd02b0L,0x76b5593495c308ccL, + 0x1be30f525d9c01c7L,0x00fb296b0c089ee6L,0x000000000000011eL }, + { 0xcab278f10884bcb4L,0x81ed4c3515bdd541L,0x69cc0e14f5766a2eL, + 0xee19b33ffa305dc2L,0x0e66612c3941336aL,0x2d70aea0df5046faL, + 0x70ede44f5562d06cL,0x92a66c0bebd8019aL,0x000000000000014dL } }, + /* 22 << 392 */ + { { 0x7aef7245579e8701L,0xdddc870d737fd8ceL,0x4e4ad894eadbb996L, + 0x135b46160dfab3d7L,0x0433d57dcd7250b0L,0x4d3f5f9db0292ed0L, + 0x639e7722eceba527L,0xcd5b2c428487b3c5L,0x00000000000000e9L }, + { 0xb1bdb67698d0936aL,0xaefe0fffc847c8efL,0x9f1c085f612bd61dL, + 0x148a96673d232e56L,0x5374bb9deeed1322L,0xb79378900e4302d0L, + 0x0c269a4be256e409L,0xf0c659b70315057cL,0x0000000000000129L } }, + /* 23 << 392 */ + { { 0x71ee59406a722275L,0x523a7e2bfa97bb09L,0x8cea508a21648596L, + 0x655546cf43160a2eL,0x715fd019b704209fL,0xc906616600197d39L, + 0xe176a99d7419b773L,0x8a8170e25ce68d76L,0x00000000000000a5L }, + { 0xfb511b500b8abc3cL,0x6e8474d977a2cb0aL,0x849c55ee2037f989L, + 0x92e102d9d072d371L,0x90c88a1edd72df80L,0xaffc59588c3725d7L, + 0x92c20b445eb10063L,0x871b18dac2eb294bL,0x0000000000000129L } }, + /* 24 << 392 */ + { { 0xfaf6bd70ac57cb84L,0x32c242073eb61ae3L,0x5986ce0e7c9b61c6L, + 0x9b36f045f1733320L,0xe4dd9d839f8948c0L,0xebe72911a8a90ea5L, + 0xd34cb93e2d84d142L,0x787753d11c55d53fL,0x00000000000001beL }, + { 0x48c0229aa2df0ca9L,0xbc42db5394660967L,0x4b209e4061935318L, + 0xcf7e4981898ae70dL,0xa42c59694dfb5d28L,0x2ccb13a68e4a6d4cL, + 0x77bb58626c36976cL,0x7b44f5ab71676ff4L,0x00000000000001b4L } }, + /* 25 << 392 */ + { { 0x2a3b21f19fd42b97L,0x388249b469e250e6L,0x7226d729f01c879eL, + 0xe588203b9e652254L,0xc6fb3b1a846a5090L,0x2dca87ce9a2a242dL, + 0xaca5c8fa7dc5f4adL,0x65334ff212ea5d25L,0x00000000000000bdL }, + { 0xc1039e12cc6df18bL,0xb13ab3c2d6a89d34L,0x8573f608686ab623L, + 0xb34720f8b0a4c351L,0xb42c5a677841be0aL,0xcd53d55f37b5ce15L, + 0x99d97165e3cc80d3L,0x3c2a9a9ddb06c07eL,0x0000000000000091L } }, + /* 26 << 392 */ + { { 0x67dc211ce9a3ea9cL,0x292af224222b2a93L,0x818fb79cfe0f90b4L, + 0x379d01c415628e38L,0x48a83503ad420127L,0x9ee9b705ea2b1a5eL, + 0x0430fcdeff6e68d2L,0x3045e6fb6b148eafL,0x00000000000001acL }, + { 0x9bba4b09d469ed6eL,0x95ac0bbe3cf66944L,0x109546371c2ed0d1L, + 0x21ece32632403941L,0xc4a8ba03051a64beL,0xb00e5c8aa214703aL, + 0xce818fb8ef7fa7a8L,0xc9b56befadb9488eL,0x0000000000000178L } }, + /* 27 << 392 */ + { { 0x7ad60105c31e7fc6L,0xf361576465d266f6L,0x5608c43e884a8482L, + 0xdfdf8db35cb2ba2aL,0xee76eb6488c0d22bL,0xf9ee20f3c433d719L, + 0x09ab2833bef67b85L,0x632230924b54aee4L,0x000000000000009cL }, + { 0x5d63b8e97bbae00fL,0xb6348d60cfba1a43L,0xe080feba8eace1dbL, + 0xfc3847f6d9166854L,0x0cf910826edf4f5eL,0xb8cd7739571167c9L, + 0xcbc05dbd10dd80a2L,0x2a0ee01733e69b2aL,0x0000000000000129L } }, + /* 28 << 392 */ + { { 0x64c9253b379d2951L,0xe87333890586cd59L,0xce8eb9b4f6c64aa0L, + 0xd7a88646774ad3b7L,0x36b395a03dabf772L,0xd7d57f5b8f6f0f7cL, + 0x58773e9a09a83d1eL,0xb3abd1a2ee22a9a8L,0x00000000000000bfL }, + { 0xd93010a338b8f550L,0xb2690375f35c0da4L,0x53a4d7c4f484121eL, + 0x11cb900504509d4dL,0x03f191749f6fa703L,0xb0fe84994edbc279L, + 0xd5d6975d38260010L,0xd7dd640ee6be46cdL,0x00000000000000e6L } }, + /* 29 << 392 */ + { { 0x7cc7d5e9bd89d3a9L,0x60d8204cc9dd7b17L,0x50ade1349d0542c6L, + 0xbc5be5f2b686ea56L,0xc092f3447f75b822L,0x352363f1339b6094L, + 0x7e59acc4f04ee90fL,0x4dacc4a31f72da27L,0x000000000000008aL }, + { 0xe176a7be926af1d7L,0x2c9765fdceddaca9L,0x66708097b7d745caL, + 0x58469ef977ac7fdaL,0xff4993d33433d277L,0x6b648ba00477ce72L, + 0xa49cc8d961ca75eaL,0x052cd69af501a8acL,0x0000000000000166L } }, + /* 30 << 392 */ + { { 0x2f13fe842dd5e903L,0x339f67a10e555b76L,0x0d2f153c003c2ee6L, + 0xba0a0aae38f0fb43L,0x11b61a8eef9b72eaL,0xc7f6eca2a5c4597dL, + 0x0e72636cc732b509L,0x939553ecb7247071L,0x0000000000000075L }, + { 0x759b4d18978c6c82L,0x232c43fac9b34e44L,0xacdc159177608f47L, + 0xae6056b93087fbb7L,0x000ca7db5c3883eeL,0x58de6f66140170e2L, + 0x95a29345ae255893L,0x6913fa78e7508854L,0x0000000000000068L } }, + /* 31 << 392 */ + { { 0x2c05d35ab3259931L,0x180318ee63243888L,0xe370c069e4a6d4acL, + 0x9a0e73d8115a7552L,0xb136040f3024b20dL,0x9ec6cce3c9b71921L, + 0x8240506569597e99L,0xad8cb3569f38ae91L,0x000000000000006fL }, + { 0x056452385fc41873L,0xc5444c3e434b2df6L,0xcdaf76b3280c0eafL, + 0xf5e96a0627548836L,0x7642c921768eb175L,0x309ecc1fc140592fL, + 0xb46b3c8a55537f8fL,0x215fd24153c7570cL,0x000000000000015aL } }, + /* 32 << 392 */ + { { 0x8b3dc3d04aae7354L,0x9ccbf6ce4a8f62b0L,0xb3713c0060c73a7eL, + 0xf6f73585349ec6a0L,0xa995a64cfc30467fL,0x877334fefad776d0L, + 0x84588e41b236dcf3L,0xdcfdb088e9ccbd7dL,0x0000000000000181L }, + { 0x188db860fb36e7aeL,0xe7a0df8df45ea4ffL,0x110245ac5ca6f9f7L, + 0xd4c3509d7a7031afL,0xf4aa648f34df0623L,0x462aa9497e664bd5L, + 0x415012b4e2f0138fL,0xec3dafa01c60732aL,0x00000000000000beL } }, + /* 33 << 392 */ + { { 0x892947c065b8343dL,0x04d4c1f54cc89339L,0xec0a8dadc9f713feL, + 0x49a1bdcd7e8cb36fL,0x8416e0725de9db7cL,0xe9d7856a814f2889L, + 0x00bcd25858d9931fL,0x77622d851e6d5f7aL,0x00000000000001d4L }, + { 0x8d1d7219651ec975L,0x5d20520ede8d13a2L,0x62d8bd2acb73a5e7L, + 0x56596d73d791eeb1L,0xfe714aa9c07a2a30L,0x2829967735f34560L, + 0xc1f5477576599af2L,0x599f6db4c2e97737L,0x00000000000000daL } }, + /* 34 << 392 */ + { { 0xe27b3464824eaeeaL,0x2358f6ad5393879bL,0xa7df34d4710fb8f7L, + 0x833df1e070a35060L,0x0713f8dc37a6da75L,0x5c1f9b86d569678eL, + 0x5388bf51316a4a10L,0x3533b4196499c876L,0x000000000000005aL }, + { 0x072045e7f7b0bb1eL,0xfea6dad18806fff0L,0x145700a9967aeedaL, + 0x7d1daf361c1fd04aL,0xa59fd9c4ba6c47f0L,0x9869b04f2051bc10L, + 0x922a2c62ed765e5cL,0xc45f00694fe8bc2fL,0x00000000000000d5L } }, + /* 35 << 392 */ + { { 0x7d73bb35ae7cd284L,0x95a93265f13cfebeL,0x35311175966ae16bL, + 0xdc7930b274e48e72L,0x81501ccbf7575898L,0x7999582f6a37a35dL, + 0x88d4ab111c496f5dL,0xd25f44e7a36d1b34L,0x0000000000000147L }, + { 0x64ab3239231cfc02L,0x75e653046357d94bL,0xd0f90974d95dd829L, + 0xabf69128f4dfded3L,0x3cca87402a85856eL,0x1c555726610324c7L, + 0x46bba522508ec0bdL,0xbf2610b1479a9c93L,0x000000000000014fL } }, + /* 36 << 392 */ + { { 0xe1299139f1586f70L,0xd5cb2c10053083dfL,0x9e0ce9829ea052f5L, + 0x87de595b0efc851fL,0x4cccf7ae98742defL,0x87cfa3788548d910L, + 0x8ef7bc8e1a6128b7L,0xa605315051d38f20L,0x0000000000000047L }, + { 0xd0797eddc7635909L,0x1eb856cb63113d00L,0xa2a965c95725aedaL, + 0x5eaa6eef217ab755L,0x4ada3e4af55bfed5L,0xe4d945933792f0d8L, + 0x19ea0a3b27ea0947L,0xe90121ad54aa8a99L,0x000000000000007eL } }, + /* 37 << 392 */ + { { 0xf2a647cdb728d429L,0x45a1d210eb7133bcL,0x4c8f934b6b83916fL, + 0x3b98f81e522dd12eL,0x454884a7d6e86fbfL,0x160c65f88689d7e7L, + 0x9069c27d763c2796L,0xf1405b86fc31e096L,0x0000000000000137L }, + { 0xb0be8cff6fb3901eL,0xa7a916b0da1ac91aL,0x7c097b23b344f479L, + 0x6b3aa54eb469472fL,0x8f10c2209edf3457L,0xe485ee25d6189cf5L, + 0x01c0af56cb45ae2aL,0x16c2ecb7d3a5d9f5L,0x0000000000000004L } }, + /* 38 << 392 */ + { { 0x6caa8e9aab24a11fL,0x918618de7f998abdL,0x51ab05a0fa2efd0cL, + 0xc5db63b47afd5f5fL,0x40ddd9d21671f863L,0xdf21f123e8979929L, + 0xf964fdaa753f5f9aL,0xa6bc6ee1ac869b60L,0x0000000000000036L }, + { 0x9ec7b5223fb9e363L,0x9cc1db429d4a17e5L,0xf6abb7fa2490da7aL, + 0x284023565a5d5231L,0x31c197bad7698b7bL,0x78e3957ee443b4a6L, + 0x6f747898b809dcacL,0x2cb14df9bc50dc28L,0x00000000000000e1L } }, + /* 39 << 392 */ + { { 0x087ae31a39de4487L,0x83eea765449ccd69L,0x4646830a781c8c19L, + 0x9cdece343b0c1627L,0x82837fda35c2820fL,0x942160f96db9709dL, + 0xd5d5c0d1874cbdebL,0xf6985f955a058b33L,0x00000000000001a0L }, + { 0xce8987ae2e31c2f3L,0xf46bc0faeb414481L,0xd6d762899fa9260bL, + 0xbc302bed030e8a9bL,0xa520f637eba41851L,0x51304e88eecb96ffL, + 0x41e7a0102072ad97L,0xf7bd7e56c4794837L,0x000000000000019bL } }, + /* 40 << 392 */ + { { 0xc2587d72e962529eL,0xd3dac5d00c872ab9L,0x44187663c75c725cL, + 0x2e65d5ab3d496338L,0x01ca52de8b23156fL,0xa7ebba10d035e4ccL, + 0xb99dfaa0e871449dL,0x6248c81952b0dd58L,0x00000000000001ddL }, + { 0x304d544909a0b2b6L,0x193afec00bb26682L,0x00425a2bba188995L, + 0xd4fc292d04bee432L,0xa56de92439190f47L,0x184b59fbac7e0841L, + 0xb2462d3ed4c24d7dL,0x6df3cff0827e5144L,0x0000000000000053L } }, + /* 41 << 392 */ + { { 0x750a3237cd83b337L,0xfa69ee5897a42787L,0xf38484d92bcd0f21L, + 0x727e6ceb1ed6eac1L,0x29cd75e4647cca9aL,0xcab25ca99b2e130dL, + 0xd347c8ae544b56afL,0x236188fa97bbcf15L,0x000000000000016aL }, + { 0xd353f1b2c35b0afdL,0x5f81b2112df1ee19L,0x3a17334ebfef60b2L, + 0xf9c53718327923a6L,0xf7fb02d5bba0cddcL,0xaecae9e4533d5c93L, + 0x3e46f95af2acd6b6L,0xd5e4a5b1aff88b82L,0x00000000000000dcL } }, + /* 42 << 392 */ + { { 0x836c879be603487cL,0xc176b372be67aeefL,0xb16c1c093916ed2cL, + 0xa19f57d1b738de42L,0x19fcdaa9b53fd0d0L,0x4503028c6c65898fL, + 0x068d459d2a6eba9dL,0x230f913208ae7045L,0x000000000000016aL }, + { 0x3e3021c260587627L,0xb845066f0a548829L,0xeb7646754d68e59cL, + 0x7c83af0ba57fd160L,0x41cbc6dd06cfd9b3L,0xa7c211a7c2ac70a7L, + 0xa97550d867a3cebbL,0x805d0a4b3463d8b3L,0x0000000000000146L } }, + /* 43 << 392 */ + { { 0x255df4920aef0552L,0x94df45d226aa07b0L,0xaf376f26633252a5L, + 0x8264ded59ce08ed0L,0x21195ce9e61e9dbdL,0x012afc4b431f8103L, + 0xce45e41bb0d6a41bL,0x16dc53f7e44539f8L,0x0000000000000189L }, + { 0xfe21cebd1fa28378L,0xb1e3d1f305fb6ff7L,0x39b11a3701779f3dL, + 0x6cdeb0844103ef71L,0x63c18cbb4421b84fL,0x1b2b98a84c2217bdL, + 0xa219ed87720f56deL,0xde0857ad049a4d99L,0x0000000000000082L } }, + /* 44 << 392 */ + { { 0x1d09b95369c3d17dL,0xa867a21dd4d01ba2L,0x9871762c3828b992L, + 0xe3cda568f6da70c2L,0x68c327a254227c8bL,0xddd426509c5e4075L, + 0xee64acb06241b455L,0x63ab87f677bf9d01L,0x00000000000001c3L }, + { 0x94f1514cde039bafL,0x0eee405d1d6d7285L,0x221b6b65199243d9L, + 0xe1ef5cd2e3b17599L,0xd2e0a39b3856f606L,0x920a00121b433750L, + 0xd7daeaa074d9b72dL,0xcb1206d333ef5d80L,0x00000000000000e4L } }, + /* 45 << 392 */ + { { 0xd79bfda671b41f80L,0x4e2adbae09635317L,0xdd1e0c89d5137b55L, + 0xac6035369d901bb6L,0xf7d994e2875b6bd0L,0x4493e365ac6726b5L, + 0xcb1b9b1a021fd3bdL,0xa22ba13d4673338aL,0x000000000000018bL }, + { 0x6d613468f89eb756L,0x8c13c6accaca507cL,0xb066bbb54f35674bL, + 0x44f03d0c34fd9ee7L,0x44158ea1798b5c9eL,0x5c340cd7e85265edL, + 0x69ffabc8a643874eL,0x4b84dc1fe1f23e59L,0x0000000000000145L } }, + /* 46 << 392 */ + { { 0xce3e4f6f3537a3b6L,0x2a548acae4be3890L,0x7d257a739c6f309fL, + 0x517b74e62188a544L,0x92dc6544c008b28fL,0xcb56ac9408dd8b68L, + 0xd9f11fe9935a4ae9L,0xad4d23a2a1dcf178L,0x000000000000006eL }, + { 0x1498f1e4e17213f0L,0xd0519b28c758cd28L,0x22057e5f11edd1c3L, + 0x533378fe26560bb4L,0x15a266a251beeb5fL,0x49d6d63b32a25673L, + 0xdd62ccbdf0100cdbL,0x872a3d87c5eadae3L,0x0000000000000007L } }, + /* 47 << 392 */ + { { 0xb529a755894cd820L,0x7ac841f79aef3e4aL,0x2639532a82c12ceaL, + 0xa107d5768457ba7dL,0x08cc2140624c6ce4L,0xc75a2afd83169cafL, + 0x692f8acb9178032bL,0x8835d7e399fdab3eL,0x00000000000000f2L }, + { 0xcf248e4d21fefeb0L,0x7a5d84be645aab81L,0xa1692e0b81b8aa2cL, + 0x06c5eb427bb653f7L,0xda8e28a27c4d70c9L,0xf13e2010d45a9397L, + 0xab9054ad78f1ab45L,0x80453fc864bc5f43L,0x00000000000001acL } }, + /* 48 << 392 */ + { { 0xf77afc9ec1f9124fL,0x010af17a5d958392L,0xa9aa7d55eeb66cb6L, + 0xb41570ca816dea76L,0xb2138d58a983b39cL,0x0fd404cfcd6cbaa5L, + 0x29c1f2fd40c224ccL,0x1e263cab99c23815L,0x00000000000000e0L }, + { 0x371cd0937649d2ffL,0x7c79d3cc02038ff9L,0xef3261855ca5e1b9L, + 0x1beb030be02c0478L,0x134d0c94f859fd4aL,0xa767faa5ee41b919L, + 0x8d957aacd5d9e3c5L,0x4b1c1deb191fca35L,0x0000000000000175L } }, + /* 49 << 392 */ + { { 0xf5232405e8e50647L,0xb61e0f313bba4ef0L,0x0a5328116f05e31bL, + 0xe3b1d72b683d197fL,0xbd55f76bb72d36f2L,0x2645e034ab6bfa34L, + 0xd94ce3b970f52ddcL,0x83e891a2470c7338L,0x00000000000000ddL }, + { 0xe53e6361797af474L,0x9d94b01e8e09f9f9L,0x8e23416db8099125L, + 0x8f3abc4c8ef378a1L,0x0da6e3fef861c054L,0xe9d3638b203190feL, + 0x5dce7db98c12ac92L,0x8dd8a61cb8335d65L,0x00000000000001aeL } }, + /* 50 << 392 */ + { { 0x0df313e10d732decL,0x0667dd1e5229a7a4L,0x78efd49555c31c0cL, + 0x554c55303735e643L,0x3b29c30758c97feeL,0x7da349bf85e2c6f5L, + 0x982f2be25cb9ce70L,0x39274c84c0441b24L,0x0000000000000100L }, + { 0x77e9356c2307d25bL,0x2e81d63615ed7120L,0xa39ba7a3e3143227L, + 0x153addc96bcece22L,0xdeebf6c01983ffb1L,0xb5f2d60e43d43f1dL, + 0x8340038620d8e424L,0xd127aeddf6f0e03cL,0x0000000000000164L } }, + /* 51 << 392 */ + { { 0xdcf0cd6a73ccae26L,0x4e52859e04a8a411L,0x085570403909f6beL, + 0x1aeb429cdd5f5f1fL,0x83c8410392fd6de9L,0x45a05c6efd051de2L, + 0x334d659ab6a36a9eL,0x9775761ac86254bfL,0x00000000000001d0L }, + { 0xbb498a7e7ef71b31L,0x54320c2a87a1a905L,0x308a31e5b01357e7L, + 0x115312b91ec5af64L,0xe3add38ecaff60f0L,0xdd257d552ac78ec5L, + 0x0e0b29616ac51787L,0xf7537bdd2f9e42fbL,0x0000000000000050L } }, + /* 52 << 392 */ + { { 0xb6018ff0078f7ff8L,0xc9a3c8112ae4ec18L,0x974c8b824dbc5b54L, + 0x323120b429c8b8b7L,0x04c883d798439a31L,0x1ddaaccbbcd28a10L, + 0xb22b548cb61251c6L,0x2cda48d6c6154ca5L,0x000000000000013aL }, + { 0xe783d50d03857e72L,0x0cb1de54fe7e9d44L,0x608cd3398526e99fL, + 0xb9d4ef4ec00cf265L,0x37c326a39eaef59cL,0x435851a4046d5b54L, + 0xf4813c30147da907L,0xdb37c0b9ecfdd94dL,0x00000000000000f9L } }, + /* 53 << 392 */ + { { 0x9f97d529526acd82L,0xbea14de9698d81a8L,0x1da65dfe087fdd78L, + 0x162991819e331cb6L,0x1770cd3351ed1f90L,0x2241947f48d1fff3L, + 0x8048c4b872e59f7cL,0xd4c7c70884441c30L,0x00000000000000ffL }, + { 0xcc713a4b9a697e4eL,0xa066a6de4c19b3dbL,0xab7c9dad5ca3148bL, + 0x8806b0223699de2aL,0x8d2c17b13c648f2dL,0xc39af0f9a3d21b8cL, + 0x91f94812fa66eebeL,0x74178170791cae62L,0x00000000000001dfL } }, + /* 54 << 392 */ + { { 0x60e7c5987a182f35L,0x60a81db4abc6a786L,0xa067c36017995827L, + 0x4d1b77557880874dL,0x79a8c6235b3e98d0L,0x0301653a9ae9e287L, + 0x32be063279f6a138L,0xf8d8c8ed410d08d5L,0x00000000000001b5L }, + { 0x2b071af2a699c790L,0xa8d0fde78eae615cL,0x76c4cd7340ce53bfL, + 0x7c36a3f742866824L,0x870e41145d8103bfL,0x1804432af7b9f5c6L, + 0x755a6755da7a89f1L,0xed97487081c30af9L,0x00000000000000a1L } }, + /* 55 << 392 */ + { { 0xc713ff085d801301L,0xf291ca368d9b4f05L,0xb3705a12e497df3bL, + 0x375e1513870fa5afL,0x1bedb38c4d5d9664L,0x418429ed114386c5L, + 0x7e3be1187561f279L,0xc7253d1a4481b482L,0x00000000000001b8L }, + { 0xf5ab88cab54466aeL,0x2dae1aa5a2485564L,0x1c5fb396c500fd73L, + 0x056e87292af65306L,0xb7b4568100a94c1cL,0xd6401f60194f2a01L, + 0x526b81492fad156bL,0xa59af828a8705811L,0x0000000000000115L } }, + /* 56 << 392 */ + { { 0x7ff87b1828b89234L,0x575b28532190da3bL,0x6cb04cf22920f908L, + 0xe32b0d0ea76a9361L,0x027f1c53b013a013L,0x77e8e27c2aaf2e7fL, + 0x3893cb0fa11e5652L,0xb166780751a15388L,0x0000000000000185L }, + { 0x25c5deff91ed25b2L,0x08e728a029e55ce0L,0x7b1d2010d66015ceL, + 0x73f667cefeb85d6cL,0x9f9d44e01d9437eeL,0x67a6af17f0ca5a45L, + 0x187111cc3ef07297L,0xfb461da6e1bf1374L,0x000000000000011cL } }, + /* 57 << 392 */ + { { 0x8bca57126c0c1111L,0x3c2a9c8f3987bdfcL,0x195b1886f65f43a6L, + 0x5edb9cbca9a638d2L,0x1431220ace7b35feL,0x67d81dc8b9c9e116L, + 0xa7ac26bb4fd8d442L,0x36ed9544c1404219L,0x00000000000000cfL }, + { 0xe7d51488f0454b54L,0x116abfb6322e62ddL,0xb1f5e4f6001cdbf0L, + 0x9bd7a0abcd63aeaeL,0x47c74ae592cdc9cbL,0x227b9e3eca320f47L, + 0xfa2f9090db249cf3L,0x5b61134f5b7d7aeeL,0x0000000000000185L } }, + /* 58 << 392 */ + { { 0x43f9e01159567749L,0xa8d74c0c892a23d2L,0xa2a6b93c4bf3e620L, + 0xd89ed75630144479L,0xe5959b544c80c6bcL,0xa347e101c7491d8fL, + 0x4c15de92a7b7e26fL,0x6abeeb069eb0ff29L,0x000000000000012eL }, + { 0x5140f873047e9fd5L,0x95b33ace83629d83L,0xbf2e419db7778fc5L, + 0x6312ed5f27390d18L,0x786b21f5b52a1899L,0x440713779eba3860L, + 0x4a5c36d9bf4ba461L,0xe456c1a900bda888L,0x0000000000000157L } }, + /* 59 << 392 */ + { { 0x1447b2045e0c66c3L,0x29de6f26ef30a205L,0xde1a4c88ce42e500L, + 0x5e3b0fd1142dc812L,0xa09c32a50c08edb8L,0xff98ab9bee677bebL, + 0xfe7f349438e3e89aL,0xc8cdfca112d9eb21L,0x00000000000000a3L }, + { 0x08002f8e769eaac4L,0x4e6619127e091da0L,0x4f251942d82a7ee4L, + 0x04224eebaddda269L,0x52263b35de1b38faL,0xa5bf36c7afeb716eL, + 0x37a49608b7bed189L,0x6adfba3c0e29dfa1L,0x00000000000001bfL } }, + /* 60 << 392 */ + { { 0x40081916318060c8L,0xa156d0f0ba050c4bL,0x8dc9db0e2b261fa1L, + 0x68193be8df24d4d3L,0xbbe73d7801ab2aefL,0x0b5476e951f96cf0L, + 0x596e6ffcb23a6db3L,0x19c3aa46328c3923L,0x0000000000000125L }, + { 0x4dd42ff66a2f1ee0L,0xf18649df63cbc446L,0xcc5e8e48ad36c42dL, + 0xb5d4fa7bbea9ed49L,0xe416534f32b06489L,0x94451bdceeb06919L, + 0x7ce63f1bda8c40e5L,0x7c5eb653de02e8bcL,0x0000000000000166L } }, + /* 61 << 392 */ + { { 0x2b1881b9ddf7ab7fL,0x273489fcd43adb7cL,0x4f3bced12fe87555L, + 0x67ce19731b543d67L,0x7eb827d4fb21a673L,0x2c874f363c90ce65L, + 0x63771809ae3661f0L,0x25e81e55185e86efL,0x00000000000000d4L }, + { 0x75cb5647a9b46abaL,0xe2d43a0498398ae2L,0x082e5804f8448dd1L, + 0xb6b5fbf01effa351L,0x8b4e95a375ee0d01L,0x06330f8cd78e5cfcL, + 0xbcc0fb586ad576ecL,0xc22b7b8e85b758baL,0x000000000000010bL } }, + /* 62 << 392 */ + { { 0xdc394f862b10c060L,0x7f3635c2f1d68dd6L,0x3b34936258a2aefcL, + 0x588c1a8631e77678L,0x6b1049c678fc729dL,0x45a9ba71868947e0L, + 0xe9942ba2cca46979L,0x3d7cb195075f93c6L,0x0000000000000038L }, + { 0x33d8da16edb3b71fL,0x42573533d151ca7dL,0xb104ef742e4faffdL, + 0x3f9371862391eaa9L,0xa30cd9a391847e30L,0x9f51fc0894616135L, + 0x62f836fed3dfb130L,0x8ded8ab1d7469a7bL,0x00000000000001f1L } }, + /* 63 << 392 */ + { { 0x1d872ee0919b82d0L,0xbb4d27900009b3eeL,0x3b7329e978baf440L, + 0x8263e55cb963f90aL,0xdbc7ce8a45acfd70L,0xbab362ce2b929345L, + 0x9c781d6df934b89dL,0x15e751c458e6ccb7L,0x0000000000000075L }, + { 0x96c9fc5853add358L,0xf7cecd0bdb680749L,0x65ebb93796cf3096L, + 0x87a7f1a067509825L,0x5425deef814d4898L,0x654213dbde0f2f8fL, + 0x958c4db48148c6f8L,0xa910a27703d86f8bL,0x0000000000000141L } }, + /* 64 << 392 */ + { { 0x1379b176e80649c8L,0xaa0c2dc8b7fdb132L,0x3f3050ef1134ffd2L, + 0x28d4e288d4a76f0cL,0x22ca05f7cd9b7f91L,0x9174bee06e7b9ee5L, + 0xc0ddc6356b341cb3L,0x1cd07d05eea9da0eL,0x00000000000000bdL }, + { 0x423ec36bbbaa1df4L,0x33ccc8ade74c741eL,0x598d466249b5cd06L, + 0xd8bfc7e58fa121cbL,0x2cbe86b991bec4baL,0xbf1fe841e8453f40L, + 0xa35ff85d392592b5L,0x8de9f3df9c5807b2L,0x00000000000000fcL } }, + /* 0 << 399 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 399 */ + { { 0x40f66d1915b26e6aL,0xfd654e8a64400d78L,0xe408365366ad9de7L, + 0xa64ca69b686c1b2eL,0x698749ffa80066e2L,0x197528a4b76196c5L, + 0x2bcfcf647bf835dfL,0x1a83fe8460e478bfL,0x0000000000000020L }, + { 0x948f3e0ac46d1075L,0x3ccd3dc1e5c05fcfL,0x11b825b26414eb04L, + 0x2b7a9a5521e3f864L,0x2a24f5706daba159L,0x51900d4ade3eb61eL, + 0xaddc9096a6a6766bL,0x357eabf9e9dc3b6bL,0x000000000000007cL } }, + /* 2 << 399 */ + { { 0xf5c1a4c690185363L,0x15f58fd920c04ed5L,0xa7b46e0ce913a1b2L, + 0x1e7167c636f1d8c5L,0xf4fe6bfebf5fcfb1L,0xc46faf10a6c2027bL, + 0x45d593004e1f12f4L,0x46bcc87312185485L,0x00000000000000caL }, + { 0x7c5180716432c12bL,0xb7e004ad90ae7556L,0xdf847160d8532693L, + 0x4d8c7b8aa18b3802L,0x90e2504596872af8L,0xe69d4894aecb6fadL, + 0x6ddaa06c17d0fe85L,0x49f1a466340c3528L,0x00000000000000d9L } }, + /* 3 << 399 */ + { { 0xc6994861edb77a32L,0xc0f2710e201b6740L,0x11783a64ccf36bb4L, + 0x1630e8ffe0b5e3f2L,0xd2491b8b26aad4b6L,0x3256836112f7b4b7L, + 0x99a8959b60726313L,0x3cec4f7d4a40d4b4L,0x000000000000011fL }, + { 0x47cbbf961233c171L,0x1ea33d4222a94a13L,0xb5d248d864606917L, + 0x6a7bb04e94261f36L,0x701e50e4b981ece8L,0x8d98d5f9f1f36171L, + 0x13e58d4a2fb6d9e1L,0x9c02d71bdbf4e167L,0x00000000000000a6L } }, + /* 4 << 399 */ + { { 0x64782a476da18f9eL,0xb89e0b654a496e02L,0x437a49f6501c3a88L, + 0xae9ea08748a19f6eL,0xa787948b346d2069L,0xa6b9a3f1532eed3dL, + 0x728ab35e8ad26937L,0x3cf24801041f4f43L,0x00000000000001b0L }, + { 0x5e272327cbbab58eL,0xe4a3c5f9f8705ffbL,0x2edc1c8eedd26ee4L, + 0x1bd46ff5f26d033fL,0xcc566431d87192fcL,0x03c7e40611764c8aL, + 0xebb80e34882a69abL,0x7a7f694361a08d84L,0x000000000000011eL } }, + /* 5 << 399 */ + { { 0xd0861f01b67232e5L,0x3205675cbdf5cb99L,0x2a44e3cf1cfea92bL, + 0x1eb229f3cd5c6e38L,0x81f6ebd18862b582L,0x352be0bf4d434531L, + 0xc8516209d530e407L,0x4f591cd009b02235L,0x00000000000001a0L }, + { 0x8b5483211f7e7b7cL,0x566ba99cc0ef1651L,0x3c01398da82de656L, + 0x91efd970cc222c87L,0x5dbe62201a5c5157L,0x777a9fa3a2c60991L, + 0xe77e3a4db85f488bL,0xdfb3c629f1b3b74dL,0x0000000000000057L } }, + /* 6 << 399 */ + { { 0x68f4a4e058b77137L,0xd8a34e67f12f5bbfL,0x1781240f1e0619c2L, + 0x44495373498328bdL,0x98e9f61a545b7d41L,0x767db401602a0c48L, + 0xe138e5e8806301deL,0x65a3883b4b21aa89L,0x000000000000012dL }, + { 0x647b00780423fe94L,0x424f483ffd276310L,0xaca222fbd6a06303L, + 0x47162acbffa52c17L,0x7ae0c5019de267feL,0x998dc9bb667a30c8L, + 0x988cf192af48c727L,0x0a270986dfa23a3fL,0x00000000000001fcL } }, + /* 7 << 399 */ + { { 0xc0de0a4647a483d7L,0x0946aa731afeca92L,0x2c67c9fb91e66640L, + 0xf3b518e6ebd0a936L,0x8eb8587c263a1c1dL,0x900d28a75c6581b5L, + 0x50591d7a71723e84L,0x47f2689834a99ff8L,0x000000000000016eL }, + { 0x52fa5964e25d2f04L,0x0e347f610f4fb57eL,0x4bef23dcbdeed9f3L, + 0x212f5df4432fedd3L,0xde891c950211bb33L,0x13ed4284567558bdL, + 0xa113c3552c283e9fL,0xa06260f73e48dd8bL,0x000000000000003cL } }, + /* 8 << 399 */ + { { 0x5c9209c1c4646439L,0xceb6698586c5d6bdL,0xa5f5225829e415b4L, + 0x18fff2c1e95d2a27L,0x21d464f1be40c8d9L,0x11b4696e059828cfL, + 0x5c1f73c570f76aecL,0x6019fadb273e3f5bL,0x000000000000012cL }, + { 0x6d1ff5913eefab44L,0xf913524cba2986b4L,0x96fc0b7a290dcf2aL, + 0xf9e7ab52061df2a2L,0x66401173d47330a6L,0xff2aca188b9f749fL, + 0x8d4ac80f1ab42ac3L,0x10ec329ea1378039L,0x00000000000001aaL } }, + /* 9 << 399 */ + { { 0xc06dc058c4f1cb73L,0x739370439ab6fb16L,0xb663bdf55f7fa374L, + 0x8f5e3b3839fa047cL,0xf517fb9ad860a576L,0xcb414b9bd755537fL, + 0xe86864ea508a12c8L,0x49a663b4cdaab9daL,0x0000000000000029L }, + { 0x9e4da7f330251bfaL,0xb084b69fac60f268L,0x15b981fb93903a9cL, + 0xa6905f206dcadc57L,0x81e961cf5050b9b0L,0xc583b4e4f6646d47L, + 0xe30e25dcfbc96082L,0x8c92e5d8f0b0ffc1L,0x000000000000000dL } }, + /* 10 << 399 */ + { { 0xa5fc51ef9c1d86ccL,0x66a9b8980b3faad3L,0x96c73c2521d124edL, + 0x392440db050626ceL,0x02bfb1ac50a4a12aL,0xc70a30741742ddddL, + 0xf1f9daa647fe811dL,0xd4baa6b1fb3ce6ccL,0x000000000000016eL }, + { 0x09042d7509d96ce7L,0x4c92ce693315f77aL,0x4992b1f9a9c0d9f6L, + 0x7c0ffbacd18ec980L,0x53d205dca008ab38L,0xf0a098f7c908481dL, + 0x2bffd3630ba397beL,0xca36e99f49003f86L,0x00000000000000a2L } }, + /* 11 << 399 */ + { { 0xd239593542d95daaL,0xdcda6a94ea334140L,0x9c27790794a62966L, + 0xc61b5734f86dfd3eL,0xd943aea70796fd19L,0xaca22cf206f55e66L, + 0x55372159c0b75487L,0xc7b6b1d2d1244fdcL,0x00000000000001e3L }, + { 0x13f187feb3b5b131L,0x8620e1362bcd4cccL,0xe68718297f061f4fL, + 0xcb04eeb2f4a85e13L,0x4a93fccba0f352beL,0xe2ecabd9b9d84a8dL, + 0x7183cdc20b9058d5L,0xbf25cd935d8dda50L,0x0000000000000168L } }, + /* 12 << 399 */ + { { 0xa074fdb7be4badb6L,0x97b054950df5f046L,0x5a3f64e4eac3a8b1L, + 0xafc5e0132971faa1L,0xe6d4ae36568e4a3aL,0xeb0324bb97479a2aL, + 0x2dcaf3d866a2c69bL,0x9f9b5551813f21eeL,0x00000000000000fdL }, + { 0xea8f05c8a13d686cL,0xf27edb763bdbd1deL,0xb2e5fd405c65b0f4L, + 0x5e959c016cbd964fL,0x02ccb301dbc193a1L,0x34ca7ed9013b972dL, + 0x7191cd287ede3f2fL,0x1d07f45798bf4e0cL,0x0000000000000057L } }, + /* 13 << 399 */ + { { 0xe0b1685c5ebfc951L,0xa517e7a232c59513L,0x9a6282865780ed11L, + 0x499d70c775533b55L,0x593ee20b020f6027L,0x63a5ee05f683a38dL, + 0xf672f073150476ecL,0x621c8f9616caa574L,0x0000000000000094L }, + { 0xf0da17c319bf3540L,0x77043ebd6e05c870L,0xf32446b727a4416aL, + 0x2ef699c612d04df5L,0xdd3cfd1f324cbf7eL,0x0171a41f631b06ceL, + 0xd12b01852e95d511L,0x043cb48ffd0c0078L,0x0000000000000174L } }, + /* 14 << 399 */ + { { 0x284e98f320f12986L,0x3112bf24749ea8d5L,0x5fc7a2c42782914dL, + 0xb2f7c229d94786afL,0x4b17bcaec7158280L,0xa139d563e6189a46L, + 0x9c72e9c583cc9173L,0x5cd5a6a0be56b6a8L,0x00000000000001a5L }, + { 0x51f0e802235a8051L,0x153dc987c4c5fb7cL,0xb549e9c79868a706L, + 0x717390089bdf5c1fL,0x0d04f60cccbec99bL,0xde66c9617f4bd294L, + 0xa787d95e51156724L,0x7ce4b4a5ae12b9b3L,0x0000000000000091L } }, + /* 15 << 399 */ + { { 0xbcb9c82f32d7d93bL,0x1bfdfba7dce386e9L,0xc05d039a3f8b5ca9L, + 0x58c78e8e299b0f53L,0x1baa1781e60cbd19L,0x8fb6ae0db0eeb838L, + 0xf26dfec46483aeedL,0x01b7456091303d67L,0x00000000000000ccL }, + { 0x39567f445ceae83dL,0x9a407eacf43d8eabL,0x361a32025ab1a391L, + 0x1c532e29d43e2092L,0x558d4fb789f04a1eL,0x0fe9497d2017914aL, + 0xe96b05380171fd50L,0xc9b0b3ffeff1c1d7L,0x0000000000000044L } }, + /* 16 << 399 */ + { { 0xe7f35808fb6618bdL,0x6abb67e1732ef576L,0xaccabb44cd0bfd8aL, + 0x176ffa9183459746L,0xcc12958585b88b2aL,0x28d94486fff151ceL, + 0xf55fca164c84bb00L,0x07cbc8419c3a42b6L,0x00000000000000c6L }, + { 0x488400d078788c39L,0x15ebb100d8eb1fdcL,0x58decae3bdc05f72L, + 0x3bfda4a75d56a2cfL,0x7ea3f8ec5ae6701eL,0x652e7e0b3aaacab3L, + 0x267d275228609228L,0x52ddc983912efd7eL,0x0000000000000029L } }, + /* 17 << 399 */ + { { 0x4a33ff03ff5fa227L,0x12b266a45dfb4bb5L,0x18ad0812a77d3789L, + 0x316275f3135e8b73L,0xb8aeba2e1aaaa211L,0x30578d2fb9d962a9L, + 0xf8b00f02cb534bc9L,0xb935149cdf535572L,0x00000000000000a4L }, + { 0xf8d93fe1793528c0L,0xea0a5bcdaa631367L,0x7a221948dfb0e4e5L, + 0xeab2e52a857da8f2L,0x80f72c632657a647L,0x24118aa3a1509961L, + 0x1131400950388917L,0x9ff6d9f6a085a80aL,0x000000000000006cL } }, + /* 18 << 399 */ + { { 0x0ce0f89f0356ef50L,0xd8cdf51e37f24a9aL,0xd0720e9bd237fd55L, + 0x20b75973aeb71292L,0x0d6efb23e42b2758L,0x0fcbdd568721aecaL, + 0x11658ce90fda8ae5L,0x23a4d576700782d9L,0x00000000000001acL }, + { 0x77fc6f2f903bb623L,0x36710ba75b2a0237L,0x4ea518d1d55a12f4L, + 0x0c0509e14c95dee0L,0x67e240784bf6c59fL,0xa9bd12b5c925e26fL, + 0x5865b6c38fabdba8L,0xc6e3433b33b34605L,0x0000000000000101L } }, + /* 19 << 399 */ + { { 0x273d761d557caec5L,0xd8de3470242043c4L,0xfc9eec9d6293eef2L, + 0xa671ef1786203aafL,0xf38b26fe7f44eb34L,0x40c0286ed1ecc573L, + 0x3ebb5cf5dc14b363L,0x5b2c33762e1dfe45L,0x00000000000001b1L }, + { 0x836149c9bd11c412L,0x49c3837653342417L,0x4c93a91757fa625dL, + 0x529ab0965445be81L,0xd172a7c5803a04c3L,0x0c47e6b365873d1bL, + 0xdcb189bda095bddeL,0x9bc935372caf25cfL,0x0000000000000071L } }, + /* 20 << 399 */ + { { 0x8287592e80c0e648L,0x79cfe5b529ce1a05L,0xbc2ed6c732859038L, + 0x076334f7af367139L,0xe13e55593a592211L,0x94d5548847f87368L, + 0x15d99113a2f1b6f2L,0x2bc0ebe9ff79885cL,0x0000000000000171L }, + { 0x68eedbb2ae127d70L,0xadf3485597ae5d0bL,0x02e40ba8d1f6dc0cL, + 0x46cf4b8f3e7a23c4L,0x111958025626c5cdL,0xde32666e1c728ee6L, + 0x2c594c77bf2d8b70L,0x3dcf58bb1469bbd0L,0x00000000000001cbL } }, + /* 21 << 399 */ + { { 0x7b9d09c5567ced39L,0xf2c277bc10de6fecL,0xd48b924b423b4942L, + 0x21f49c8bb147ebacL,0x45ce05f0003eef62L,0x291f77ada2b01ea3L, + 0x754bbb867bf14c9dL,0x2abeb1fedb8f0b77L,0x0000000000000115L }, + { 0x8de7419c48073c55L,0x540008c26fbb1768L,0x579ae6d9610d5a83L, + 0x7c2f90d43ca9c514L,0xf5bb5df4e078dcfdL,0x02cec780eae4e114L, + 0x5c4d230feb0c66f8L,0xce73ffc6c28bf3d5L,0x000000000000005fL } }, + /* 22 << 399 */ + { { 0x548235d6da309336L,0xdc058555d0ce4b03L,0x8103a260117d9fb1L, + 0x7aac6ea7962fbeaeL,0xdf219bc75f9f7c6fL,0xdb529bd239755a22L, + 0x7b68e00472676f34L,0x99590caac5011f75L,0x0000000000000027L }, + { 0x8b591dde80e9bfe7L,0x510daa29259b4046L,0xa6f4c61f41cfdbd2L, + 0x6934a20ef175b862L,0x2a73244068cd3951L,0xcf3ca559d119730aL, + 0xb8aa2298df77ebfaL,0x8b85cd46f78ece91L,0x000000000000015cL } }, + /* 23 << 399 */ + { { 0x6f71753a499b06afL,0xe9d09e3f7221545bL,0x366a725173e4aa15L, + 0x1cf56688a241d729L,0x45f261b6b7beb74bL,0xb17277cd0d48498fL, + 0x7009afd203d67414L,0x26ebcdc5e9405ce7L,0x000000000000004aL }, + { 0xc72b4a31cf1b5cadL,0x047ec0b68676686cL,0x3485799fc8b8098bL, + 0x62a31effc762e262L,0xf1402ccb07ea3b86L,0x6138f07fd91931f0L, + 0xa1b2834870baec3eL,0xe8735d8406e9de6bL,0x00000000000000d5L } }, + /* 24 << 399 */ + { { 0x5419ed9ae06fb3cfL,0xb454c6fb8e703ea5L,0x9670af86ac4c2649L, + 0x365e43f40db43887L,0x1ada3d8f00c320b4L,0x1d00cbe5aaffe3d9L, + 0x1e99987d211142b4L,0xb98381f79cb8bb86L,0x00000000000001bcL }, + { 0xdd2388378b1718b2L,0xa8cb957e532daa33L,0x20a55f673b041a83L, + 0x973207121b075250L,0x5a70aa65b7ef4b51L,0xa33056613865f77fL, + 0x96bd1a89bd8a4303L,0x507d7779fdc197c0L,0x0000000000000048L } }, + /* 25 << 399 */ + { { 0x1fa5ad1177a5fbecL,0x8c566037b8b5655bL,0x52848f491321baecL, + 0x8305e20227f02ff4L,0xedada23bc276f11dL,0xba94317f8acd1abdL, + 0x8b125970e3edbcfaL,0x75e021d50f5643c8L,0x0000000000000042L }, + { 0x9ed9e1e313be597eL,0xe271c2a6b74be691L,0xe249fdff2e52b57dL, + 0xae84ab19d18031b9L,0x7135760af49f27f3L,0x3c4775cc1cc28c5fL, + 0x49165948fdf0f394L,0xee56522b240d7f13L,0x00000000000000e3L } }, + /* 26 << 399 */ + { { 0x16aad8d873f397e2L,0x8cda09d440bb988bL,0xe351cd2da881cd6dL, + 0xb92fdda9c9a6fd41L,0x2298efad3932267aL,0x76546625a099bb25L, + 0xdc48a34858230b71L,0xd98ecaccc6ed085bL,0x0000000000000107L }, + { 0xe3bd2de5dc95f5d1L,0xb890da1f2e2c2366L,0x7c6226a1c4f26cc0L, + 0xfc1f77bf00356c62L,0x0c8e11f7d87ee6f9L,0x84cc68aa0af9a4a1L, + 0x70eb714ace01ea76L,0xdc9e3c696ecdbdc4L,0x00000000000001faL } }, + /* 27 << 399 */ + { { 0xd3eba8f103dc76a9L,0x92dcb80109d7c9f2L,0x09a3edfb224325ddL, + 0x78d9b818c87e4b16L,0xbe607e9788d33a3cL,0xdf01263e152e6cf7L, + 0x49fd31ccb308c83cL,0xed94b658d68d6a82L,0x0000000000000161L }, + { 0xbaad071e181669a3L,0x5f2e0a1dd54676d3L,0x96ea8e3e4a9ae061L, + 0x0d2e0ee8ba5bf8faL,0xc1892fe007bb010fL,0xa538bead73632f75L, + 0x6d794924378c2631L,0xa7c065460205723aL,0x0000000000000185L } }, + /* 28 << 399 */ + { { 0xc7f26a2fd5434124L,0x7629d62516380451L,0x8513932307858144L, + 0xfa1d44bd4627a0a0L,0xebd1be26b90a996bL,0x57baf49237f94e07L, + 0xabe9cf16d5ee67b3L,0x5ab3cb065fce9c53L,0x00000000000000c0L }, + { 0x38a153713faee0a0L,0xfc98c0dfa1142d0cL,0x9320556353001436L, + 0x73acbb7da63e615bL,0x8204c993414da94bL,0x2fd26bc2e8622aebL, + 0x86a1eb1826467b86L,0xca554d782bd1a581L,0x0000000000000044L } }, + /* 29 << 399 */ + { { 0xe33762396cdd9921L,0x4398b198c6386361L,0x05002343551040a8L, + 0x0a82fca0be2ad7bbL,0x727e2d5f3c1acb10L,0xfcb0c12d4af46347L, + 0x01782a40ea661ccbL,0x6f1f3e45a49bea1cL,0x0000000000000123L }, + { 0x3ee55592961c0cf4L,0x15cb90c37c19abf7L,0xad4e930d2ee5a6a0L, + 0x75b054aa3fa249ecL,0xcea656d85d005c47L,0xcab95ea1be507dbcL, + 0xda131e43dddfa969L,0xd508e708dfc6e136L,0x0000000000000149L } }, + /* 30 << 399 */ + { { 0x01fd929d48d5f1fbL,0x5fb11462724ad844L,0x29442573707ace9eL, + 0x38b790bcdef7dbd1L,0xb33eef7dd77c1c89L,0x952246e9f8b1403cL, + 0x11ab7ec0e78d9efeL,0x538af4c702c22634L,0x00000000000000aaL }, + { 0x3827660d04c6eb59L,0x62a02de83cd9597bL,0x57fa76f246ece58bL, + 0x03b9fb533c21145eL,0x9e04ec45fa60662dL,0xc28cc184944a4a44L, + 0xf26c1df341ff380bL,0x6d48b57c13c7f613L,0x00000000000000d1L } }, + /* 31 << 399 */ + { { 0x971d5a7b786009e9L,0x846c3469783fdd9aL,0xd9a307cfb335a7b2L, + 0xd583b59ef86adb9eL,0x17dbcbe05b432839L,0xc67e7b0a35c6d202L, + 0xc617810a0d50e1daL,0x9c291e5b64bfda1aL,0x0000000000000129L }, + { 0xcf6a1382ee444a83L,0x073decbf249c2c46L,0x974ad0d3b59b9474L, + 0xbfc44aa8440132b1L,0x9f959f7d692ea2ceL,0x6653be0fc93ef496L, + 0x8ed05d4c0b087a60L,0xdcc0c8ba573f0e2eL,0x0000000000000080L } }, + /* 32 << 399 */ + { { 0xfed9b067e2eca03cL,0x34ff6f2aeb3230b2L,0xda83b96a31bc82beL, + 0x3b138d2ccc89c862L,0xf4a27aeeebd59505L,0x29df153a8bb49ddaL, + 0x2dffec4650a555baL,0xc899108d0fec5d80L,0x00000000000000bfL }, + { 0xc834e65988da1a0fL,0xb9bfba08eebe47a8L,0xf59d33937b3a2b73L, + 0xffc7cb5d8decf4e4L,0x72477dd583dc5f4eL,0x0a59e11e43ac64ebL, + 0xda8aa16ccb10a6abL,0xe571ec8c3759c37cL,0x0000000000000047L } }, + /* 33 << 399 */ + { { 0x2890f4b93d32bc1cL,0xf4e8332ac455d6e5L,0x7b1523c8c4d7c367L, + 0xd5006acba60d5778L,0xf958872021f34b5dL,0x8c36c23628d1d74fL, + 0xdd2ad5092d0b0f17L,0x1a895017b66a2e02L,0x000000000000014cL }, + { 0xfe2eb1a11d8ec07eL,0x7d755399215e9267L,0x3cf9b914891f82b5L, + 0x0a77198f79aeb59dL,0xd71d06ce0acc8e55L,0x911f149d3b3c4c20L, + 0x86f130115d124fe4L,0x6972ef3ede4f5d98L,0x0000000000000084L } }, + /* 34 << 399 */ + { { 0x57459abcecbf10acL,0x2d399838f2c3c306L,0x6467c2753f9b1181L, + 0x58edac9c040e0722L,0x80cb5d10f23a50c7L,0x6db0315b304aab12L, + 0x7b662c951cbc56faL,0x421b1f244eaa55d6L,0x000000000000019aL }, + { 0x0bd62b3f5c87fd33L,0xf309039dd7b12051L,0x155dee7fff9da505L, + 0x66ed5fe9e35d68fbL,0x5b9829227cd99f55L,0x1de7e5f1cebe48cfL, + 0x44308df3b0e2e114L,0xb2e026d92735bc27L,0x00000000000000d8L } }, + /* 35 << 399 */ + { { 0xa0f27eff25c2b290L,0x5496d39c3c7b110fL,0x96d99f277f278d2fL, + 0xc73da2d2f5de1a83L,0x77ad34919fe0a789L,0xa2a7bdc8f2e23c14L, + 0xbc162cebd10ff276L,0xdea043e2ce028697L,0x0000000000000121L }, + { 0x909a4e1aa11a8d73L,0x985baa0e46b89908L,0x2054d790ac720559L, + 0xb6b4e53a1f8067faL,0x39b49b6b1e143560L,0xe96e6562581d42e0L, + 0x7a540841b920ef30L,0x34edae19ff2eccb2L,0x0000000000000196L } }, + /* 36 << 399 */ + { { 0xf4c48d376d17d9b5L,0x714b38acd9b7c9e8L,0xb332763c8e59bd23L, + 0x704ec12ed64f9bc6L,0xef738ab2c472c08eL,0x39d182692313abb9L, + 0x64b05426cd80c265L,0x0dc6c228345afb94L,0x00000000000001d2L }, + { 0x7d51410b7a9c096aL,0x3acb41805f7d6f9aL,0x10559f44ecd13fc8L, + 0xfa6f5288446a07cbL,0x2799b503ded0e010L,0xdc865f094fad1e65L, + 0xf1de127e0e77baffL,0x25f090228bad6efcL,0x0000000000000198L } }, + /* 37 << 399 */ + { { 0x6e9dad7a10b10723L,0x4528b97a9561e0a4L,0xf6ce779cef7aa977L, + 0xd0a07355c09f82f6L,0xa519f70c6f5a96aaL,0x09917e5a0aa6d4ceL, + 0x261a96d028994835L,0x28e78eb2ec9a5868L,0x00000000000001e3L }, + { 0x60efc88871882100L,0x795ce82f5fe4b377L,0x8e805c686f587d64L, + 0x3cd8bb97cae577abL,0xdfa751c31d8417c8L,0x940341deec17e1b7L, + 0x3e87a1fc1135e1a0L,0x94f8a8931cbee12dL,0x00000000000001c3L } }, + /* 38 << 399 */ + { { 0xf257b5cd9cec713fL,0x928b215a0356f001L,0xb2c44dd22b0a2d42L, + 0x62055ba191c318deL,0x0e298611dfc58d1eL,0x8840d79c383da28cL, + 0x324e3dcfb18554bfL,0xea84bee97fabced0L,0x0000000000000041L }, + { 0xafef969437baed5eL,0xc04bdf2c2f5a6cfaL,0xf75197c07eb37653L, + 0xe5b011b029cdd976L,0x1f41be962c254ae4L,0xcc771ab718829595L, + 0xd1215bef12f64b06L,0x5e970494117f72fdL,0x000000000000016bL } }, + /* 39 << 399 */ + { { 0x83bb6de40bcda6cbL,0x90f2fbdc1e5fb277L,0x0f6f3261119a1e8dL, + 0x7f6434f8dc73b93aL,0x8a1b958a9158b9c6L,0x43fb60a45ce9133bL, + 0x9bd5bb0284d86dc4L,0x1e3a7ca06872b101L,0x00000000000000bcL }, + { 0x8d2d115f230c41bfL,0x1e7df5d4916b43b7L,0x3815b3c724d94d99L, + 0xb463eb11ef11894dL,0xccf7ca983c63af75L,0x0bffd871dbaac87eL, + 0x267db8678dbe5699L,0xfe97d927bf6cff8bL,0x000000000000000eL } }, + /* 40 << 399 */ + { { 0xe32ddade157a2ae6L,0xcfc8bf7c0e6e08e9L,0x35f750f3bc2e3f23L, + 0xbfa297a200d897acL,0xf3283590b52421c5L,0x1f2b851222bf59afL, + 0x7f63809d62e5d037L,0xc33dc13581ee5e5eL,0x0000000000000022L }, + { 0x04b006db4bff94b5L,0x373fdb9ab2cee9beL,0x39e63eb4cb8d9886L, + 0x4b371662202592c1L,0x1f5f94a4f6935600L,0x6f7103c47eefd53cL, + 0x0db5f837a02bf79fL,0x29cb566178a72ceaL,0x000000000000004cL } }, + /* 41 << 399 */ + { { 0x34338894e3f3888fL,0xcd26ba945a0ca66bL,0xef170a1c666f902bL, + 0x34f0dd0554012d0fL,0x4c8744603ff21e0dL,0xfdfa53e5f2384339L, + 0xdd11e707557da25aL,0x9cdc67f0075996ebL,0x000000000000009dL }, + { 0xbe8e6f5910ed53a7L,0xcee1132182822051L,0x3bc4f5f9b57cdbe0L, + 0xa83b5947d318ec78L,0xc841ef967a0a6d8bL,0x689ee84e011639c6L, + 0xf07f99d813ee15cfL,0xc49549ad5a0ea35bL,0x00000000000001b8L } }, + /* 42 << 399 */ + { { 0x4f2333e5efc1df8cL,0xbd1683fbcf67e9a5L,0x8c867003532ed940L, + 0x8178b176f39d6717L,0x789e5a65e8865f2fL,0xa288918433099d45L, + 0x136d38ce4d8936faL,0x3920c375950ac39bL,0x000000000000005fL }, + { 0x33bcf75a182da275L,0x3b42095667577d3aL,0xe6dd20f8f6767c3aL, + 0x8dab007359b27062L,0x5186bec68f34af3cL,0xf084043d42d72626L, + 0x50fbd81a90cd4665L,0xb932207e385b29f3L,0x00000000000000ccL } }, + /* 43 << 399 */ + { { 0x3e2481ddcca46795L,0x2ad212182d2f1511L,0xe81633a7aea3821bL, + 0x60ed6123ca636678L,0x2af841042b8c035eL,0xe4c0875e9f7cd940L, + 0xeeb262546d649c9aL,0x9c1dba6f70179ed5L,0x00000000000001c5L }, + { 0x1acf2025e719d7e0L,0x27ac0cb7bca64a98L,0x1a4f58f5f9a54bc0L, + 0x3c2fc7016a743761L,0xa916ab7a0767dd6aL,0x8953314117fdfe8fL, + 0x5eef51dc4a7d6265L,0x1a1890e548920a64L,0x0000000000000177L } }, + /* 44 << 399 */ + { { 0x7772aad1a323089bL,0x1c3eba0a917ff79cL,0x8b9da0870fc8b113L, + 0xbe669dc94e5c0c10L,0x3ad27c13cac54859L,0xdb835b140cfda5f2L, + 0xfca9e3fef20ee2dfL,0x80870761674b7ebeL,0x0000000000000167L }, + { 0x788ed1db4356cc9dL,0xaa557f015b5c9608L,0x62c22482cd869134L, + 0xd0ec45740dff5303L,0x3df51b9f5389ca31L,0x59a6e53145b54873L, + 0xe6052b072df10bc5L,0xcb7107d41f4cd861L,0x000000000000018fL } }, + /* 45 << 399 */ + { { 0x24d270b0a2c5f6cfL,0x97246482f54bdbe0L,0x2b71247a8ae3277bL, + 0x3273c1c9a45552f9L,0x6187912dda706e75L,0xc43665644e2903a3L, + 0x9243d5b29348f72eL,0xfb5aac5ba7b876e9L,0x0000000000000070L }, + { 0xe9b247659aab621cL,0x378e7c849cff96f7L,0x794aea34d6ebe2d4L, + 0x4eadc9d51661a8d9L,0x48c7c8058a105436L,0xcd852b6dd8d196c5L, + 0x40a4c838d2c05f91L,0x60b4425eab562273L,0x000000000000008fL } }, + /* 46 << 399 */ + { { 0x68143217fc5598cbL,0x55f45fe3ecb8be5bL,0xdc707a143627496fL, + 0x5a60f85e9665cf29L,0xbfa30147a13782ccL,0xcecb4dc9e6496833L, + 0xa712fe79bd9761a5L,0xcbfbb56a078c3e11L,0x0000000000000030L }, + { 0xb0a421291e0066d8L,0x23f10112fdd822f2L,0xb83cbe5147e7d2afL, + 0xf5d65634852e2252L,0x6223702b277cc79bL,0x1bb298814b2433f5L, + 0xf8ba03fcca6ccb00L,0x0859156462e0a22fL,0x000000000000000dL } }, + /* 47 << 399 */ + { { 0x2b6f7947ec51092cL,0xacd21ef95149fc12L,0x5f45cff7fb65ce50L, + 0x1f51d19eed597402L,0x8b5ddce12b9e93a9L,0xd47f7ace6c0ec08bL, + 0x7e2e638e2f64a0d8L,0x734b83b65e71988aL,0x000000000000014bL }, + { 0x4f6e691930b08bf1L,0x7dce78e400390037L,0x22ff88f758d11de7L, + 0x758620d621202d1fL,0xb94a535664939469L,0x17da7341acee471cL, + 0x2d37a341fd2310eeL,0xa8601bc372a3c4eeL,0x0000000000000078L } }, + /* 48 << 399 */ + { { 0xc0c5b4a6fd219d25L,0x52c7f6ed058ff8a7L,0x1fe69b86b6e0acfdL, + 0xd1e3e73b52a447c1L,0x31537c0968054dfbL,0xc312a9a53edbbcf9L, + 0x893237de8b0f13bfL,0x1784bade6ea4ce95L,0x0000000000000191L }, + { 0x112af36812489269L,0x49acfb797f0d7098L,0x94f9883080743844L, + 0x46a890ed1c9b2deeL,0x7874e2973b42178dL,0x026f247f131ffe31L, + 0x9f9e06e01658f803L,0x8beebca7b4c25ec0L,0x00000000000000ddL } }, + /* 49 << 399 */ + { { 0xff25d39a56fcbb0fL,0xf3167cd6767aff46L,0x0d149e1fe37f1498L, + 0x5cc5476e4ef8353dL,0xf483af728a100b6aL,0xdff90a5a5e61a9ccL, + 0xef06ad071b8683c9L,0x1a51a840f21160b5L,0x00000000000000c5L }, + { 0x0cf4a4f250535795L,0x6465217feb140724L,0x39b30eb481336b95L, + 0x4d6808ed2ff959b5L,0xd2cbd262102f0eb9L,0x81a05ea2401e2b5cL, + 0x1c88cf3f90e33b3eL,0x0e5da177bf342b81L,0x00000000000001f0L } }, + /* 50 << 399 */ + { { 0x692d5ad031e4d9f9L,0x404aeda94c975f59L,0x774d5f574567894cL, + 0x9384a6d394c384b5L,0x5640862431f1ade9L,0xc7fec784ec664a1cL, + 0x1855d29dd8daa744L,0x215688b92cb5ac77L,0x00000000000001baL }, + { 0x941ccf2069ed9766L,0xbe0bc408bb264b33L,0xaf690b685fc53f91L, + 0x2d3d6a0d402b1ca7L,0x15f7858d0d0bbad4L,0xd4107f9a09001d66L, + 0x5d39101058f42888L,0x6dbab76e1957a5e3L,0x00000000000001aeL } }, + /* 51 << 399 */ + { { 0x45213613b7e2c6ecL,0x40eba404231a4131L,0xb6fefe73a732bae9L, + 0xa1e9bbf5dad5d26aL,0xd4292e9b3754d63cL,0xfa2f7d163b4afd0aL, + 0x101c23ab45993a58L,0xef2e6c2e76be32b6L,0x00000000000000a7L }, + { 0x39ab0b2e91c2102dL,0x08c2853a38a0dc87L,0x0bf44cb5fc98a8c2L, + 0x3944bcc04053d1dcL,0xc58245c95a4c6921L,0x727b1481097fd198L, + 0x65aeacda0fd2d648L,0x39f714eab57be657L,0x000000000000010aL } }, + /* 52 << 399 */ + { { 0x63b6bb80bd8a172bL,0xa4fcf17c5dbcddfaL,0x20d9c687abd07afaL, + 0xd2c3d30414609e55L,0x54f385032006d72eL,0x26b914c24df39b12L, + 0x27f45ab386f0c497L,0x3dcc1ec688f50651L,0x0000000000000073L }, + { 0xdeb2636533f06be4L,0xfcf1a35db48f3d92L,0x7fe1fd3af7cc2a73L, + 0x41ca3e11e6aa5884L,0x164881a8cdc21e13L,0x8cad911a03484874L, + 0x767521e9e9c3025dL,0xde97f814218da4baL,0x00000000000000d6L } }, + /* 53 << 399 */ + { { 0xd3ecaad9f421e457L,0xa1343944abcd6bdaL,0x5e3976b4ef498da9L, + 0xb3b59e1df3f12ba3L,0x07bfc03c8e4490c0L,0xd5fa985d5b53f101L, + 0x715822a34bbf7797L,0xbd2d7345fe9453b0L,0x0000000000000147L }, + { 0xee1ec3a72f095506L,0x1fcc2581a4577097L,0x8c650b965b1bb6c7L, + 0xc19686238a67baedL,0x6105e80775b944f9L,0x45b830e0d8ad554cL, + 0x4bebdb941e52e028L,0x5fb560dfd302d75dL,0x00000000000001e1L } }, + /* 54 << 399 */ + { { 0x41cdf0a8090e62c4L,0xc203cf661a389c50L,0x08873cd92fb0e7b4L, + 0xfb7d8fd2849e2c3cL,0xbf0ecf0abf15a0daL,0xcff06a5ba3288614L, + 0x3ea4f678a5a1bd6bL,0xd07f4fea5a0b447eL,0x00000000000001e9L }, + { 0x7bde418b0072eb53L,0xc497b7d7c895370cL,0x360c81eebfb58e0cL, + 0x4db855d7ca0d8993L,0x21da05101cffe5c2L,0x4e6c55e431da01b2L, + 0xab33e450a177a1caL,0x4e024158c8d9d072L,0x00000000000000c7L } }, + /* 55 << 399 */ + { { 0xf01997349c89c37fL,0x422e2013e23346feL,0x872234bc0fa240cfL, + 0x073c9ec935c98044L,0xc5f576844da45c45L,0xd9f227f2e036eaffL, + 0xd624bcb8753152bdL,0x63adde436cdfed8eL,0x000000000000004fL }, + { 0x6ed39da2e48f14f6L,0x7e044ac48399ed5fL,0x3d32aa1ccadb8a57L, + 0xb17680e165432f64L,0xd1ac87201062afbaL,0x2977e3c2ea64a181L, + 0x3c50ec594f5ebbd5L,0x06d71e4b8c0f88e2L,0x0000000000000116L } }, + /* 56 << 399 */ + { { 0xb3329e3168581a3aL,0x432c15f5e718d540L,0x50fb0773b5405fb7L, + 0xd422287361b9c0e3L,0x3a53d3dbae54be89L,0xb7f9d1e582601b93L, + 0x33d9cabdaefe244dL,0x90c3764d347640a4L,0x00000000000001d1L }, + { 0x8e0fbf2c912d5804L,0x3b9f0ab36fd05d70L,0xdd8c3192b7d09ac8L, + 0xf44cab3e47c16785L,0x7de9fd5b35549cfeL,0x38dfdf50eb615262L, + 0x57e912f224dc69caL,0x69f6970490ea604aL,0x0000000000000199L } }, + /* 57 << 399 */ + { { 0x4687247eabad6418L,0x4f83495ac51d51ffL,0xfce781bec0aef136L, + 0x8abed322580cb513L,0xb85782d6a24589daL,0xb8f7c3ad9c8640c8L, + 0xa669aef661539d62L,0x9379601cb4477f5aL,0x00000000000001b7L }, + { 0x332b9e9b0f7d41a3L,0xe7bcdba572f306caL,0xed16c9e3263c7286L, + 0x5e47df68f8fe3510L,0xf0d18d39e3726bd2L,0xacb28a51274ae3f8L, + 0xd0de1a497de215c6L,0x4b050057a1b3fd99L,0x000000000000014fL } }, + /* 58 << 399 */ + { { 0xc060c814721192ccL,0x1f3185a112876e37L,0x114f15d65821223eL, + 0x391814b09bd49029L,0x21cb9042f69f5c98L,0x9788e3395a9b2f4aL, + 0x7b0dfabe54926a1aL,0xbcfa4d6051cd489cL,0x000000000000014bL }, + { 0x19829a11c90ff321L,0xf6ca593932b21fe7L,0x6d858a7cf4f5aae5L, + 0x0dca676f9e7b2f50L,0x000990c44b9f09d5L,0x5b89da9c85dfbfe2L, + 0x3d5e0e8eabe8b3c1L,0x1346693456230753L,0x0000000000000006L } }, + /* 59 << 399 */ + { { 0x850d86e94937892bL,0x07fee7bb2443a2b3L,0x33ba4ac4e1cb6357L, + 0x9d14b81d36923302L,0xe15ac9c895ada402L,0x567bfcd1ac08b951L, + 0xded4973cab9f0cc8L,0x3b9d665fbe68ffd1L,0x000000000000019aL }, + { 0x58fbfb85882a8c22L,0x1d20051a57a91ae7L,0xa817c3921ffb42d0L, + 0xa547264cbd43f75dL,0x5c2c5689eaf301f5L,0x9a6180df6567fe83L, + 0x3f0b7e47edbc4659L,0x3a74b5825cca8bd2L,0x00000000000000d1L } }, + /* 60 << 399 */ + { { 0x8a8d4ca6d8dd2438L,0xd4f4c7bae21c7f82L,0x2146e9df4456d737L, + 0x99e44513b7846ee4L,0xa1c33ee7dbe655f3L,0x3f9e32737648952eL, + 0xb2ab63292d2542faL,0xbbd56f29ab7433d8L,0x00000000000000d8L }, + { 0x9324ab77baf53306L,0x41c9e3370caf808eL,0x29ba5468377d0a1eL, + 0x20d49c30208ae032L,0x9ed653f68abe7fa3L,0xf31a73b7496e497bL, + 0x88f1357835532c73L,0x7ae79b0e55591f20L,0x00000000000001f8L } }, + /* 61 << 399 */ + { { 0x830254cde199ec6fL,0x0faa16580c6aad81L,0x37a327a5fcd2e4e5L, + 0xb414910976c46adfL,0x944b4fb9218ba402L,0x4abcc3e5c5df6891L, + 0x578b24b904111a63L,0xf2d5e4003e278bbaL,0x0000000000000032L }, + { 0x857d53702414469bL,0x94d6f875d8b14839L,0x83561fe4c13ba3e5L, + 0x87995be649f39edaL,0x0438cd3e7a4cdaefL,0x56fc49eaa9282dc2L, + 0x1e7e40c6786edabbL,0x6d7f686a469d086cL,0x000000000000006cL } }, + /* 62 << 399 */ + { { 0x73a04d03a28b455fL,0xc84e719cbf57585cL,0xd6d9911fd35e53e1L, + 0x331034c059f94f9cL,0xadb8fdd3875393a0L,0x3be757f899a478c0L, + 0x4a62214fe02466a7L,0x18913f0488f2c28bL,0x00000000000001a2L }, + { 0x952c4e67ff257bf8L,0xf85960a2b4000ba2L,0xb767ebc447e9be29L, + 0xb4cb73f318047110L,0x2f53d6f5cecc7f40L,0x6d4f9956feaae016L, + 0x1e8432279badee98L,0x568ef329a23ddf3aL,0x0000000000000141L } }, + /* 63 << 399 */ + { { 0x3a99f756c26395e6L,0x4bfbc2e0527eb51eL,0x2e37039c79f00878L, + 0x78e08d275e6b1e6dL,0x6f6f335145d74f43L,0xbcc475bfc6a9906cL, + 0x5c60a9a9b8a7301bL,0x0204a0a3d640cb3aL,0x000000000000002aL }, + { 0xd378e362c2ba81fbL,0x110becf374d1d5d9L,0x14fb5720abb529c2L, + 0x923d0d28d9b58c66L,0x59d4651aeab145c9L,0x5a9178553c90bf08L, + 0x5b33d118c24717b3L,0x5142b895dc818c38L,0x0000000000000164L } }, + /* 64 << 399 */ + { { 0xb4c4ac143ffe4858L,0x3d9c1b48baab1296L,0xb28b5f0a5791e378L, + 0x3aa34de82a5d32caL,0x8e90308ae76fb3f0L,0xb74a7ed3c85d002bL, + 0x48a246ab65840cd3L,0xc8494718771d3f2cL,0x00000000000001b8L }, + { 0xb3f57d14a28da023L,0xdbc8b8d91d078f91L,0xfa5f5a7aba67f27fL, + 0x4908adc3fc60df70L,0x4fcdd3e0618486cfL,0x2e725571eb3c5855L, + 0xdafd1ef5c5d9dd01L,0x806554ee19b9cfb7L,0x0000000000000125L } }, + /* 0 << 406 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 406 */ + { { 0xc5f56d18fdb2dd1dL,0xcb31387972b373e7L,0x2fbfa499d85c69fcL, + 0x49ef4e5504211367L,0x307a9a329f9f852bL,0x00be68013d586d9aL, + 0x6ab9c6709bea9584L,0xbcb478e24b06f588L,0x00000000000001faL }, + { 0xb6d095562b1e18c4L,0x2af8b9233a783993L,0xafcf8ae678ca9e44L, + 0x9139a8376cfec34bL,0xce8cd219583ef0dbL,0x8bf49e56abe307edL, + 0x89ba853be83c9968L,0xc5694a20008b52d3L,0x0000000000000063L } }, + /* 2 << 406 */ + { { 0xc3974b7675276d10L,0xb6e1836c07bb3d19L,0x2b1ba75db1619e9eL, + 0x1e89c2bd5c4a2ebfL,0x4c2ef54b7285eb6eL,0xba22a605df72938aL, + 0x51152a2657474591L,0x70c745ff997713f2L,0x000000000000006bL }, + { 0xa7ab63790835db88L,0x2a6d5526753370f0L,0x157e790a4b2e4edcL, + 0x0b56220a911f4047L,0xdedaa43bd7c0fb09L,0xc6ff25e35a31e2daL, + 0x6b0ee7bfa78ab4a8L,0xcbafb78836d00945L,0x0000000000000165L } }, + /* 3 << 406 */ + { { 0x537b5f578e6ee46fL,0xf204223120f5b371L,0xaa0921a6b9156758L, + 0x8e7aa4907ae36aacL,0xdbbfc95da650c738L,0x652cc382eb9c38f2L, + 0x224f35e15f0d2472L,0x7768c4358367e668L,0x00000000000000a6L }, + { 0xe0d7b3cfdaef3c5fL,0x4dad6270a0c5684aL,0x24b2fca0e9360710L, + 0xfd91b89a3e763d1fL,0x822b00bff598fd98L,0xdab869207bca6f89L, + 0x34db8325c7996505L,0x6e88215523f32f05L,0x0000000000000060L } }, + /* 4 << 406 */ + { { 0xbca656eb512e743fL,0xcd94fc24ba4f85ecL,0xf4383ad0048bdf6fL, + 0xdb10e381587f47a6L,0x47f244e7da3ec391L,0x5f337e380c33624bL, + 0x9259425f82a3c375L,0x509e56c3f276bf73L,0x0000000000000014L }, + { 0x2d65148dd94cb899L,0xcac386303b28185eL,0xa36bb7bde102adb5L, + 0xa1e7e0b5ff3fffb6L,0xc842ceace628e1afL,0x8e6dfa7c8647613eL, + 0xeafd758584ade38bL,0x7c4c0b0a00188d5eL,0x0000000000000151L } }, + /* 5 << 406 */ + { { 0xecb8fc22d872682eL,0xcee69ae9a340ad2eL,0x8aab065f725233efL, + 0x3314af1015860444L,0xafea039307d4218cL,0x83860c75b56d7265L, + 0x045d08732f402749L,0xc14d6ebb50676c29L,0x000000000000011cL }, + { 0x7018485cecc303f1L,0x2c88f82b189755b4L,0xa7d21cd59574649bL, + 0xff5ce29da5c77fd2L,0xf06a468a58da370dL,0x50d800934a5fd275L, + 0xcba539d8b69ee483L,0x170868f7f2ee3b38L,0x00000000000001bfL } }, + /* 6 << 406 */ + { { 0x21f9e41cf53fcab5L,0x7bf9f35165adec42L,0x6b26bbd558bb94cfL, + 0x3f786272c6324c08L,0xc4a2eac0f2b7a273L,0xa13f661eac22a668L, + 0x68b9afc47fe9399dL,0x1b92202959664de3L,0x000000000000009cL }, + { 0x17c1b9a418a2fe9dL,0x2ba8744435cbdc79L,0xf4a4e0ce0b5f43b1L, + 0xcf17bf66e7569a01L,0xca184ac9c510d6eaL,0xb90fa4fc1c25e88eL, + 0xde8a6e1935d88156L,0x1546d7b58d8dc62bL,0x0000000000000122L } }, + /* 7 << 406 */ + { { 0xba45c8f36c94382bL,0xc1be84a34e796905L,0xa70a60a281b19d43L, + 0x5a333dd0a47323edL,0x7b28b6af0b27ff7eL,0x35c03b9fdea38355L, + 0xb95af1386ae6bcf1L,0xc9ef2a5a2b8ab4d0L,0x00000000000001adL }, + { 0x324116b7e42a4135L,0xe6f37e6ce3fd98a2L,0x9fd3f19f7c76568eL, + 0x22b20731515dc1b8L,0x9723434fdb8e80ddL,0x60e6c8e242232789L, + 0x9716ad2c04d7423cL,0x66529fe435216302L,0x000000000000010bL } }, + /* 8 << 406 */ + { { 0xe5d4defb5b3e313bL,0xa821692c272ac52dL,0x63e8db0d853d75ebL, + 0xe135b25b774da98dL,0x5019813bc0ea0962L,0xa03d6d8f93d92bddL, + 0x63ad9b320ec90c8fL,0x674e973c21967233L,0x000000000000002eL }, + { 0x6cfdfea798494014L,0xb9f9c0ce4f03d4bcL,0xb30273db8a4cde4eL, + 0x81dfaa1b3c413325L,0xf7bc31e7b3964d85L,0xf73996c8fa47fcd0L, + 0x1d5fe418a94e35b2L,0x7cd3986e9fa4f44bL,0x000000000000017eL } }, + /* 9 << 406 */ + { { 0x1b7980a57834d697L,0x9a3aa459c73206aeL,0xe10df0f1912aafe1L, + 0x3c252699cd81194fL,0x11c78b3f5fa93a50L,0x88c38a165c535f44L, + 0x33b2a6c905dc99e8L,0xf9a8a90d1ea49641L,0x00000000000001a2L }, + { 0x96a8c21569486979L,0x97f6097999b6ccb9L,0xfe3df60d18d3f89eL, + 0x83dfff4ef064bd18L,0x49b4057d54fcb9f0L,0xeab1e88c3f088687L, + 0xa1765be041072587L,0xf99b2779acf46728L,0x000000000000012eL } }, + /* 10 << 406 */ + { { 0x8128204a9a214c96L,0x0e0a82b44882a079L,0x33e5d6b9b27a35b4L, + 0xab4c56c097462902L,0xe54b628e5b19e791L,0xdf29b1f5d64e8065L, + 0x6a2862e6ade36f00L,0xcba66d7ddf61110eL,0x0000000000000107L }, + { 0x76cc421fda5c2464L,0x0c4f2a6322f3528dL,0x2eeb70f2c54214f3L, + 0x58be1b09eb798dedL,0x2415a2c156bb8dc1L,0xf9c7fd5ca298c0b0L, + 0xdc7d1ae628082c59L,0xc4ec983d05a44844L,0x0000000000000192L } }, + /* 11 << 406 */ + { { 0xa04e47c0c9ed9d3aL,0xed1a5bfaec3716ccL,0xe70c47c1a2e4d7f0L, + 0x05931685553e5305L,0xa90e6d16c73eb2d4L,0x17058be63459f194L, + 0xcf1b93fa0e76af5aL,0xbe38213c93f26b0dL,0x0000000000000009L }, + { 0xa56996c75a0f2a52L,0x9fe2e1846badad6fL,0x64a6ae0d70ff96cbL, + 0x835954857b77aa01L,0x9910099ceeda6f38L,0x64b3d7f42c333e54L, + 0x6b13aeb23721114fL,0x791d073c90ca8602L,0x000000000000018bL } }, + /* 12 << 406 */ + { { 0x72b023245e3f9a6fL,0xc889374508defdb5L,0xab7235362acd0af4L, + 0x7ce916a85d02baceL,0xb2abe9dd96314ae6L,0x6b664d88cc74749eL, + 0x59f417bea5595e97L,0xe77f2e3b52ef341aL,0x000000000000003cL }, + { 0x1f57cebbe3aa5b6dL,0x870522a68901cd97L,0x926ce7d957cf5e3aL, + 0x2d8fc69331e15a34L,0xc3a756ad009b6274L,0x498748f3f82857a8L, + 0x7b27d095b5531159L,0xe2783284c7e359a7L,0x0000000000000082L } }, + /* 13 << 406 */ + { { 0xc09185da7e0f6976L,0xd8a1091ec4a3a6c3L,0x7f7a1bdf331817e9L, + 0x95893105a21b71e6L,0x529b76e0031bc2c0L,0x899c9c15f486e501L, + 0xa1c9b18f67318b39L,0x2c05bb6aadc3ac81L,0x0000000000000087L }, + { 0xf051133eb2a8d83bL,0x8b7c0690cecad4bfL,0xcdc32d5c3836511cL, + 0xf17df7ec1fde03acL,0x65dae2a3603ce6cdL,0x7711a540f1ccc60aL, + 0x391a3d1b3d8a3950L,0x640a6d5ab3cf9141L,0x0000000000000148L } }, + /* 14 << 406 */ + { { 0xcfe4526c7deb11f7L,0x67b27a2787647f71L,0x07eca87da3dfbbb1L, + 0x6173cd9ab9440d12L,0x0ca8bb2b40f2e74dL,0x3d83a719d740cbe1L, + 0x0fc562e81d080dcdL,0x16cb8f6f65d8140bL,0x0000000000000117L }, + { 0x505508d0e8ddf98aL,0xe0d08e494e83c1ccL,0x24215a4d2db487d7L, + 0x5b91a9b5a9d15e5eL,0xb4d85a71e56eb130L,0x0e03cbc65d7390a2L, + 0xb43f2613963f58cfL,0x1cb92acaf93a024eL,0x00000000000001d8L } }, + /* 15 << 406 */ + { { 0x0b93b131dbe1d1eeL,0xef971fbcfe0effe4L,0xaf4b85c708d49697L, + 0x497ffb799eb46f2aL,0xb35cd96addc0ccebL,0xd5a55094303e4c0cL, + 0x3af23c7d86e7b58eL,0xaaed08ec9efb4691L,0x0000000000000099L }, + { 0xe9dd554da84a6620L,0x87891bcd36fa1b27L,0x8f0d90c7e8344fa9L, + 0x8342943413b112a7L,0xbc8d11749c77efcfL,0x8a33437af15a8f25L, + 0x664c7908683b532eL,0x374dc6569c90903fL,0x0000000000000129L } }, + /* 16 << 406 */ + { { 0x78d9e7f446045461L,0x841c793e7430d61eL,0xf77b63b8dd245666L, + 0x4b39bdd5844f837fL,0xce7bb287ab70f0f5L,0xef4b6aa274b7cbc6L, + 0x9821f978981fdf5cL,0xb63e67a8e506a31cL,0x000000000000012eL }, + { 0x1d2296041fc1da54L,0xa5b7e873d0c4b6bdL,0xeb72b87bf59127b7L, + 0x8db0f3859fbc54b7L,0x534ebf9461462a53L,0x97099e7580b9d20bL, + 0x0ed2d96a88aaa712L,0x415ca08a6a1b85d7L,0x0000000000000091L } }, + /* 17 << 406 */ + { { 0xb379144aeb469a20L,0xc5437f8f1cbea72aL,0x7728e8b42b9ec8acL, + 0x606e8adf8a15a0e9L,0x8d8e962ac004f3a3L,0xd5df086da85ebf2cL, + 0xd33bcdaafb6dcbb0L,0x0fdb0c9f07f2cc3eL,0x00000000000001e6L }, + { 0xce44c6c293c2da62L,0x378521fae38bba04L,0x82a8e500db763d85L, + 0xe78e38955ed8824fL,0x8fbaa40d1538bad9L,0x868f1eba0248ad8aL, + 0xa6c1a139cd968348L,0xd2778c4b0a2b9761L,0x000000000000002eL } }, + /* 18 << 406 */ + { { 0x48e4c0f3361ae7cdL,0x6b7984948046cd5cL,0x12f157df00f16183L, + 0x3597a186c382bdb3L,0xcd0b5a6b021194ebL,0xc207ed009af8a87aL, + 0xe4393bc184dc124aL,0x11f71411e91afc32L,0x000000000000015fL }, + { 0x579a5046c1866867L,0xd866aa1655c785e3L,0x2f76a8d43cf758adL, + 0x32ac9a89b0d31de5L,0x4962fdef2a973d93L,0xd7187c2b4ac6ff65L, + 0x7b1c92bfee83ecebL,0xa56863b534638002L,0x0000000000000053L } }, + /* 19 << 406 */ + { { 0x25efb568dfc25ec5L,0xd1189d0e1b0e7079L,0x5072903088a9eb91L, + 0xd590fca710d3bcabL,0xf9c0a5d7df6bea0bL,0x0bf708746efeb1beL, + 0x42747b384fb6a9a7L,0x2f14f557f56a702eL,0x0000000000000156L }, + { 0xb9090e1e348c7f6bL,0x922e69d7d7496d2dL,0x6349d2ca92bf15e0L, + 0x94de29b2129013e6L,0x6e89ee99ec7bb7b1L,0x68ee23481ac1ffb8L, + 0x60be6017a20aa6baL,0x91a224fc79b16d91L,0x00000000000001b3L } }, + /* 20 << 406 */ + { { 0x039c81f5b6501dc3L,0xbeaed737d507075bL,0x080ba34a5c367a46L, + 0x06a583c5ecd54633L,0x2085119b20eff4cbL,0x26f27b7aa193a015L, + 0xa2dd99c937b34fb1L,0x99227938fff66c7fL,0x0000000000000107L }, + { 0xee87c18e1cec40f8L,0x760e880d65532da1L,0x78d00f69be9ed489L, + 0xf61114948cc9be51L,0x79d826abaafd71e2L,0x831bc3059f39cd9fL, + 0xfffebc7ec214629cL,0xab14a5cdc0a202e7L,0x0000000000000083L } }, + /* 21 << 406 */ + { { 0x85252463481afc74L,0x7b71451828c701eaL,0xc439fd69861bf548L, + 0x071f8693e3d11f33L,0x6225ebd572e2305dL,0x4ef519ad0d1f6093L, + 0x1b0453e06ca1c580L,0xb7319f2fb740aabeL,0x00000000000001c8L }, + { 0xf21d026d84604fd4L,0x1b1b306011b08033L,0x649ad1d36fd4ad38L, + 0x33f1d68714abb62aL,0x0e7f376997abee33L,0x530e818370a2952bL, + 0x98a0a545fc905c8aL,0xe6ec2b5559af7273L,0x000000000000007aL } }, + /* 22 << 406 */ + { { 0x6cf8f7069c217921L,0xc42e41d7b075c03bL,0x6375246a68982f40L, + 0x8669580151954ce8L,0xf5310da9a3a60bedL,0x0abca274b68cc2a2L, + 0xc0ec033b6851665eL,0xb783a62aa463f268L,0x000000000000008aL }, + { 0x262a839eacc800edL,0x23a464a8b7a47436L,0x48a5d2ba302d2f36L, + 0xdcc0964d853ccb63L,0x326373ba856a8728L,0x135e2dfce81f2a09L, + 0x75387e9ce2d61167L,0x2d4026dd77466495L,0x000000000000018bL } }, + /* 23 << 406 */ + { { 0x15f1ae548997be68L,0x90aa75ea279fae38L,0x8e6704c0bf52607eL, + 0xdbdd9e471515aebbL,0x3f13153cfb9d9d6dL,0x6783f0c749f2b830L, + 0x933ddbd70938f0f2L,0x8c4b834cd7669424L,0x00000000000000a9L }, + { 0x195637299d946809L,0x9946ba9bbc29db9cL,0x6f9e27fb6cb8b988L, + 0x3294b03efccfc99eL,0x33374610f7141a54L,0x28f868ffc7673c30L, + 0x43dc3427bb2c289bL,0x029e076fb96395c0L,0x00000000000001e5L } }, + /* 24 << 406 */ + { { 0xc1b7a8f704de7536L,0x4b9e69438570da46L,0x7724c0c2337106bdL, + 0x6f1367d3054b2f07L,0xd7d30e2004200225L,0xd722d2ff8573567cL, + 0x3b1a8a31741d03aeL,0x41474e1b878ff79aL,0x0000000000000059L }, + { 0x3ce983c9fcf8a2fbL,0xe3c71ed67940d19eL,0x7347af8b434df786L, + 0xa187aa1d08d0762aL,0x618fc81d48cc16f9L,0xfecc62573b0e7f5dL, + 0xcb1f9e26fe7d47e5L,0x9ddb1566ce3918a5L,0x0000000000000047L } }, + /* 25 << 406 */ + { { 0xa7bf65565cdd5de1L,0xd91a3b5ad0299c7dL,0x34ad344fb621c69bL, + 0x0eaa0119ff537f66L,0xd06eaa83437158b3L,0xc8ef03821b5a588bL, + 0xc42cdd179a13f8c3L,0xb8bc70a25e7488b7L,0x0000000000000152L }, + { 0xb571425067a283dfL,0x71221b8349a9b640L,0x84b743d5e5009d45L, + 0xc7150aa8fc207d0cL,0x5146ec3777f5ee74L,0x64856c0e9cd38d8eL, + 0x3c6544b4ada269a4L,0x3321ddf10a505d47L,0x0000000000000083L } }, + /* 26 << 406 */ + { { 0x679fec081ac21f3aL,0x8e69d49664fd0330L,0x4d520cf622381900L, + 0xfe8843f23cdada22L,0x9ae1a7245fc715c4L,0x2551beb61c16d5d7L, + 0x6feb8481d00fcf9cL,0xcaa2b375c91e52b7L,0x0000000000000184L }, + { 0x24be593ab64e1524L,0x4e1cee9d7332b856L,0x131c5e6a797daf76L, + 0xc663eb2e75ce3e6cL,0x03ef6a7909834ac9L,0x7d7aa414c52b3350L, + 0xd66cb98f1430ff4dL,0x8aa042bdd5ceed07L,0x0000000000000084L } }, + /* 27 << 406 */ + { { 0x5c157e7b6deb0864L,0xf9d82ac58bbf2f15L,0x8078defe946d3ad0L, + 0x0202531b76b4ef19L,0x91b204164639bde2L,0x266d4ddbabeb6f8aL, + 0x099e2d193c697956L,0x622d9fc056634b3cL,0x00000000000000fdL }, + { 0xb40778e6a1d7b58eL,0xe7e73d837e8f2b8aL,0x1079ead85b307ff5L, + 0x6edaef3415ec5733L,0xf0af7d8875239c59L,0x2c12598569c78449L, + 0x0ddb52fddbdb0499L,0x3659ab877726d11bL,0x00000000000000a9L } }, + /* 28 << 406 */ + { { 0x810305ab4f244783L,0x99471c5a0e2c1a83L,0xbb3beeb39bd2ba8dL, + 0xe7fe7b929c189a01L,0x0f5a31c9667f8683L,0x28ee1a59fb7bfbb0L, + 0x4cb86e098e14240eL,0x47039ec29bab970cL,0x0000000000000183L }, + { 0x09b761f8f4ea7190L,0x5b109e93637f19c3L,0xe40aa435edd588d7L, + 0x6b25737881a3b4aeL,0xf5b5cc60591f5caeL,0x2e787d34811b80feL, + 0x28be1db7b0d84075L,0x2579d623a4db6982L,0x00000000000000b8L } }, + /* 29 << 406 */ + { { 0xebfd47a0f3c9663cL,0x5bd6c124fcc1ebacL,0x7a72f2629e4fbc0cL, + 0x8e12014b04da3327L,0x1644293ff16aee24L,0x51c99ce93d9cbff4L, + 0x3e202a2905ac2f77L,0xdc053a471a547c46L,0x00000000000000b5L }, + { 0x33432716d62c57c5L,0xb6a806d7fd1c2525L,0x8fad574efa0879bdL, + 0xb155c4592f8f0fb0L,0x1164655e58d81213L,0xe11727e694a07b4fL, + 0xff4b7575c7036122L,0xcb353944c198f11aL,0x00000000000000a8L } }, + /* 30 << 406 */ + { { 0x0aba20f753d66edaL,0xa3aeae3b7e525dfeL,0x7b988a6199e675dbL, + 0x944ca69614798f05L,0x42277a4775fddd0fL,0x281cc5692e466789L, + 0x3292cdab72d85db1L,0xdfff0593e12591ecL,0x0000000000000079L }, + { 0x1c2f7a8c0b87e726L,0xc9c4e2867c08d0f1L,0x7244fb70add49ccfL, + 0x2f9a917876dce245L,0x60d895eb97836a0fL,0x97fdb433fbcd00e2L, + 0x83017090f2aacd14L,0x292faa800e939a1dL,0x00000000000001dbL } }, + /* 31 << 406 */ + { { 0x5a7a6424e3f6f82cL,0xbd371425726aed2fL,0x2f56a6db5da92b20L, + 0x40dea40847e9f8e5L,0xd214c17ba447c6d6L,0xfc2fc25418e56c61L, + 0xf21df2b42ba5ffabL,0x38ed181e0c4b74d7L,0x0000000000000116L }, + { 0xcc7b32aa60988f29L,0x86934d213fb6f043L,0x2f3259e3a7e17064L, + 0xdd355633aae0132bL,0x6b351d3f58a51724L,0x174aa7de5d3513b9L, + 0x5ffb74678e5b7018L,0xcfc563cd0b563426L,0x000000000000012dL } }, + /* 32 << 406 */ + { { 0x48a6060b5244adf3L,0x196c42f99f9c1646L,0x1dea9fac3946d4dbL, + 0x30563642183c46daL,0xe5c58915c9e4a634L,0x3e71b542cce36f63L, + 0x4d91d55c39eaada5L,0xd3f5af0f1c43907fL,0x00000000000000c4L }, + { 0x1152f1413080db73L,0x3d5031a2b4bda7abL,0x0df9c2bcd0f0daf0L, + 0xaf4aa1f30d01895aL,0xfe8281302b137acdL,0xfcdbfefff072eda7L, + 0x8f3e313911d578dfL,0x37d9ce5b92662c8eL,0x0000000000000086L } }, + /* 33 << 406 */ + { { 0x87e7f88cc5bed943L,0xfd2e6b03aad8bc5eL,0xf24d94389e249c9aL, + 0xe47161cb7c008e8cL,0x19d22f2831d8c40cL,0x0671ba709fffd96aL, + 0x31a6f3ad74d609e7L,0xf90ddf84b6905c30L,0x000000000000009eL }, + { 0x404c5574e9f17126L,0xafd37819604fb843L,0x4d1d2b92b00db9eaL, + 0x4a242643a7ecb8e1L,0x23c70b6e8b66b49dL,0xcbd1e9f75995098dL, + 0x62ee997d643f5b38L,0x2c7285da63549975L,0x000000000000007bL } }, + /* 34 << 406 */ + { { 0x70bd76a983f34ba3L,0x372227ca7eca3199L,0x4cc896ce4e57aed0L, + 0x142d290f0ddd2f05L,0x867a1f6fd6612f3eL,0xcf2e8ca14ebc7530L, + 0x1d15b0252bdf257dL,0x1b6ea850fcb53380L,0x0000000000000110L }, + { 0x38c505c3e561bf5eL,0xb5c5b048ae44566cL,0x902051d06c45aaa1L, + 0x619fec981ea9caacL,0xec4ca3f013e5e007L,0x899d203208bc5341L, + 0x5869d8e09361ca4bL,0x1850a711fe357cd8L,0x000000000000014cL } }, + /* 35 << 406 */ + { { 0xe9d78d73c5fe4cb1L,0x65d4973e63669b5bL,0xf8f26e6758db5d06L, + 0x1fe46740a5bea178L,0x22e30d273d9d53efL,0xf9c3e1132d740fdeL, + 0x0eaf2c3782ad4253L,0x1c0a8f57afffe82fL,0x0000000000000083L }, + { 0xe04ad02fea374c3bL,0xbfebd3637e1ae7deL,0x2f496f966970176fL, + 0x10eb48976106472bL,0x9ab8cc2922481fc5L,0x806a32fb795e47dfL, + 0x3920421e8a79c1aeL,0x17c4253868d45930L,0x000000000000009aL } }, + /* 36 << 406 */ + { { 0x164a9e29d07b0d60L,0x7d9325bc4a4ced37L,0x389846410a22ea31L, + 0xc33e1fc1659c12aeL,0x7609475678cc3ad9L,0x3c30033a4cf14874L, + 0x6c7ed92d23f58493L,0x0040c6d99a41b806L,0x00000000000000c8L }, + { 0xe9652d66a4f89d26L,0x97aca7ae1e56daa6L,0xa1650f15b8b76045L, + 0x2f2338b3fc159585L,0x9af48930f243cf7fL,0xdd333c3d923de549L, + 0x415a035f3083991dL,0x0257bcf5b8f179d8L,0x00000000000001b8L } }, + /* 37 << 406 */ + { { 0x785ceb0e60a23046L,0x06693923da26f777L,0x4cfd41e2f6347cc4L, + 0x375228c909f95926L,0x5f3dff336d32813bL,0x3becb638dd9b1ce3L, + 0x7e9b3e1160c02110L,0xc46e496d72e500fdL,0x0000000000000003L }, + { 0x15d186e9c39eede8L,0x00d45c82e2cbe492L,0xcb0430b0e656d2edL, + 0x52592855cbf91059L,0xed3f498d7ebbb6c0L,0xf537912d8c448093L, + 0xd02e28094a50080aL,0xde023ed27507fb99L,0x00000000000001c9L } }, + /* 38 << 406 */ + { { 0x9189e6492b66dcb3L,0xbd9d89446e0a16c1L,0x7183787c8fbf21b9L, + 0x48b9a986c84a07e0L,0xc7951767bd3f75ceL,0xfd74f40c709029ebL, + 0x8e0d48faee163d0fL,0x583247acea13c411L,0x0000000000000029L }, + { 0x544713627200e61fL,0x43a5caa69151f221L,0x88e1d04480dfe2d7L, + 0xe166419a814a432fL,0x47d7a7324998873eL,0x06abfc47d649229aL, + 0x38759e7b790a1bb9L,0x2aade3423d87e983L,0x0000000000000083L } }, + /* 39 << 406 */ + { { 0x814f5c1eed4cb90eL,0xc27974eb37f64e46L,0x1ab1171566cd615eL, + 0x27e7e0d98a254731L,0x1756745a1520a956L,0x4a8d601258af427dL, + 0x430761eb1c0eee1bL,0x6bbfb93438874a9fL,0x00000000000000d7L }, + { 0x9f48d4ca3a8603b8L,0xdedd50492e5be7c0L,0xe088e3bdf54289d5L, + 0x0a3ee65da44290e6L,0x8ab9a318bb9114a0L,0xbbb8fc213ba90e93L, + 0xbd87016b28aa8d97L,0xf467d852c050a231L,0x00000000000000d7L } }, + /* 40 << 406 */ + { { 0xfe306ca855a073ebL,0x35543d282dcab16bL,0xd66bc457a45d0b9cL, + 0x3fdc1cc24be414daL,0x781093ded7d9bb1aL,0x6b5f2c375d2031a6L, + 0x9b6b4ba38a23513fL,0x6fe788858f5a91f0L,0x0000000000000188L }, + { 0x0f952fdeaa340993L,0x860ad393381382b9L,0xf0f1878aa611333cL, + 0x9d53e1f9fe29f16fL,0xc371ba5fa0d105e9L,0xedf4adf826128fd8L, + 0x4f4e22ffdaeb10c8L,0x0d27b149d9544e7aL,0x000000000000009aL } }, + /* 41 << 406 */ + { { 0x384e080421facdb7L,0x6c8b7244e2b7b131L,0x05f1dd47ad8a4fe3L, + 0xeb2e048dd4649216L,0x9f1bc2b7ec853458L,0x386f3d5798115589L, + 0x8c02c32e6fd93252L,0xd1a9526bbd686fe7L,0x00000000000000cdL }, + { 0x0a4bc5e2a6384682L,0x8281608a042b55caL,0xa41505532a528bb0L, + 0x96b9f2e72217abd8L,0x9f019996e3251a18L,0x7aa51a6a79a88c6fL, + 0x90d8c8ad98fa880fL,0x2dd166cf45076ac6L,0x00000000000000e6L } }, + /* 42 << 406 */ + { { 0x7ba7b06a5826d6caL,0x42257d9c430621c6L,0xe5dc2c8f02dcf96cL, + 0x1b63d8ce16550605L,0x9e6341345cde8df3L,0xf9781f21809d18a4L, + 0x2de5477650c12248L,0x915e036c94f60a0aL,0x0000000000000149L }, + { 0xe8a91619a2d29974L,0x67a70872b420db3aL,0xb24abe92e50beb4aL, + 0x760b7119dc0a98c1L,0x1519d8aea70e0dc1L,0x33f102a2679c49e2L, + 0x5123d8c2c1c358a8L,0xc928fda8bb7899a4L,0x00000000000001b9L } }, + /* 43 << 406 */ + { { 0xd2601167ad233a91L,0xc8d9884dcdc5862fL,0x01a22fe0a648ba05L, + 0xbb3b9e3c515c1511L,0x18c06c97ea8c7464L,0xf20adb09be2c8395L, + 0xb2f3dd6d371f7631L,0x2e4fe2c91fa77646L,0x0000000000000128L }, + { 0x9732c4edfed3264cL,0x81acaedb4d287a1cL,0x736e2ee565d0497dL, + 0xe8b4f6fa9ca05e5fL,0x76f8565567f63d25L,0x3622850f9569a822L, + 0x4213d66b43a8a078L,0xc601ed61e323d282L,0x0000000000000185L } }, + /* 44 << 406 */ + { { 0xc0135e27beb6745bL,0x1fa3e9d57ac01e8bL,0xa713ccd34efaa6adL, + 0x6b82414232492fa1L,0x1e3dc6b3c5a1550aL,0xe3c7ea75429f42c3L, + 0x93f0cadbd1d97acaL,0x7b23ee53f7091927L,0x00000000000001afL }, + { 0x708dc83aecb37a50L,0x802e69b38f9edcaaL,0x02abcd3cc66d3a68L, + 0x814ec38b1101afaaL,0x20e5d950a1b52623L,0x0ac6ea5fd4dd624eL, + 0xab26008227a0c3b4L,0x577f5ca09368736bL,0x000000000000001fL } }, + /* 45 << 406 */ + { { 0x6d42774c21bf0842L,0x2a4da71bd4d1ea75L,0xc2cedd0cf4d653b0L, + 0x7d94db7211d82f3dL,0xfaf1ca20a8c32913L,0x9b0c6b3563841938L, + 0x1ddacce9cf9b706cL,0xb204884772b2c899L,0x00000000000000b9L }, + { 0x4e7adf267aedf83bL,0x8ded9b05be0cdec7L,0xe5f546d3b5c0649dL, + 0x270504c8e19891ddL,0xa92e7b14a7261c10L,0xd14be6d3b8f9ebccL, + 0x7a89d993eccfecebL,0x4ce0afa5ae754f25L,0x0000000000000074L } }, + /* 46 << 406 */ + { { 0xcb677986904f5312L,0xe7d62adb29dfabc6L,0x93e4e87b3e4e42beL, + 0x19bca6264069b407L,0x328eab93ad8142e1L,0x2e7c1a5d52672bedL, + 0xa5fd1db730aef66bL,0x12ac39b7fb19bf54L,0x00000000000001beL }, + { 0x01c76a76b5b5ec2cL,0x64de1b6a0861dab1L,0x3258296d637140a4L, + 0xbda2cb733b3679c1L,0x0041da437024d813L,0x4303794de9a0ce74L, + 0xf8ee8193c0529c15L,0x53feedc587074b7fL,0x0000000000000115L } }, + /* 47 << 406 */ + { { 0xa6ba5b160c617119L,0x204e68fb438beefeL,0x7a20a3972ada8708L, + 0xdedf232d508368bbL,0xe096d0bc104f521bL,0x7a6b16d711269c40L, + 0x55d4ed46914143d9L,0xba5c71c427d395ecL,0x00000000000000caL }, + { 0xa821bc5010cb008aL,0x48052c133adc6d77L,0xd0408c0f53ba3178L, + 0xd60edcdef9fbd6c5L,0x3cfa4536eeebb802L,0xcababb7b03709682L, + 0x13ee6a48bce9c097L,0xbd0ef6510d1b0bebL,0x00000000000001d8L } }, + /* 48 << 406 */ + { { 0x57b087c3722f09b6L,0x1702ccf501659998L,0xf3e88554769ff3b4L, + 0x0ceaa311b9b58b5fL,0x3200928faf05be70L,0xd7254305405c2f75L, + 0xd48bc6c28cff9a05L,0xc951c65d71aaf653L,0x00000000000001e0L }, + { 0xb1c60c62e06a762bL,0x95d42294d121d094L,0x16b88a5dd0a2ec1bL, + 0xe5baf8f2f27ab833L,0x81982eb5a7742d30L,0x0244c5595e8d4a52L, + 0xae2e0d639ff0933bL,0xcac9c143c0641fdcL,0x00000000000000dfL } }, + /* 49 << 406 */ + { { 0x0729d23aaf7d4710L,0xcc26cbbed18d4ee5L,0xbbad9a780031ac64L, + 0x4cf6f0857d9c26d5L,0x4f7a82d40d1a7532L,0xbe268e66d13e2a2dL, + 0x14bae8a771bdd2ceL,0x3c3d7fae6b5df00bL,0x000000000000008bL }, + { 0xee205513e5ea848eL,0x3b6a3e27dd421986L,0x21485c6e461b7f1bL, + 0xa62bc7b5b14b5037L,0xd6372abf560b7bafL,0xaa6228c9c962a69cL, + 0xdf2b172bb089d815L,0x7d1775058d97fc0fL,0x0000000000000167L } }, + /* 50 << 406 */ + { { 0x1b2752ec9c586918L,0xb3fb67c153a201efL,0x105199bdd73f0eb0L, + 0xae01712810aeafdfL,0x1426b2b52a519a78L,0x68e86c5a1ea4f48cL, + 0x98699970a6f22d9cL,0xc80564d51a9de939L,0x0000000000000000L }, + { 0x41f8c7fc85bb70b5L,0x4bdf510c78874a6cL,0x836e36477192332fL, + 0xe71580260d3877cbL,0x76eab30d9db5fe34L,0x5cf92e6aacafab64L, + 0x9dfe39041fedad0eL,0xa9be6419c83bcb00L,0x00000000000001d2L } }, + /* 51 << 406 */ + { { 0xef6dd59eb9490407L,0x40b3ffc7cce5657cL,0x303b86892997c881L, + 0x10ba288bac9e88b3L,0x0598dc047e06cbddL,0x6b6070f9b8f3fdc5L, + 0x1b6e0bf0bf167eb0L,0x832f256f61823b3aL,0x0000000000000066L }, + { 0xcaa38eb16ba2d1a7L,0x0e9eb2b5e34c102fL,0x45d0a0412f3f191fL, + 0x1883b65b78e42f37L,0x106790f4b3d340d5L,0x5bbca96e950faf87L, + 0xa5b7401125d0e75bL,0xbf6a2e4aa9a5b37eL,0x000000000000004aL } }, + /* 52 << 406 */ + { { 0x66cb0db47c4f8f4eL,0x81ae6d75fc21f5f6L,0x19e8fb8714951a87L, + 0x52ecee54602c5224L,0x50a823a8e408b25eL,0x32aad7d9e372c922L, + 0x80a117c8e03a5ce6L,0x9b62059c2ca7f6b5L,0x00000000000001b2L }, + { 0xc50397739d676975L,0x6959c6b5becfc0f0L,0x7b73318a43d30b32L, + 0x0c0aac5b6048cafdL,0x77f3201dba7d3c59L,0xa5a05d3e09004fddL, + 0x711b1b17ada3810bL,0x0b9f7679a6eb6d1eL,0x000000000000017bL } }, + /* 53 << 406 */ + { { 0xb111d5049113f1d0L,0xaca7880f301363d8L,0xd1a3a3a0715d0296L, + 0x7c7131450ef65ed6L,0x66eaa22b454a687dL,0x3818b9757349a9fbL, + 0x5062a36e0d2022d4L,0x528e46da2ff88497L,0x000000000000010fL }, + { 0x32a84b8a5de50ae0L,0x76263a196884f659L,0xfd8bb04c315d19aaL, + 0x949d5c4b8c484043L,0xa6b46b769af8ac6dL,0x30ceb799b23ae2b3L, + 0x0ea36685daf44f51L,0xebf5489e57d1f2cbL,0x0000000000000179L } }, + /* 54 << 406 */ + { { 0x72ba03aae5c32db9L,0x674c55738d0d3eecL,0x80208af9ccce2df7L, + 0xa35d4b811c853818L,0x9c9e525e4027990fL,0x1d4307391f2d304fL, + 0x557a34c0e56780b1L,0xd88631e7384f416dL,0x00000000000001dcL }, + { 0xe13dbdcc305ed92cL,0x7344f3ebfb1445b9L,0xe84a2541d10d4b7bL, + 0xc0063b826bc03673L,0xa46a733bf4f6b339L,0xa6c0138431320032L, + 0x3b6f5b9634212c42L,0x445752ddb9601168L,0x00000000000000b5L } }, + /* 55 << 406 */ + { { 0xebcab29ffe8e565bL,0x7b04357f43480e76L,0x153ea7357ce43f36L, + 0x04ce126da3cf5166L,0x3eb43a6e0f412fffL,0x4ede4ce058733496L, + 0xe90b4291aec2250bL,0x35811220bc4b63d6L,0x000000000000005fL }, + { 0x2a1d458659ff9b9aL,0x61d60d77a684082cL,0x3bcdaeb53ca06170L, + 0xe5793487801582c8L,0x73754dacbdc327b7L,0x4e9f7a1aa91091d6L, + 0xb1dcf63b688c3076L,0xf781dda2a455387dL,0x00000000000001a7L } }, + /* 56 << 406 */ + { { 0x3f28171bd3179895L,0x91ec9bdd6470fbe0L,0xd6713024aa1ab0a3L, + 0xbdb9e60388393838L,0x29c6d976170ffe51L,0xb415ce2c20c85e4eL, + 0xc816b042aa9a7f5aL,0x0f7257df41dcb044L,0x0000000000000139L }, + { 0x16044c1463dbe97cL,0xc36a41e8dca02de3L,0x7c9403766849e580L, + 0x77b372d1d1bf053bL,0x539e249d175c2a6aL,0xebd056bbf0e17332L, + 0xf7b71bf6e62c02a3L,0x8e8636d754ee354cL,0x000000000000014aL } }, + /* 57 << 406 */ + { { 0x23975ca006ea445dL,0xc8c0af23792afff3L,0x648b22a1089f7960L, + 0x38be1485789e86a1L,0x09c7f82c8e403792L,0x00e6a9750da77f58L, + 0x89ff11bfbbabdde6L,0xdd065345ccf6e66aL,0x0000000000000017L }, + { 0x8c47de427496b1c2L,0x4c7e85508ad8e562L,0x27bf92564ec11ed0L, + 0x0764d014f7290f77L,0x02a78a70c49b97deL,0xa88a1ccf1ddbbd5bL, + 0xe42270462e9e274aL,0xba580882466c1d4fL,0x0000000000000159L } }, + /* 58 << 406 */ + { { 0xcc1838a04d0dec13L,0x2d4bdfbc97bd2484L,0x3f6defdb8e61f25eL, + 0x7f6f45104ea8dedaL,0x013f14a40425fa58L,0x956254689d7d619bL, + 0x5115c63b5bda4f78L,0xea1900e7fa6e1bf0L,0x00000000000001d0L }, + { 0xa17989e0363fe2daL,0x2cfd22dfca53cb24L,0x45f91d59881a474eL, + 0x0e4ab11250329824L,0x8ac28370b72739d1L,0x1b310d913b6d9b75L, + 0x6ec4da965c56e9e9L,0x0671aae0ffc656cbL,0x0000000000000042L } }, + /* 59 << 406 */ + { { 0xb3ab40eb790b8e7eL,0x8eb63013d4788dceL,0xbccc0ef6934a5525L, + 0x680e70c725f88ec4L,0xd485afdbdb65760bL,0x5d426a75efc322d2L, + 0x0f83571f2e4bf148L,0x91ccb5dabbec6361L,0x0000000000000169L }, + { 0xd8887e8d1c57d73cL,0x236b0f805f5fd2d4L,0xb63c2775875a5803L, + 0x414fc01a4a8803ecL,0x52d5e443d98a3657L,0x18a2f004211e0a47L, + 0x692f1862d077952dL,0xebb3821e3543f3b3L,0x000000000000002dL } }, + /* 60 << 406 */ + { { 0xc543163d23d30fa4L,0x45cc53a2b32b27c9L,0x894b9ca07953c4b4L, + 0xaabba057bba613a7L,0xc50371ae6ccbb39dL,0x610cdcf53fd4f3c3L, + 0xad08443a19a9d357L,0xabf74787d519c9cdL,0x0000000000000076L }, + { 0x4ee3a817e5502ae4L,0x7ab2462d8b39a71dL,0xd6736eabfcfe642bL, + 0x77479bae25c914e3L,0x95fb92af2a14e2c0L,0xcd33464758bd112cL, + 0x3165b9a35bc0ed8bL,0x980f3176d89bf952L,0x000000000000006fL } }, + /* 61 << 406 */ + { { 0xcce789470c205347L,0x2cdc9548317b0bb3L,0x91761877eb18287eL, + 0x5da4b78d6fe20d46L,0xaa5a0e4c56abc91cL,0x20d5e538f880504fL, + 0xc8b49ff54cf760dcL,0x84ce19efc16bc484L,0x0000000000000055L }, + { 0x26ddf4fc8277a9a9L,0x1084e9e795284106L,0x4ae96058d6f31a98L, + 0x8768d94c4ca76957L,0xabe0c7dc4291c752L,0x742e8a5c902ed035L, + 0xb19d63427e0da040L,0x8340b9aae19b885eL,0x0000000000000089L } }, + /* 62 << 406 */ + { { 0x1db573165a1c76bfL,0xff18d5ec4afc3851L,0x2c1949939579a536L, + 0x18b565e4bbffa98dL,0x446f6b2146c634e5L,0x7c8e557e54f8c188L, + 0x0bd678caf2a2547fL,0x3898e9617c679a91L,0x0000000000000195L }, + { 0xc9629fbf5b94cca3L,0x4f2c7c0bc3a4b9fbL,0xb497be19d7c9194fL, + 0x7342f77aef265586L,0xaacd585fdb0e0ce7L,0xa4cc7b4bf17cef46L, + 0x72e0c651d630df1bL,0x39514c9d4e7fd042L,0x0000000000000163L } }, + /* 63 << 406 */ + { { 0xf81da77e080bdb19L,0x42e0d16218eff3daL,0xaecdeb9ed5246df2L, + 0x4bb5a16667f90950L,0x9eea36f3914df6afL,0x92f0cc9b6bd50255L, + 0xe1756a88acf4b34aL,0xa30cabc40a3069a1L,0x00000000000000ddL }, + { 0xc407225c59098bcbL,0x957308996d9a4295L,0xb4e208abe995c4f4L, + 0x42560e5040f907e8L,0x0e6c38dbc174276cL,0x2360c05488cff7dfL, + 0x31e7aa6539f892a2L,0x4ffde5e9b9592227L,0x000000000000009bL } }, + /* 64 << 406 */ + { { 0xf7593760e63885d6L,0xce5054bb86e546caL,0x5910ad526e9567b2L, + 0xe812cf7be420aff3L,0x266049fd904e9676L,0x42d0e2ab2c8b34adL, + 0xf79f8f91a4382f29L,0xf8cde3ed41d531deL,0x0000000000000035L }, + { 0xe514497fb16cc1f3L,0xbc044f39627db2f6L,0x76fa5148a1fa2bdcL, + 0xcf7eb72e5fe3fd7aL,0x83236c11ee3d45e6L,0xa02d7e92c4eddb54L, + 0xcfb5bb894d4982a5L,0xee6ac2a27c217ea5L,0x00000000000000bdL } }, + /* 0 << 413 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 413 */ + { { 0x1e5e42932f3cdb19L,0x9920316787214e3bL,0xba80fbfe93d885a3L, + 0x58f3ecacb25a2700L,0x2c4a14d369f5d172L,0x478313d4816ebaf0L, + 0x41018d31c93f7738L,0x5db7131e06ac6002L,0x00000000000001b4L }, + { 0xa5d3687295b8edbcL,0x47931810945b9473L,0xea5b3ffea82c5f49L, + 0x5ed84f08f491bc9bL,0x1492e5349b3b994dL,0x4788372dda6c1225L, + 0xfda4521440022d06L,0xfb0bcf49fbca3d03L,0x00000000000000a5L } }, + /* 2 << 413 */ + { { 0x8a4af8ae25144121L,0xd89467b7ec215026L,0x3ab6cb7fe013fba2L, + 0xc15a056935f18f1eL,0x8bd48a1d70933beaL,0x6a40474a5b8c61f6L, + 0xad967b15b5158afcL,0x119a0ee96d7f6e5fL,0x000000000000010bL }, + { 0x895e3e94a548551eL,0xfcfe2882eb65fa7eL,0xafa1800f3582920bL, + 0x5cda388ebf58e02eL,0xe7aad6f7fb270f11L,0x95a2636fb7e27667L, + 0xdec591d7a3be4195L,0xde0e7440808c1315L,0x00000000000001b5L } }, + /* 3 << 413 */ + { { 0x57b9da073a771403L,0xde6ee702ab21fe58L,0x60d32de6843aacabL, + 0xe522b9569064dfbbL,0xb4b429207ea0dcadL,0x4222697aa3f46a45L, + 0x1557d30b3be7a95bL,0x712ca7479fa83859L,0x0000000000000153L }, + { 0x162516d415fb1ac0L,0x44b19cb0ee7be87aL,0xce8815eb3d9d86b1L, + 0xce8f13f957331615L,0x78e8539ef71faac1L,0x29154b098e5ca8bcL, + 0x45b36262312e4c96L,0x2e03ee8680940f73L,0x00000000000001c2L } }, + /* 4 << 413 */ + { { 0xe95ab9e7ef05ef2dL,0x29ec067ffd5cc417L,0x67c10bc739402b0aL, + 0x109f7d511e9ca8acL,0x0b10550bf8efe529L,0x0bf3624cf18c8decL, + 0x42f686f33746474aL,0x0f8a831140052446L,0x00000000000001f4L }, + { 0x56303452329ed806L,0x1d80c752654f22d9L,0xf41fe1729f61f3ecL, + 0x7d4ad71e0cd5fe42L,0xffda94addd164da0L,0x260a2bdf7f19c583L, + 0x8e8673245d6229daL,0x1d13d05c2fc87ae8L,0x00000000000000baL } }, + /* 5 << 413 */ + { { 0x5750d281b1734753L,0x0146bc94d4a13019L,0x44c6f69714e0f6a7L, + 0xbe893e5aed528e03L,0x1e980b908750b50aL,0x7edaf7fca9de6cc1L, + 0x093cd32faee636cfL,0x293d801a61a0b0deL,0x00000000000000d6L }, + { 0x154f8d1810a54f0eL,0x8a0e70ea4a976afaL,0xb2fe0d041d2e4d9aL, + 0xc6c5afbc6229b256L,0x8d0ca0e734bc3bc8L,0x93d5a4cf47a7a58fL, + 0x5413ec8faa6e0b8fL,0x1df389c48d831011L,0x0000000000000099L } }, + /* 6 << 413 */ + { { 0xe4f52ec5ba7dcf59L,0x345c1385d5dba9c9L,0x67084813d49a6201L, + 0x0c8c4fc84ee78000L,0xbe78c6a4e17b9868L,0x6698156608377738L, + 0x1afdbf526012a726L,0xa884b8f7d2127133L,0x00000000000001a0L }, + { 0xa4aa47b66590bde1L,0x32398ff9823ee7cdL,0x3f1f7e49967c860fL, + 0x0ac197d796a0e601L,0xfc6a28c6e61a3207L,0xbe5cb223fbe09f61L, + 0xe83db350d54bd1b9L,0x44334c2baca898f2L,0x0000000000000024L } }, + /* 7 << 413 */ + { { 0xdc7ed009b224af29L,0xc73de9d40e9acb64L,0x81fba2d377629ec2L, + 0x2f41756aa44ae9b4L,0x7c839101311c713aL,0x608ec3d486c4c015L, + 0x6a8379c0ee544e59L,0xecd19a437a67b442L,0x0000000000000062L }, + { 0x05112f63cad5be69L,0x9b97a7f150c4e5d0L,0x8744e98ae7e2d407L, + 0x9359564016ab7f96L,0x87dcf03cdb24a32bL,0x2f9216bbabf80ae0L, + 0x38dc2b86701a21e2L,0x60a15b35e64ec8ddL,0x000000000000005cL } }, + /* 8 << 413 */ + { { 0x141838bf8fe6f511L,0x8e9ff343ee745e2eL,0x71da9fd502325306L, + 0xa527b9baf0d0d68eL,0xce19b22c31144649L,0xca9b778b0bfaa46cL, + 0x66e689a76ce31145L,0xed43460dd8d1da21L,0x00000000000000bfL }, + { 0x241323f7c1669168L,0x9934b3de79b94b5aL,0xd332c1a2d9f9a406L, + 0xbe3ebe21e4a49eacL,0x4a00648e678be3edL,0xc9f534eaff09db31L, + 0x2de774065efdd859L,0x2c218cd800f5c7a5L,0x0000000000000152L } }, + /* 9 << 413 */ + { { 0xb1f44e10e8216c49L,0x8242ce59391c90c6L,0xbd3ec80b1e6415dfL, + 0x8055d00e364ed350L,0xc0499dd8eecef27cL,0x6fd2d8c71658459dL, + 0xaff8a01800a6c15aL,0xa2f028d8406e8a9aL,0x00000000000001a0L }, + { 0xc752503f42bd3f43L,0x87e708599710bbddL,0xcda119c090e6eed2L, + 0xf1a7a1b2596ec879L,0x479f3fec24fd1933L,0x717a613abb82f79dL, + 0xaae73616faf9c1b5L,0x865e91b7cf129fbfL,0x00000000000001c2L } }, + /* 10 << 413 */ + { { 0x9a111bb32120bf4eL,0x18c2fc772458882eL,0x0c3480d8e24ebc99L, + 0x1832974652cc401eL,0xb424bd884fd6a487L,0x5838dc6a822adf42L, + 0xf5a46a3cdb196a29L,0xce7baaade6640b4dL,0x00000000000000e0L }, + { 0x231ac4cdc62a4ac7L,0xa25cea601851842aL,0x5ad82748d7a79770L, + 0x8fbffa6ebb164240L,0xb64ecaea85db52c0L,0x4ed71ef3b05a83a0L, + 0xb1a5c366f87d6991L,0x52f4a0043ccb2067L,0x00000000000001b1L } }, + /* 11 << 413 */ + { { 0x4d6fd5b3a11c9d5bL,0x423db5f508656d72L,0xb0f9e57833051a59L, + 0xa6e49ec8797e73aaL,0xd2c1bf6eccc0d215L,0x4b3dd3e2d320576eL, + 0x317f7ca5d773e88dL,0xc37a7f0f5f88be68L,0x0000000000000114L }, + { 0x033fd47e1b2e6090L,0x297dbc0e211a3ba1L,0x8808924481671ba6L, + 0xd2f3670ba568cc06L,0xf41dff3d58ff526cL,0xd9ce52f41a33b324L, + 0x2e0d362c12adc150L,0x8da85adff5f36b5fL,0x000000000000000cL } }, + /* 12 << 413 */ + { { 0x876382e3b6899299L,0x0b4845d3c420faf5L,0x6e09254c04b364c6L, + 0xdbf5ee2fc3ee3781L,0x518c5aaff1a2f07bL,0x48e496f4e09ca02bL, + 0x325878764d599f03L,0xedf96e075a7941bfL,0x000000000000005bL }, + { 0x38dd2793981bdbc6L,0x9b21020129395b8aL,0x924ded1bd1678774L, + 0x7a77150735ded04eL,0x2275228145c78fe2L,0x8e500fde9ce86720L, + 0x5998350640caf83eL,0x38ff8869b773ada9L,0x0000000000000173L } }, + /* 13 << 413 */ + { { 0xe8ff5ac6ed7cf572L,0x7f686a590f1e28a2L,0x62427eac20faa4deL, + 0xb6209fe31ad4d13dL,0xd2e05eb22ffba92bL,0x2c9345eae68093a7L, + 0x281b99996eec4207L,0x7ea83de8e452091aL,0x0000000000000123L }, + { 0x923bd3311639d5d4L,0x68c713339a8bf71eL,0x4d9d8d20c6a51254L, + 0x81954588c3eeaf0cL,0x5cab2bcdb7fc166cL,0x0375c8b7c95eddfbL, + 0xaa864a1f451c9fc1L,0x0c658f1f7b744928L,0x0000000000000058L } }, + /* 14 << 413 */ + { { 0xef5c1e91a73432dbL,0xdf98b1e4b4f36c7eL,0x68b70bc8d61583a3L, + 0x0c8264e2ed0b2c62L,0x2b2c0770ea80cdbaL,0xf057643161bb4804L, + 0x66e3ebbbc240ceceL,0xf34eada0354bd699L,0x00000000000001c0L }, + { 0x6134da4fd4514eabL,0xff414ddaca338f1fL,0x588506f10fc1b9aaL, + 0x5bb66b45bb21daf2L,0x093354525824998dL,0x2c9c4959a8e47c78L, + 0xf7e6339c6269c6c0L,0x5551260b237e9835L,0x00000000000000c5L } }, + /* 15 << 413 */ + { { 0xf4f12f1396123090L,0x471d20baf841a4f6L,0x2086017985371d0fL, + 0xe655cf99bb77de97L,0x246595b0761430d0L,0x4361b05f54503716L, + 0xebdb356a33210d4fL,0xc429076db67e1961L,0x0000000000000072L }, + { 0x3a611d84c57d8e8fL,0xc0bc94ea1b749a8dL,0xd8ce81683a51b900L, + 0xd595034f38d06f25L,0xc649def4bfc48667L,0x755726c0ee42d04eL, + 0x100a8ceff26eff8aL,0x6d43ef9ff86d056bL,0x0000000000000112L } }, + /* 16 << 413 */ + { { 0xfe1b039aefd5cf86L,0xc7fbca26c0945b5aL,0x6b30e2e0887a27a1L, + 0x0883a38be7cc98d9L,0xa646c35bfb2138f5L,0x735769df74f0c200L, + 0x9fcdf60a2ab20e1dL,0x4e0acc801f2e11dcL,0x000000000000013aL }, + { 0x00caf22515abd8eaL,0xe5f47384cb2e6e92L,0xa9186f93be14ff19L, + 0x4620d2c916ca5bb6L,0x22d4fc0ebddde27bL,0xd6f04a09a1c81269L, + 0x25d65811fac277d4L,0x803293db322ab775L,0x00000000000001e4L } }, + /* 17 << 413 */ + { { 0xb3ebf1380ba381bcL,0x7289d26e159f83acL,0xd45884b4642425deL, + 0x644daeecebfd5e89L,0x22c6df37f5f65270L,0x376b8127b6bcf3aaL, + 0xbda049d4f4ec524fL,0xfd412add737fba08L,0x00000000000001ecL }, + { 0x060c42cd0ffa17fcL,0xf6e37fbf8b4e9a97L,0x7681530396b17356L, + 0x92c4519627ef9df3L,0xd048c276a0fb6660L,0xb69785e8b4f971c5L, + 0x6f2a394cfc90d573L,0x2ff151e18d0558cbL,0x000000000000011cL } }, + /* 18 << 413 */ + { { 0x7a118320596008e4L,0x7f469890b45b8849L,0xd05018829fd8988dL, + 0xdba0b02ec8fc3c87L,0xc10a7d57333e0ac7L,0x63c960014e554169L, + 0xbbde212e773150ebL,0xb654d3d86d7967aaL,0x000000000000017bL }, + { 0x8e3087977a2e5d76L,0x8e58379ea352086cL,0x8f9c8c8c4797c0a5L, + 0x329c35097c491506L,0xe43fc3699312118fL,0x0d96ba0101989af1L, + 0x60b3e701d8dd5e74L,0xe2954b8011364c68L,0x000000000000018bL } }, + /* 19 << 413 */ + { { 0x89318598abb6095eL,0xf838078254f1abd2L,0xbcc578dd4150e804L, + 0x751c7bdf9c394104L,0x563688d2b10220b8L,0x8f371472362c10faL, + 0x28e0a341ff5248b0L,0xe79232df6b59cfb1L,0x0000000000000025L }, + { 0xc7fae1d695e6f9bfL,0xfd2bc6d21b4cb12dL,0x1a67033c7a9fcb6cL, + 0xe2741449e7f5f237L,0x7bf33fb4c5e0cd46L,0x2bb0a260011b6c96L, + 0x55b14f8b11c51582L,0x861b24259938fe56L,0x000000000000000eL } }, + /* 20 << 413 */ + { { 0xbb34d7849d0d6a8eL,0x181372ae5249d4aaL,0x70542a6f2a73950fL, + 0x74b04f0ccdf2e96dL,0xfd9cae5b3fecace5L,0x22fb4d8d19fb6b62L, + 0x3252d045e69c55fdL,0x7290f7fac2bc4211L,0x0000000000000148L }, + { 0x10d8e574ec7e23eeL,0x8d6c5a23c7a555e7L,0xd583c8aa140a4f49L, + 0x0de3c0d4f10191c9L,0x985e0201d4d1e937L,0xb5dee6abc3a7e70fL, + 0xc212b74b44de1f4dL,0x1f745d35ef35b040L,0x00000000000001fcL } }, + /* 21 << 413 */ + { { 0x622c60244c78f076L,0x6aeae9383019aef8L,0xdbb4d128ff3e8a56L, + 0xeedc83ec8eff26c1L,0x3cfe10dfe4beda18L,0xeb555398b2972b48L, + 0xb79caa949ebce9fcL,0xb0425e6716bc49b2L,0x000000000000007dL }, + { 0xe7d34bbe0e8a5400L,0x5f50f6422d2e374cL,0xaaa6e26d82100013L, + 0xa79dfb31cb6bd0d0L,0x81573d70aca0982bL,0xeedac6643d0964ceL, + 0xf1b12fb1a8f4ea97L,0xf9885103b636c8e3L,0x00000000000001e6L } }, + /* 22 << 413 */ + { { 0x6a3f3b0cf066b354L,0x58cc7cfecdf5d33fL,0x299206fb3b7bbe62L, + 0x590ee606197b5384L,0x09ca781aa8bf219fL,0xd2bb936cc54c6d12L, + 0x3053151291abf037L,0x5d04db05244e2825L,0x00000000000001faL }, + { 0x31e28fef6017bd41L,0x44fc27841c446006L,0x6265b3097dfb20a1L, + 0xb460776dd6aebeb7L,0x3c2abf087b9a1601L,0xac3464126c7f692fL, + 0x8fef9006b988eb33L,0xad7ea2997832d27dL,0x0000000000000053L } }, + /* 23 << 413 */ + { { 0xd5d2fa1109089d81L,0xb4399d888abd7968L,0xc1716638eb869300L, + 0xd993cdb9e1cd6275L,0x241d4c2ab8310dd0L,0x031e500abe369875L, + 0x5a6ed301297c9a3bL,0xf2435b9230b41f99L,0x00000000000000c2L }, + { 0x08100d2cdf2c3657L,0xa6ac7c13b29b6405L,0xc3a7b5461f343164L, + 0x8fd10a6b9665ca07L,0xfbfeed84171e0f50L,0xa7d5c01d4f409d69L, + 0xdb8be8f366209239L,0x68e69ee888fa3ee3L,0x00000000000001f4L } }, + /* 24 << 413 */ + { { 0x323e1b7f0a823516L,0x8f4e9152673254f7L,0x5e5d719041599b9cL, + 0x96bfba24a1198accL,0xb1b6e75bbbfcd72cL,0xfd5955f2f4894626L, + 0x109edfea898bbbfdL,0xd36929041f468074L,0x000000000000016aL }, + { 0x089d12a99bb7d749L,0x893ebf254ffc33d9L,0x6ed72c609097ff49L, + 0x9291e1eeb4e1d91dL,0x0dbfe58272a07882L,0xac41065fbe0636ecL, + 0x43d74f8bf4234bd5L,0x01bfb0850a28fdefL,0x00000000000000b5L } }, + /* 25 << 413 */ + { { 0x01eebda491b42f54L,0x891ca7438f8e24d8L,0x98ff0a26d2362d60L, + 0x82147d21085b187bL,0x469ab10225f97fb1L,0xc404bd2bd0280e20L, + 0x292592698ecdb68eL,0x570b7a92d5f50c44L,0x000000000000010fL }, + { 0x381f68256c856631L,0x4c3d2172fb2c1f40L,0xc1beda5173da2dfdL, + 0x442095377d9d1ad6L,0xe94807503a33e01dL,0x697d86895fcfeaa8L, + 0x5d933887a8b9155aL,0x2582c294d2a8f48bL,0x0000000000000064L } }, + /* 26 << 413 */ + { { 0x03ba5094920b66a3L,0x65239fc4ebe42bebL,0x68c63f3dc4ce1efeL, + 0x33084ea54ad8b52aL,0x28873c93b9e33e89L,0x00d6e65e1410a1ecL, + 0x21cc87d07e79c97fL,0x32bb9986b1277db9L,0x00000000000000daL }, + { 0x659619b4c1ae88c5L,0xef1b0ea112d24497L,0xc0646672983fec9aL, + 0xb105138fb42b7079L,0xe0a6eccec2900a1eL,0x5e2eac870b094635L, + 0x8750a30ab8659fc0L,0xc3aa9a0f9b73a020L,0x0000000000000024L } }, + /* 27 << 413 */ + { { 0xf6cd90c7b7e01593L,0x3c0e6245c0096352L,0xb55ed8331cf5a335L, + 0x480e68de18962d59L,0x83d09d6992f059beL,0xcbd11b027f28d3deL, + 0xdc5b2e864e26e34cL,0x66e2f6e9eac14dddL,0x00000000000000bfL }, + { 0x1c0928fba612de24L,0x407a55a5d1db9f9bL,0xa64ea668b6f50f0aL, + 0x7d1c14e9ede7c9d9L,0xef7fb4b775353ab5L,0x7cc7e66146f1bc70L, + 0x73dc29babf7e44d5L,0x26b2e00643e6f0beL,0x0000000000000146L } }, + /* 28 << 413 */ + { { 0x3463165d755e8efdL,0x96650fb2ac359a78L,0x1059dde3f00748afL, + 0x0e20212f58e38d2fL,0xda345fe4debdfa10L,0xdf920bc8667e54c1L, + 0x03953f8ba4dfa4edL,0xc7638154ebb27882L,0x0000000000000110L }, + { 0x950085ee132434eaL,0x8a2bdab57120a652L,0x28ded0be42ab161bL, + 0x9b55dc39bf1c6faaL,0x71e94dc9261ea91bL,0xe52990b0c6996959L, + 0x62bd7c41582570a0L,0x61d8fc99be597471L,0x00000000000001deL } }, + /* 29 << 413 */ + { { 0x6dab19378a42408dL,0xc005cc1ff3a80b5aL,0x0a1030f0b8a2cb36L, + 0x171576f06fc3169aL,0x0b7dc150952294e9L,0xf506b4c3053cf79cL, + 0x417ca443c70ed2aeL,0x80bbc72684c09424L,0x0000000000000113L }, + { 0x6ecc2c85cfe818a1L,0x9610f7d24e78801bL,0x8c33fe9336b49aa4L, + 0x10eda040e5b58e18L,0xd58d1b7b2a8213fcL,0x4db6dc12d932602dL, + 0xb3a161343b1df67eL,0x734f2ef4b1746724L,0x00000000000000a0L } }, + /* 30 << 413 */ + { { 0x37ab4d79ce269559L,0x80ffc5068ba6574dL,0xbcddc7871b2a6d07L, + 0x8d2f43928c76de3aL,0xfd6347a2f6c3616fL,0x2c47372d5d8a2a7dL, + 0x246d8873d86e2015L,0x265dc733e339dc8cL,0x0000000000000172L }, + { 0xa0c9d81159538718L,0x0f7fb3fc26e1da06L,0xa5aaacf82c28577cL, + 0xfd956a4de59804bfL,0x2046deb77de0e903L,0x7388e7374774c173L, + 0xcce82ee06107d1aaL,0x2c3d2f51b2eca2deL,0x000000000000012cL } }, + /* 31 << 413 */ + { { 0x4294d5f39a9fc27fL,0xfca710b946c678aaL,0x34fbc512549d8927L, + 0x918de41068015dd0L,0x7c2cf8de5260a6f3L,0x4b248ddb0523096aL, + 0x7b8493dcb1a42b11L,0x5fe79c0650631110L,0x000000000000016cL }, + { 0x3dd89707a7ed1ec3L,0xfcc8bb190042f593L,0xedfd3f0813a4e462L, + 0x33ea61fa56ec2b76L,0x56cd7669dd665c93L,0x62ea3ebdcd19b6eeL, + 0xa312ca02426ceb4fL,0x7b3645d0137b2440L,0x00000000000001a6L } }, + /* 32 << 413 */ + { { 0x9d0dbabb1ec238d2L,0x3f6c4af337e67e7eL,0xf7a6b09464dec6e9L, + 0x1f53fd502c2724ccL,0x5f333d4751bcaa0cL,0x63cc89baebae3589L, + 0x2b90af90f0632831L,0x8f6af6cd4d18cfd3L,0x00000000000001ceL }, + { 0x8b34e44576940e65L,0x264a0d8fc1c34c11L,0xd433303f4120afc2L, + 0x240a12d6f7e738b8L,0x574fa7156e921883L,0x6b2bb33c02d74e8cL, + 0x7b193e240fe8e9b8L,0xbe954bad506cc6d4L,0x000000000000011eL } }, + /* 33 << 413 */ + { { 0x63299f8ad1979cd4L,0x76ce11cd53a8f2ceL,0x8f5247314756f0e4L, + 0xb360ef86d6312ae0L,0x26b6aa23146945f7L,0xfc5e0b3453c2b57eL, + 0x41cf74a99e4fa20eL,0xbc94194f872c03ecL,0x0000000000000179L }, + { 0x30c171bd96d3ac18L,0x6a93906b7a26ef90L,0x60f1820bf5a6d675L, + 0x2abec262860a7536L,0x82a44ef7a356d631L,0xa00527c8dd13a5b5L, + 0x009955617328996bL,0x8e004ab9a0e8f5ddL,0x00000000000000faL } }, + /* 34 << 413 */ + { { 0x591f47c9c2db5c7bL,0x83799ff638afcd10L,0x02aca0a07a4fbfe6L, + 0xe2cf1215d818414dL,0x46330ef3f9419a89L,0xe57b3b7dfcacb608L, + 0xe2af0b7111de170cL,0xb3dae14ca9f7cb22L,0x0000000000000001L }, + { 0xf518e5500ae03740L,0xaf9575680d43e335L,0xf341260f5fda61ddL, + 0xf7fabc68d815ff6bL,0x446f516f11bba091L,0x7b6a7a028c2cbe3aL, + 0x3ef55ec98ead682fL,0xfa11057b08472496L,0x0000000000000105L } }, + /* 35 << 413 */ + { { 0x2ef8dcb991d78c71L,0xea57b096b79b7989L,0xd9cf5cb49b8be524L, + 0x692d270c0fa151a8L,0xfdd28e1b32b69968L,0xcd3323ae8d8d1e47L, + 0xfa44d78f4c086337L,0x756f8b1c42359ab6L,0x00000000000000a9L }, + { 0x5d2941884498e4d3L,0x486cc3fb4a2e8a97L,0xb3629ceb4420f08bL, + 0x23cb3922a710733cL,0x17ad6e1a321478b0L,0xfe521732df1ff995L, + 0xdc11dffc4dffeff1L,0x5ae4b2e2957afd54L,0x00000000000001f1L } }, + /* 36 << 413 */ + { { 0x4b597ec0dba12c57L,0xb3b2dc7de1fc4b5dL,0x4752e40f15050e90L, + 0x1fd86c1acda9bff2L,0x5a398964f73d82ccL,0x1f59263b323ec620L, + 0x350c881919bf5ae6L,0x6736c9f029ecb6c0L,0x000000000000003fL }, + { 0x4c7f061a627720abL,0xc5011823b7be99bcL,0x5ad5f70496a77f9bL, + 0x9843b728a8f8d2d5L,0x82d03acee3a8b30cL,0xd03952fe2b786acbL, + 0xe01b07d58d17f32aL,0x21a6c2baa479b6f1L,0x0000000000000156L } }, + /* 37 << 413 */ + { { 0xbd4996a6408964b4L,0xd39f56b289c25effL,0x4b58c175a1935189L, + 0xeeeb51b457351b32L,0xbad2424c58c15022L,0x88532ce5e3ac5df4L, + 0x297db1951a613b42L,0xa9826d4bc9b4b0f6L,0x00000000000001faL }, + { 0xf487960c57881c0fL,0x1b67319734f10432L,0x484f2a40f6a2141fL, + 0xfb9e8fd2449bdb38L,0x088d6e6184f4e1adL,0x93b7cb1a2e4ac094L, + 0x855999084b9fb3bbL,0x6f601e992f3be966L,0x00000000000000e6L } }, + /* 38 << 413 */ + { { 0x24993c953bb81c62L,0x93e513dfff3eb22eL,0xf23a18b7b756ddd6L, + 0xa6cfc668247dea49L,0xc2aa1665cd3a8b9dL,0xb5a556189fa99e24L, + 0x615682b309f23beaL,0x1d669c5bc44b4017L,0x00000000000000a8L }, + { 0x605339e218b6f759L,0x624202ff67b89ad5L,0x679698559145a96dL, + 0x6ddb181c7e1ba76aL,0x437631e06eb22bbfL,0x41c8c70078e93567L, + 0xee67a9ae5e3003f0L,0xeadb9626981a58dfL,0x00000000000001a2L } }, + /* 39 << 413 */ + { { 0x47340a8dff6e5e58L,0x597caab368c8ad6dL,0xde8c32ae6b628a7cL, + 0x694f8cf92168a60bL,0xe41264649434c688L,0x4849f57ba8be6f19L, + 0x5ed6e3517879dbf8L,0xeaabde98d33e7390L,0x00000000000000b5L }, + { 0xde51fff7393a3029L,0x40930f72b285d593L,0x6cd65dc0ee41a1bfL, + 0x77c5681d80dfeed0L,0x8f33ecee8afd229dL,0x137a4d675cf8d15cL, + 0xa7855f8aa67e504aL,0xb9a6700bb52b6c48L,0x000000000000012fL } }, + /* 40 << 413 */ + { { 0x4054b365d1a4a65eL,0xfa395e2aa04b2f59L,0x1c6c3b41bf63d17aL, + 0xa639fb26abe2aad0L,0x008f81600c9a565dL,0xf5efa9e691061e61L, + 0x7731c2f76e881f7fL,0x3623cd7c9074b4aaL,0x0000000000000016L }, + { 0x20ba924b1037873eL,0x7b7107acabcfd34eL,0xb492ebb905e13406L, + 0x6dcc3342f08fd2faL,0x08f3a1d364126080L,0x8fb54869fb5bf009L, + 0x5e66163282442dcdL,0xbab2d8dd5c21d62aL,0x000000000000003dL } }, + /* 41 << 413 */ + { { 0x526bab49e03531dbL,0x6c2eb8722e649aacL,0x96751e97a364a699L, + 0xcfffdf6e00d01fc2L,0x40861a4b3d572fa7L,0xfccb951f606b2b3fL, + 0xf8659daa1950c2bbL,0xb436a4638af98008L,0x00000000000000e9L }, + { 0x35d0267b9b0b76eaL,0xc4c6961226bd292cL,0x098afee2f060dd93L, + 0xc7cbd9c79843bd5eL,0x137f8b62152e8229L,0x81825a8462142fffL, + 0x5b25f6e808d3a5a4L,0xdaab67c3384476afL,0x0000000000000058L } }, + /* 42 << 413 */ + { { 0x989bda89821ccd3eL,0xeddc91644cdc3d8dL,0x6a06cd546cf20b6fL, + 0xf53d75fcb49b3f50L,0x18e2b09f8e214907L,0xff1c4a578857d58fL, + 0xd2ea3b54af88ce82L,0xc40ea7f424289152L,0x000000000000010cL }, + { 0x6e9760861ae764a2L,0x79aa8d11fad90855L,0x4e04c7c51ac4067dL, + 0x4e48d0714b54abd3L,0xc15e12f487c57ef4L,0xcf0dc49e73dea84aL, + 0x87d2022055e54611L,0x20aa68d70b68bb26L,0x000000000000017dL } }, + /* 43 << 413 */ + { { 0x90ddc3205b3c6e1cL,0xaa2d087cbff4a8c1L,0xbdb8dcf2062349a6L, + 0x06b762825d602038L,0x9f0411cf6739dce2L,0x603ecc513feeaa4fL, + 0x8b86e3803aba1b86L,0xfdbeff61e0da26dbL,0x0000000000000050L }, + { 0xd403b2587366a1ebL,0xfdd4afc3fe7e6844L,0xc55a20ed2e8391dfL, + 0x31d85dd7c875e460L,0x50afaf2183d6c41fL,0x0ddb52a859e9a2b0L, + 0x5481ec9d971ed465L,0xdd22f0619de7c8d1L,0x000000000000011dL } }, + /* 44 << 413 */ + { { 0xa62823964d464d88L,0x4723b946caeab8f1L,0xe68a25c31a3f1da7L, + 0xb8c1bae54696044bL,0x7cdc5fa6c27e2468L,0xd54c65dc55d7b1c5L, + 0x9fe5872486b67022L,0xe4e5bf99be39934fL,0x0000000000000069L }, + { 0x1e79773b464a3c68L,0xbbdccc3899fcf4caL,0xa3023f20e69d9927L, + 0x39de0edb203fce2aL,0xebfd0e18fab9a998L,0x14b7b1e1bc60d079L, + 0xfa9a20add10760c5L,0x5050fd3e820f2a0eL,0x00000000000000a8L } }, + /* 45 << 413 */ + { { 0x4d4028e3962d3e22L,0x221795bb8df85019L,0x3e93eba3b7f3dda7L, + 0x18d2aa25ba5f7d48L,0x5811dbcf5b36ec49L,0x74878d24bec9449bL, + 0xb331d19575d88654L,0x68d14252ee93f20fL,0x00000000000001f6L }, + { 0x4c3947ef517f4d8fL,0xac81ff08fd781b82L,0xa8183a60afd02499L, + 0xf11fcac98cbf1552L,0x639c9edd2464822eL,0xa6a0974551617891L, + 0xf475d74536c25dc9L,0x0e2df3059e3de853L,0x0000000000000133L } }, + /* 46 << 413 */ + { { 0xc412881ffe8e6e27L,0xad72506e12423bbaL,0x557e822a887695eeL, + 0x42741b0e91b1f6c8L,0xcd108283790a1959L,0x526c066463986f8eL, + 0x7b3c480388675baaL,0x7c7a94618d2b8f28L,0x000000000000012bL }, + { 0xf3b31c681f34e4e2L,0xe27f6de0d22cec8eL,0x3cb7d2770a843a7dL, + 0xab143cbe02ae4431L,0x9b786935cce27ca6L,0x63c73cde9c515282L, + 0x0b6854a1f1ef32f1L,0x843fc37eb5d08cddL,0x00000000000001e8L } }, + /* 47 << 413 */ + { { 0x81d165c841a1e954L,0xba30ab7ad97a9a90L,0xebe7c890998f67d2L, + 0x972185aa63216872L,0xf9859ed7eb4ff20dL,0x13dcc62399e6f6a8L, + 0x5d2cf8ce47a6b661L,0x2260e1445fa93eaeL,0x000000000000004dL }, + { 0x062cb12721186595L,0x15bb3dad9cdbfe1aL,0xad2bae7b944c54b3L, + 0x0dfa4da0ba724aacL,0x6b342cc6bfe51025L,0x4d8bf909d9c2630bL, + 0xb374c6c6131de964L,0x475e6872835f73c1L,0x00000000000000e1L } }, + /* 48 << 413 */ + { { 0xc296b1f684c1985aL,0x971547c5aa4f7132L,0xe3705e1183e168cfL, + 0x8d00e588c935a3deL,0x853154235262789bL,0x46f2c2ca005f7c35L, + 0xfbfbadd217ea733eL,0xb32a38b6c9e496a6L,0x00000000000000d8L }, + { 0xa82e95e5cec8d3f9L,0x8a1efca3e378be9dL,0x6e114d078080d874L, + 0x514dc754fb9c056aL,0x5781be8966ef6c48L,0x0586bd3834b1ebe3L, + 0x0e31b44f8a8b161eL,0x20c2b78ad4d62dd5L,0x00000000000001dbL } }, + /* 49 << 413 */ + { { 0x97a8c5a9870c8cf3L,0xe01acc6f904f3673L,0x42ee78a16905580cL, + 0x237eb3fb4e2f3c90L,0xab1806eabe2deba1L,0xf2b6643f66c68eb3L, + 0x75e0a1d4a509adf3L,0x7a7dfcb64779af07L,0x000000000000000eL }, + { 0x951ef5e34263b572L,0xe203f49b636a5067L,0x8d229b333377c5f1L, + 0xd3427aa4c030994eL,0xdc7861d91a84d51cL,0x6454562f195e282cL, + 0x74a7c351f6882e78L,0x4eaefb0f0798f8c2L,0x000000000000018eL } }, + /* 50 << 413 */ + { { 0x2b2188b680f1f0b3L,0xefed65ae394debd7L,0xbeda0fe75d7c51b5L, + 0x3989a2b6cd0db074L,0x0f5406253d9437f6L,0xf58203c70c0169a0L, + 0x400e35e9bbc38332L,0x3ea9b7a961baeca4L,0x00000000000001b1L }, + { 0x58cc97c45059b23bL,0x8f5e67fccf7bd0e2L,0x6baec2aedfc1882fL, + 0xa0f796283aa69ec4L,0xed7986f2a6d520cfL,0x5fa28f4ac454e929L, + 0x5835f87541151c50L,0xa3d1fdce3143de1eL,0x000000000000002fL } }, + /* 51 << 413 */ + { { 0xea6629b82b899daeL,0xb372b86257edbb50L,0x053f9c62cd45f9f8L, + 0x395c0f8529604bb2L,0x24100041a1e9e16aL,0xd89d20d72b979b9cL, + 0x4ff39f96845213e8L,0x077d286e70ca9f63L,0x00000000000001d5L }, + { 0xa250b87c56f1bcacL,0x9d71c979886fd8b6L,0x079a52682c052432L, + 0x80d965ccbce043bdL,0xb7590694806ee3e3L,0xa829475e77f75c78L, + 0x6311b404667c2bffL,0x88343a1c8aa15256L,0x000000000000012fL } }, + /* 52 << 413 */ + { { 0x10e4b5a2ad1c282eL,0x6f768a2376e6a2a4L,0xd92b73cdcb93da72L, + 0x477c839eecc382a3L,0x761d18d5a70b8e5cL,0xd09779d2270ba0e8L, + 0xc2c8eedfb2ea6bbfL,0xeadbb724599593b9L,0x0000000000000026L }, + { 0x6eb0aa1c8ec01fdfL,0x3dc88cd0b3ded71eL,0x8c4bcd807bc58ea5L, + 0xea9404def976a709L,0x97e33a53d73a2a75L,0x8c20e2ea165b3e6fL, + 0x4193a051ef0c6a5bL,0x1edf6f8c3fb398d6L,0x0000000000000016L } }, + /* 53 << 413 */ + { { 0xa1718bfdb4a3de01L,0xe962902ea830bb59L,0xf25d70441a38302dL, + 0x285917cd4c16cb5aL,0x0b86ce611be8e2e7L,0xb1f2900c6e96d268L, + 0x18d5185e834190f9L,0x96c4db4bdacbe228L,0x00000000000000afL }, + { 0x7b57f2eeec9822c2L,0x36ceebfd9b46f49dL,0xb11fa678043a7394L, + 0x75a9f067e86cb17dL,0xd13a22746e1185b4L,0xa7137a6a24b92829L, + 0x30f74cfc2edb7707L,0xaa14984f56fca5beL,0x00000000000001eeL } }, + /* 54 << 413 */ + { { 0x85d2856b0231dafaL,0xd7fabb5108028fbbL,0xca0066e8602a5eb9L, + 0xc6d7512e885466e4L,0xe5d1fea841615e58L,0xa2de9d5f996b2a88L, + 0x0cbd9831554a3c32L,0xabb8d9cbd675917eL,0x000000000000003dL }, + { 0x10fd5eb68255009fL,0x47fd9bb733d24399L,0x4a4d9629ac5cd8b4L, + 0xae5aa75cbb1b9747L,0xa6f7516e98397c51L,0x38d011e92ed7e028L, + 0x3e2e3a3357c86547L,0x13f713fa701e9db9L,0x0000000000000122L } }, + /* 55 << 413 */ + { { 0x2662f1810115f051L,0x7e2fe420aa76e71aL,0xe198883adf6a9851L, + 0x9e4698fb250b8ce0L,0xa9c27caec73858a0L,0xba746550df3344aeL, + 0xc7c7a7a7a6e81567L,0x942c6f66e35727b5L,0x0000000000000187L }, + { 0x08a018d6e8a4fbddL,0xfc08d2fd6798242aL,0x093b8894b21316baL, + 0x4320f88aea4ef1adL,0x28fe97c8e6141abbL,0x50ad5c65d21bc6d8L, + 0x0840a2a4473c5901L,0x20297e1baf604adcL,0x00000000000000a7L } }, + /* 56 << 413 */ + { { 0xea6e467aaeb0d0f0L,0x9fa030d9eb5ee813L,0x9be99873515cdbf8L, + 0xe608db63a805d16aL,0x5e6255dd9ce2f64bL,0x2d88fb427418bb31L, + 0x147c74fee7ac60bfL,0x34df0171672b0f90L,0x000000000000004dL }, + { 0x6c1f267177f1f4feL,0xf64d4242138fa188L,0x55ab5ddd4c0097caL, + 0x354bb5f3bdbdb465L,0xad6142d579060e68L,0xd9adc1101017b5fbL, + 0x656ffc3a9f510299L,0x568945bd741ebe49L,0x000000000000009fL } }, + /* 57 << 413 */ + { { 0x3b3b40ee0d593a59L,0xd8655c25b704b4d9L,0x58d47bae4ce690aeL, + 0xe99ebb1f44bfd996L,0x5039cb719e993a47L,0x0e30fe78f47ed3a0L, + 0xf73f450b5bd344f2L,0x35a41fc7ca7179a1L,0x0000000000000053L }, + { 0xc9b89a7d4b94e4a7L,0xf84e3eae978f1d58L,0x18ac261b1ea29601L, + 0x004424d32d91713eL,0x61bfe2f4e594f2ecL,0x444d234da62b8403L, + 0x77c35ae6dc6b4b34L,0x75fbae457da07a2aL,0x00000000000000deL } }, + /* 58 << 413 */ + { { 0x42c072b856afffc1L,0xecf5aac3405b1205L,0xe64a6af0f8f4c098L, + 0xb29e488cc9cdf82fL,0xa859c42dabc487e7L,0xfe6bd7e9464bdd74L, + 0xdee72074eafd8d10L,0xa9da876d40f4074fL,0x0000000000000020L }, + { 0x733dfb9a970d4e05L,0x1560161d4e08df26L,0xadf0d94e3675567fL, + 0x0e063d520703fd74L,0x91f7dfdb32500e7dL,0x4d20c3881456db62L, + 0x49311bdb8b5ddfe7L,0x6e844794f76e2067L,0x0000000000000071L } }, + /* 59 << 413 */ + { { 0x31c81bd38d139215L,0xb2e03ba0aa31ea08L,0xe8e0824db68f165aL, + 0xfea377926694a8d8L,0x3f967e438274eb91L,0xe4bd0c82cca1557aL, + 0x1daa85547bcd3067L,0xd3334fc48baaaa35L,0x0000000000000141L }, + { 0x9281ae374fe8c1f5L,0x91fe949cdc284160L,0xb50a91b15fc70688L, + 0xbffd0e40b7ec7809L,0x2ec95feacf809d19L,0xbfa250a6f70c5c78L, + 0x3c3111505dccc3e7L,0xd2d9107cbddae7f0L,0x0000000000000136L } }, + /* 60 << 413 */ + { { 0x30a7aebd85a53d34L,0xf42e79a34ffe774dL,0x60e6bbffd567b09cL, + 0x30c427bee46a553eL,0x3083088d3ef4ab24L,0xa2959e2b8411405aL, + 0x72732346f6ec7452L,0x260a48eb44d2b722L,0x00000000000001c9L }, + { 0xb5f775e0b1160f42L,0xd7b0ebb4266a2d91L,0xf66d1fec455c9e75L, + 0xbb3adc9e66d89a69L,0x0c49158dd66a3216L,0x5eab18da51e0df4bL, + 0xc008165e7ca542dbL,0x6007ee56065967f6L,0x00000000000001afL } }, + /* 61 << 413 */ + { { 0x3e43fea8d59d042dL,0x826f5eccd135dcddL,0x937fac11e783cd85L, + 0xe514de033495a28fL,0x16da07dc0217f127L,0xdd4edb0f9f1049e6L, + 0xf4bab49015317cc4L,0x8a4d5834510edce9L,0x0000000000000048L }, + { 0x3daca528a6ab3760L,0x4a210108aa22cdb0L,0x8f7d2289cd53851dL, + 0x494ff277eaf66a0dL,0x406301d01b33a9d5L,0x1993932f8d87f67eL, + 0x20ae660e73829af3L,0x3bb045bf23729097L,0x00000000000000eeL } }, + /* 62 << 413 */ + { { 0x3131547bdda780aaL,0xec4bbfd7618a818bL,0x06a9305b376c9580L, + 0x7af0133e4068efa0L,0x089725c5c0dd9815L,0x41620e90380fc678L, + 0x3e93df46704179fdL,0x350b0a9f498b064aL,0x00000000000001c4L }, + { 0x46d40dfd908a6651L,0xe872529e45405cb1L,0x71f9883ba68ee627L, + 0x77732ecd56e0c803L,0xcab27f558c00c613L,0xd74f55bc6d431b96L, + 0x84426040a332bb37L,0x9f1aca8ac247e7adL,0x00000000000001f4L } }, + /* 63 << 413 */ + { { 0xd491491efd080c64L,0xfb48d7df52544552L,0x2dc3bca98e495dc7L, + 0x4a82f1464dd3d735L,0xcd7034f325427421L,0xf25d1072fc1e5323L, + 0x1d90639574c48551L,0xd3383ab8d11bc0e8L,0x00000000000001eaL }, + { 0x9b2bcccbe3d16f54L,0xa1f4b6eb85354b8cL,0x4c6a0456950886a1L, + 0xa36a9009f5394ac9L,0xcfd0a2a221950a10L,0xf4ca275ede4cff54L, + 0xe755c08c693839a3L,0x4f04cd6185190443L,0x0000000000000190L } }, + /* 64 << 413 */ + { { 0x61d252da63d7540bL,0x5977687dba87605eL,0x53be7d04f8f3558eL, + 0x76c4c49c99f1aaf0L,0x129e0bffce8bf731L,0xcdde32a166a1555bL, + 0xedb9e9e5d4bc5b6aL,0x675b5d1252bc3c6fL,0x0000000000000130L }, + { 0xfb3dabae91b491f4L,0xef1967c39ea0b6d1L,0xb68a0e75e6370d8eL, + 0x6dd017fce2ffb7b5L,0x7e3dc82606d66169L,0x802b269056271658L, + 0x117a4ee3c3da3c9fL,0x80bb9fcf167d09afL,0x00000000000000d0L } }, + /* 0 << 420 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 420 */ + { { 0x73a6ba3810a8c4fbL,0x5153d959ecc93e5dL,0x7ca58012b59e9871L, + 0xedc0dbefafd442f1L,0xb9050a22b9cf7691L,0x3d1e96fe464d017dL, + 0x541781a482074dcaL,0xedce0db38b355413L,0x000000000000006bL }, + { 0x1a13e3eeae2b39c2L,0xc431081d3c218179L,0x5cbc14c1ae68b7c6L, + 0xcf2559bb9005a304L,0x14d7c1e92ec7aed5L,0x5c379bfe1e2e2f0dL, + 0xfc33e4d2886f0cf9L,0x2f14e7d0ac4e1d17L,0x0000000000000071L } }, + /* 2 << 420 */ + { { 0x27e639c92c82d5c0L,0x035527b8104b9d44L,0xd0a7f3c88a17be05L, + 0x3c529b91813cb246L,0x296074b2b8e54b08L,0x660118578dca703cL, + 0xfa30e9de41606a8dL,0x2652c08c9ad1d94aL,0x000000000000006eL }, + { 0x09724ba6d42d26b2L,0xc0a02c2e25eecab7L,0xb004174845e7fb83L, + 0x24e80b604503354eL,0x4f3045f9da55516dL,0x97d76335cc7b2627L, + 0xc8bfd79bb380f10fL,0x987396522becbc27L,0x0000000000000146L } }, + /* 3 << 420 */ + { { 0x9394910f0093fd88L,0xff7760d2547fc3e1L,0x7b81fcf5a904c24eL, + 0x120a558dfd80644cL,0xfad07009a715d6fcL,0x24bd3ad8abf6246fL, + 0x7c5b04c779c968a7L,0xc7df9266108ad586L,0x00000000000000d3L }, + { 0xe37db2e0b655b6e1L,0x0a029277568c6373L,0x0bd0526f428a6b69L, + 0x54bd7b9b3d4f3847L,0x42caca2fcb6bd60fL,0x657b0f1f6d10bfa6L, + 0x97f9f99d0fef238dL,0x93171b4dcc3795a8L,0x000000000000009dL } }, + /* 4 << 420 */ + { { 0x83fa275b0ddd2ba9L,0x71f5b43dbaddbdeeL,0xb67814112244290bL, + 0x075580601244c56cL,0xfdd85cce2ce4fc6bL,0x7fc52dace53b1502L, + 0x3243b95255a68467L,0x06ef6a6540900cefL,0x000000000000009fL }, + { 0x562d06ac65457f82L,0x9dbd63519a9e8e58L,0x0d218265496d9349L, + 0x940bf3f4a218ec30L,0x1e9c0aa3d605883dL,0x2cb9bcc6427f8a5cL, + 0xba3119b23685376cL,0xfc2c2f0df0c5783fL,0x00000000000000dcL } }, + /* 5 << 420 */ + { { 0xb7b63d0fa73a1aa4L,0xfadba5955532ec0fL,0x19a61405ff9e47d6L, + 0xd9a44ef1aa210afcL,0xcab8bd2c3c3e2b07L,0xdd8f8d154e1428a8L, + 0xa5eabcc591f63ff2L,0x7dd9635666f368c9L,0x000000000000018aL }, + { 0xb35c9e17d84c056dL,0x0f27cd01eddff5deL,0x855c10a92c0d1066L, + 0xc7aee7feb4bfb648L,0x9f9d978d5803338bL,0xfca5a25f4f4f2b95L, + 0x9d5d5fbfadf37f8aL,0x13482b1e5059c65cL,0x00000000000001a4L } }, + /* 6 << 420 */ + { { 0x6e95a3aefda1ffdfL,0xc4014ebcea5aed3bL,0x915719f862b64816L, + 0x2ea08a71de90b2f6L,0xd2b45d301b4836dfL,0x5647580021840212L, + 0x169110f6ffca7e1aL,0xb32ee8a9f5f43dfdL,0x00000000000001bbL }, + { 0x7ba4dcd71962c792L,0x539fabac5fddda2dL,0x48913d37cfc7b6c2L, + 0xaa2e0fb009d93b9fL,0xa625455b63f79f1eL,0x0f26f80b7c45060aL, + 0xdba77c6793ee96fbL,0x54ba37408711afcbL,0x00000000000001eeL } }, + /* 7 << 420 */ + { { 0x95da54447500f10aL,0x6935ca2aa6b14d2aL,0x58f4555c30dc9660L, + 0x949ec82aac954b30L,0x94027576e7fa5c4dL,0x035d525a21960c7fL, + 0xe3ed1e66c2092c76L,0xbd151aa3df9779a1L,0x000000000000012cL }, + { 0xda7d7c3de59368feL,0xb4c240648ddce4fcL,0x05360bf847eb7a34L, + 0xe0889804700d30fdL,0x99e1add207f447f6L,0x30e5b7574a4e8762L, + 0xee9f06953329b2fdL,0xbb6fd0fc6894a597L,0x00000000000001a6L } }, + /* 8 << 420 */ + { { 0xb70f255cee12880eL,0x0fcff02402831c27L,0xd92706fe87241978L, + 0x75c7a550fb475daaL,0x128971bb889e3be2L,0x032c8d2651f95444L, + 0xabe4d7dd7251b1c7L,0x8c903fae332e627cL,0x00000000000001faL }, + { 0xd00f7bfc32b6fc85L,0x33ebf5c256e1cd16L,0x0cb224fd42b24457L, + 0xfd0d2136a0ec189cL,0x4718b1736cce225dL,0x403528f6e04d9e2dL, + 0xc7312b00e6e6e81dL,0x3d146f6002202a72L,0x00000000000001b1L } }, + /* 9 << 420 */ + { { 0x1a20556e1034bea2L,0xd717d9172be51df5L,0xe95f091286ac317dL, + 0xf3923fbec1304ee3L,0x3f763629e240f768L,0xcbf933bcc8c2af96L, + 0x0e7cbef612c667f3L,0x6f01599295d25e69L,0x000000000000012fL }, + { 0xf941627f4b56bdafL,0x06ee3b59eec4ed47L,0xb2c0882e4d951b00L, + 0xf5304c7c8652dbcdL,0xf59e2884a6245127L,0x2f9752c14e4444a4L, + 0x91d5fa7af8ded8f5L,0xe5bde5985c3a5632L,0x000000000000003dL } }, + /* 10 << 420 */ + { { 0xd9e55a11a0d1be9cL,0x4c3bfaf24c1c1b10L,0xb0d8ba7d2e8bdca5L, + 0xd67631c3e2b2f07eL,0xa7bc810f085dc2acL,0x0df53d51ea54ed35L, + 0x09cf707235ba17f4L,0xd6bd518ebe70853dL,0x000000000000004aL }, + { 0xa7cd3940b78c4776L,0x9ba0f564e9be7ca5L,0xc40944c61c45432eL, + 0x44fff573472b1abcL,0x80ebafa1c1ec6836L,0xff7eb86e0c55f71fL, + 0x20c8e2695b805b7cL,0xdf0e571b16aa5eefL,0x0000000000000199L } }, + /* 11 << 420 */ + { { 0xb765a24b2af340d9L,0x958178050b487cdeL,0xf2b88c2cf47f322aL, + 0x7fa33ce51ee6fa6cL,0x98002c9a11a0096eL,0x5507eb0bbb6e514dL, + 0x7b3dd65c1a52bdceL,0x80a29cc867bdeb7fL,0x0000000000000069L }, + { 0x0cdc241d6d7c8921L,0xb1506bfc039a455fL,0x14ecf2f7ee18c2bfL, + 0x3317ba71027f00feL,0x2c12d6706662fd91L,0xe9cedbfd87d5376fL, + 0x3e0f51b5b3bc972bL,0x2450c347b2c1a068L,0x00000000000001e7L } }, + /* 12 << 420 */ + { { 0x484732401f8815a7L,0x46168826199e1b61L,0xbb9a52bc07807f97L, + 0x7b3824d9ba31351cL,0x67cd58b460a2da3cL,0x0d99d6d621957903L, + 0x7c3380d37b459b99L,0xdd0a602353b07588L,0x000000000000010cL }, + { 0xebc49bc0ab2213b8L,0x540f6fc558615ccaL,0xe873ee56d8bc64cfL, + 0xaed30c80ffe085b5L,0x03d39214d0de645dL,0xc6ec44b959f64e2bL, + 0xffafef87c7f37e31L,0xef7985a21ea39c70L,0x0000000000000159L } }, + /* 13 << 420 */ + { { 0xcddb65c37f55b79aL,0x6e26c45f5d85b33eL,0x111919ceb1470acaL, + 0x7aa294e0e6c06007L,0x35207de63c009509L,0xcf119448ddd0c472L, + 0xc01bf29c3c05546dL,0xadf8465854126fe9L,0x0000000000000054L }, + { 0x2c13d69dfef83427L,0xe539c2da0960af28L,0xbe374a8297ccc699L, + 0x8020d224523d7f91L,0xdab7f7d6531b6154L,0x37ae51127cfaaf1bL, + 0xb5e3e2036f69e8a9L,0x4291b0454b708b58L,0x00000000000000e9L } }, + /* 14 << 420 */ + { { 0x4c085162a78092beL,0xcf80206180d90634L,0x95c122b69f81ff39L, + 0x5c936cacaf784312L,0x9f4796ae350bde15L,0x0fd2eacad138a2f3L, + 0x5764f1cec825ae43L,0xc9b9d5ec9b4f1332L,0x00000000000001adL }, + { 0x3049e55456c15c19L,0x61c0dd81e3fcab36L,0x2132d9b2376e573bL, + 0x875184b1c05ee74cL,0xc37f8e51a427b9e5L,0x6a2fb427e6c5b9d5L, + 0xcf13c1ef7b754328L,0x146447f8d1749292L,0x00000000000001e3L } }, + /* 15 << 420 */ + { { 0xeb3388479418ced8L,0x68f55b32506ed21cL,0xccdb0bd335ec2dfdL, + 0x63b075e8b65f515eL,0xd3d07af8f6905524L,0xad5a170ad1589131L, + 0x97ffc56d61582a3cL,0xf15add9a34c2d2e4L,0x00000000000000e0L }, + { 0x79d0330ef5d91b62L,0xf3e8f69ce46eb580L,0x1b5ea4a8067e7599L, + 0x425084336b335559L,0x85863c2eb4ad41bdL,0x9572cd16aa1db8fdL, + 0x55756be84f205835L,0x82ed485f53de8f6aL,0x0000000000000049L } }, + /* 16 << 420 */ + { { 0x7d6e42404e1080a4L,0x5cf6c427e1adb078L,0xe74ec6033d594e28L, + 0xc0185404bdf853f2L,0x287d94dec3335717L,0xf735656ac3179807L, + 0xd356f2bf604e93f7L,0xb1fdc2f2cbe27815L,0x0000000000000151L }, + { 0x7b17e356da1d3ea1L,0x0ed57d7ea7af9610L,0x4a6ac3e53ae89c0eL, + 0xc8f722b85f82b4a3L,0x453d5a4a7f0850faL,0xd6f79d237b3ac4a3L, + 0xe1c9dfc20dbae800L,0x56e4f0f5c4b9258eL,0x00000000000001caL } }, + /* 17 << 420 */ + { { 0xe80852c1ddc395faL,0xe58da639cf6e9427L,0x2d80ef4ccf9f6f4fL, + 0xfc2bc3b9469f9ec9L,0x90dd2047b5059098L,0xbc7dc5b8a38a135dL, + 0xba7a504e14d5187dL,0x1e5cea620bf39092L,0x000000000000016dL }, + { 0x71afc93587e487e0L,0x5ecc86b7ef01d44fL,0x0987add313d2d4e4L, + 0xf64716e189b1c583L,0x2ef8a366b9927a7fL,0x83bf2fb6a43e9f9cL, + 0x16f2b1856dfd62d8L,0x0720e52aeae049a7L,0x0000000000000027L } }, + /* 18 << 420 */ + { { 0xb372ca6459f57bf1L,0xd71dd5777941180cL,0x71685edf4e9ff4caL, + 0xe587c1c10ea50561L,0xa8807733b4de6fa7L,0x37fd0baaf54d261bL, + 0xbd41723b63d60c25L,0x1db8a7d40ee723b6L,0x00000000000001d2L }, + { 0x4ed434f57731688eL,0xcd0b204990872443L,0xf1cf19d6f08bcd59L, + 0x0a2421868d0e3d48L,0x05071d83631b9d75L,0xe6c824d71f5ea438L, + 0xee72e77f733eab36L,0xa2edb32480653b67L,0x00000000000000efL } }, + /* 19 << 420 */ + { { 0x3c66f086179844eaL,0xa6a8ea1bd3130343L,0x392cdad799fc9f0bL, + 0x3394632e63b0b646L,0x29bf4836f7748943L,0xff18e250c24214deL, + 0x6758a91d0c636fd6L,0xd2cc47e168fee4d3L,0x000000000000013bL }, + { 0xd6d1f413ef6d4af9L,0x0092046fc87c16e4L,0x93d8aa9f3ce6f5d9L, + 0x8f7642089eeb6b0dL,0xb40ff54c3cf6b39eL,0xafa404ed994e9d85L, + 0xe0aaa8228a7a8b6fL,0x74e6a937fd01cbb6L,0x00000000000000c6L } }, + /* 20 << 420 */ + { { 0xf52e68cc04a4640fL,0x469ce7167a073c4bL,0x3ef46bb0bba28a24L, + 0xb963bc66d966a270L,0x5195fa9dcac49906L,0x3265912718c2f3bbL, + 0x5bb5b809a43a8d57L,0x654653503806ba70L,0x0000000000000163L }, + { 0xb3ddf2d683ef27eaL,0xc490dbad6a698a1fL,0xa13676175dbef692L, + 0x2a41f99f4a8d7af3L,0x3a6ed906f7cd77e5L,0xc842b863533e5cfcL, + 0xc098708ee1bec806L,0x9c21e280e32088cfL,0x0000000000000000L } }, + /* 21 << 420 */ + { { 0x8404d82c0f06d9bcL,0x547289a5294acf16L,0xb468ae64c502621dL, + 0x985c267fb3b353a2L,0x4e11dd3e5721f0b2L,0x2f5cd568ce1a24f1L, + 0x1dedf5ebb5fb7148L,0x42a82389c1626fbcL,0x0000000000000110L }, + { 0x8543730601bfc317L,0x7f7784a2e5cae3adL,0xf3dacc6469be54d6L, + 0x038528a3c9d9e32fL,0xf98309683a6f6357L,0xf325307d3f837cfeL, + 0xe805afa33af3d0c6L,0x275272065252e357L,0x0000000000000078L } }, + /* 22 << 420 */ + { { 0x0aaa344b99371256L,0x0a066fe2d37c69aaL,0xdf78a35d25e09230L, + 0x53c1bd11d8dc7960L,0x02a2444bc9fce3efL,0xad94f6321b02c0f9L, + 0x827e0e363ab86bb1L,0x378fd466c7c1a0a4L,0x00000000000000ffL }, + { 0x94780574032e1eeaL,0xc220064001cb9b20L,0xa27c53bbaa195a72L, + 0x41cfa39c1fd255aeL,0x0727c31a99f3b693L,0xea608501ae941816L, + 0xf6f564d91ffecb1bL,0x78a17e5e5f980bddL,0x0000000000000188L } }, + /* 23 << 420 */ + { { 0x8bb946e2dfa393a4L,0x08fb2f81f615c758L,0x1db09d68b2e74865L, + 0x470d784d3fb6b0f4L,0xd7e9135501b6735bL,0x8b7d570a051cba9dL, + 0xfc3e0b2c70b205c5L,0x06869d5e1e7cbb3aL,0x00000000000001adL }, + { 0x55ca4fd936ef4edbL,0x69d852beecc47c2cL,0x1bb04f285c5b235aL, + 0xbc4ab04d098a7ba8L,0xcc8ac9027b5f540eL,0xf538468155d8047bL, + 0xe494d2587ade6e54L,0x7f6619105cd2e315L,0x0000000000000134L } }, + /* 24 << 420 */ + { { 0x9f89d5bb56aa4115L,0x63fbfdcac2d35218L,0x595276045879b205L, + 0x546e9805863191fdL,0x01d6bb2b492bbed9L,0x837d7e9c73daa6dcL, + 0x3d3a90f2363a7a49L,0xb50a603d703ec617L,0x0000000000000160L }, + { 0xc7d60aa51d767edeL,0x78b275545178ee0eL,0x45d33bfbc598f46aL, + 0xdb05f976bf59b320L,0x4d4d3206f704d3a5L,0x883fada582590e40L, + 0xe9369fe09ef9a17eL,0xc1d11e068661cad5L,0x000000000000009dL } }, + /* 25 << 420 */ + { { 0x76b3e52bd80b2b0eL,0x1ddeab08d57b3e26L,0xadeee95e05d4095cL, + 0x8a441ed876b470c5L,0xb40ea63b598bb942L,0x7a69955764e9311fL, + 0xf41e2dbda3b34c65L,0x36041d213038b04cL,0x00000000000001a0L }, + { 0xa8630639aff87e50L,0xcf330929a9050224L,0xe1343f8f9555f24fL, + 0x7b2b57fd4486e5beL,0x901a1fa3892f8985L,0x48ecf1c0ba0578f9L, + 0x062036a458c5803bL,0x64db8ad3d495e812L,0x00000000000000b5L } }, + /* 26 << 420 */ + { { 0xd828a1eea7cd3d7cL,0xa39918741c46e505L,0x2907f22b977a89dfL, + 0x1a97284ea8bda785L,0x3be00be53e2c7eb8L,0xa09b9df661392237L, + 0xad6d2e582734a180L,0x9c8fad43f305fea8L,0x00000000000001b1L }, + { 0xdf8354b4d3157befL,0x1bbbb2ea3fff9736L,0xd875650ab541b7d2L, + 0xa219b4ff11f808dfL,0x8fddc92292b811e6L,0x8ae8e5e556e34e2fL, + 0x9c90ed9d431f2ef4L,0xb83c8e8d6e5aeb06L,0x0000000000000033L } }, + /* 27 << 420 */ + { { 0xab542a3bec6d5a30L,0xf3a89c0cbcedbe21L,0x06d93f10f42b171fL, + 0xc05c30b19b2f005bL,0x4402de5b488c69b1L,0xc4fb5dac03bff7ceL, + 0x8cd55dad7ab06022L,0x52af902df9b2cfafL,0x0000000000000193L }, + { 0x16f1519f32e721a4L,0x868f20628f21c68dL,0xd0af3d8f1d1b6568L, + 0x476cabdc0655b4e2L,0x180bc7c910912bd7L,0xccbe77e248758e83L, + 0x5248fe84c984174fL,0xe69a6bcc64f9eeb5L,0x0000000000000073L } }, + /* 28 << 420 */ + { { 0xf736440000feca1dL,0x037b3d49852c31b8L,0xea78fd256c6687b5L, + 0x2ebe2e7bf447b783L,0x654f16b156d7f1a6L,0x8f71c03583b9bec7L, + 0x26211de5f46e3a73L,0x64c26d1d9d823907L,0x00000000000000d4L }, + { 0xf055c6a0dd7faf2eL,0x0f98d13a2ca825c0L,0x539a62a67bc8d8dbL, + 0xadc25a49b69842dcL,0x8a5f304557914947L,0x9c8541cfdc1ddb47L, + 0x9e133348f4ec01faL,0xd0fe103be3eba2fdL,0x00000000000000c1L } }, + /* 29 << 420 */ + { { 0x5b1d13ec418a83dfL,0xe9288a19dd578685L,0x8c90adfff1199fd2L, + 0x535f7ff5c9dfea76L,0x2bc4a3dbf0846631L,0x36a1bd74a6b94ae2L, + 0x5c61d3a32c51a655L,0xf03d31519da1a40bL,0x0000000000000118L }, + { 0x4deee9fda7c44cecL,0x4d3cba4911e4cca1L,0xeb066f59b33266dfL, + 0x45b3856fb3d19c6eL,0x3ac917f0cc142930L,0x8d067ea4a5cc182aL, + 0x1dbbb1c652ee009cL,0xd5769d6027ff54c7L,0x0000000000000110L } }, + /* 30 << 420 */ + { { 0xa15a7bbf8eefad48L,0xe6e89e742651197fL,0xa61a3e84bd94ed51L, + 0x0d244f552b588bf5L,0x65a05bcd083d4e00L,0x439c2e1c135d813fL, + 0x48e05e558b3ffcb7L,0x215bb215254e73dfL,0x000000000000010cL }, + { 0x240758f78096e07dL,0xd60bf24f18137989L,0x0b8be39738a3a7f1L, + 0xad8e978490bed9d6L,0x292131c39b75ec5aL,0x1159fdc879cc4fdeL, + 0x42fa8c1f64454a70L,0xc8c4d18461a34ddbL,0x0000000000000053L } }, + /* 31 << 420 */ + { { 0xf64f3d95dfb33552L,0x4f52fde0ae6e4644L,0x43ae88ccd887707cL, + 0x0873f0578ffac488L,0x39fae3481d00f41eL,0x9f0f2187f47c974aL, + 0x5e63a28114aa2ad1L,0xe12e811cc6985285L,0x0000000000000196L }, + { 0xab5ae9ccac35731eL,0x8967bfb5bed4d30cL,0x92ae61f5c2ed4d32L, + 0xde3ec6960ad912a8L,0xc931c7b5cf649a14L,0x5588d02492043e7bL, + 0x19c4fe955cc40a0fL,0x9709c5ab8e76d659L,0x000000000000018aL } }, + /* 32 << 420 */ + { { 0xc3c84fd2b5f54682L,0x3488244e32c3a3d6L,0x08c89d9ec1f6e2a1L, + 0x9a8aadac957ef7deL,0x0a54f7fa07cca3e8L,0x42ad7295ba97bb36L, + 0xd82cae54871ae976L,0x0eb9357204bf46d8L,0x000000000000009cL }, + { 0x2a3aa55f45ae741dL,0x9b2168b79034b489L,0x66c66f0921f91264L, + 0x6823b0fb70703195L,0x5e43efad96e215b9L,0xe1940d5a4ca42901L, + 0x792cdc63294ccde6L,0x5be5af8a4446ebb1L,0x00000000000000b8L } }, + /* 33 << 420 */ + { { 0x34fc69cbeb2f5277L,0xc7e5637326425593L,0xb0a2de928f05fc88L, + 0x143963814468bb18L,0x926b5160a3b0a9d2L,0x8af3d2af44487fc9L, + 0xf93e45cc3bea6d12L,0x6a8eaa6a97905e78L,0x0000000000000184L }, + { 0xe3d982841f13bdd8L,0x8a93c07ad628c0c2L,0x12ff398667d10ab3L, + 0x9de71f1a25b30544L,0x4193e07914788cedL,0xa297bf3b8979e06fL, + 0xaaf8e7c8a5c33e4fL,0x996bad366c986dbbL,0x0000000000000192L } }, + /* 34 << 420 */ + { { 0x3cc8f593e5967354L,0x6526ed4871aed083L,0x6ed8306e8209df3fL, + 0x3b6811db26d0cc40L,0xfecb44243ef4bfc4L,0x7b1d0c977c380292L, + 0x5f8d1053f3155334L,0xd5a6671d57606f5fL,0x000000000000018aL }, + { 0x91a333f78ca15d85L,0xe401daa9ccaff56bL,0xecfc424a3642272bL, + 0x293bac7dc265dabaL,0x3728937d35176032L,0x32095630b8dc5e90L, + 0xa7e40a33fa3238b2L,0x800e51aed54ae7b0L,0x0000000000000112L } }, + /* 35 << 420 */ + { { 0x5e9ddc946cfe150dL,0xe80cdf7de931ae6dL,0xd653e42269e75084L, + 0x5f2457112d80bb7eL,0xdf34a45bbaa1ff31L,0x4aa1544db4555a76L, + 0xbe131a26420ba11cL,0xeef42ea9df8e840cL,0x0000000000000075L }, + { 0x0954b8900447d3b2L,0xb56748258273dd87L,0x4e475be49554e8abL, + 0x9565d985123d94dbL,0x5c2d1bcec2abae12L,0xe74826e441d39436L, + 0x0a61e8d6e5c430daL,0xa6db3e630f131767L,0x000000000000012bL } }, + /* 36 << 420 */ + { { 0x48ac5636f221fa1aL,0x8b75f29afa87732aL,0xa56becb1ba0db0ceL, + 0x372230897daf8fc9L,0xe2569a163d2f2365L,0x52509c3fe5a4b5a5L, + 0x78158611cfa993eaL,0x9935db074f0642b2L,0x0000000000000099L }, + { 0xbef1547fa883def9L,0x5ea9d1b0c3ffcd28L,0x4aa992f9e8cd0500L, + 0xc259eb625a46f7beL,0x242f5516e7d4212bL,0x7abf19ecd6fbd446L, + 0xc95987380d54d6a5L,0x5622a0476a7b5e9bL,0x0000000000000190L } }, + /* 37 << 420 */ + { { 0x0f55ed468bf89a5eL,0x1cfc2ac48952261fL,0x9c3c347ee4cba9c5L, + 0xc308a0f46c3adde6L,0x01054a9145571f72L,0x1cd4829193982030L, + 0x74fc84ff51306753L,0xdab8cd4e8cfdaa2fL,0x00000000000001a3L }, + { 0xa224dad5cded7237L,0x49d51a129bdf65aaL,0x09a36334a4f5586cL, + 0xba0faddf2ae77a6bL,0x07b748b72f29e747L,0x9623f46e3478ff8eL, + 0x518b3bbe8b3af3d8L,0xc4fee8ad78840f15L,0x000000000000017aL } }, + /* 38 << 420 */ + { { 0x5650b564bd61ce2bL,0x7eaef69224f299e2L,0x2a0df96fe6d4489cL, + 0x812d43518ce9d28dL,0x0815a72b59e9646fL,0xd7c2973b017708e9L, + 0xbb2ff9abd4d799b6L,0x2801293e04228a85L,0x0000000000000120L }, + { 0x509c20ae3c5f9553L,0x0b59b77c393ba611L,0x2635f780da2c8feeL, + 0x8f2a267b41f93f7aL,0xff6659a7537ae88bL,0x9a224d7a7fa76913L, + 0x191113ed42cad3dcL,0x0fc807e43f06db46L,0x000000000000015eL } }, + /* 39 << 420 */ + { { 0xc1ce03c9b0f7ddc4L,0xe9c3a1f6b5545247L,0x45e0feb9c1c782f7L, + 0xe2c1170d5fa69f36L,0x155bf55e15407d53L,0x54d33d0902d578d2L, + 0xf566ec081442a6ddL,0x4a144739f67fe9abL,0x00000000000001edL }, + { 0xfb449e2032d83f99L,0x3efebd58fbbaf71fL,0xbeae9abcf8b67d53L, + 0x64c6c39a442e271aL,0xed0b3ffbd8510f0aL,0x778a250b882c28cbL, + 0x8924bca243ed493eL,0xffbe451e7e1b7b11L,0x00000000000001bdL } }, + /* 40 << 420 */ + { { 0x75b27730fe4c5bdbL,0x00613beabd678c1eL,0xb12a8fe817d6e608L, + 0x5c6ce1ae0158785fL,0x72f9e5dc1ed9f069L,0xde5caa848a05f3d6L, + 0xccfb76716536f0b2L,0xe7378e81b634be60L,0x0000000000000045L }, + { 0x61d85c77f724dd8aL,0x91b6e75dcbe10dadL,0x382bab75adeb3740L, + 0xafa638046b3b344cL,0x136d755fe8d2f35cL,0x45416ba496ee3d72L, + 0x92533b2be175a63dL,0x51facb6560cbc9c8L,0x000000000000001fL } }, + /* 41 << 420 */ + { { 0xa9020bb2bf0ca667L,0x2433b9c5d9660317L,0xde04b705f3de0b9aL, + 0x46fd25b653ec2cd6L,0x0448e5dd4960b51aL,0x4c06d6a16721a33fL, + 0x19f91ed6655171b5L,0xe5974b3f78df179aL,0x000000000000002eL }, + { 0x77b2230d27199e71L,0x3a655385f30abd35L,0xf5e630e34fcdf51dL, + 0xd65f90c494535658L,0x01b3c3083f9ebddcL,0x209f2e378f886a9fL, + 0xc6145c12d24b1178L,0x1431a5087dc07344L,0x0000000000000119L } }, + /* 42 << 420 */ + { { 0xee3406d1e638a4aaL,0xd688467a2fffd8d4L,0xe56cbf66141afb81L, + 0xa3b39278d2093407L,0x4e0e475b63f0a6a5L,0x0cde9be59e82370bL, + 0xb20476e7f6a08824L,0xc9193506ccbbb74cL,0x00000000000000caL }, + { 0xe4d9074c6fa10c37L,0x4519eecae5095ad7L,0xfc5f4e21a6b9417dL, + 0x14bafee71076f557L,0xf6999f8b3b0d7e6cL,0x0fff28293d147610L, + 0x9db346f7c1dde148L,0xc3648d0cbb06be0cL,0x00000000000000adL } }, + /* 43 << 420 */ + { { 0xd9b2900c41e98124L,0x535389b81e5e8d0eL,0x1eafd301052df92aL, + 0x6fb1af8781c04ce8L,0xbf53896454faf2a5L,0xa6d771c9c33d4cd5L, + 0x3cbc0b99b7985721L,0xa8fc80fb4bffec76L,0x000000000000001bL }, + { 0x669d9031eb22ffc9L,0xff1d9af297694f7eL,0xa9583293f7b30304L, + 0x8c0ec270de3e2e04L,0x6e6490c9ac3051afL,0xe554e8fdb77d165cL, + 0x2fe899dbd7547c9fL,0xbcd900fc5d034decL,0x00000000000001ccL } }, + /* 44 << 420 */ + { { 0xcd1ed5190f67298eL,0xbd6ec57814d77ae9L,0x6d8b2ab4f38a5da3L, + 0x55d9555be9a51909L,0xc4988f443f290779L,0x403297f645c00586L, + 0x1df14102017a5468L,0x81aba58618d98daeL,0x00000000000000e8L }, + { 0x8fccd75d3d78fc68L,0x727adeed26fb1966L,0x1e7cd41598a17cb8L, + 0xba026b83de88062cL,0xc11e4bfe933e04c7L,0xece90a097abd303aL, + 0xf09a827b0b73aa6bL,0x31e7471f9fe9d6deL,0x000000000000005eL } }, + /* 45 << 420 */ + { { 0x65d2e86156ac524bL,0xa0f398c44dbd7a05L,0xb51e471f247e0ebaL, + 0x6fb7515e1cd2b3c5L,0x8d4129cc4019d508L,0x1523c52fe21dad33L, + 0xe929492f2319f582L,0x76c5862899d946d3L,0x0000000000000160L }, + { 0x4cc4e9553641774aL,0x469e1d4c39518dd0L,0x679e1cf2dbb4dd35L, + 0x5a34378a7e756f2cL,0x09fe4915534df7d3L,0xbf6a2c47eb2dd58eL, + 0x073d3889e26761b2L,0xa6edc4da1ba2ad04L,0x0000000000000133L } }, + /* 46 << 420 */ + { { 0xa43aa7fb067722b4L,0xc7bd872e2d19638eL,0x04d8838d6683f1acL, + 0x6df5e4441167542eL,0x7aa48da719d4b02fL,0x81f9fd58f6237d71L, + 0x91f5815c17ba998eL,0xb9613d178d8d72faL,0x00000000000000a7L }, + { 0xf529d3986a6a407fL,0x6d22899caccae345L,0xd126146a563863dfL, + 0xec3241f617f1f2efL,0xe09a57205e69f591L,0x8ef992d3c00ffad8L, + 0x872822af82dd79dcL,0xa8cc65a6adf02b46L,0x00000000000001e3L } }, + /* 47 << 420 */ + { { 0x67777d056c680d36L,0xcee0e9eb765a8da6L,0x8e18422e2410bd23L, + 0x087b9b2bd73057baL,0xf6ed0c4373a435b8L,0xbae2117c0aa2620fL, + 0xb97d1cb5d9ba7028L,0x417aa6bec3f20528L,0x00000000000001d8L }, + { 0xa6daa33f114871ffL,0x889c853e69e3c506L,0xfb5cef1de42b8ed3L, + 0x249b0fe7e2ec351fL,0xbb94de883fcae45dL,0xfbfa5c9f3a94cc20L, + 0xb80b5bba253c1736L,0xb23924460770b8ccL,0x0000000000000171L } }, + /* 48 << 420 */ + { { 0xc7d454186a52747dL,0x034cb6d1523d74d6L,0x073a404e0e2b859bL, + 0x9a38e6d89345da73L,0x4c4771d5c7fdcce3L,0xaee8c00c025c1423L, + 0x10c94e72d7203e0cL,0xea9f58bb4547289cL,0x000000000000000aL }, + { 0x030c1df531b952a7L,0x8177a6e11d47e37eL,0x9e2222b0de6f3c0eL, + 0x1ed75c871399560fL,0x9b818c5d829af1e8L,0x75c7751945db80a1L, + 0xfac13445d73c446aL,0x91a2741362844292L,0x0000000000000027L } }, + /* 49 << 420 */ + { { 0xcb7db4ccc19e2d86L,0x829ccd00df87335cL,0x8d2d73e940ed4392L, + 0x926cdff5b78c8289L,0x1c76f094e7cc750eL,0xadeb089fa3a6923dL, + 0x1d1946c3167dc508L,0xca796f6852a57472L,0x00000000000000baL }, + { 0x94ba42d7a1526724L,0xee549d5ca5fd1158L,0x9e5da9d47645a4d0L, + 0x05d4d6efcdd6a638L,0x5619dcf729ddb41fL,0x34ae766d98d04e6bL, + 0x125b901956b51686L,0x5127a0b08ce76994L,0x00000000000000fcL } }, + /* 50 << 420 */ + { { 0x8040cbe83e0a2ad9L,0x8be1ff91e6cf93d6L,0x03b887834d371ee5L, + 0xd70eb00752a3f93aL,0xc3cdb6cfd333a1a4L,0x5edbb599b8f27078L, + 0x339236a4e1faa8f0L,0x70c8fb9a461463d7L,0x000000000000014fL }, + { 0xd55dbd41033fc6e4L,0xb53ee32b3a7e3f55L,0x2274a2d905b7e980L, + 0x9b037c48b27fa57bL,0xb0584718ce3683cbL,0xfe230ee62cfae448L, + 0xe3f0c7eb7fdef3bfL,0x5c3af24ba17e64d1L,0x000000000000016fL } }, + /* 51 << 420 */ + { { 0xe08921a1d2295f2dL,0x7b16da7bc2d62e51L,0xab35c918b7fa9ea8L, + 0x809eb392dc39f49cL,0xf094f0be00be5695L,0x1a7d60ef782f29ebL, + 0xf0a9dd07968759d0L,0xdff0d6565ada0b06L,0x000000000000014fL }, + { 0x0875da554d4c798fL,0xbe726982435ee34eL,0x1e880746a069c9e9L, + 0x0ea957f7eb120213L,0x5f88c63e19f064ebL,0xe07a2047c8a7d994L, + 0xafd33fef4aeb2130L,0x4b7ad49154e880eaL,0x00000000000000ceL } }, + /* 52 << 420 */ + { { 0xf090e674c02453a8L,0x95c5135eb672a218L,0x5cfa269b7ad4f0b4L, + 0x35da68b2c36c7952L,0xa8fbb5981f48b8f3L,0xbe2cf8255897269aL, + 0x503c997f3d216a5dL,0x365f223401fba58dL,0x000000000000008aL }, + { 0xac2cc265454cbfadL,0x36c3406228c1232aL,0x55d6f9f6d9c91dc5L, + 0x7ee8f96e911d7488L,0x7f9b42d548f93862L,0xd6e242b2add639dcL, + 0x03131280d2d1eba6L,0xaa3096d2db5b01c7L,0x000000000000009dL } }, + /* 53 << 420 */ + { { 0xce23ccbc11c799f0L,0xb09e370e6d8105a7L,0x05002372a187616aL, + 0xc34d46e4f41c6585L,0x365fdffff1641b50L,0xd78b6bd5d5283ee4L, + 0xd5855eced0d84ea5L,0xf3490e30a484c1d3L,0x0000000000000093L }, + { 0x03d85dfff30302e3L,0x800f89f9fbcd54feL,0x75a431e4a8a7bb3aL, + 0x46a785f87b30b1ecL,0xe90e2b361de5152cL,0xb5b65fcde4ed0df0L, + 0xd8682ed77fd1e92cL,0x805419c72392b876L,0x0000000000000177L } }, + /* 54 << 420 */ + { { 0x036072e3602365d6L,0x0eb6814b499d5b3aL,0x02dabe21585e81b9L, + 0xe58f4a6353272f40L,0x442c5511aed0d19aL,0x8c880c028d85cbd2L, + 0x187265ff24a978b1L,0xcdd62d82452fcf0bL,0x000000000000014eL }, + { 0x8142a2909cd1eeedL,0xeb03b362317c3443L,0x7814dac5d2cf66e1L, + 0x3d24c15dcdd1a642L,0x1579027d89e46eb8L,0x25bc682cc0c6830fL, + 0x952370e314712b5dL,0x579a2c0fa50221c1L,0x0000000000000071L } }, + /* 55 << 420 */ + { { 0x3a88a023b129f90aL,0xaf0c62ef2525b31aL,0x89e75e13756fc51dL, + 0xb3ae0d9d7e69c4ffL,0xf8cd4b08756c7379L,0xbfb1d6468d905b47L, + 0x35b759f3abfd39c6L,0x67ec7a6a8607a1fcL,0x0000000000000053L }, + { 0x524ec8171b34aa5eL,0xcf1f37cbcce1b38dL,0xdab23e703413c644L, + 0x4463a1e986c8f8fcL,0xfbb14a202b600686L,0x98b964a57c7bbd38L, + 0x1fe6c9397c1dce81L,0x5ae65ee42a9950afL,0x00000000000000bdL } }, + /* 56 << 420 */ + { { 0xcf77ab4e7c68e159L,0x941498eb23a412baL,0xddd4b9272ac19495L, + 0x516bfa1b32e1f686L,0xcd8eb8a8702d0649L,0xf0563debe4f963a0L, + 0x1bf5e540fcd5127bL,0x0d588b6454eaa78fL,0x00000000000001f0L }, + { 0xfa0dcab27de22d97L,0xc5760a0f5f48ea48L,0x0f8b46db3d33e0e8L, + 0xa7a1f86fac66a28bL,0xbf23e7cb64923dfaL,0x8a7bdc180ee51801L, + 0xd4d0d78ef5ffe76dL,0x54edfe409408a217L,0x00000000000000aeL } }, + /* 57 << 420 */ + { { 0x55f79f5cb58dd212L,0xf4801c93dc1260adL,0x658b2bf530175013L, + 0x5d2a2e5fb90c6463L,0x8adcc6414c2f1448L,0xf4fc68fc1898dc30L, + 0x04b471f888c1b9f0L,0xe70fe4ba798487fdL,0x00000000000000fdL }, + { 0x24ccae8d0d4d95d4L,0x3be4976338fc4b49L,0x9586e6997912aedcL, + 0xb3aa52bfb74cc6b2L,0xd96d0f460cc9b267L,0xf02f4f5a27bfd74aL, + 0xb23ab22c46682e37L,0x9fefdfba01157990L,0x00000000000001fbL } }, + /* 58 << 420 */ + { { 0xc90121b8f156217bL,0xaaf825757939b674L,0xf60bf6a8dda00f0aL, + 0x5d5e16d8eafba2edL,0xe920fa52f2b9d2b9L,0x4128025e7b9fd078L, + 0x9abb0d5d331b9fdbL,0xaecee7f1b6dad1b1L,0x000000000000003fL }, + { 0x4441c32ff77c8fe4L,0x3896a2fa0b254727L,0x2fa20f175bf54dabL, + 0xaef65731d543e3feL,0x2fd6c568ba9dd36bL,0xcf4d161a1fa960f2L, + 0xdb09ef161aa90dd4L,0xcec37c14a86b9292L,0x00000000000000f1L } }, + /* 59 << 420 */ + { { 0xd9f6ad7dad254f14L,0xf94afff19d0754d7L,0xc7fcf1aa7390a63fL, + 0x3e1401b4dafaca98L,0x23119a5fb1b00455L,0x2748906273ba24dfL, + 0x1786deafa8263814L,0x147175799fc6cbbbL,0x000000000000017fL }, + { 0x79696e9e66467ce5L,0xbf7b7b6a3df74e9aL,0x9f13032837bfea44L, + 0x63fac8d5afcbe8dfL,0x0b3aea515eca7aa6L,0x77a62c62deeb47b2L, + 0x0afe2fa8ed28a210L,0x1a7e4b557e34cfc2L,0x0000000000000146L } }, + /* 60 << 420 */ + { { 0x2d1c3c48a6c00b46L,0x8e2190ddd2e6f87aL,0x3a98a1d1a272ae2eL, + 0x38ee3366d3fe5d39L,0x57fc9b50c56eaf4aL,0x8593e95fc6fe6f1cL, + 0xae51c967e1d59d7eL,0x2a5dbf23f599aa76L,0x00000000000001deL }, + { 0xaedd9ccf7307c238L,0xb26cc95d943b18afL,0x58338caf644bef84L, + 0xdae311b916f04018L,0x6ba2c14073b20692L,0x7d1e2f7269fcd5dfL, + 0xf9a0763ce750776cL,0x17496ac5f959c158L,0x00000000000001b5L } }, + /* 61 << 420 */ + { { 0x6cd44056369b1126L,0x76163c1b5053300bL,0x9ea62d1ee93928b5L, + 0xb19ad078adeb0c1eL,0x0df14acad93446b9L,0xbeb6ed1725011b4eL, + 0xb1cb0790181ae9ebL,0x39777187d2012ffdL,0x0000000000000084L }, + { 0x7017109513039577L,0xf2401d4a070d136dL,0xe313554aa0841a4eL, + 0x0fa389e5e8d0c631L,0x2ddd35cd89da61a3L,0x0baa99b9c6d8d4b8L, + 0xc10864df82dbd5cfL,0x45d1a07927e73c87L,0x000000000000014fL } }, + /* 62 << 420 */ + { { 0x2b23cfa513f0114fL,0x150fd33dbc4f0b34L,0x12c3170a9ad79bb6L, + 0x46d0cb95b1beb85bL,0xfb7ffa77416e67a0L,0x342c023411484398L, + 0x7a141e512f2b0cbfL,0xb78b3cfa27ab4f9bL,0x000000000000010bL }, + { 0xb5e185cacd2372daL,0x6c10c435389aafc8L,0xf00d3d16deebb0a3L, + 0x15de86422e5f7c40L,0xe3855c8935e337bbL,0x400e20877cfc7e5aL, + 0x8ee2550c2e37e8b1L,0xd0621db2323e0fb9L,0x0000000000000083L } }, + /* 63 << 420 */ + { { 0xa8116eb9db8f861cL,0xf130b16a474aad84L,0xaa22dbec029b6996L, + 0xd71690ff74108051L,0xc18b20fbc8203260L,0x4768ee8e652d96b6L, + 0x6533a2229ca212b8L,0x282f0d5a448c7ce1L,0x0000000000000155L }, + { 0x8dcbf8c7a62c88b9L,0x899d99fa49028cc9L,0x6b585197d1d43367L, + 0xf977bac51bc4e9beL,0x9a829b1ce9a20a04L,0x2ae79e6892febd86L, + 0x688bf2ee19feb821L,0x3e7f2232c81dfa25L,0x00000000000001f1L } }, + /* 64 << 420 */ + { { 0x3d239c6b6a81c377L,0xbf4d092e7bcf552fL,0x3d2af909abd2cae8L, + 0x3d5ae04cebef1533L,0x9c57ddd8c7d656ceL,0xa345ac44c552693eL, + 0xc66e8710b604ff42L,0x5743b43318cb68b4L,0x0000000000000170L }, + { 0x2ae332c9427e80c7L,0xc3c529d4dcaed647L,0x1d1a183473f44043L, + 0x4f5155d63b71f27dL,0x4239074473ee7e4cL,0xb23433b84e09e697L, + 0x9a4264daa60c57c2L,0xa7f72ff8486992ebL,0x00000000000000b7L } }, + /* 0 << 427 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 427 */ + { { 0x1352b764a2839e80L,0x61a50d9716058b22L,0xbd02c6879d6ae08dL, + 0x1349ee8b6679516fL,0x3d7e69c9bc4c2fa2L,0x4c2e12a10d24e1fcL, + 0xc69535a147436039L,0xa477f86b0ca25c70L,0x000000000000011dL }, + { 0x756e379c2a4c1871L,0x4d0b08b35943df83L,0xd8a9b302cd8f5abbL, + 0x53865d1e8c240617L,0xd422a9b114a2e093L,0xbdbc3d7d03014fdfL, + 0xa4c8b35a11578dd8L,0xe4427ef87a3afbc1L,0x00000000000001c1L } }, + /* 2 << 427 */ + { { 0x8af346c57d874400L,0xd9e98cb571098f45L,0xdb40081079565740L, + 0x0f7625ebed9a7dcfL,0x5d1415d0c93d2542L,0xf87f74b4688d0692L, + 0x9fa2e31b9f648523L,0x4e505024cb387129L,0x0000000000000006L }, + { 0xe2087f9c446e7206L,0x60f69447b48bb585L,0xda3658622020bbf2L, + 0xc8a263673cd51256L,0x8cc663b1c62f7c5eL,0xc86d4165e72bbd20L, + 0xffce9039aac68be3L,0x6c6c747cc3c360a8L,0x00000000000001d4L } }, + /* 3 << 427 */ + { { 0xa74637fe39b2b1cbL,0xe81e51e9f280efdeL,0x450ba9ae6356c12aL, + 0x0d23bf43d9768f5cL,0x686e344b1c4fa118L,0x3b6f0edf9ecf4874L, + 0xf51830635a22fdd0L,0x6ef7d57de9ae039aL,0x0000000000000046L }, + { 0xa6a7f6f54fa66238L,0xd61028909787026dL,0x12e10e29801cc4bfL, + 0x660a4c82e98a16b5L,0xc22e4d53415ec2e6L,0x4c0ac2c10e64c2faL, + 0x2d2d2bc87c576994L,0x843f23adfaafdf3cL,0x000000000000009fL } }, + /* 4 << 427 */ + { { 0x290e4ff1781f48a0L,0x020d9e3d66d6f7ebL,0x1c0da98e084d19d4L, + 0x42c64660030dd190L,0x996602dd29aa0eb8L,0x7719fb105d0d86e5L, + 0xe20032629145140aL,0xd2dc210c97e687a2L,0x00000000000001e3L }, + { 0xe704ecdb18c76449L,0x30a4377044131380L,0xaec3cf4b372cf9a8L, + 0x4033c8c03df882a5L,0xa5e23683969af81fL,0xf42c5b9580263b3aL, + 0xbff4ed559323c9c4L,0xa1d992b800ef132eL,0x0000000000000100L } }, + /* 5 << 427 */ + { { 0xc9701a1591849a47L,0x05a00e8ceb572ad1L,0x0d5062393cf6f9dcL, + 0x6434131bb421b5ceL,0x1142248aa0c289e7L,0x9b5a89d75735da7fL, + 0xbf8377d6e3ff625eL,0x076cc0af36a7fa2eL,0x0000000000000173L }, + { 0x8e8aab15aa837baeL,0x96c61e2b389fb0b3L,0xe4f00866954a8633L, + 0x9db8fd3f8311855fL,0x317810367e76eae0L,0x4633933d1aeb3b62L, + 0xc47a2dc1cec925d4L,0xae1c392369aa084bL,0x0000000000000002L } }, + /* 6 << 427 */ + { { 0xad854bde1842eb75L,0x52d44a39fd913403L,0xde2d5fdd964bf528L, + 0x00fc48b0be2d138fL,0x497778287968e858L,0x326f8a6379b5f778L, + 0xb093b6d8026ee753L,0x36bd9300067c336fL,0x00000000000000b7L }, + { 0xf63d269d3d3b0373L,0x7fd4555e8901ce7aL,0xfeb8a5abf0c00754L, + 0x2b855524d1e4f38aL,0xeb245c9a10593ea9L,0x8d0428915303ae29L, + 0xcc86c4617be4ec45L,0xd05d843bb1a35c03L,0x00000000000001a0L } }, + /* 7 << 427 */ + { { 0x2c5034d201ceebebL,0xe55562ee8a5f5149L,0x72de18472b04dec6L, + 0x59f580cc12742363L,0xefc95b49fe90504cL,0x1f7355ee93d13fb9L, + 0x2ea3bb73ec8811e6L,0x8f5c272a92208e52L,0x0000000000000068L }, + { 0xeba0c66dc7f8b064L,0xda096db86441a58eL,0x345dce44b56dd9d4L, + 0x4adfaf2326fd92a4L,0x35c0f9a74a8d2df2L,0xa2a0f0cf8a6afa89L, + 0x05a14a6862097318L,0xb10a3b7dd8a0bf21L,0x00000000000000dcL } }, + /* 8 << 427 */ + { { 0x4a3ba7d35a80eea6L,0xafabc45f138ed8a4L,0x8500a36b08955eb2L, + 0x3d6b2ec617a6c2afL,0x23470f5958d5e66dL,0x5db86555a1df0eacL, + 0x3a2e16179974c9bcL,0x08480d1eb4dfe2e6L,0x00000000000001d8L }, + { 0x89ec8a91c3dca584L,0x6c06d419f2aac2beL,0x427cea3ca57e58c9L, + 0x54ae1a697b0b1b12L,0x40b5c77e3965ce2bL,0x3223c7e53ce07598L, + 0xc2df893146948ff6L,0xfd997c8655d12af0L,0x0000000000000155L } }, + /* 9 << 427 */ + { { 0xcdba58e2ed6d1a95L,0x345e6a00b4ca1bdaL,0x1c65f4f8b08907d1L, + 0x99506f0ad15a285cL,0x602479cf8c31df19L,0x935e607397b6aa05L, + 0x57d3627e1d814bf3L,0x4161619e26d070f0L,0x0000000000000085L }, + { 0x5ef502f18fefd13fL,0xc0a551ba2241012fL,0x655978a86c235318L, + 0xf708895cca82f195L,0xdf32de1a7c5dc00aL,0x53014f9711d92fa8L, + 0xcae56947517ce815L,0x58e0469551904244L,0x0000000000000014L } }, + /* 10 << 427 */ + { { 0x615a13d91bf64aabL,0xad0c56a0ee125e48L,0x5622df0d2f1ab618L, + 0x7331c9b97e465f36L,0xe9f79f235e69238fL,0xe71d8ade22200715L, + 0x0f2c7539c1111751L,0x0a3e1894b9287b5eL,0x0000000000000172L }, + { 0x20e9e6d2311c0623L,0x2d2ea9d91cc824f0L,0x4f188539344e1b73L, + 0x468983b4a32ce555L,0x5a610f7ab185efc3L,0xa65ed115dff9f69bL, + 0x61f20b69f79d8be4L,0xa496802121d50124L,0x0000000000000036L } }, + /* 11 << 427 */ + { { 0x6e2e5357dec7c016L,0x4331a55e19d6b3bdL,0x03e618cbb8047f97L, + 0xcacb5933d8cd8c09L,0x0121db5bae62c20dL,0x7347fc4d471bd47bL, + 0x1dbed250b2369abcL,0x1385da8b0b503a86L,0x0000000000000184L }, + { 0x05b1de38c3dbed55L,0xb9e877fc80818aecL,0x28143964ac119fb7L, + 0xce773a4ccc648a43L,0x1f71b5d4d0138213L,0x0029fab77e42530eL, + 0x789aefc3fa18d353L,0xd6a441614d6b5903L,0x000000000000007aL } }, + /* 12 << 427 */ + { { 0xf393bdb41c4e903aL,0x7c7be529356e8b65L,0x17cb86bb5b85085bL, + 0xda6493819224b373L,0x53b88eb810832b05L,0xfc02cb4798c0250aL, + 0xf46c49940f0221ecL,0x28376e37eac2ffc9L,0x000000000000002dL }, + { 0x2f6c2ff9afc8827cL,0x12ff0f5917ef8723L,0xac1aa2d22455917dL, + 0xad3764b0f8910f17L,0x5d4a355c3bdf893dL,0x7387c7a3eeee0b8fL, + 0x9b3e2f884356038eL,0xf4affafdc75003b5L,0x00000000000001aaL } }, + /* 13 << 427 */ + { { 0xc1e3b4605aaf5b29L,0x81e82a8aae180dafL,0xf37efa0d445e87c0L, + 0x599a537e16e6330cL,0x85bc6d9706c410f0L,0x28d5a15fb362500aL, + 0x2ff92fb621b87c85L,0xebba633facf33979L,0x0000000000000105L }, + { 0x7fc199a9dec8dbeeL,0xef2e49ebc489b71aL,0xd7f6fce89a528946L, + 0x169d0a2ef358dc52L,0x6a61d2c891922c6cL,0xd2aef478e59327faL, + 0x6def24948ef04d83L,0x253711d72f66173aL,0x0000000000000092L } }, + /* 14 << 427 */ + { { 0xd7de8cd0fc9be442L,0x0a52e40fc9dde6a1L,0x05a4ff641c7149bcL, + 0xcc966d0d5f2f3aa5L,0x5ece6a196e2d25cdL,0xcd0a1086f6441a77L, + 0x1c27360085904d79L,0x44a59f2ecf8c85acL,0x00000000000000aaL }, + { 0x266fa397db6bfcdeL,0xf68d7a368bf6394fL,0x0d19162214a079d4L, + 0x3fc59bd33f93fde7L,0x9edbb13e691c417fL,0x50a5045a4b6d9aa9L, + 0xf5f738d8a35b9642L,0xd0fab4f3471d4f3cL,0x000000000000013aL } }, + /* 15 << 427 */ + { { 0xdf87750b40973d15L,0xbaf02e703fa8caecL,0x7020957bc8144da8L, + 0xaf4275cbd2578871L,0xe669df8d0500056dL,0xa432b1d84391f122L, + 0x576be73ea4c4e08dL,0x63de36a3db2e9164L,0x0000000000000025L }, + { 0x5e3a636c0af7f448L,0x7abe9a07bd788577L,0x6a39bb248b212db0L, + 0x1d99e0a5aa72098cL,0x75df398dc0ed5bccL,0x730e175316ac1e36L, + 0x5029aa6376079634L,0x30538a42216528faL,0x000000000000004fL } }, + /* 16 << 427 */ + { { 0x85c4bd23ad5b7e60L,0x9a21b0b0f41a63b8L,0x368399f11991539dL, + 0x67f2c9217b2b3b1aL,0x7dc0513338b4a267L,0x3ebee98741104a8fL, + 0xe73da9a9f8687167L,0x04b9f294155f4eb5L,0x000000000000007fL }, + { 0x1adb7a1453dcbf7eL,0xfbc526ae08b7789eL,0x004fbf8fa1f9c5cdL, + 0x7ca3ff9b48b46608L,0x08a90d3083829a47L,0x90a442adfe3c66daL, + 0x36d2d6bd4c69b3fbL,0xd5307e6b67675cc3L,0x000000000000000aL } }, + /* 17 << 427 */ + { { 0x8523cc86f38cc458L,0x6f95b125c82eb162L,0xde7e9372e4770eb9L, + 0x20f67d244ffcff31L,0x88c19b6d857e009eL,0xa6e94060846c385bL, + 0x9b4a3fcbf5f6e292L,0xfd730d62d494452dL,0x0000000000000053L }, + { 0xf1d11d02d9c01d92L,0xbee1f672d81f0bacL,0x43333ad7d8fa5d6cL, + 0x3ea8f6fe8014dbc4L,0xe69afa5323c7799fL,0x1a60df1e437b3c02L, + 0x3449cba2fdfeb094L,0x79636fbbe1d7af5aL,0x00000000000001bdL } }, + /* 18 << 427 */ + { { 0x38d50ae2b1340c26L,0x98d9a11bbd6c48bdL,0x71342c9d2a770fb2L, + 0x641b25aad42c16b6L,0x9b9caa6900a7b958L,0xb2e0f3d21c6f21faL, + 0xcaa3c0854ebdff9eL,0x907443377ff33500L,0x00000000000000a5L }, + { 0x680bfb3c9295497eL,0xcd4c1cc25be8f97dL,0xfe7dee3bcb911f56L, + 0xabbdfd216980d877L,0xc134efd566109f84L,0x83200bc03adbce88L, + 0x1ae3359ddecbf40aL,0x4f63adbe2135d328L,0x00000000000000aaL } }, + /* 19 << 427 */ + { { 0x0afeb995ee429693L,0x1a1793aed33c2875L,0x3011c348aff376dcL, + 0xe14d4d40fa291a4dL,0x2bc85ccc328ce212L,0x51afc390ca16ac0dL, + 0x7f1f2524cea3c7a8L,0xfad2721775fcaef7L,0x00000000000001f3L }, + { 0x23419bb74f5163b4L,0xe4520607ab546931L,0xb6f171a4246b5851L, + 0xf6f00f0a4c8e6602L,0x4a84dc07f25bba45L,0x9f1eff155959d040L, + 0x686a15867d49ceadL,0xec9e5a8da8990689L,0x0000000000000173L } }, + /* 20 << 427 */ + { { 0xd35b9e3138ccdc36L,0x193d19235b283364L,0x3f11086716ac2730L, + 0xc858771ef63f67b4L,0xfa465ba5426ad999L,0x2f23aeb634208c28L, + 0x1804c3f488e3b573L,0xd40adabed5865046L,0x000000000000018fL }, + { 0x43f3fab933b8a100L,0x854d92b7b1bdaae0L,0x71b54f79564bdc47L, + 0x45a21a1a1a5ba2b9L,0x34e2636ab2f40276L,0x30a97302e43c1e12L, + 0x1c8b46c36422eed8L,0x788d649283623652L,0x0000000000000055L } }, + /* 21 << 427 */ + { { 0x9fe3e597bad87407L,0xbcc2af4e9450ec40L,0x45a510949f71d8dfL, + 0x5209eee4afaf3e95L,0x029c59ab77b5c348L,0x16f2acc25e10ee90L, + 0x640a7d135b710171L,0x51eff75a282ebe98L,0x00000000000001f0L }, + { 0x38626ea764fe09f8L,0x8503a9d745408d36L,0xeff27b96d1a3ecf8L, + 0x549fbb5cd78b52d2L,0x95215d10884f4653L,0x2d1fdb98b1ae33ebL, + 0x0e0cd5f91008a6a2L,0x43c9181326d87e9cL,0x0000000000000031L } }, + /* 22 << 427 */ + { { 0x13cc8a3798a740faL,0xc99aa3ed2632ece5L,0xeaaeecd6b9e1c72bL, + 0x3aec5c4956487374L,0x89a403cc14674797L,0xca0ea19a6e0c69ecL, + 0x86b528a68ed9f435L,0x06be957dbce563b4L,0x000000000000005dL }, + { 0x35d9c838ca34abfbL,0x0dd8cb6d4f813c5eL,0xaac57a0356dfecfdL, + 0x0f11909337223e93L,0x5f8781c95b447b8eL,0xab2f96c0ed13b66bL, + 0xd38636ebe3af9074L,0x296c578268da2b8aL,0x00000000000000e1L } }, + /* 23 << 427 */ + { { 0x4a1ed69265e97671L,0xcda466e2890bce4fL,0x3dd5210c31b4d676L, + 0x7061885e97026fe6L,0x5f50b6f7541cdf81L,0x1c1c3fd8fcdec849L, + 0x77e19a9781cedc49L,0x35c05a1359b27262L,0x000000000000016cL }, + { 0xe74e401b05aba6e3L,0x1da6d83aaca696c5L,0x6e9d07b3549455e5L, + 0x5a121ee1c7dcb5d0L,0x34346781d46f00c7L,0x534b1136ecacb619L, + 0xf196cbf3b24e6e3eL,0x7a31c822aecc0b23L,0x000000000000011eL } }, + /* 24 << 427 */ + { { 0xdfd43ebda9f23142L,0x707183f61ace42feL,0x2db54733954f2170L, + 0x1a10e785e3a33da2L,0xef315020b848d1cdL,0x179454ce5ecdc048L, + 0x845369cf13e1aee8L,0xb924faa0b536990dL,0x0000000000000012L }, + { 0xecf9a0ef7d22a7b1L,0xc8dc42e20e95f354L,0x2db31a2ff194a767L, + 0x31063e42b67924efL,0xebc3ca6c48537ba1L,0xc59746eba8363616L, + 0x6c45f60203d2c783L,0xefc327471e59fd7eL,0x00000000000001a0L } }, + /* 25 << 427 */ + { { 0x38709b4b8eb4707eL,0xa07de12525ca12f8L,0x0cd83aa26c2d4220L, + 0xb916ea38f1853536L,0x8d2d7f602bcb2378L,0xc7f353c265863a3bL, + 0xef8293aab05aa95dL,0xc99ce91fdb80d1c3L,0x0000000000000077L }, + { 0x66feb65f5751344eL,0x89587ddf80016cccL,0x9732106be14e338fL, + 0xa0cd0f26164039f5L,0x3a4ac3f1c656cfdbL,0xe91d606cef332647L, + 0x7c8dfb2328e19b43L,0x6d94937494d26529L,0x000000000000006eL } }, + /* 26 << 427 */ + { { 0x393a7dcbd3c3aa79L,0x8a3a2240a23c9656L,0xdbc3a5f8cf3eefeeL, + 0x128e5df243801f53L,0x2915bd679f3b1e81L,0x9c317104741c8852L, + 0xdad2c02867107eb1L,0x17c5c24994300eacL,0x00000000000000ddL }, + { 0x8e33383090087c9cL,0x3b477580533f2a2cL,0xb6395d04a0b8fc27L, + 0xe16f86d626ac5091L,0x376968721370dd63L,0x17277a7f0dbda08bL, + 0x7cc056e800825ec2L,0xa37c952239724e7bL,0x000000000000002dL } }, + /* 27 << 427 */ + { { 0x16df84f88cdcb3d8L,0x08e2203eaa095d0eL,0x60356444b912565dL, + 0xdb9fa32bdf68d347L,0x37a7a24fddb99999L,0xbec968d30327527dL, + 0x9ced58e7e0d299c0L,0x3ec26b36eb40f891L,0x00000000000001a4L }, + { 0x51fe548e3cbcb1dbL,0x9c7b645212019828L,0xafece3855d8853f8L, + 0xa5b7e31c373f3ce3L,0xfe574de1a56827e6L,0x6b717d575b124f35L, + 0x3478142156048b07L,0x2d0f9a599d2a86c8L,0x0000000000000131L } }, + /* 28 << 427 */ + { { 0xa8d55ba1f6f83320L,0xe7493496c4ee60d0L,0x55a0e10fe16bb4e8L, + 0x7da768acaf161711L,0xeea132c0be96a3b5L,0x4a09ffff7fb41dcaL, + 0xbd69c04713f6bf90L,0x4aeb854a82509b35L,0x0000000000000156L }, + { 0xcb9c060d70f0c260L,0x91fab0dbf52f971dL,0xd2fac4c89dbcd806L, + 0x92f23fa3266b3438L,0x01bb7d4b56ad0784L,0x0216d0acc864ea06L, + 0x8c126be50b55df14L,0x9541a18baa2a1f85L,0x00000000000000fbL } }, + /* 29 << 427 */ + { { 0x10ed68cf51480510L,0xfa750fc26cccef7eL,0xc0a95e54120622e6L, + 0xa7b8a3eb98cb5878L,0x67b9482d47b7a85aL,0x7ccc6eaafec17f20L, + 0x0556723dce3e75baL,0x3e047342181e55baL,0x000000000000006fL }, + { 0x58094d0329e6c93fL,0x3fc1c1fa454c358aL,0x303c9624fe9d019bL, + 0xfec90ff16d018035L,0xf15f05418266f9bdL,0xf3fdb456cacd7be8L, + 0xdf7d1fc03dd66192L,0x2b9617b8c3f9acddL,0x0000000000000027L } }, + /* 30 << 427 */ + { { 0x3f8344d51426b278L,0x8f7ddf8a9a9b479bL,0x1a964eb3c669901cL, + 0x6fa4cd8fcaccb2c3L,0x65facc3706e7ad1fL,0xf29ffbb490a7f25aL, + 0x3d1ad490e68a505eL,0x5a5cd3fa88d6e4e6L,0x00000000000000baL }, + { 0x9ca4b1fbb503a9c8L,0x94938bf8ad8685f3L,0xe9c8fb4af178c339L, + 0xbfc000d25ed5928fL,0x7b8b0f6ee7298bf4L,0x46c7096a7fba0571L, + 0xde65dc553dd656a8L,0xee6b622341a31a9dL,0x0000000000000196L } }, + /* 31 << 427 */ + { { 0x17deaa271ffd501eL,0x2376a41b0d4ff4daL,0x35928095d8cf506bL, + 0x1b4ccb2aa9312ac8L,0xd971fc980fc0131bL,0xff85e073c4e30e91L, + 0x82420dfdf59f79d9L,0x81961c5f5d12f268L,0x00000000000000bfL }, + { 0xa3167cadbc31557aL,0x5e3000588089f9cbL,0xf3b49fcaf6ada736L, + 0xcf3a28d0502a7f0fL,0x547068bd5e020aa5L,0x9a8d88ed5537e4fbL, + 0xfe8a3d5693f0d6adL,0xf8e02f75d71aa5a6L,0x00000000000000c6L } }, + /* 32 << 427 */ + { { 0xa66cccc8d6e1bd72L,0x1ea4e9214ae2fa5cL,0x66775af22d021fa7L, + 0x6d711dfb2a4c4677L,0xddaae8bb5ae380b3L,0x53158994600f11fcL, + 0xfeb122dbda94d443L,0x7cbf1afadffc21ddL,0x0000000000000006L }, + { 0x03cfa731834bf891L,0x4e016085630b8032L,0x90cf9b5949db4d96L, + 0x96fd2614335c9dddL,0xc02145108e3b98fbL,0x6ded06331da2ea2fL, + 0x2bda6d769d0ead76L,0x4ba00e99f3e4fbf2L,0x0000000000000169L } }, + /* 33 << 427 */ + { { 0x92de07fbfbf8e53fL,0x73ac4f82b63807fcL,0xcc4f442b27affccbL, + 0x2d62daf18bb57cefL,0xa7d49c8d70519e9aL,0x0c4b3d051b5b5b71L, + 0xa157cc8f70d64bd3L,0xd127d42ce74f2b2aL,0x00000000000001cfL }, + { 0x79eb99c7cb079cc9L,0xbdc25d7fb011154dL,0x872b357c399ce92cL, + 0x3e6cddea714f21cdL,0x9f6164964b06671bL,0xb1830b11fb8072d4L, + 0xf2fa236813044f12L,0x1ac80049e95ad608L,0x000000000000010dL } }, + /* 34 << 427 */ + { { 0x9269dc702740d740L,0xff34da75c999e65bL,0xff47227b44d3a050L, + 0xa32b5a8c6930d914L,0x06a3fa67b194ccf2L,0x21f0ff632045ff06L, + 0x76f08dc2f25f41baL,0x0effecd5071e60bfL,0x00000000000001ccL }, + { 0x509f29ba52771829L,0x9c992d40f2b740d5L,0xd62799bdfa7f8244L, + 0xb403785ce16a20e6L,0xe1a98c8c9e156027L,0x0d397dcdc18b859fL, + 0xbac3cb83c9d70388L,0xc5316eadfb7d5568L,0x00000000000000f3L } }, + /* 35 << 427 */ + { { 0xc856f24cf4a0af77L,0x43161834b585a74aL,0xaa1b75e173f7cb2dL, + 0x8349aa98061aefbcL,0x2e5847d2c610cfd9L,0xf1e0b1d393bd106bL, + 0x6d239a5e14efc60fL,0xe7a6418f1e30dbfbL,0x00000000000001cbL }, + { 0x8258a5df6955a0b2L,0x832126776e90817eL,0x63a78ff5110e989eL, + 0x84dbefcc96ab15e3L,0x7d0a08c003ac922dL,0xbba5ae39a50d0bbcL, + 0xe480626e9c7466f2L,0x278bc8daaa73d0feL,0x000000000000005fL } }, + /* 36 << 427 */ + { { 0x2df7a47e6a625873L,0x30bf6394d0cc5aedL,0x44f942da51af3189L, + 0x2b700239ea2b12f4L,0xb5aea5e8e9af1742L,0x948b6b30dd4d5386L, + 0x5eca1a7bab06b047L,0x19ab42c24f3f2baeL,0x000000000000013bL }, + { 0x903986b0f22488a3L,0x9ccce9ec066ff5deL,0x8149450d08e6babbL, + 0x176b2286cd8884f1L,0xa328bdd3c05b78d6L,0xdbaf8ea4d0b14a6cL, + 0x139734eca2e92cecL,0x453c8e96dfbee5b5L,0x0000000000000049L } }, + /* 37 << 427 */ + { { 0x3e36dae1b3de3ea9L,0x4d67dc22b1048176L,0x9c3f39b27ca60a49L, + 0xd4a75e25f7600352L,0x03dc181cd298be2fL,0x18ae60f43016d93dL, + 0x871f677ca3a7aeaaL,0x1977f85af92c5d38L,0x0000000000000055L }, + { 0xa51c1634692bb41eL,0xca455e9dc37c90ddL,0xf48be721c7a8731dL, + 0xaa030d88b9da10cdL,0x35029fd9ed039c9eL,0xb3d2e1e89423f362L, + 0x555246ed4ee59557L,0x292a8be6905a0decL,0x0000000000000088L } }, + /* 38 << 427 */ + { { 0xbfe67c557a8d10f9L,0x1e8167b2f54e78dbL,0x01cdca1f4234bfe6L, + 0xbbd9a3acd655fafdL,0x02bf164c7854f6f2L,0x8241f9ff52ce9fc5L, + 0x2fc147122689e156L,0x527855adbd0e10cbL,0x0000000000000143L }, + { 0x8adcba30f57d2022L,0x95093ae15fc652deL,0x4d830641ad841830L, + 0x702148f87da2878eL,0xf47d81009531f234L,0xd6391216ee253ee4L, + 0x99e201a00732fabbL,0x693327a7a11d3ef4L,0x0000000000000037L } }, + /* 39 << 427 */ + { { 0x257b2c08a87a1545L,0xe1e118548a4a4ccbL,0x099d563158109e59L, + 0xe395c06560d8cf6dL,0x2da7e6dbdac91c22L,0x113bd02ee0632099L, + 0xcc7111b98d953366L,0xb76e6af3a5983204L,0x0000000000000047L }, + { 0x0c44689c7bbce3a7L,0x26bc71a8fcb6c961L,0xf34027d8f31cdad5L, + 0x1fddc33cd929cc3cL,0xeb5682a0eb37a1ccL,0x4755ba0df7ec039dL, + 0x0a94d4aaf33d0836L,0x19e00d7947a72a04L,0x000000000000016dL } }, + /* 40 << 427 */ + { { 0x2cf3b1fa322864adL,0xe2206b9b9e3f46a3L,0x76348b0f41d0ce6aL, + 0xd41b3ed8a4325cccL,0x9d9ccbb21d3405a8L,0x65abee1b7f11ee03L, + 0xd977c7ab0aa890a0L,0x220e26661a890787L,0x0000000000000052L }, + { 0x136e9c9769519db4L,0x2e563a4757f5ccccL,0xad3a43887feb68ecL, + 0x5e71644ebca01b85L,0x45a4cd156a97b3f3L,0x3c8779307e5e79cdL, + 0xf104f21a4ff32d62L,0x656dca67ee2e827dL,0x00000000000001c5L } }, + /* 41 << 427 */ + { { 0x538e141005d2dce4L,0xc517290ecb65b292L,0x171cde90f26b5fcaL, + 0x0f58cb2f09e7a385L,0xe0c681b758c62021L,0x0f7f0891acc7ceeaL, + 0xe8edc977a5872183L,0x073a1069d7756934L,0x00000000000000a5L }, + { 0xbc2a3fc512d27ad9L,0x0207f142076be131L,0xa72fea2658f61ae5L, + 0xb74a4667c5878639L,0x08b02b75d3027808L,0x06cc19d64340d00fL, + 0x957bc371e56573d7L,0x97f805cca8bb03c9L,0x00000000000001f6L } }, + /* 42 << 427 */ + { { 0x942815d76ea3d821L,0x8881dd1046468a84L,0x30aeb26b67e28444L, + 0xcc1f79d53d0d7f04L,0xe66ba1e36db5fadbL,0x307dc24b821415f6L, + 0x3ca98ae0f143a003L,0x461e517a9e4c0596L,0x000000000000010cL }, + { 0xf6873034c32523baL,0xacf66d608fc9f270L,0xa7bbe4df9e29d4b8L, + 0x06382f3fe4866269L,0xd02b5fe55bd0878cL,0x313d61815536ac79L, + 0x4ef5bc9b92c66a64L,0xdf2cb0b4c6507408L,0x0000000000000108L } }, + /* 43 << 427 */ + { { 0xe64eab7050f2ad0aL,0xb0836a25989d3480L,0x646266df62a0e974L, + 0xe31a0fcd0786a00bL,0xeb3ce16c8d094173L,0xe027c0361b383372L, + 0x4b98279927e50720L,0xe1be59ddd0db5c1cL,0x0000000000000191L }, + { 0x251b4415aa8b2696L,0x7c76e719cd241ccfL,0xa927921cd20b8c2cL, + 0x3bab44460e5395caL,0x085f53b8b4f6fd5eL,0xfdc7d362585747a4L, + 0xe01aa6f8b49df12eL,0x98b64a06cd96102eL,0x000000000000013cL } }, + /* 44 << 427 */ + { { 0x97cb33b1a4555f7dL,0x43df1ab01b9e2de5L,0x786a7bb7783d728dL, + 0xcecfdcdc0ed18d15L,0x1d7d0ef878175e9fL,0x688a6a93ae2d03e3L, + 0x86370b065f8b6e9eL,0x430bbf48dbd9c4faL,0x00000000000000f7L }, + { 0x2edf29360f7eb5a7L,0xf5420a13cf6ad480L,0xda6ab6be65d089f6L, + 0x596de3a24a8d9ab7L,0xae767579b2c581aaL,0xaab09d434572bb0bL, + 0x112a1f2b4928b1f5L,0x38e069e2f93ad7a1L,0x0000000000000021L } }, + /* 45 << 427 */ + { { 0x941ca2415ca5585bL,0xd923343df644f5f8L,0x71a7d718b18c3e17L, + 0xed8dd0de99436ddbL,0xe1fac7baba2f9c39L,0x34358aaef116190eL, + 0x403bac20e41c6b1dL,0xabc6637b3b26b4f4L,0x000000000000007eL }, + { 0xd117748197fb706fL,0xe41b34db815019bfL,0xb5228a6772199b3aL, + 0xf022c6474d89f217L,0x00696885d7150cf3L,0x1f655091377a6789L, + 0x6cf689158649e629L,0x3219372d4b28e7efL,0x0000000000000171L } }, + /* 46 << 427 */ + { { 0x2fb27dbb68b4cf14L,0x83cf08a47542d4c2L,0xedea4b048059f2e3L, + 0x1222f64a6f67d4a5L,0x1d790b02e6c04450L,0xcfe52ec5cf4dc563L, + 0xb42d56e9a5fa0b1aL,0x2ef31effc3d864f0L,0x00000000000001c4L }, + { 0xd62492da6114eaf0L,0x04f4c8d4d40afeddL,0xfaca6c0a08af05adL, + 0x7171982cce26c849L,0xa7bc880548e683a7L,0xc150763774d06896L, + 0x0f49ea72c3fcef58L,0x8c32076913cf0f49L,0x00000000000001f5L } }, + /* 47 << 427 */ + { { 0xaf181e82985582d4L,0xcdcd966e4e992b86L,0xfb5402b6a35ca1b2L, + 0xf74d80512add6ba5L,0xb7895c4f41091b8cL,0xde51f53e3a6355afL, + 0x050164b1ed0a60ebL,0xcd62660cbd2047c7L,0x000000000000015fL }, + { 0x38f3d8571cbd7da3L,0x24586de2b9f659bdL,0xd8110d99860eb4a7L, + 0x44464b4562cf6fbbL,0x7eed4e2a0144bba3L,0x3e54c13af303cbfbL, + 0x30ac8c5f15d1fa60L,0x64450110a92fa2aeL,0x0000000000000035L } }, + /* 48 << 427 */ + { { 0xb24539ba81c69120L,0x04505ebccfb6ad04L,0xc834374bfb3dab0bL, + 0x1387d634c6993dd4L,0xc2e664886b2f12dbL,0x96ddf76e8b1f1c7aL, + 0x0af8836d0c73112eL,0x60fc8d5c87471da5L,0x0000000000000160L }, + { 0xbb469810eaf8f9a1L,0x578f6ab463f8ee9dL,0xc94a65ed62b399f4L, + 0x7dbe97209ed6026dL,0x33fb692d4dd7fec0L,0x1809935aec02996dL, + 0xeb564aa5b4362311L,0x7721c318a6eaa32cL,0x00000000000000f4L } }, + /* 49 << 427 */ + { { 0x6fe34646132b6d6cL,0xe8ad45578b5df74bL,0x5edbe10544181a9cL, + 0x876ad5acade27e8eL,0x709d9c087a2bc939L,0x8e3fc5dcaf2cc6faL, + 0x69f278e6f034d895L,0x76f09ec5c27e1c76L,0x00000000000001bfL }, + { 0x4cc16808db60bb50L,0x8783bcabc0049677L,0x26987cfac55833d9L, + 0x68a244380dcdb1c2L,0x7c83c14a42bb8497L,0x6b582da63e47412aL, + 0x3b7e4b7eceb07167L,0x6b8a30351c1bb2e8L,0x00000000000000a9L } }, + /* 50 << 427 */ + { { 0x7fc67685987b0150L,0xa8abf2f762b4cd90L,0x115c21725726f489L, + 0x861ab367948a78f9L,0xbfb4d3a1804286c7L,0xa4512b172771cb85L, + 0x48d255811beb89b0L,0xbe78f826daa51a26L,0x000000000000014cL }, + { 0xe2829f61644031e8L,0x80de3e3475877fdeL,0xbcb6bf9527c7a40cL, + 0xc823751734abfa0cL,0xc476ed04d4a33668L,0x715393764f9017b7L, + 0x085bb25f90ecddf7L,0xe63273c0a6f3dbd0L,0x00000000000000feL } }, + /* 51 << 427 */ + { { 0x06a48d0bbf507ffeL,0x59caa8e90b047e91L,0xa10f715478b708dfL, + 0xee72692a5cb625c7L,0x1fec6d20c83e6aaaL,0xcfba2c7a3532329eL, + 0x83970bffa919b5eeL,0xbe15513c6a2a46c1L,0x00000000000001d6L }, + { 0xfa14892670cb9e0dL,0xa56e8b43ad374337L,0x7d18381ec20e39c8L, + 0xc7022ca35b739c55L,0xe8d0c9d08ec3667cL,0xf7ee5edaf4244d4aL, + 0x501af624a2c20c8aL,0x55699815ee5cccc3L,0x000000000000011dL } }, + /* 52 << 427 */ + { { 0xa2d0161263e5d445L,0x318c60fb81a2b810L,0x4564edf6ab744edfL, + 0x21cbb72b7129260aL,0x7ab39feca9d01ee0L,0x9b4d0991aad2d43aL, + 0x5bf41341e8c8f5dbL,0x61002ceb83ee50daL,0x000000000000017dL }, + { 0x94b575db1c891b7aL,0x348538dcb7f8b85dL,0x91503a10c1877296L, + 0x9ff573b2503b002bL,0x4886698ecbd46aa7L,0x6564266de31a6020L, + 0xfa1da81ee61a1eb7L,0x2df2b1604e03554dL,0x00000000000001e0L } }, + /* 53 << 427 */ + { { 0xf8a6c112818062f9L,0x8d08a4ddb301afb9L,0x6d7305c20b074bc5L, + 0x279f7e4c3470f4bdL,0x59f3f94b6e8c4a0dL,0x680a11913e917609L, + 0xdd6758459621dd8aL,0x1ab73dc00bc5e395L,0x00000000000000d8L }, + { 0x2dd9e1385f223a32L,0x4d87bf99ccdf9a1fL,0x8a67b8908a2f1b38L, + 0x329c08b5969b592fL,0x9f15de53e70135edL,0x537ad5329408ce34L, + 0x00f468c6d91dd253L,0x07514043d47da37eL,0x0000000000000138L } }, + /* 54 << 427 */ + { { 0x26144feeeb6e7d44L,0xa3d0e47b419d7a27L,0xc97f088e22e55c60L, + 0x611ff17696306002L,0x5be12ad9e06949d3L,0xa044ae3d199da892L, + 0x3e45377b9337f515L,0x6c3d1a82b9c008f3L,0x00000000000000ceL }, + { 0x7576e2901950a782L,0x0f137065091e8675L,0x32b05e3c59703452L, + 0x0ea85fa33e8a80aaL,0xad2e35276f9bff8dL,0x50cbed34efae1927L, + 0xb7dc26ee13ac433eL,0x42b282472925f0ddL,0x000000000000011cL } }, + /* 55 << 427 */ + { { 0x2e49ba37ff3d2c8eL,0x8b7eb6db17f31cdaL,0x81fe046de11d97c9L, + 0xed15756eda082707L,0x685e44031195a75aL,0x56e7aab54492062eL, + 0x5ca5797e3a978f5fL,0x4591a8bdeca382e2L,0x0000000000000097L }, + { 0x4fb2fb61ee14d996L,0x4723a61f5803e9a0L,0xed2c32c01740f872L, + 0x9cfb2e942c46e342L,0xf664be63842b362aL,0x4ab1e9d06d396a61L, + 0xb0930e94a2fe10a7L,0xdc17d952a8fe6f70L,0x0000000000000110L } }, + /* 56 << 427 */ + { { 0x586c6846413b14edL,0x936bb2927edcbc73L,0x893e795583cf2054L, + 0x386a770bab5def06L,0xe753fc8a28830523L,0x3e651f83a9e5952cL, + 0x42c2d31ebddb4b3eL,0xb9ebb8c23d9485e4L,0x0000000000000130L }, + { 0xd0f7512dfdd9c07eL,0xdb9b61d37c17370bL,0x1d5542e40e023630L, + 0xd7367ec1e751a8b2L,0x3ee2c75913e4ec75L,0x331c85fa9cb027a4L, + 0x9b1d0f016fc9c60bL,0x7e6a19cf34080c71L,0x0000000000000142L } }, + /* 57 << 427 */ + { { 0xe6bb0584e75480b4L,0xb3a154d2cc7fb34aL,0xb297a7260d5ac90bL, + 0x46cd0c4f19075d40L,0xc3375c6733a6825cL,0x64904deac9512c02L, + 0x4f17c760ce09f4e9L,0xde401d3cf301d829L,0x00000000000000adL }, + { 0x89155ce8d39f2376L,0x39a8df112371e03fL,0x65d702536fee325bL, + 0x54ff235708ed7dfbL,0xc19258ca932476b6L,0x13cc49b26dfeddf1L, + 0xa2b26c3c850f96daL,0xde83c98d86e4d2fcL,0x000000000000002dL } }, + /* 58 << 427 */ + { { 0xc4f1693f0af0d7d1L,0x04df4ffe075feb82L,0x4f818a9543f8745bL, + 0x3f8bdedd8c94b232L,0xa4af5aa35314fd6eL,0xf4911b6bc3ca1b45L, + 0x9924adf319e2f569L,0xa7794358dd09e72dL,0x000000000000006dL }, + { 0x4f87908e973ff83fL,0xcac68ed2bbcb94e6L,0x99c8c1d8689acda0L, + 0xb0d9d2a9e581b37dL,0x413d1edfe1ec4002L,0x1585f1cce9fabb62L, + 0xbbeff8aca026d8f9L,0xfee19657d13ced0fL,0x00000000000001faL } }, + /* 59 << 427 */ + { { 0x2b24b3c019ce885aL,0xb53de0cbb854a17fL,0x9a0df03ecf26c32dL, + 0xe6e276ff18141773L,0x29033b53711c8b3cL,0x367c7c6827b85921L, + 0x8a0f8999c6f4439eL,0x7dabbad4a3d38307L,0x00000000000001a1L }, + { 0x88b4790caaaabfb9L,0x39726e9aee90a6c2L,0xbc324a8c2a08e052L, + 0x5044c686f2835310L,0xb4b2ee401a62e2a6L,0xb5bb8ea4715a09faL, + 0x4ba0b1caa2406da1L,0xee2a9ea4066a2a26L,0x00000000000000b7L } }, + /* 60 << 427 */ + { { 0x244f7e0c14c92774L,0x0e07d01007243141L,0xc07fc4185d862fe2L, + 0x052eb9b39eff52e1L,0xa39dcf3e79cb7730L,0xd4bc1e0ada1a0cd7L, + 0xcbcbed4ed398b073L,0x466686ba732f9530L,0x0000000000000110L }, + { 0x67d348f78f15b8cdL,0xa681bab6d8cca9bfL,0x5dea910aba84678aL, + 0xb89d861524525de1L,0x7536a8e711faeef7L,0xcb049dfaaeeb1f92L, + 0x647de8ded21f0b55L,0xc2e8852ffb9fd11aL,0x00000000000000b8L } }, + /* 61 << 427 */ + { { 0x53e4c63ab7849f04L,0x065e74bdc84a5960L,0x4c437df833b4638aL, + 0x3e8046b785eb9a9aL,0xbf96115d5136388aL,0x7682edd57d6ea4beL, + 0x4939f243b3c638fcL,0x71d0774fb46c9fadL,0x000000000000012eL }, + { 0xe57fa566f6f9843fL,0xfc6102e7f9b4659eL,0x51279da3ee351e88L, + 0x1913f351622fb01dL,0x5a1ada08ec899057L,0xca36809acf2110feL, + 0x58afbe7f68df1b01L,0x025adf57f30e4d6eL,0x00000000000000b7L } }, + /* 62 << 427 */ + { { 0x66de14604a32d769L,0x7085c561b00b7baaL,0xf3781f27ca887f70L, + 0x067b88390f609588L,0x4dde43f9de1aa960L,0xdf0d5c73246e1756L, + 0xfb7e6cf0e32b6b74L,0x832383085785f432L,0x000000000000017eL }, + { 0x85244d7e30556b4fL,0x7d7ae5c25f2b01dfL,0x6d217088afca87aaL, + 0xcce2eb33c6746f35L,0xa214db754c48ee16L,0x7075d0059af9c236L, + 0x05acfa551dcb066cL,0x51808254399e0cadL,0x000000000000002bL } }, + /* 63 << 427 */ + { { 0x37140db02c17ece8L,0x67c86862a2f14a31L,0x8661ca8b7f2ebba1L, + 0x44ea992a1592fbe6L,0x23c0aa41dbb650d2L,0x7e3b31be84e08d74L, + 0xdc2437d958374fe6L,0x7080a4802a00d6dfL,0x0000000000000084L }, + { 0xcf036b4f784a660cL,0x368783d64202a5eaL,0x70f9ea976ab86497L, + 0x10e443598d9be8d3L,0x804e0b878e0a9738L,0x7251be8c013faba3L, + 0xbf2df4fc39e0f147L,0x0b31ef1115f25948L,0x000000000000000cL } }, + /* 64 << 427 */ + { { 0x5fbf73adee70e781L,0x29a81e4f67793cf9L,0xd7e9ccdf80bf503eL, + 0x6c06d49cd6edda9dL,0xc33c7ee6d96e70f7L,0x30d7d644ef6114b1L, + 0x0615b576ab81af49L,0x56b8af445a09a31fL,0x0000000000000076L }, + { 0x32e0b33475cac764L,0x0b123da0dc2d7e31L,0x67d72e7ad11b58eaL, + 0x93165a21961098edL,0x4ae95e5add1d2b8cL,0xc26794a07ee3715fL, + 0x6887f532cd333782L,0x67e608d5af37aedeL,0x0000000000000179L } }, + /* 0 << 434 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 434 */ + { { 0xac333e4aecfa8831L,0x9d08f6ef8a6d6e93L,0xf2eed835ea97fb6aL, + 0xddf44417070c46f2L,0x1e3159bf37694ea3L,0xb0c4319c8b7d6a05L, + 0x0abc757261f314fbL,0x0057f6c701a6047eL,0x00000000000001e7L }, + { 0x204cb3dd6af8283bL,0x4eedfa1644ab9ea7L,0x79f1569993b7d9e5L, + 0xaac8f0c7d29dcc8aL,0xe293fd274800f5e5L,0x6ec0a739e22da004L, + 0x8cbee62fb903094cL,0x86dcb1fda88c3bceL,0x00000000000001ccL } }, + /* 2 << 434 */ + { { 0x3a44dde085f45eddL,0xc543bb6498309156L,0x82d1ce827597d0d0L, + 0xb2dc0f228106ca3eL,0x66f394472c3a313aL,0xd8f6fcdaa6cf2e03L, + 0xed6e0e279e6bf40bL,0x7dcd3a305b9a41afL,0x0000000000000095L }, + { 0x0a870cec12be5b78L,0x912c21c51406e140L,0x977417fdd5e13ca1L, + 0x9edbae8ab9e21e28L,0x9fce45ea13b2f069L,0x25bd760b84f3cbf2L, + 0x29e0c3ce2489fd01L,0xe16d31c1ec62a766L,0x0000000000000061L } }, + /* 3 << 434 */ + { { 0x9badc953ab98f2bfL,0x867b5f5e6f45ec55L,0x36c30111539ab291L, + 0xc663fbe74feeef3cL,0xb2da1de355213dd8L,0x2c3e4a54a8f58884L, + 0x2c2794c713f166adL,0x1b31c3a69bb7b65bL,0x000000000000018eL }, + { 0x1a866f47b06e4f37L,0xbbe067a17a609c0dL,0x83073920151f64fbL, + 0x899421eebba549b1L,0xa2145adf91f21d16L,0xbd5f420952c37c74L, + 0xbf18214c7c7afc51L,0x917e6547b69ba831L,0x000000000000013eL } }, + /* 4 << 434 */ + { { 0x6985bbd6ee32e2ffL,0xe69f0f223c35a00bL,0xb1abb4ae0ede3c1bL, + 0xa3d7ecde93d96532L,0x69eae2594f693cabL,0x52e38b65fe3665f9L, + 0x17664e93f2fa7a85L,0xe1a2febfa5300907L,0x0000000000000000L }, + { 0xbb35ef0ca538774aL,0xac9aa3ce2b28e6fbL,0x57e02d060a801edbL, + 0x6977544394847324L,0xb526bb1929dd0a3bL,0xaed93900c468815fL, + 0x3eb1b9b2221df814L,0xe421e27ad9472dafL,0x00000000000000b9L } }, + /* 5 << 434 */ + { { 0x86c72d693a4d1314L,0x8a0b4b5a5c7db07eL,0xb77d353706fde68fL, + 0xcb5446077f5e412aL,0xb399cf67eebefcd0L,0x8b1f0e0a274fa0ddL, + 0xbf0e6dc875a8fa65L,0x1c405959c78432ebL,0x00000000000000cdL }, + { 0x50aef83472b78101L,0xf4477ba98ad40428L,0xffafa24a11d9b90aL, + 0x41b8927e81c56505L,0x0f9920639e742b07L,0x1e7d380631dbf256L, + 0xd4c41bf666ba635aL,0x895bd3de14246d32L,0x0000000000000179L } }, + /* 6 << 434 */ + { { 0x4ca733dff28aebc3L,0x408d980ed8f0c8feL,0x64cff1fd113fe1beL, + 0x3ecc3e9706c2bdc4L,0xfa7cd864c6e6802eL,0x728c7528d9975387L, + 0x96ae9f3bd6abcb5fL,0x24a5251a89ee857bL,0x000000000000009fL }, + { 0xe49c9cc6c5a6f9b0L,0xdd026901c60fb1c2L,0xc0c58dacc212b65bL, + 0xf2ea35783677c2b2L,0x7d2c7e6de6ca86aeL,0x8ca90cffdc0561a1L, + 0xa4e849704061a2ebL,0x852086dafe948ce5L,0x0000000000000135L } }, + /* 7 << 434 */ + { { 0x64d982d29e4a102cL,0x4526200619d68225L,0xce9edcab495a7a30L, + 0x49255fd1405baf0eL,0xd9e98392b5055fa9L,0x5deb038d3910964fL, + 0x3c53120d8c7d1e1aL,0x4690a6be39ee8dbcL,0x000000000000003dL }, + { 0xcbd6c1035f941144L,0x0cc8a7b3d72dba80L,0x09d89abc286785b6L, + 0x7f545c4c4d52dde7L,0x1f0b173d3212b0c7L,0xc82280472dacdc27L, + 0xd9a295be16301255L,0x75cf7419ed5f40ecL,0x000000000000005dL } }, + /* 8 << 434 */ + { { 0x823a3093dcc0c55dL,0x48e4f42c8667bd82L,0x6483b8acdd036949L, + 0x0d3f6e52154b38e9L,0x713a6450cb96db0aL,0x791efd97ac907250L, + 0xe491a5192130c99cL,0x333277e57df593fdL,0x00000000000000ceL }, + { 0x4deb4d72d7d90a84L,0x7f39203ea13778c5L,0x2308cd8a7bb79168L, + 0x470cef35a710bd28L,0x6db6b1a213ba5c45L,0xe35b95dde9e55ec1L, + 0x68f023ab7819af5bL,0x50d449896770441fL,0x0000000000000187L } }, + /* 9 << 434 */ + { { 0x4710bb318de4fd4eL,0x169135edf00d87afL,0x36bd504900a94292L, + 0x90c1c9c2ee73e959L,0x604cdf4c627461f3L,0xf123980795d64fa6L, + 0xa60a3fb61f64ccc9L,0xdff7fc6e358d52d3L,0x00000000000001d6L }, + { 0xa97592eec296e9a0L,0xcfda4984fbc367ffL,0xa22077102957a911L, + 0x596f1f66ca8ec572L,0x7bb35c4f4723310bL,0xb8f39027336b3010L, + 0x4c2ad1f39d1bfbccL,0x99f721ababd1e48aL,0x0000000000000103L } }, + /* 10 << 434 */ + { { 0x761d77943135f08aL,0xc8ea69f3c9ff4b0bL,0x3a427a226d6f567cL, + 0x4f8fbd235866181cL,0x4ab492868b5a14d5L,0x2cf7fb42ad46afe0L, + 0xfe411657b6759dc7L,0x9610138134a7b11bL,0x00000000000001b4L }, + { 0x1c306fff2d0bc15bL,0x23830bab33793532L,0x959e8e6406ebcd49L, + 0x8ed9b284f60bceb3L,0x7d9c5d2acba1b607L,0x8aa698e5ea1a280eL, + 0x75025662502aa440L,0xe1b6a557d8462948L,0x000000000000004eL } }, + /* 11 << 434 */ + { { 0x1bcd8a7db6ef3dafL,0x12c9d413f4253275L,0xba8941d2eff8e698L, + 0x3c29be9700772aabL,0x4d3932bc466ddc96L,0x9d8f7ea612b8dbceL, + 0x54b26ab2e90a0e8bL,0x28ecc5262ab562e3L,0x00000000000001d0L }, + { 0xef47706ebc52c73fL,0x88ac550f31ca0b88L,0xefc0b9993f371074L, + 0xf757ea265a09d4dbL,0x4f614d7d4eca4514L,0x3b597b46bc70e805L, + 0x5f65ff72861a04c5L,0xa2fa0cd6a07d93a0L,0x00000000000000cdL } }, + /* 12 << 434 */ + { { 0xfd2c6ba8c03c8a93L,0x6287baae255a51a6L,0x6cde095efc336858L, + 0x668c6f54acb5fb8cL,0x26b65582568a5789L,0xd51684effcfeb7baL, + 0xb5d91d3d8dfe63fdL,0xd366cae7560188b9L,0x00000000000001c6L }, + { 0x587d06cc9844367eL,0x527a075f589d9acaL,0xd581ad9f4f88ccdbL, + 0x3499e8073a56ca59L,0x50b8b14dd6328d96L,0x9b06ce845936a744L, + 0xab46f398e693da36L,0x4c5e0d9abc55f498L,0x00000000000001e0L } }, + /* 13 << 434 */ + { { 0x35e6f3aeedd8cdd5L,0xe7d1f84e3c70eda1L,0x2fa8a57c99601b6bL, + 0xf4cd235a499f2353L,0x9200190c848ec0f4L,0xd8e57c44524a27b7L, + 0x5a32ea3b935a4f48L,0xd31839aa14181ffaL,0x00000000000001ffL }, + { 0xe60c02142be2e8d7L,0x4149c75ebc31b48bL,0x87917bef8716d826L, + 0x70259495ce66f29bL,0x457f892b3868150dL,0x8f604f018c143012L, + 0x802b15781b1338e7L,0xb8e25cdbe0d1d962L,0x0000000000000116L } }, + /* 14 << 434 */ + { { 0x8f89563d4f5a170bL,0x9b4bedca2a08933eL,0x270d697123bf430bL, + 0x589177b13db77dcdL,0x2c7fe43654015de9L,0xa190fe6f76847478L, + 0x161fb872744696e7L,0xa272a78c692f59d0L,0x00000000000000deL }, + { 0xdd80e1522a95538aL,0x2d9da36b70fe17caL,0xfe5bfe5823144f73L, + 0xd3239a7c02c3a576L,0x01efa88bd359a33dL,0xe6af55f7269d3166L, + 0x15e3f06e0b135041L,0x20be0cf9492369fdL,0x00000000000000bfL } }, + /* 15 << 434 */ + { { 0x76c2738f4e339d8bL,0x2f6527a787c02e91L,0xc39a95b131e5a030L, + 0x8a733cc6c748a0f8L,0xd9ddf3065851fca7L,0x0cc640146e2fa015L, + 0x39d03d51fd0aa84fL,0xaaaa83861c8f6581L,0x0000000000000138L }, + { 0x750194db2640d9c0L,0x3415c97add9e522bL,0x002ae6be920d5398L, + 0xf544445d80534cabL,0x3c62a5dc6f30a7c8L,0x26b9e329a45929c2L, + 0x7675e5d566978e76L,0x905599fbb8d851c6L,0x0000000000000198L } }, + /* 16 << 434 */ + { { 0x6cdf7285e8cf496bL,0xa27becaee1460283L,0x0291e5e3a8331004L, + 0x436d8dea957e58fcL,0x52ade5768f6a37b3L,0x4c31e9eb08ca0bf7L, + 0xd918c8a0547bb960L,0x85ceb1004394f6d3L,0x000000000000007bL }, + { 0xf796100862db1e32L,0x875fa000c20f8b1eL,0xbd4bb865e621e53eL, + 0x9346c7c57235cf42L,0x8ae88a5692cd8c70L,0x06b82e3ac68ab6d4L, + 0xd0dea99a736a6b71L,0xa54ceaa48c6a1235L,0x000000000000012dL } }, + /* 17 << 434 */ + { { 0x0fa6807fc5da532bL,0x55d9934a4ff8c1faL,0x2eb4a07eebdcf033L, + 0x4b02434624189b33L,0x2ac6561b755eb9fbL,0xaea6cd0ad3eb3456L, + 0xa36e3823c15ba5acL,0x94b73998c83d3098L,0x00000000000001e0L }, + { 0x7fc341f11f8b078cL,0x713203356df305b0L,0x37a2030430ec9e60L, + 0x7999cd74c660bafaL,0x7521e720ec45a6f7L,0x6fa8a84d4fb05d84L, + 0x16e0315c1f4f5dfcL,0xdd5cfcc90cd47592L,0x000000000000008fL } }, + /* 18 << 434 */ + { { 0x7ca1ffa7541e8082L,0x00f359827383faaaL,0x04a27995ddbea281L, + 0x11cb61e87b84e2aeL,0xb22cabebc2411c38L,0x684f6d0266eb6691L, + 0x712d6e1f4d7d1723L,0x5717734104bda106L,0x00000000000000e4L }, + { 0x72616b593feb0af8L,0x6e76d929924cd70aL,0xf6d917784239138dL, + 0xfa7bd85f84918a4eL,0xb90ba79df7073d27L,0x4edf2a217e42f694L, + 0xb4840c74e1064c54L,0xdbcefd0b133a8f82L,0x00000000000000d9L } }, + /* 19 << 434 */ + { { 0x74207ef458e13084L,0x64cd272b775520dcL,0xfd6c279b16d75480L, + 0x0de0dfa321b58672L,0xd156b0bdaa4fe111L,0xa006bb34bdc41a5bL, + 0x586bb1e3f7423cb5L,0x39ffa18d5c61fd49L,0x000000000000011eL }, + { 0xe3f73412ec60c0b7L,0x7a6bc1434ab8bd9aL,0x935d3e7ac11d433eL, + 0x1559ebf9dc6e4b07L,0x6a4fbf74d1ff2ce8L,0x58526dc6fa40bacbL, + 0x36abb094c5a9d599L,0x4ce17b30e5b1452bL,0x00000000000000cfL } }, + /* 20 << 434 */ + { { 0x14de106989b694f7L,0xfa202a91453ed74eL,0x19344093b7c462b0L, + 0xddb9f1159d5f7733L,0x6110f1cd6e71efcbL,0xe9dbc336d27d9f95L, + 0xd9b29e011ba3bb66L,0x7b218536a9677cf4L,0x0000000000000178L }, + { 0x7fe687840eafdb07L,0xde335a451c0d642fL,0x11b4b98a208d5237L, + 0x53f2c20236b0801dL,0xf07f20935cde08e1L,0x5e0e5d3a0f34078dL, + 0xf1fc7f3480286b22L,0x4c305a8bc48e9216L,0x00000000000000f4L } }, + /* 21 << 434 */ + { { 0xd6df42236705ed72L,0x370df16b66596b1dL,0xcfd44ffd24fd4817L, + 0x3d6143cddc924183L,0x26b486833c93f868L,0x54926c39c19a83d5L, + 0x3d07a6ae95119375L,0xb882bdf8d83bcc99L,0x0000000000000034L }, + { 0x3cffe30d64e14848L,0x0dec1cdaa7c2b18aL,0x22757d5836be59cdL, + 0x7ddacf11d39f6627L,0x83237713e4a0e59fL,0x755523683e890a59L, + 0x6473baf3ff8e3c4aL,0xf8a58f623318c5f3L,0x000000000000013aL } }, + /* 22 << 434 */ + { { 0x3fb7ab81dd8b0441L,0x9b12b4f4080198a6L,0x765ab538611543c5L, + 0xd1b6ca116043b1e1L,0xe8e0c15a1e579bbcL,0xc9005a7fb00fd558L, + 0xb0ee75168c91ddfcL,0x85c607eb8506ec71L,0x00000000000000daL }, + { 0xf9016a2cc0ae0796L,0x1cc194908bddc9bfL,0xb49f46aff31a5da5L, + 0x172441592beef5bfL,0xd8596e0bb4e4021aL,0x272d8334936dbec1L, + 0x6b819cc236552640L,0xf6bf179cdf801f6aL,0x000000000000018dL } }, + /* 23 << 434 */ + { { 0x2ae917945ba71ac1L,0x8812d26b5fb18507L,0x50d7c5af4e8f8ab4L, + 0xcbab980b252f505bL,0x53ef95460538a252L,0xbaa38f3b2c7e3061L, + 0x6227e7032ced0186L,0x3eff1b0b81e073ccL,0x0000000000000184L }, + { 0xcb8042e53e110191L,0x8b718b8d844874b5L,0xec43894e2c55ef49L, + 0x089d49b33c1b5249L,0x431cfb0951502463L,0xbd443d7152d52366L, + 0x6d21c0b3b0e9dce1L,0xca17c72c4b785911L,0x00000000000000ceL } }, + /* 24 << 434 */ + { { 0xdb8a02d18294cc29L,0x6aca02ec9a9a5709L,0xee2b10e6d554c17cL, + 0x29aa181881c661ccL,0xbe522af71fd10eaeL,0x3c4bb5242b5589f7L, + 0x5a5600b1681236a9L,0xf7ffd0917f235a88L,0x0000000000000007L }, + { 0xfdfb09268ad17511L,0xc8964821ba2b0a60L,0xa6edc26433608d96L, + 0xd98937de18d1b857L,0x0feecaad85a5a3b9L,0x567bbb021f102192L, + 0x89147837441b4a0aL,0xaa4716104e0f3d86L,0x00000000000001b9L } }, + /* 25 << 434 */ + { { 0x93f9cccf7863976aL,0xfdbab5361c91781eL,0x6774ae57904d212eL, + 0x6cbfac24eb9ead07L,0xbd90380ba53968deL,0x0fde4f3a0a3b43b9L, + 0x4c738cf2096efa0cL,0x1a3a02b223c171f1L,0x00000000000001d6L }, + { 0xecd4893c71151a16L,0xb6a710f6c733301aL,0x28758b01587eec91L, + 0xb131f8b73bcdc933L,0xd5f86b40871cd739L,0x2ee24f351b13cde0L, + 0x743cf511dc7a4ae4L,0x88f47ad7177a75e7L,0x00000000000000c1L } }, + /* 26 << 434 */ + { { 0xfd8ed032796edb60L,0x24d59d721ecab1efL,0x34dfd046f4ff8f0cL, + 0x8606b5075706fce9L,0xc97ed07d35c5b36bL,0x7f4f2338c412e11bL, + 0xd071191ded311016L,0x2df826748840bf8bL,0x0000000000000192L }, + { 0x9225258095905d12L,0xae0da381a714a1f6L,0x59c89c85914bcea5L, + 0x625501930a6928b0L,0xdeaa1009b4cc872cL,0x1c66457791b860e9L, + 0xe52c89f4cd323335L,0x1b163d294211ab43L,0x0000000000000082L } }, + /* 27 << 434 */ + { { 0xc9e03c6c0eb94372L,0x8a373721b3f7a044L,0xb81c3b953e48abc0L, + 0xb4a8a4b733c5a6f5L,0x5e4f35752c6ffb15L,0x369c3d68d3cd8567L, + 0xad684b69cd922456L,0xfc195cb8ca0961abL,0x0000000000000083L }, + { 0xb54609c5e604a834L,0x028b402f975cb6a1L,0xd61cec8482493224L, + 0xc0d930d5d339794aL,0xb86c38891b9a3b61L,0x7d2eff3793827f00L, + 0x7ff8c2bf9ab658a2L,0xe7f3aea8cfdd8fc3L,0x0000000000000156L } }, + /* 28 << 434 */ + { { 0xe030feb9814abc1cL,0xaa7ef7427387fcfdL,0x134e4ea847e5ce93L, + 0xce9f3cb52949ffb0L,0xa83252a83e11a1c5L,0x2b308229ba0f2644L, + 0x1eb76fb7ea75a292L,0x0d9bd36b73cf1481L,0x0000000000000021L }, + { 0x45bdd68dc50e223bL,0xc6a83635a6c0be76L,0xaa61ea3fc0e1f910L, + 0x7ad38095e8fd27fbL,0x2deaa3b2796c8efeL,0x9ad0a578d099bcdaL, + 0x5819856a2a953ea1L,0x2b7b7793593d3e2dL,0x00000000000000ddL } }, + /* 29 << 434 */ + { { 0x5d863e60fd21235eL,0x1737c3f3325a1732L,0x6d933847ccc5bf11L, + 0x00a192d493e00d81L,0x89a45bfac2c643ceL,0x02a13ad00765649dL, + 0x26458f12fa509f47L,0x74c8aa2d16f09353L,0x000000000000013dL }, + { 0x3fb91bb9f034a301L,0xc374e95b48dd0af7L,0xe1b0cc306d83c081L, + 0xbbd70743e19f9a52L,0xd036958d37f45c9aL,0x02e6c68263326db0L, + 0x65b7b79941872949L,0xa2e9e2dc4a5cd039L,0x000000000000016bL } }, + /* 30 << 434 */ + { { 0x68dc9f921bdf35d8L,0x037282cfa1a62dc9L,0xdca2ce576cb7d912L, + 0xd565f7f06dd74592L,0x33e5cd32c2cf6a12L,0x86e50fbbd1ce584eL, + 0xdec9cac5a91b3424L,0xb0c5e080b9b711e9L,0x0000000000000170L }, + { 0x76d0834ade8dc323L,0xc7c54d2e20e86b82L,0x262b43488d8a41f7L, + 0x35d84018b7751aebL,0xbf41d90a5e4cb30cL,0x3561f5f7c0cd08eeL, + 0x0da722077b161b52L,0x6dae1a62cd604660L,0x0000000000000169L } }, + /* 31 << 434 */ + { { 0x8a4f3392d8eac947L,0x6c5c77fb762e311fL,0x29873696050f18eeL, + 0x1736f47d8bf20331L,0x6a7e4e5256b5a106L,0xabdc39f8b502e6deL, + 0x92d66f5e192b6aaeL,0x81266111e45b2f7bL,0x0000000000000038L }, + { 0x12c3d68218222288L,0x2619aee789444667L,0x15c4af7dde3e55b6L, + 0x3ea45634f3cf8dd1L,0xa4ec538f4dea2c6aL,0x8a5698ec39ce9315L, + 0x3748d917aeb0b056L,0x1698cc83705362a5L,0x0000000000000007L } }, + /* 32 << 434 */ + { { 0x88fbc549470d7e36L,0xf091232ae079762fL,0x8e84ce7e027a4477L, + 0x21e570dfb704f424L,0x51e89ca1d36fc205L,0x52451d98cb04c0f3L, + 0x4233a3569a256b13L,0x42f33c2422636d07L,0x0000000000000186L }, + { 0x88d6f4d60445345aL,0x8b03b9f60c2faa71L,0xa45ab219c3baf6d7L, + 0x67c7eba1b9beb6d6L,0x3eb7beb99c8fcf22L,0xb35dfc4371d9a1aeL, + 0xc1204af558436e2fL,0x543e4f318dd710f6L,0x0000000000000096L } }, + /* 33 << 434 */ + { { 0x4e2ab19ccecc8f96L,0x4b5b98ba2dc2c44cL,0x0b69cf0a14cf2ddcL, + 0x109772e225a59fbdL,0x14e18ca6dc1ce60cL,0x8d542628d9b88c07L, + 0x088494bb355c86c7L,0x6ead45471c0ca228L,0x00000000000001bdL }, + { 0xcac21f389be7431dL,0xd238795012a91cbdL,0xa719bcba717b6555L, + 0x5df1906c4b27ec0aL,0x489f2341704016b9L,0x7a6b8295439665b3L, + 0xd48a672f6bd0e2d9L,0x7c109dfc85d4da53L,0x0000000000000062L } }, + /* 34 << 434 */ + { { 0x95923905c4e86d59L,0x03d679ea822fe184L,0xd620a19613200a7eL, + 0x9e83b378747e1eb6L,0x39596aa1709c1988L,0xcc66a5db1570d652L, + 0x3e97c888759f7faaL,0x7a97227391133073L,0x000000000000009fL }, + { 0x044c93fdf39e6260L,0x152909941bb2cfa6L,0xcb1274af09a76fc4L, + 0x430e3da16de5dad0L,0xc91bf14c1fb5f32fL,0xc2b6692c0587e533L, + 0xd318416f41343775L,0x55ab5e96b0acf93dL,0x0000000000000098L } }, + /* 35 << 434 */ + { { 0xb847386fa177d3dcL,0x06d3c04864062f50L,0xe818655887c6c7f0L, + 0xc72485a20bbe2d45L,0x36e906749ebc1de9L,0x02ea3f0a95c940b8L, + 0x32502e0f9de384a7L,0x412d534093bc0c9dL,0x00000000000000a3L }, + { 0x234b35877cbee5d8L,0xc8b3c8b7715bd276L,0x64efc64eede0417aL, + 0x76fd0880130a4b05L,0x7629e3ac5d8dc2f6L,0x3d1bb123377d965cL, + 0x672bac542c2bd073L,0xd6de18f0763028d5L,0x00000000000000deL } }, + /* 36 << 434 */ + { { 0x8c0fbbd4799716a6L,0xcc051818aa53a2afL,0xbca333e86044b4d4L, + 0x119f4ff585c795f0L,0x4b480cb57ea3cadeL,0xced7fb1dc4c1c28fL, + 0xcb53655dccc703ceL,0x1fe0b7538c5540d0L,0x00000000000001ccL }, + { 0xd937f3957c5bc459L,0xb6756328ad67f0a0L,0x3a5bc8d0fe7c0533L, + 0x796ac0b0e4565f1eL,0x8377d5884913c2c0L,0x2faa11ac7d65704aL, + 0xe207ffd25e625bdeL,0xbf3d689068bd440aL,0x0000000000000147L } }, + /* 37 << 434 */ + { { 0x36917a0fc5d50ee8L,0xf67584a91182034cL,0x6e3f6816ddab541fL, + 0x252c3846fb4383f3L,0xd486c56a79135f2bL,0x5cf7ae823a7b6256L, + 0xc903396746135cd4L,0xfa78d00cc9fdfe0dL,0x0000000000000007L }, + { 0xaa198a27acef8834L,0xbc5b9d6f97b2f05fL,0x9f0aba712c3f6799L, + 0x11990d450c5baaabL,0xee42d800d468664cL,0x5d20517f8cd348edL, + 0x4e5cd17de02cc9e5L,0x2cc4892c040e5585L,0x00000000000000b4L } }, + /* 38 << 434 */ + { { 0xd9bda3f36d9412caL,0x759172bde7d5a033L,0x9162350fe29748cdL, + 0xea7e260f7ce78177L,0x6630e96465a27eb5L,0x2745ab185575a078L, + 0x197118fac60ec2b2L,0xcfdc1b2b7b511ae9L,0x00000000000001e4L }, + { 0x3f09c640ee1c385eL,0x64473a006dbb4205L,0x6bed5856526fa99bL, + 0xfd8ca51fdce3dcc1L,0x5f5bf3dfc76817deL,0xc19d1b50e480b4b7L, + 0x1028a8009198a639L,0xd505d500a57b69ecL,0x000000000000009bL } }, + /* 39 << 434 */ + { { 0xf9a3263685b14170L,0xfaaea08426db6ffbL,0x84a477d90e115823L, + 0x922ce22b3f333890L,0xc0386be7cd7d2d74L,0x10ea11cc30d0eb44L, + 0x85207d417dbb318dL,0x7194658ad2411ca4L,0x0000000000000039L }, + { 0xdb898a4506da169eL,0x0aa090c2467b64c3L,0x2cd2e8360970b9fdL, + 0xaa6c8478f176f199L,0x176a0fc397e78d6fL,0x70e4e4b0f93e0f7dL, + 0x184642658f26159aL,0xc7cbb454e455ffd3L,0x0000000000000058L } }, + /* 40 << 434 */ + { { 0x2f7b339b2b037a05L,0xe42f3e95c499d10aL,0x0160d72e07be03b4L, + 0x153548a37a4601d1L,0xb0fc5c52ea960842L,0x275739870fe92d67L, + 0x4c0862b5dea7eac0L,0xcd613cde55b71eb2L,0x0000000000000023L }, + { 0x28530abf98a98d28L,0x85a1af53cb349226L,0xdf816addeea21f09L, + 0xa9b60f768c0d127eL,0x508a88e61a151e50L,0x25ac746c3d750f33L, + 0x5011a102a225271aL,0x3b5029b548113215L,0x0000000000000037L } }, + /* 41 << 434 */ + { { 0x38c9b635309d5353L,0xcb77df72ff53eabfL,0xe116962d62804686L, + 0x6949fbe02faf8d17L,0x7899699b97f3100aL,0xcf94c761b8ef13fbL, + 0xbca74d760416373dL,0x4ca1c8e9b9562432L,0x0000000000000068L }, + { 0xed281e63efb5a925L,0x67e0fa020dd70085L,0x01ca8b2cc32e60e8L, + 0xa78c502dcacbb1a5L,0x8cf30c5145222199L,0x306de139ace8b1b5L, + 0x3614b36b961f8addL,0xe3ee448a204661cfL,0x00000000000000f4L } }, + /* 42 << 434 */ + { { 0xed6f4f24b9102544L,0xf209b09d85d70bd4L,0xd7e7b9b1c727863bL, + 0xa52ecaaf946895adL,0x4128d7380b92132aL,0x02b9b836f0afd5ebL, + 0xbff72d5e25e0c7ddL,0x67eabf0bae65295cL,0x00000000000001aaL }, + { 0x210820f484bb95b5L,0xd5e78b484e691cc3L,0x6ac799475179f470L, + 0xf1bafa8f493c2266L,0xd772f2b402acc5caL,0x25c600fff28c1f86L, + 0xe57413f85ba4a90bL,0x1040f65dc9237b57L,0x000000000000008bL } }, + /* 43 << 434 */ + { { 0x721a88c5f4a96867L,0x1a6f08e0c7e3964aL,0x0c702afe5874917cL, + 0x3660b192488af1a6L,0x4cb5255f1d7d5ca9L,0x1f31efeb4121bb44L, + 0x6853d58c5cc7c475L,0xe222fcb5a50c5a86L,0x0000000000000104L }, + { 0xa10d5a0167ca5d9eL,0xa5000c01b40801afL,0x7621eed07bad8270L, + 0x36a455c869afadccL,0xe7659c2588f3d64aL,0x2e81486da9d2e3f5L, + 0x4a64f4cf9dd9834aL,0x32a3821eeb0c3985L,0x0000000000000176L } }, + /* 44 << 434 */ + { { 0x32269c1bdc9fc49bL,0xad26e06e9fc27f50L,0x40d2a97d6b4e84a9L, + 0x736b98d4910ef791L,0xc48559b078702918L,0xb38b860861be6975L, + 0xcc86b5ac35b00e9cL,0xf06c1fb5335d2dcfL,0x0000000000000177L }, + { 0x24bfe015658a4588L,0xbcf3af27ab32678aL,0x55e6b2e160abc5b8L, + 0x55fb213b917c8360L,0x9e981ed52f427c30L,0x43665885c4fab94cL, + 0xbf8864fac0afabe4L,0xcabd2939cee3a355L,0x00000000000000fcL } }, + /* 45 << 434 */ + { { 0x3501e6801439c78bL,0x45a176bc14dee0d4L,0x5d33c48ecdc93ed4L, + 0xca8d913e0f11febeL,0xf9553c9ae595b009L,0xc546040165fd9c88L, + 0x1d88fd1c067148d3L,0x05d5e73168c32d38L,0x00000000000000ebL }, + { 0x57df5f2ac01721c8L,0x46eea36794fc35b6L,0x1a80221d3532fe8dL, + 0x58a569de2fe6110bL,0xd58d99cb9d006486L,0x01a70beac7d58257L, + 0x0f7676b2d56eb44eL,0xb503c4212bb311c9L,0x00000000000001dbL } }, + /* 46 << 434 */ + { { 0x48b2eb5bb8f016ccL,0xe1f7acad2889fcbfL,0xe2fbd9c195de450fL, + 0xa2ae397601759f50L,0x03e182f9966abb7bL,0x6b87622f15f8b8a8L, + 0x8234b5a6559f8478L,0xc3eac3da12e79b2aL,0x000000000000014eL }, + { 0x98d557a4bf09ce09L,0xfe2e1a6e9b890fd4L,0x773b8fee056880b1L, + 0xb7df5bfa733d34d7L,0x9ae1466b7dc667edL,0x1f3c57ef1db24478L, + 0xcf7a87b31c706e65L,0x52bf0e2a08204e04L,0x0000000000000108L } }, + /* 47 << 434 */ + { { 0x12dcc5ed7de6a415L,0xc2d54417adf91247L,0xa5b6b79c9cd65815L, + 0xc89a400c59c0db1bL,0xb11b7522837951e2L,0xf50d64cba967c6d2L, + 0xed1de5b667ad5dbdL,0xb0188dfbdfdf950fL,0x000000000000006cL }, + { 0xe590422636ce4973L,0x62d15372ee9e7e8eL,0xe03a02160bc5bf10L, + 0x594b360ec424638cL,0x9631644a46e4980eL,0xb4c6d0935ce887f0L, + 0x34e073072af0a673L,0xd1c705e7cd22b648L,0x0000000000000076L } }, + /* 48 << 434 */ + { { 0x83fe2ac97051ea31L,0xa76b3ac1b39476f3L,0xee45d2e03e2433edL, + 0x606db8a30d999a0bL,0x12be94ba99c5c5b2L,0xe2231745695b061fL, + 0x0aff87140392ef4cL,0xbdc82de6e03641b7L,0x0000000000000172L }, + { 0xc9a067a61e4ddb23L,0x924466e6c5fe49e1L,0x56c25e66498a09a8L, + 0x2ff93349603dd109L,0x6173e4ef0cfe1653L,0x486c08b8861e5694L, + 0x675b2d4e4460ca71L,0x9ac71cb2dd05403bL,0x00000000000001f0L } }, + /* 49 << 434 */ + { { 0x594bee43add1ffa3L,0x81b3f78384ba86a8L,0x4d4d314504753e0cL, + 0x86918b199618ea5dL,0x4342ac93de51e92cL,0xedc80f689d28bd5eL, + 0x4d3cffa667558269L,0x8d61908adbd3cef6L,0x000000000000011bL }, + { 0x4b24dc12ddfdca3dL,0x27f4baafe5dcba84L,0xf6a5bb67d4f24194L, + 0x704e87a577187a98L,0xd69baff3c81d6027L,0xf0695b948d86bf10L, + 0xb60137a1a2c69accL,0x0854af7a4dd76e60L,0x000000000000007dL } }, + /* 50 << 434 */ + { { 0x28c546ab91960c2fL,0x6c1382d2d67d35b5L,0xf1fd4ece5c68d954L, + 0x2dadd781a99cf73fL,0xa97a79671519360eL,0x87392a02f36e5f39L, + 0xd3ba240d4d75be7dL,0x1ec49cacecaf25eaL,0x0000000000000087L }, + { 0x8aa2388cf7daf8baL,0xf19bb702c9b04b01L,0xacc01044bc682431L, + 0x3d7bfffca43a6b52L,0x73468005d108c945L,0x6ca56166f402b740L, + 0xbda45ddecd486b57L,0x9f26b50036456623L,0x000000000000005aL } }, + /* 51 << 434 */ + { { 0x5248a930a805fa39L,0xafa9a623ee9049b6L,0x32d8272bf57b62caL, + 0xc57c6742ac33b6adL,0xe67e374781628ffbL,0x12efe0414055d283L, + 0x85c2388a6afc546bL,0x3e7454dd2325904fL,0x00000000000000dfL }, + { 0x27113ea3bad0b4a3L,0x23c3a6b6c7bb4233L,0xeaebb97bacb43960L, + 0x751bf53c12765e44L,0x5a5843830c3d1a27L,0xc6e8bc1baa75a625L, + 0x1cc8684bb84e4b2fL,0xcca40425c687f410L,0x000000000000017aL } }, + /* 52 << 434 */ + { { 0xeefc28baa0c707cbL,0x610a0bf5b5e8a4b1L,0xd74559f39b53d25eL, + 0xb987896d504c5e8bL,0xae65dd137b25a6b9L,0x967698fecf659983L, + 0xa10cf8fc302cb07bL,0xc116f1c94447f110L,0x000000000000019aL }, + { 0xca833c3f74b590eeL,0x6099ad10a90703dbL,0x1b05eb478dc0a094L, + 0x8e611a9ffd61f58bL,0x220f8e3cb6376b63L,0xd3ef40d06276c206L, + 0xb496ed2c80be240cL,0xcc1e4fee4c62d68fL,0x00000000000001e9L } }, + /* 53 << 434 */ + { { 0x0aa9cafcfb53fdb6L,0x9bc4068864ad41f1L,0x89f2ed95361e5281L, + 0x93721ee2d3ebc5c5L,0x099676e1059cbaf0L,0x32034ec33ba1925fL, + 0xa372ccabe58c5408L,0x4198c3ba3a1925acL,0x00000000000000eaL }, + { 0xd573dd0472e39b81L,0xbd53680227fb43eeL,0xb4a9b13773ac68b2L, + 0xecc58184afc54cd5L,0x0f4ef62140216af3L,0x37a4d9e63ab7f116L, + 0x7c3b8e87fc8e3ff0L,0x7860d91b2112304fL,0x00000000000000f9L } }, + /* 54 << 434 */ + { { 0x8b8684e1d9f5da59L,0xc52ddfcb3bbccec8L,0xf37f477d0614edf9L, + 0xa6f89b1dfbf73c9bL,0x65e2b9fdbd39f9f3L,0x9b027ba9bd91c4ceL, + 0x4178ae818da1ecd5L,0xe926bc1bf8108614L,0x000000000000003dL }, + { 0x9b622c1d398d43a0L,0x56774237f050f68aL,0x599aa2b9c376824aL, + 0x8e56adb3e4f31fbeL,0x62af9ad61d87cfb6L,0x2340d1df8baee620L, + 0x378bbe281fd803a0L,0xfb8fee939543c614L,0x000000000000005aL } }, + /* 55 << 434 */ + { { 0x771a68f7d2aa178aL,0xb95ab9c4b8c86d0dL,0xc305c165d1780b7dL, + 0x495d129eebede15fL,0xf84a588398216869L,0xf5bf03808faecf27L, + 0x3a6f8a73c8709373L,0x49cc5d0f53b3905dL,0x0000000000000046L }, + { 0x170add44ffbb3294L,0x723db04c457403f2L,0x0f6c3750bc22011dL, + 0x56e0e96571df11c9L,0x9a9d1508fa898187L,0x35699de6bde2d196L, + 0x5c63eef632409db2L,0x4041e690bdd73fd7L,0x00000000000000ddL } }, + /* 56 << 434 */ + { { 0xbae333cec37cc3eeL,0xf3c270f736721328L,0x42ca9e1c3e762919L, + 0xe58208bda33ea0c4L,0x659c9e0588a736a2L,0xe4aafabff2ce2fd5L, + 0x6c42985d2fc82799L,0xa83e18ea7bcaca5dL,0x0000000000000074L }, + { 0xa94f77548e8fd835L,0x2d730134babc4dd1L,0xdf79c84dd9fd0a30L, + 0x642756577b1061cdL,0xa8a2a2dcdfb545c5L,0xbd5248e2b32e562cL, + 0x09c33229db76e132L,0x70f63a074ff616ffL,0x000000000000005fL } }, + /* 57 << 434 */ + { { 0xea1693d10689fb01L,0xeef647104d87521dL,0x382fc26cd4fe6b97L, + 0xc179b949f19e86b8L,0xe3084066a88d4985L,0x9e590678bf8727baL, + 0x23cf020a5de7c97aL,0xca99cdefd686a87bL,0x0000000000000183L }, + { 0xd268f2d96478339bL,0x54a2403255ea7d66L,0x85f864bdc95560c2L, + 0x86abfd934597c282L,0x1279888901f25eebL,0x1a9d8482c2eb4653L, + 0xb1312836217a63faL,0xc33f92ee8f733178L,0x0000000000000196L } }, + /* 58 << 434 */ + { { 0xa0d7c4edaded1585L,0x8d4ea04dbf7fb3faL,0xb25ba60333e42c20L, + 0x660a0b5a73d754b5L,0xb84c5639c6a35d0fL,0x7efc0030cf59e3c0L, + 0x32417038868c3823L,0xd0e71cc934586091L,0x00000000000001bcL }, + { 0x9b28a972202e4521L,0x5846906ce6d90c80L,0xd3d25ef2b38d46cfL, + 0xcebe3581c209c445L,0xb36008a1d1a0e99fL,0x21f0df426da9da35L, + 0x5f5f088dcf6b4935L,0xc6eeb3a8447a39cbL,0x0000000000000187L } }, + /* 59 << 434 */ + { { 0x85ef589cda7fff0cL,0xbb7d2d187aaac2c9L,0x62da1c79861a01c1L, + 0x20c960dfa6a290c3L,0xe3e5fa87b841bb53L,0x56f776ed030b234cL, + 0xe3ea771922aee8c3L,0x562e9d08df60618eL,0x0000000000000003L }, + { 0xaa56fbb1be6ba82cL,0x052d23638f908eb6L,0xd0f896afd5cfa9deL, + 0x1c90749369f1787eL,0x634393c8e73e153eL,0x6d1a88b10383a7f4L, + 0x9e56e70c59aeb927L,0x43f825ccce34cb9fL,0x0000000000000096L } }, + /* 60 << 434 */ + { { 0x6a2d32b0f3338f86L,0x4a5c3db2c9710433L,0xd298f3b0a5f68f1aL, + 0xae78b804732d40d5L,0x860d74d8dc109412L,0x0f3d42ce2c898732L, + 0x63e5cb193d321599L,0x796c17cf1462a77eL,0x000000000000003cL }, + { 0xbb71e1b917837e4bL,0x9711dff889cf36deL,0xdf6c0a2ab1fd56b7L, + 0x6f07fdd1d7e8e866L,0xecfc1ba5b4f1bd81L,0x4ac6b88a29db023bL, + 0x2bb7f22a6de37b7cL,0x86e37d902c35fa00L,0x0000000000000113L } }, + /* 61 << 434 */ + { { 0x5126634734a859fcL,0xf9f2b2e478f47a8aL,0xd8391cff0044a94bL, + 0x6f2eb81f2357b297L,0x1c4f2bbbc7ef458bL,0x3a1d92225e46dc98L, + 0xf97b3e71727a61f8L,0x8edeab095546e3caL,0x000000000000014dL }, + { 0x8cc6143e4b4de1e6L,0x78fcfad7fc1524c6L,0xc805ef2343fca983L, + 0x37753cf32b7fb591L,0x8269ad972df94bfdL,0x2768ac2617382c88L, + 0x28767bc5589bd8f7L,0x7d96d75f12aecb9aL,0x00000000000000f6L } }, + /* 62 << 434 */ + { { 0x8460354fb970b064L,0x4da9baa59ad28208L,0x81f9b7fb546876e0L, + 0xee7368f787933e9fL,0x95f61bae3d91b713L,0x29ecf884ab29d102L, + 0x61a68fc2ba02eeb7L,0x2a62ecc38eecd5a5L,0x000000000000017cL }, + { 0xcca8a64d65fc05daL,0x04e1b84433befb34L,0xc799d13c1aaaa85dL, + 0x431d1495bb40bffdL,0xfb991dce7dd23e02L,0x9944de58a4ce9e1dL, + 0xf5ca8246b3b4700eL,0xa2307905364d87f4L,0x000000000000015cL } }, + /* 63 << 434 */ + { { 0xfad3776ab2c1d16aL,0xd0885428474617b3L,0x09f73d4268c33a2dL, + 0x0a00131a4f77e376L,0x92cb3f114947663fL,0xbae96a84fed01f57L, + 0xb5b55e2bba697677L,0x6f1a684cc671ff81L,0x0000000000000088L }, + { 0x1f41fe2e1931053bL,0x9745971032a2249eL,0x180bcd1c3c403728L, + 0x422195462474f7fbL,0x7917cd1812f67c1cL,0xfab12214316ab5b2L, + 0x6415634ac6d0c5bdL,0x0ecd28b49e499fc3L,0x000000000000011eL } }, + /* 64 << 434 */ + { { 0xa0f946b34c7f31f1L,0x122b97ec004b810dL,0x98f876d1831a9cb2L, + 0x6690030aaebd922bL,0x12a235d1a21b0412L,0x5ced8104e097208fL, + 0x049b33ed49d32fb6L,0x2e88d76287b187ebL,0x000000000000007fL }, + { 0x08801d4eaeaaa0d2L,0x3b052b0a6fc2830aL,0x4e02318f6fef40c1L, + 0x5a24478a06f38abbL,0xc90c53926dedd4f3L,0x402e4a9eff07397bL, + 0xde13d7bff1bdec37L,0x8a363ccb727eb837L,0x000000000000014dL } }, + /* 0 << 441 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 441 */ + { { 0xa749f2661accb5d0L,0x8ee12c7ebcc9460eL,0xf3a52fdf3d25e73cL, + 0x187bbe7100c5540fL,0xcdcc3cc1561beb89L,0xe54df2b48eacfcabL, + 0xbe6d1874cda02530L,0xbb717f8fd072a684L,0x00000000000000bbL }, + { 0x79de2d6504bd7407L,0xd8ae0921ccea6b74L,0x358cf798695de295L, + 0x004e73b2b81c02a2L,0xd252441e9e6caea2L,0x557f228caaaf568bL, + 0x6f92ec1d357d9e37L,0x31efd9bfe91fd306L,0x00000000000000dfL } }, + /* 2 << 441 */ + { { 0x650e3a2694a7efd8L,0x0e18083dc739b54fL,0x4ef3232e9367e306L, + 0x2de3d70cf0838edbL,0x1364e843ca8715d2L,0x854dca361c25b1c8L, + 0x73c011bcbc117070L,0xe83bd36f051286e9L,0x000000000000019bL }, + { 0xf1226611c0f59bfdL,0xb910f5b64287520cL,0xa408790bffcdbb02L, + 0x9d56f34d1a0bc553L,0x1530c726ce9a67a7L,0x96f5fa25dce794f5L, + 0xd5a224c8077377aaL,0x2e9d8e98d016e05dL,0x0000000000000093L } }, + /* 3 << 441 */ + { { 0x3e884116e71bedebL,0xc43114f0d8e683d2L,0xc65277b4709db235L, + 0x135e623363401611L,0x5e6cc4ca4ceccd3cL,0x9d481fa0f73e6e64L, + 0xd52236235c253aecL,0xd60d849991f70b82L,0x00000000000001c4L }, + { 0x5db01dd6632f8eb2L,0xc4695bbfa948669bL,0x143ae7567fef1ac0L, + 0x3958cd3f21487ff3L,0x00c79f337da2ec59L,0xce54451e07f9a8bdL, + 0xfc1017fb62f7eea0L,0x6bb256b462f0f204L,0x0000000000000008L } }, + /* 4 << 441 */ + { { 0x3e9a969a777a2dd1L,0xa928fca6f0b88aa3L,0x7282de355f56eec8L, + 0x6fc69cd1076833b3L,0xb082ae0411751566L,0x09a31956ceee94fcL, + 0x6d3a6221158a2cb5L,0xb29c6ae7788752efL,0x0000000000000105L }, + { 0x44ee879abd125523L,0x8297eaa04e8990b5L,0x9deee7c4d7723740L, + 0x3c6037179a8bf178L,0xa851306b1207191fL,0x13eee334bebcb461L, + 0xc13d695eb8cce34cL,0xf3f594a50ff4755bL,0x0000000000000196L } }, + /* 5 << 441 */ + { { 0xd370ca154611d4ddL,0xdca296e58f40e4ecL,0xc2c0a54fbb7413e0L, + 0x99064624c90f6194L,0xfa357ce2798e03acL,0xf782d7099c6ba76fL, + 0xc3718575129fbaf7L,0x9275ddcf09ad3f74L,0x000000000000016aL }, + { 0x6ee77b4cc4919031L,0xb09eec783947c6c5L,0xbd8f2eb86a83f5bcL, + 0x2584dfcffbfd128eL,0x11f97bd08c68c3adL,0x157420d05f2133d1L, + 0x8f3c1815d1dd4835L,0x11e09e8f326d9859L,0x000000000000015dL } }, + /* 6 << 441 */ + { { 0xbe0ae36cd038af41L,0xc3e663659074c082L,0xf3a754972111b39fL, + 0x853840556be5e224L,0xe6a841a8fdc29253L,0x6f242df76886082cL, + 0x4de2664606880770L,0xa8d7ab3e57695441L,0x0000000000000105L }, + { 0x452fa47042fb45c1L,0xcd54e43102d65857L,0xf6439594c6aa1fcaL, + 0x910ff5bfca2c8838L,0x5db4b1df093db021L,0xc6c529725d06ce9fL, + 0xed40a0482981de06L,0x394308e642e75708L,0x00000000000000a5L } }, + /* 7 << 441 */ + { { 0x1140d1b13f7e1b85L,0xe5d9542e558f5a89L,0xf01590081347e2eeL, + 0xf4140adaa83cd3eeL,0x1388e29d1f9958dfL,0x177f13dff1d926eaL, + 0x5aac4c2037209003L,0x46bed0a6cb9e0456L,0x0000000000000165L }, + { 0x635535b494312d7aL,0x662149bfa396a95dL,0xb23c59dd4348ab81L, + 0xa059d555cf24fb7dL,0x3dc39dbfbe9011a6L,0x7e11b9d98d1ed5a6L, + 0xde503b26a1fb4b12L,0x04d12ec729aa2854L,0x00000000000000ceL } }, + /* 8 << 441 */ + { { 0xbf07e90d8114885dL,0x76d513b29f39a562L,0xd98ae4f77e5446a1L, + 0x7e3314e96264b6c0L,0xe634dee76f3a7560L,0x1dfa2392a08d539eL, + 0x45764812bdae3c53L,0x858b836fd824e9acL,0x000000000000003eL }, + { 0x9bdd0365e324ea11L,0x9625d01b1117cdc2L,0x491ea6427f6c259cL, + 0x61b79486125826f0L,0x573debc818a56486L,0xe224dd3b0d31132aL, + 0x075c3bf780be8263L,0x0eced67ab84a07faL,0x000000000000010eL } }, + /* 9 << 441 */ + { { 0xe937531c77ee5e92L,0x2a248ed6cde38019L,0x9d56715bf16bcb1bL, + 0xb232a0b13682fee6L,0x50adcfb76062ba66L,0x853ecaafc15a8617L, + 0xe2ae82eafbf86cb2L,0x2387b9e996e5d5d9L,0x0000000000000126L }, + { 0x2c949e78d1ad4cedL,0x2dbb570f7e1df51aL,0xb3b00e4a1f09cb9dL, + 0xd50d45499ddc5774L,0x22bc724199ed1407L,0x0c6b49fea2462565L, + 0xb7ea7261ad2bb7a2L,0x14edf03146fa95baL,0x00000000000000aaL } }, + /* 10 << 441 */ + { { 0x0c0d310cf9039efdL,0x954127e0be623269L,0x2c8de379ad37d5f1L, + 0xc88058c24ae1e101L,0x953cced393612558L,0x6f795799562ff195L, + 0x04b43800f75dc91fL,0xc388110878a305a3L,0x0000000000000023L }, + { 0x3a910a199d0e636bL,0x8fd231dea9233a03L,0x61c5a3511799791bL, + 0x0f6e33cf24e1aa4aL,0x99fcbe6d24c1749fL,0x6d6dc4ff29e84e10L, + 0xdfd3919445a30b2cL,0x551811cf3d438976L,0x0000000000000042L } }, + /* 11 << 441 */ + { { 0x19fa754692c76a06L,0x09b44b739cdf4192L,0xdd474e2429a471ffL, + 0xb33d4b633c1f2efbL,0x352bb3c87b6ba5bbL,0x33be999f7a25c837L, + 0x455849c9708753a5L,0x4bebf7228c356712L,0x0000000000000082L }, + { 0x491c083d1c932272L,0x53e741ac7902ce78L,0x3e913b13cc553480L, + 0x28d36b5162b46cafL,0xdaa8df3ec5c92702L,0x1b8ce0face5b521eL, + 0xc3a12ef4346a9c86L,0xa88531c1af543e95L,0x00000000000001b4L } }, + /* 12 << 441 */ + { { 0x225c4d89218083c9L,0xc8ca7c0bd951e863L,0xc48c23d1ae8d2aedL, + 0x2e5a185d12f3fdbfL,0xd459188cb12bf51fL,0x42707a3376a84f56L, + 0xe647d5ba0c5646f1L,0xa18986803140e000L,0x0000000000000132L }, + { 0xdf3b68773acdacf7L,0x2e2ce1ead81dab2fL,0xd3a3397dffa2aa06L, + 0xcc9192cb2a580c69L,0xc171c2ba6291b080L,0xa886917af908ea1eL, + 0xd49b41cab0ee690eL,0x53d75ee28a4699a0L,0x0000000000000055L } }, + /* 13 << 441 */ + { { 0xad0120df46dc9f05L,0x9dece9ccd80e1f06L,0xb55d9ea96661d61aL, + 0x3bcc804d06e86dc4L,0x711a39a0a1fd8a7fL,0x5dc5752e514bb596L, + 0xbd07dc9cdc01996bL,0x745d1b95166bb5c5L,0x000000000000009cL }, + { 0x896d26ee45568558L,0x995524f1c875e781L,0xdd27fb7f85ae1606L, + 0x6d2a995ab6ce93a4L,0xfdde5c2a6bfab47aL,0x6cf168956a8f8625L, + 0xf36f40b9c9697605L,0xd85528d7a2ac13adL,0x0000000000000123L } }, + /* 14 << 441 */ + { { 0x245c6974e996e859L,0x18140823357fffc4L,0xbbeaf2d9268c0e61L, + 0x84c491bf1ec3a60aL,0x92519cd3932c2143L,0x2f8026a50e9c23bbL, + 0xb3ea1321fad88c26L,0x253e9873ca612741L,0x00000000000001afL }, + { 0x96ac89af04eb167cL,0x1d51ede280839911L,0x0ea85b4126be1ea8L, + 0x2d788d0d3e5c6fd0L,0xc0565fec84fef7ffL,0x605449368a5337f4L, + 0x8eebabc69bfcd3e4L,0x0e0fb4da4a66bbf1L,0x0000000000000059L } }, + /* 15 << 441 */ + { { 0xd3bf69da0970051dL,0x93f43d8c436c6222L,0xe07658e68ef8f7aaL, + 0x0644fc85e0da692dL,0x5b21003afa44ed0cL,0x7a63520b55fd30cdL, + 0x2e494a11c200c9f1L,0xa3c6951d20e8ddaaL,0x000000000000007bL }, + { 0x4af75dab2fc74e40L,0x1413b04a35e18492L,0x3a5ca7ae646c012aL, + 0xdf45e095444c9da3L,0x28ad4ce37723586cL,0xe7d27e128baf9d42L, + 0x41f7f06f8ec93610L,0x0636c501c7c2f47bL,0x000000000000000aL } }, + /* 16 << 441 */ + { { 0x486d3c489c4cd8ddL,0x2ceed6f8f7e8160bL,0xfb38315da76fc3c5L, + 0x521ac7bb7863c459L,0x17252331576f783dL,0xf0fcaaf23e6d4bcaL, + 0x8db1cffce6f8e385L,0x2e2ef7f478c49eebL,0x000000000000001aL }, + { 0xa3c3c341f690d5aaL,0xc14cffb041e05923L,0x2c7e4bcba2c02f22L, + 0x79352516c38aafe2L,0x67bf3709daf3e5f9L,0xfd7c30a974d2aad4L, + 0xb46886878b971591L,0x7adf565ab5f34fd8L,0x0000000000000132L } }, + /* 17 << 441 */ + { { 0xc6f3ddbf3ee1b1b6L,0x3ae199de367775f7L,0x869e3f277b5c2062L, + 0x01adf55de699e103L,0x6fa9b9ca3efcf30eL,0x60babac333616bd0L, + 0x619cb20628f783fbL,0x0d5cc6775ed241ffL,0x0000000000000124L }, + { 0x7906ac2d14876366L,0x5182e7f2bc304f89L,0x2252436f2139416bL, + 0x80955cfe62bb6596L,0xa097c5da2ccc76c8L,0xbe48d244b6a4de18L, + 0x4a6ab624ffe47914L,0xca10c376f974a0a3L,0x00000000000001f9L } }, + /* 18 << 441 */ + { { 0x8fbb3cd7496cb13eL,0x107cdb9a258b584bL,0x5d0e6918a848ca32L, + 0x73891c7cbcd21ce5L,0x3ea57b2c791d65dcL,0x6c5e0cd6d9b5669dL, + 0xd81229f8f1321da6L,0x8b9972dfb0b17706L,0x00000000000001e3L }, + { 0x8939d7a7d6e10f59L,0xe160bd16cbbe5271L,0x3348b4f04fbb8a8eL, + 0x147e36393fd01b59L,0xbcc1210d7ee93c4fL,0x949ce71dfc4dd8e6L, + 0x1436c0f549890833L,0x4cdcee717273ac5fL,0x000000000000019bL } }, + /* 19 << 441 */ + { { 0x512a10934e9a039aL,0xb6470fb2e0c95392L,0x7dba72fa82400a70L, + 0xd3dd821930685212L,0xcbfb1de9ac7a38d6L,0x4146c087c670cf2cL, + 0xee1e0ca179e513c0L,0x1b5015d74d53c35eL,0x00000000000001fdL }, + { 0x601cc1e63cf7333cL,0xf1f4c134f785b4caL,0xbd1aa34463f3fdacL, + 0x221f0a2573056a6aL,0x197d6ab8e6aba28aL,0xbc0019e0a3d17004L, + 0xd18a3b87825d6d76L,0x5178300e5f4e4796L,0x0000000000000072L } }, + /* 20 << 441 */ + { { 0xd01fff9dcc045310L,0x937dcc6f352c1840L,0xb17d2b6f355c78afL, + 0x42e3d467a1b4c507L,0x9007d976d1ec1c6cL,0x04e44bd51c604ac7L, + 0xac5f6d35f3c28e52L,0x6f9382f57c78a5e7L,0x0000000000000197L }, + { 0x8f6894774e717f2cL,0xc59589f0279c36e0L,0xe4b11ec19dac55aaL, + 0x37ecce34b2301ce6L,0x396745a27ab894dcL,0xbae3de55fcf34e70L, + 0x3f670dd62158968bL,0x9b8ef31efc1f9ab2L,0x00000000000001b0L } }, + /* 21 << 441 */ + { { 0x89402e7a36636861L,0x0e121ba28da879c7L,0x824ec11d9caa72c4L, + 0xb03d625b90ee0713L,0x16b477525af76e3dL,0x314d2daffc116c0dL, + 0x00e42bbcca591142L,0xebd124bc2a28d332L,0x00000000000000c7L }, + { 0xfbb0938305f3a77bL,0x5478a7d2fa1f96c8L,0x2f04b86dcbab30deL, + 0xaf1c81f41d38bcaeL,0x873794acb0e3391eL,0xe5234d10d1ba629aL, + 0x7854ab94fdb4901fL,0x447d05a5f48ba9e2L,0x00000000000001bdL } }, + /* 22 << 441 */ + { { 0xb3dfccf01d967d35L,0xa68315ea3f71108fL,0x928e15c2837a8c3dL, + 0x6f2114c3085d00efL,0x77d86e5e1a86a583L,0x3d97e64935f597cbL, + 0x9c729cbcfb5a4d92L,0x7c99562d7d946695L,0x000000000000011eL }, + { 0x41632d38aabbfae3L,0x48793078c835493cL,0x8e90be311d5f8e61L, + 0xa09b5add33bda788L,0x3dd23a2e80c7b31fL,0x9a46ee40f7657031L, + 0x7b4d15e30b070becL,0xf2071f44878a2010L,0x00000000000001eaL } }, + /* 23 << 441 */ + { { 0x06cfb330d10147efL,0xe756da6b749d4ad2L,0xf6fcb9200f956530L, + 0x7c3bdafe04cdc530L,0xc89aae334e3f46f9L,0x0dce363962eaae49L, + 0x267909518d9133a2L,0x0f27bbe5e46de833L,0x0000000000000087L }, + { 0x8cf40e37bfe09ceaL,0x09cc65b1e1f4df2dL,0xf964911777cb793aL, + 0x78e249ee3fbc5ea1L,0x305049579084b235L,0xc008de498a39b17fL, + 0x6e0117bc4ac4f445L,0x34fa696658aac50fL,0x000000000000001bL } }, + /* 24 << 441 */ + { { 0x82e9010daffbd971L,0xab6fea0bc7ac3199L,0x8d38894050383d27L, + 0xc37ab3b3be775398L,0x8ef10af154bf74aaL,0xa1c39ed452aeaf9bL, + 0xcd2b6ee670f42d7aL,0xcb57643738a53a34L,0x0000000000000087L }, + { 0x5a1416996f80e03fL,0x0d7a7ab1408678e3L,0x33db44d87d279aa8L, + 0x25e82c9032a33193L,0xc68f43737fa96201L,0x4e2fbe8a1c15c8c4L, + 0x5b5ce8055429a403L,0x1dc2f304f6543e19L,0x00000000000000d0L } }, + /* 25 << 441 */ + { { 0xfd27036f6249d4a0L,0x80732c4c05aaaf64L,0x13a02ccb3aa41caeL, + 0xd2d117204bac8ecfL,0x33b4f7e4e0b47969L,0xe9719c3634ffcd7eL, + 0x0b5584bef041a2bcL,0xad35638388240cd1L,0x000000000000008cL }, + { 0x4597cab65d253821L,0x136b4eea8621290eL,0x1a053fee74cfbd39L, + 0x38390a30e8ce29f9L,0x85008d8de17589d6L,0xc9a9b0bf3a55618cL, + 0x21247a63adcf68f9L,0x6353a40ac8eaa840L,0x0000000000000072L } }, + /* 26 << 441 */ + { { 0x2a84edeb611dbd82L,0x928512d7250d7b4cL,0x26101f84bfe2aa8bL, + 0x2e79ff22920044b1L,0x2e23d451e99667cbL,0xfef41352f63db866L, + 0x17db59799385090bL,0xee24e159c43db739L,0x00000000000000feL }, + { 0x3a23eb35c2fc1ff6L,0x2ecd2057897f8b11L,0x0b94433b2cfc90d8L, + 0x625b69a8345d638fL,0x04830925115fbab8L,0xc73ec42631db6ec8L, + 0x7bff020d32aa0c57L,0xf097aa60d3ae8c0cL,0x0000000000000001L } }, + /* 27 << 441 */ + { { 0x8261b41492e7787cL,0x46a920b300919d6eL,0x0a5ac766ca0174d7L, + 0x2dd5584380d01c6dL,0x27104845253c74dbL,0x2e86fc89511c661cL, + 0xc24fe84d2d4623ccL,0xfed49394f8d720f0L,0x0000000000000084L }, + { 0xe3246ec04a711e6dL,0x5aaec97fcb25754fL,0x33571e67eb2598fcL, + 0x90f4667029ad653eL,0x3885ac625aacd507L,0x9c7106c4000bf4aeL, + 0xdfd9bec142352fbeL,0x0cd94630dcd3b524L,0x000000000000003eL } }, + /* 28 << 441 */ + { { 0x265e7e144a61c8cfL,0x7108ac8e9c01168dL,0x8c00c591c1db0c2fL, + 0x4fd76164c7eb2880L,0xee207ae8072c9afeL,0xf291ac662af4f872L, + 0xd556354e38e6ceb5L,0xd1ff944a901b9c88L,0x00000000000001a2L }, + { 0xf058a1e5ea374b44L,0xcdd1f0b7d282af39L,0x5095378c3ba681b3L, + 0x2da2db904e8befbfL,0xd73223a361edb289L,0x305ec380221d18bcL, + 0x5b191c1ca45ed6b2L,0xf28be70a0e63d2a1L,0x000000000000005eL } }, + /* 29 << 441 */ + { { 0x293d2e8a6dd376faL,0x400a186e9918b3c5L,0x693265b7a7483fc8L, + 0x0f38d5e1dba14394L,0xfcad07b3d554e653L,0x9536e6a909d37398L, + 0x0c20469422b5fb0cL,0x4d22c2e77593be0cL,0x000000000000016cL }, + { 0x320a27ff4e8770f9L,0x341f5628e9b27b2dL,0x13c713b8d7797f73L, + 0xac4636d2bd4289a6L,0xc6cd28cf096621efL,0xe866b2e9f1dddce5L, + 0x36872e001add8506L,0x7a82493c1835b90fL,0x00000000000001a0L } }, + /* 30 << 441 */ + { { 0x0efb34abc520c268L,0x2ef017b00b3ae7a8L,0xd8ba2baef1f916ceL, + 0x495b8a2801a2e8bbL,0x76c86a31e37fe26bL,0xbdddbdcd2787c00eL, + 0xc2d98c35f431de88L,0x7642a3382953f309L,0x0000000000000112L }, + { 0x10436513df59f330L,0x2a63f6ca21d133ffL,0x01f2b7e8d8e16c98L, + 0xacab87a5b3b1c312L,0xff9c344c37aec499L,0xabe46f7b4afed453L, + 0x826c9894c1feed8cL,0xb89d0a04aa543610L,0x00000000000000fdL } }, + /* 31 << 441 */ + { { 0x85d18171c9e4a7cbL,0x54d525b7d2a22758L,0x1abc342ccd696c36L, + 0x812a42651c781fa1L,0xbcbc450635f8c1b1L,0x97c302369fb7c9d6L, + 0x705db3f328bb1e6fL,0x24bdcfb4a6df36e9L,0x0000000000000107L }, + { 0x09eaef27e74ec8e7L,0x367427963e1028f1L,0x6e525404023c3e4cL, + 0x0c8eca526ede11eeL,0x502e86b6184a8d28L,0xcf8ba9d9125dce7fL, + 0xd4e534c610b11eccL,0xc9a33890effe448bL,0x0000000000000116L } }, + /* 32 << 441 */ + { { 0x074092b0518c2b4eL,0x4efd0358226849ecL,0x384ec468c8582d0fL, + 0x6348e93a1b6530ecL,0x752f051586723e53L,0xc0124a0a18b6d228L, + 0x8e61db8af16144b5L,0xa3b3bcdbc57ad828L,0x0000000000000096L }, + { 0x7b5eaac0d5d72677L,0xc6a4fd2fc826acdcL,0x06f1269583835c84L, + 0x287eb11866c70474L,0x824e4c576fb32f64L,0x6aab90483326b6d2L, + 0xefef96e49dddf339L,0x8e113990df13705dL,0x00000000000000feL } }, + /* 33 << 441 */ + { { 0xe2accf1d679c7a0bL,0xd91d0623370058d5L,0x838dfaf51c44effbL, + 0x478d6317c891c192L,0x9a1d69833bcdbdf2L,0xa2adcb29206a78daL, + 0xf032d6439ecf90b5L,0x998eeb68b6f4b66aL,0x00000000000000ffL }, + { 0x3ce5d445246d19c5L,0xafeef35bf2be8053L,0x1cd93d149c3354f6L, + 0xab8d758584c1d532L,0x67887e7fd06af98bL,0xdae0dd587ecc066eL, + 0x37f0c7cd24990908L,0x2a9a78c654a855f5L,0x000000000000017aL } }, + /* 34 << 441 */ + { { 0x14518e8d2e7f1be7L,0xddf58460b39604b7L,0xb9b7abcc97a83cffL, + 0xc23ed4b77e595bfcL,0xb83200b68301e5caL,0x593a1e9dc5460472L, + 0x344bb417a4aa1ee2L,0x7e005a7b53932656L,0x00000000000000c0L }, + { 0x0d7d26803f56e1f2L,0x22f567b107fff8aeL,0x99b49f32be054a82L, + 0xa94832d6275528d9L,0x997fdac4842ed9cdL,0x476353ac85fd9ec5L, + 0x515f0503ca503c68L,0x9571eec431a59bf2L,0x000000000000001cL } }, + /* 35 << 441 */ + { { 0xf5b648deca4167daL,0x3f3046dd2c8f0e2eL,0xc4e0c6b4038d24c8L, + 0x3c16985789b7da6bL,0x4d3649863cfe2320L,0x2fe23040c895aad8L, + 0x786f1a119f4272abL,0xf91aa91a9fb42f8bL,0x00000000000001e9L }, + { 0x93e6c70ea114a55bL,0xa6a07aaaf46f5649L,0x3ac845724b626a02L, + 0x63d936001650bd78L,0x80ef98cb58064535L,0x3994f55051c8d7b8L, + 0xdac64abc01b4971bL,0x1165fb7be97d3193L,0x0000000000000051L } }, + /* 36 << 441 */ + { { 0x79f135f005771675L,0x398910e27008ba42L,0x72f9a899e2cfb304L, + 0x0e7448ed51bf359cL,0x814a9f6453d82f6dL,0x02cc1df8cc0bd057L, + 0x03a77c0a754becd3L,0xde0385b37427d79eL,0x000000000000013fL }, + { 0xc3aedfb3b5cf64bcL,0xba344d912f11967eL,0xd269443db9dfc1a2L, + 0x1ab57001a5a70140L,0x414cf58ef408fe92L,0x7b9adfb6d890280eL, + 0x705bb16539242a3bL,0x66db67486bad4596L,0x0000000000000150L } }, + /* 37 << 441 */ + { { 0x2c3cd5a0108c9893L,0x8a240794bac6cc23L,0xaeeb36df823fe6fbL, + 0x9c2476d068aab363L,0x8b9d9703c0cc02fdL,0xa8e483fdb8d2a4ccL, + 0x0e3cb987fb774e3bL,0xedd76b875f1f80c4L,0x0000000000000036L }, + { 0xd7ee3e4719615126L,0x2afa7e8df5547692L,0xe7d92c2f2133f245L, + 0x3005e6e925c73cdbL,0x3efc8169ab362edcL,0xdb546c5d06b3dc8fL, + 0xcb75f0d88067855aL,0x03189f39698a4373L,0x000000000000010aL } }, + /* 38 << 441 */ + { { 0x77f6a34facbfb019L,0x0bd1100e4518a2ccL,0x96b927d7ca8c27e2L, + 0x5ada2880718c432bL,0x09765ed620641c90L,0x6e516e8b3f7090c3L, + 0xb27c6c5f5bbe4811L,0x0268577d3b1eab5aL,0x0000000000000187L }, + { 0x8da5cea7c1571bf7L,0x6c28d100cb30dfe8L,0x94f462ac40839b28L, + 0x84e174469893e5e6L,0x632d3f321f4e932cL,0xa791cf199cb57b35L, + 0x17cfc8606d58252aL,0x8b93b8cf95d09a97L,0x00000000000001b4L } }, + /* 39 << 441 */ + { { 0x3406f11d524f9da8L,0x162475082c9d9f83L,0xcd6306049ad35aecL, + 0x4a06fd2a9377e247L,0xaedf8263836a2ea5L,0x2e23b143be771b59L, + 0x53a8aebc67330056L,0x6a01eb9620b8fbd7L,0x0000000000000164L }, + { 0xac3ffdedaea4e0b8L,0x15c59c69ad153673L,0xcee2f459856f7af1L, + 0x654725f8f84e2d29L,0x36ce53ad68875b2fL,0x5fecc9d05e8dd247L, + 0x9fd2d8b73100f3caL,0xcd28d36c030be688L,0x000000000000001fL } }, + /* 40 << 441 */ + { { 0x75605b034319c263L,0x887fdfbb9e4740e2L,0xde488a31d43f9b89L, + 0xb0e691668e84f79cL,0x8ad2cf6570a03015L,0x20c0c8a49151cb55L, + 0x9c4d58100538315fL,0x18b7eb2712204fa8L,0x00000000000001c0L }, + { 0x2701f40ccd3d6255L,0xb6302374684db0fbL,0x1934bd385986c5a4L, + 0x277ca623ac3eb927L,0x1f8e83f33c9260caL,0x1d42b0b1cdd488c6L, + 0x03dd506202069bedL,0x1edf5be80713fb4cL,0x000000000000012dL } }, + /* 41 << 441 */ + { { 0xbd340554f68cd488L,0x5ba629fe4c4e9de7L,0xbf0904b5598a592aL, + 0x090d3b7797f22215L,0x35271f6f32f10ce9L,0x5ba1143f4ee5ff39L, + 0xeca98e4948f5e741L,0x312c71324d4fc387L,0x000000000000005dL }, + { 0x9a42c0a19f83c1ceL,0x070c9ff00c8a3828L,0x8c0ec42d1a0dfae4L, + 0xeb0739df69b7ea10L,0x19e3841e3fda056eL,0x967531161e991eabL, + 0xf304dfd98ce9c7bcL,0x3ef71431a70d2876L,0x0000000000000002L } }, + /* 42 << 441 */ + { { 0x1b85459bf86e2a23L,0xd7b46e148126f294L,0x547e1e6613227f84L, + 0xe520b291a8600d75L,0x8836425dca9aefb9L,0xfb12993c3089b09eL, + 0x38e644f9963654d8L,0x115bfcb733b309adL,0x00000000000000e1L }, + { 0xed33a090771004f4L,0xc97174545f405e28L,0x055b8e0e983ff84eL, + 0x43b21455945a54afL,0xf5ece9981da15a4cL,0x0135baaddbb0512bL, + 0x09832d2c97dbecc2L,0x142102fad396c4e1L,0x0000000000000022L } }, + /* 43 << 441 */ + { { 0x56c6e12ff218d75eL,0x82ae876717040ba5L,0xde953898ec9ae18dL, + 0x74801498c973dc1aL,0xb8fe18b74ddfbe5eL,0xcc2fabfc221a12f0L, + 0xdf3d3ce3095717b2L,0x063315b301674069L,0x0000000000000166L }, + { 0xbb0b8502a5d6015fL,0x0319da6b3267e7eeL,0x8a3f055b330a7d42L, + 0x24633fbb578d3e0fL,0x42b3d57732d001e9L,0x6b4bd31841c3de6fL, + 0xf67e14de7674f280L,0x872c31500945d653L,0x0000000000000113L } }, + /* 44 << 441 */ + { { 0x79dc032fd1ddcb83L,0x9502a00f09f7b95bL,0xcea6cdc8719ad453L, + 0xe631629a13c95a17L,0x6e0bd6783e242b36L,0xd5a4ce6070adda24L, + 0xeb16526ecf9c3e3bL,0x6b56885de376134dL,0x0000000000000046L }, + { 0xcefbb6e0ee1a7e20L,0x55931c877bbba16fL,0xbef858780527496bL, + 0xefe470f2fc66b0ebL,0x10170d9f357e9f6aL,0xdc427c1534bcabf8L, + 0xa2fafe51543ad6c4L,0x9a3ca5f80bac368fL,0x000000000000003aL } }, + /* 45 << 441 */ + { { 0x6f7e38cb4a64ba29L,0x0274530761def10bL,0x4a7875c1777d5a43L, + 0xe8288a8a30046b61L,0xcd56398ef4e306c4L,0x13546b98f0619a51L, + 0x7d5b477d91c8e36eL,0xad040ea782ee4243L,0x00000000000001b4L }, + { 0x3bd14037cebf5073L,0xc9278b5deb3e67f6L,0x0a6920c3c29f3e6eL, + 0x98866c9e93420540L,0xfd3ad7f76b32119dL,0x1a6a91e1261778f9L, + 0xa97267d7a571a92dL,0x8b2ba189dc9a6d11L,0x00000000000000a7L } }, + /* 46 << 441 */ + { { 0x8d51e510e5ca5d70L,0x2d5791140c52c1b5L,0x82eb4358c0cb0f63L, + 0x8f2f681264a008a8L,0x58a8e17648b6cb17L,0xa9471be2b8a072d7L, + 0xbacadbb5268eb661L,0xd461695702720623L,0x000000000000006aL }, + { 0x1087e596bf609130L,0xcfdadd188556e863L,0x5352546b03f39394L, + 0xb427f549781817f6L,0x67901c47b7016c1eL,0xfa4bdbad16c8cb78L, + 0x17210e07e949f687L,0xfa9d5907476f0c79L,0x0000000000000035L } }, + /* 47 << 441 */ + { { 0x328e727f9b8c2326L,0x7012c2200d0a3acbL,0x282ccb9b33f31c09L, + 0x90c088dfe8610f6aL,0xda4af662c6723acaL,0x60a4df7eb50061c2L, + 0xc523af09bf77cd76L,0x58c1d1d6440105b3L,0x0000000000000048L }, + { 0x608a3ccb2ebf4a7cL,0xdc6f70652e90faefL,0x5af50c8330229489L, + 0x2ae1f05a455c5d97L,0x04e6c953ef78a185L,0x64bfebe30e773105L, + 0xb4e9e049b897d690L,0x324bccaf6141dd22L,0x00000000000001edL } }, + /* 48 << 441 */ + { { 0x3fe0adaca4c2a3f0L,0xe8679478929e1f33L,0x4779b883705b8214L, + 0x00776735e23c0cffL,0xf663f8158f766bd9L,0xaab22b6bc30b548aL, + 0x089032b0d56597daL,0xd72918f251d92e66L,0x000000000000004cL }, + { 0x152af75d026ccd7fL,0x9e12b11ddc3e849eL,0x5d6f7b8664244b05L, + 0xbc9c468b1d80629bL,0xa14ed1a1f2f109c9L,0x49e6d42cab8403f8L, + 0xbfe90229e34a6f68L,0xecec84484234d819L,0x0000000000000101L } }, + /* 49 << 441 */ + { { 0x35fe58b55f58fe63L,0xd274d4931f30f4f7L,0xc54580b42d20dcbbL, + 0x8a45c948af35249fL,0x7b282280b07c25e0L,0xae3d9c8104da51d4L, + 0xf0c972129f58356aL,0xaf7b07f069e61c0fL,0x00000000000001b3L }, + { 0x194629861f68b78cL,0x6f8829503105a795L,0xfbd30e3501f044eaL, + 0x6df976b2f89de7f1L,0x83d9fcee5548ff7aL,0x45975517820607e9L, + 0xd30cfc72254a1342L,0x9886898289c44b4fL,0x00000000000000a5L } }, + /* 50 << 441 */ + { { 0x500057ba7e564993L,0xd7526cfb59e53d40L,0xb633fe53c5af2433L, + 0x18e8adb2dc8a9738L,0x6f81b8dc6b666052L,0x1d2f08b8451c8fa7L, + 0x7bfdc1f3d8f33d9fL,0xd07b2ed6d86f41b4L,0x0000000000000005L }, + { 0xc652298baaf2a31cL,0x9bae8c281463ddaeL,0x54a4159ae1af0d62L, + 0x61b847dae074c303L,0xde70d48e4f1a52bbL,0x0b208a372feb1c68L, + 0xc2b2b3a4963d5fd7L,0x05ca95d478706fffL,0x000000000000003fL } }, + /* 51 << 441 */ + { { 0x7a722f7f0095970cL,0xc5f0befac60f55dbL,0xc5afa461df29f5ecL, + 0xf01bbf1161ac1f57L,0xdc6ec1e80bc3b86dL,0x321a34a437f04963L, + 0xdb0512a25c6ed011L,0xa104cdabac1a345bL,0x000000000000003aL }, + { 0xe9a1eca87437f0aeL,0x54ef25424d949bf6L,0xca3300f0039a9770L, + 0x648c20a304bdae20L,0x3655df9fbeb0d46eL,0xfcf7a10c95f888e5L, + 0x1e19fef16e669132L,0xced02e38218c93bdL,0x00000000000001d8L } }, + /* 52 << 441 */ + { { 0x366152e3aa65ddbfL,0xfc53f8af6a5fe671L,0xc7cad59195c7f6abL, + 0xe9c3b6d552052ed7L,0x31c94766e4b302ccL,0x2bcb7ddf7477506eL, + 0x0cfae42e86ee3dd5L,0x1de11cf355923fa9L,0x00000000000001f3L }, + { 0x43018d98df6d337aL,0x1f679cebc484673bL,0x341c38cc6acce244L, + 0xa732e5782edf01b3L,0x8d0b29601551ae1bL,0x0dbaec30509d1fcfL, + 0x5655b3dc5c9679abL,0x45b3c31b63161444L,0x00000000000000f3L } }, + /* 53 << 441 */ + { { 0xc4ec1f1acc4d57ecL,0x7c006c3aa3f8581eL,0x94f683110ccebe4fL, + 0xfece2dbd68be3d0aL,0x51affa5406d27f4bL,0x3bb8b836fbec32aaL, + 0x908bbbeab2a0e297L,0xcb7ebbf201196d89L,0x0000000000000078L }, + { 0x0e3185e9ce461e7dL,0xd6931227013e20abL,0x64313c538c85e000L, + 0x00fea6dbff7cbb26L,0xb8bed8fa5b26b6e2L,0x91bf6292dba94d45L, + 0x6816ab5998d8c01aL,0x01a52687eab4002bL,0x0000000000000033L } }, + /* 54 << 441 */ + { { 0xf6a61a467f854608L,0x5e7417c80bcf85b0L,0x9000efbc1cde2bf0L, + 0x2ef0c39edf86fd2eL,0xf494697a18a12510L,0x425c1fd09ed7398cL, + 0x0bdcf04f27afee20L,0xbc353a89282a8863L,0x0000000000000161L }, + { 0x7f643929a58a4e6bL,0xe899530b7ce61fa8L,0x8d07cf175ffa47f4L, + 0x572269977e217444L,0x789a6625526bf412L,0x656ad4358df93bc2L, + 0xc6d0fb0b46868e2cL,0xccfaf97090b50ea3L,0x00000000000000dfL } }, + /* 55 << 441 */ + { { 0x1dad0dade6860303L,0xd9126836c7e91c97L,0x9855fb600feec65fL, + 0x5ad336976c9564e7L,0x81a2f362d8548916L,0x642b4014ee4a88fdL, + 0x9c23de5ee3e3e988L,0x19c510415a0e4881L,0x000000000000015aL }, + { 0x27f5a80c00aaf668L,0x296827e796728192L,0x165a649e4e124c4eL, + 0x3268bb940f8e937dL,0x69f171678b9d4987L,0x3ce36b905e06f20eL, + 0x97287b68cbbd791bL,0xc1a6fd4c18564d91L,0x0000000000000181L } }, + /* 56 << 441 */ + { { 0x8c466ec6afa6274eL,0x935ac8e6e57d9338L,0x3ffe008e7b5ca188L, + 0xffd8ba41dfbb530aL,0x9504dbc067d0b3e8L,0x95ebde8c622f0b00L, + 0x984bc63c209c6309L,0xb2bdaf4a6b23d9dbL,0x00000000000001f8L }, + { 0x248363240e2bb6e9L,0xcef90f5c65cc3116L,0x4c71c80816bbf8ddL, + 0x0d6ccbc38de8d626L,0x6e51fc2960c000d7L,0x1e3116e3dfff70d3L, + 0xf3d91182dd570c6eL,0xf02fbb1b6827d006L,0x0000000000000191L } }, + /* 57 << 441 */ + { { 0xc81bd2b8decb7c02L,0x956e27a4b894def6L,0x0311fa9403356805L, + 0x0e13ccd322aae58cL,0x0196f20f3ac70b3eL,0x7d4bb4e1a7b2c93cL, + 0x10fb01386510c50dL,0xd031b898e8f926c1L,0x000000000000003fL }, + { 0xf46543b5f8f5dc8aL,0x4233ee03e330d2b5L,0x1c13f707c81f39f7L, + 0xb5894eec28d508e9L,0x0ce78fe7eaaa453cL,0x4ee0ea5a0b2c7d8cL, + 0xc96edb248034a086L,0xd935778349cd9be2L,0x00000000000000a0L } }, + /* 58 << 441 */ + { { 0x871955d28b93f569L,0xf05605295a48b689L,0x0820a8aa467e5891L, + 0x7a4f5cb3f58b25d1L,0xc94ed47f8587a84eL,0xa99cd056fe982ec2L, + 0x371cbf3fd1d06d5fL,0x491d9acaf90440a1L,0x0000000000000179L }, + { 0x9df7fa8740a5fe24L,0x942fe6b1d96d3e57L,0x0f21d3127c7497f8L, + 0x7a28f02bb1685a0bL,0x45264d84fdb0e456L,0x08b20dbf95f14f94L, + 0xf36dbdd57b299fa9L,0x644f73a96dd0a74aL,0x0000000000000136L } }, + /* 59 << 441 */ + { { 0x4e0aa1cac34f2a7dL,0xd00ee2d1c4678255L,0xc74603ad187fc9f6L, + 0x6aab1d2ece9d3267L,0xb74dd371f511fcb5L,0x8b822e5c8836ab61L, + 0xc504820cb338d04fL,0x247e13889d12238fL,0x000000000000012fL }, + { 0x421d4c327f6e0a6bL,0x82a8854ba04228dfL,0x6003515d12b7fe81L, + 0xabe47e71d0a397feL,0x59feb1eb6aaca148L,0x45bd603375a8831dL, + 0xd4612d5917d1ccb3L,0x1b89ba9c74ee1027L,0x0000000000000042L } }, + /* 60 << 441 */ + { { 0x03e7026b3fc38c51L,0x43eb0760581918a8L,0x2229030dfb2b26dbL, + 0x178bb4bca8973dfeL,0x79204ab49902d625L,0x9dfd5966e57d8747L, + 0xcae096fd8b664963L,0xc3b1fc25fb1dc64cL,0x00000000000001d6L }, + { 0x7f136d66e577916bL,0x11834be442bf8511L,0xbdc623e83428bb9bL, + 0x14f10cad49c2d303L,0xd2e47e509dd1bcbbL,0x79bb7eb356290215L, + 0x7a2500b36d356e99L,0x6d01df1e1f30e17dL,0x0000000000000158L } }, + /* 61 << 441 */ + { { 0xf99cd5725ba74517L,0x93baaf6e166dcce8L,0xac938b88601599d6L, + 0x30f247f47404a032L,0xeb3d46c03b10dbb2L,0x414e0218371dd7faL, + 0xf1164440ef4849faL,0x86420604faf4e6d6L,0x000000000000005aL }, + { 0x3376a08ebc96ecfeL,0xec288c773697c3ddL,0x6f6c6e24447a27a4L, + 0x31711e8947104345L,0x21a2e7cb8188d79cL,0x4e0ea8809c2385b1L, + 0x59d2c5af1e7a9f39L,0x644fe4e08e884dcfL,0x00000000000000a7L } }, + /* 62 << 441 */ + { { 0x7c8c7f684dd64f0cL,0xc7820e341855ce9dL,0x1544117d323f6ce5L, + 0xcb768820e373e48aL,0x464428b73c3c4c90L,0x4cd0e1ceaae35437L, + 0xd25046831ceb2a4cL,0x0e3338ccdf439915L,0x000000000000017bL }, + { 0x865ba7db6618c759L,0xca0a323710e77d05L,0x9d13842b2b28085fL, + 0xf941b5fa9fea876fL,0x041d0845e6d70255L,0x4c1a7d64428b57ccL, + 0x33f6bd2429ebd1a4L,0xe80522479a17c0d0L,0x00000000000000e4L } }, + /* 63 << 441 */ + { { 0x267820c97c2cf8a4L,0xaf23c49e7d43b5b6L,0xe83af4497452297dL, + 0x406618b2ddbb37c2L,0xfa481044f0cfb99bL,0x237d923c37e98319L, + 0xdbdc034a9ab1956fL,0x30ec502ebd6f3826L,0x0000000000000176L }, + { 0xe63ad325815972a5L,0x1f1cd2b8cfd1b1e1L,0xcc91e37e0c0b11acL, + 0xbc62347cbcc8f659L,0xb6a838e80fc52227L,0x1975db004cfa70bdL, + 0x7c4bd8bca73d6fe5L,0xef91ced9ad2e5c83L,0x0000000000000029L } }, + /* 64 << 441 */ + { { 0x560ed5e263840645L,0xac6c9d02b99ffe18L,0x0510a7b7bf7b7fe0L, + 0xe1dc108356aec190L,0x29b4ad9527581115L,0xa8021a6026a12461L, + 0xfbbccee845144aa0L,0x2c93ced8ded40b1cL,0x00000000000001e3L }, + { 0x2c841a6ef3fb0d14L,0xe8b8b8ecb1f67b7cL,0xbd4c9a2219e26083L, + 0xece8dc33c1c6a093L,0xfb47210f948aeae7L,0x98a8b0211569c5fcL, + 0xc92cb9c7399c9d0eL,0xdaa97144b0f6f23aL,0x0000000000000145L } }, + /* 0 << 448 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 448 */ + { { 0x6d7eedc48f69079eL,0xf02e8e404f03b92fL,0xdf751116cb76b181L, + 0xc255eb2c234c1925L,0xaf997d5ea94af9b2L,0x797f3df0d1374a41L, + 0x357fd20b23986cdaL,0xf1caccda1540adb8L,0x000000000000007dL }, + { 0x2a04363cc01605a9L,0x0d7cc0d87d7d1e54L,0x0515eb70a0f9fd2bL, + 0x725d12a9bb4ec5e8L,0xd1b5b8a33a6eb09aL,0xb6c836b187027d7bL, + 0xf4773322aaf9cd03L,0x2fb3354e2500c647L,0x0000000000000081L } }, + /* 2 << 448 */ + { { 0x83da529b41750b74L,0x9e431a5c76e28bf5L,0x225e2eb97fde2491L, + 0x951e2cede7c469bcL,0x35f384caf0e10c32L,0xb8ed4a312b1aadbcL, + 0xd39393ba7b797492L,0xc82685f847597f4bL,0x00000000000000c9L }, + { 0xb04e1261436e2563L,0xdc3e83052de3e4e7L,0xa32102bdee3dbbf4L, + 0xdfb5378945f15efbL,0x34904b0b6246965eL,0xcd645c46aac06ca8L, + 0xd4e3f322358398f3L,0xaebfe7c7a54f3311L,0x00000000000001f3L } }, + /* 3 << 448 */ + { { 0xff2fa49cf2afae47L,0xa2c86068f9f2b257L,0x73dd56a446cb76eeL, + 0xbffb9f017dc0a64cL,0x0ee2b60d44b4a4eeL,0x49939f0837c5959fL, + 0xb302fcd178603de3L,0x03c2a2204017c122L,0x00000000000000a7L }, + { 0x48acda9757413107L,0xbd8f584d4c01caf8L,0x38f448a71ac29755L, + 0xcc3884836d29bf3eL,0x15c4baf974401fedL,0x7826e5cd9023bc05L, + 0xc7a70d094f459413L,0x1de803c631896429L,0x00000000000000b8L } }, + /* 4 << 448 */ + { { 0xabb0d91dcef34144L,0xbc5903fa42b2a8bbL,0xe279ef5638e4c5faL, + 0xb3416e9e6d5b04ebL,0x447819187bb923ceL,0x58fefa212914a3a4L, + 0xb0456b2ee2c19f36L,0x176361bbe92c1e50L,0x00000000000000b3L }, + { 0xbc56a1af824fde34L,0x33bfbfb3098a50fcL,0x6e720f55ab9cfa73L, + 0xf1e104b908e4dfa2L,0xb96b8abd77865b2fL,0x69eb257aa36740d1L, + 0xd73b946d26a50f43L,0x588e4a1470cd91e7L,0x000000000000007aL } }, + /* 5 << 448 */ + { { 0xd1da41a113dc8b45L,0xf4886d248a639b8dL,0xf3431ccedef9c5c3L, + 0xae07317d8c204a2fL,0xcbbd49a1f3d6383aL,0x43d3593229bbe61eL, + 0x6bfa06099643a1a8L,0x391499b22005a71fL,0x0000000000000188L }, + { 0x40774a7e79d0771bL,0x31987151595038a1L,0xff6df557c48b5063L, + 0x26c03269c3751769L,0x660c36ced2d0ce99L,0x71378e21d3d79ea0L, + 0x7b2ec7ea30be88eaL,0x1b342d41eaed0f73L,0x000000000000016dL } }, + /* 6 << 448 */ + { { 0x0f1b2a0ee2c08642L,0xc110227519fbfff6L,0x3e49142104574825L, + 0x058232d599552fdfL,0xa83f1a2f0e083797L,0x7c387d90860acbcdL, + 0x7d8440d7d59d4ffeL,0x45111fc778f12d73L,0x00000000000000a5L }, + { 0x77acf98e28e9b450L,0x0823b735a8129a6eL,0x1abaa0f98d1f0ef3L, + 0xc2fb1108dec36172L,0x4d486fd4855585e0L,0x27d002549d440b52L, + 0xc2b556135b964bacL,0x251ad413a29f6fdeL,0x0000000000000142L } }, + /* 7 << 448 */ + { { 0x47de9d47e138d37eL,0x7387f3693e7e1c87L,0x9c00813e0a48af71L, + 0x9df47c63e8829fcdL,0x7dc7c04819e24a68L,0x3d1823e16942e130L, + 0x2f4440ffa2cd6199L,0xebf60be098168aa8L,0x0000000000000049L }, + { 0x989690073b5f346cL,0x00a0ecf9b79ae683L,0x1b6f0ddbf8fb676fL, + 0xd536d3717998c836L,0xe1dbda549c4bc7cdL,0x2b752ca371e12239L, + 0xd633d0eec296c03dL,0x548b37c59f6a0015L,0x000000000000003fL } }, + /* 8 << 448 */ + { { 0x6ed1bbb76ed0892bL,0x4921306bb788bef7L,0x7200d473bf3cdbdbL, + 0xf1cb448feefdce6cL,0x199b07a2836591b0L,0x804ff00948fc6659L, + 0x697600593b8f405aL,0x14755be59ef63732L,0x0000000000000103L }, + { 0xd0a6d1b323383b1aL,0x50c7e33bae42ca1eL,0x564b46552563bd12L, + 0x48640d21fe7baf1eL,0x6a0b495ef7717825L,0x9e4ad9aec7df9fc0L, + 0x4ceea6a6e038e19eL,0xea0e3bf8010887deL,0x00000000000001ddL } }, + /* 9 << 448 */ + { { 0x4da845a0a4851434L,0x75176aa6ec7c655cL,0xcb64f9e9f093c7daL, + 0xf80504e644bf8c6eL,0x13958864001ac4a3L,0x067e1167de28fbb9L, + 0x0ee319d6708782ddL,0x90e661f3f0a8a799L,0x00000000000001c5L }, + { 0xa161caa15db20757L,0x2adaf4e05350825aL,0x5e2bd35abcc582c0L, + 0x9989c8fe71a3bc28L,0x454827e228898a24L,0xa7108d022fd729feL, + 0xeb2b76ad8c310f5cL,0xeafd847e634c3d7eL,0x00000000000001aeL } }, + /* 10 << 448 */ + { { 0x46e47ecff0136b5cL,0xf17ab7f6344965a0L,0xf3faf3b354e24a42L, + 0xee7c7b5b6e7035e7L,0x78529226908afe39L,0x440aefb22e64eef4L, + 0x08ac260a1849e45bL,0x7203a8f76fc7bc4fL,0x00000000000000a9L }, + { 0xf36d34eb171f3c98L,0x4a2e2716e65bea70L,0xa7387fbbf8f31760L, + 0x9e33161eb9cc24f3L,0x808d8d2957715f35L,0x5bca0410150ba9fbL, + 0xe8823a72d019f4a9L,0x5e14abe756165147L,0x00000000000001d7L } }, + /* 11 << 448 */ + { { 0xd7f60f17d77089e3L,0xb68b3c61a10cb23fL,0x72a83575094e8bd2L, + 0xd9a28d29915a575dL,0xb30e811f211c2f4bL,0x3a62fcf658119ebfL, + 0xb692e3bb679eaa2aL,0x2375e13c3fe29c8aL,0x0000000000000069L }, + { 0xd613b9ea2b8b53c8L,0x6ad8d6eeaefe8d7cL,0x25163969b1d6fb5bL, + 0x6b37519e76a6f0d4L,0x8fc9208359eaa8eaL,0xd2a15a1d560539cfL, + 0x35d7a7b70f7e555cL,0x590426c042a30fbbL,0x000000000000015aL } }, + /* 12 << 448 */ + { { 0xdc6fd0b27ff5fdd9L,0x663c5481fab729baL,0xf70938531fde9e82L, + 0x293d27fa71f684c1L,0xc05cb3d07fcf66fcL,0x709d86ba11cd0445L, + 0x1cd6d595497fa0b1L,0x5eabd8a0d47408ffL,0x00000000000000c6L }, + { 0x001ea477b6b64713L,0x148f2b5049fa00dcL,0x00106f97efce2ec6L, + 0xaca11586e205cc9cL,0x60d70881a6d874acL,0x37dd5c14d29f29c1L, + 0x586ac6c8e11191b1L,0x96470f229677b2d0L,0x00000000000001eaL } }, + /* 13 << 448 */ + { { 0x179058303a7f3608L,0x172de35e26f0fc61L,0x323d2d7334d7c1a1L, + 0x464cd498aec45adfL,0xc4f38d5062ae054fL,0x3f016202a66711e2L, + 0xd105f907e8d33e40L,0x3a102b6a75225100L,0x00000000000000fdL }, + { 0xc754a811dd02719aL,0xe4a5ea1f4bcc6cefL,0x3d64cba45fcc954dL, + 0xefe41b2c12cf1ec0L,0xa8e5b613969c79ceL,0x42e6fb9b288a3a37L, + 0x1c9e65a6aac4dca8L,0x160703b36bd7bc54L,0x000000000000013fL } }, + /* 14 << 448 */ + { { 0x4888c71de2f66bcaL,0x548a0a8f8ade3531L,0x0162233c6638686fL, + 0x4d2c6af83c30e52cL,0x62124a0fc19a0df8L,0xfce8188dcc7c56f3L, + 0x6da68920b8b77752L,0x370dc630b622bf61L,0x0000000000000044L }, + { 0xe3072dc65b51fb0dL,0xa06bd9c232cc673bL,0xe374dd3e991c7eb1L, + 0x1052bbee887d8d76L,0x9b43d6dd897746d0L,0xfb7797ed6cc50b4fL, + 0x83d30a082bf11803L,0x24d36df97d78f93fL,0x00000000000000efL } }, + /* 15 << 448 */ + { { 0x0158728662e9c602L,0xe3f31aa9c7803839L,0xecd16d810cea9177L, + 0xe203943e82b139b5L,0xcdae675da59ea3aeL,0xacafc1fd79b0b7afL, + 0x1f3ff6b96d0e87efL,0x3c52993bd84b4b55L,0x0000000000000172L }, + { 0xc8fcf451415e906cL,0xf25cc44fcc223afcL,0xa0fb3f66f80c4a7dL, + 0x6c936445ebeec08eL,0x35215495a50739c8L,0x9c197b15d8322e65L, + 0xe263f119ecc81fb0L,0xeeea2e6f37bd4d5aL,0x0000000000000148L } }, + /* 16 << 448 */ + { { 0xd7c0efa36ea09a93L,0x95f69e09fb01c2b3L,0x021b1721d43150e2L, + 0x39ff07e05879bc51L,0x0dd73c326cf50ce3L,0x9d69d32a5758870fL, + 0x5df4cc8c1c0e91d9L,0x0033508815df54abL,0x0000000000000182L }, + { 0xbb656a89f2dc12f5L,0x6c4a32bb5c46fcf4L,0xf324462c70e7a152L, + 0x4b1d509a1f7eaad2L,0x3da87fa776e51583L,0x9a435e2019fc86ccL, + 0xb97d3f449eab8de3L,0xc56c7c988f92304bL,0x00000000000000a8L } }, + /* 17 << 448 */ + { { 0x46beca92c9464906L,0x620775e18f2f81e0L,0x767f4266c5771682L, + 0x6c6dde810bbc65ecL,0xfd8fabfbc405cc39L,0x5d9b8d18643e0d72L, + 0x30303e19109ea8dfL,0xefea4ab30929cd4bL,0x00000000000000a6L }, + { 0x993ca142b25badb6L,0xb647dc6d2880eb4eL,0x2d6e2faa551f13fcL, + 0x444f2ea3ba3e00d7L,0x3c89c8af62c6bf24L,0x1e2b1b9993ff5675L, + 0x9f2809305dbdf81cL,0xa0c0b2c11618ab5eL,0x0000000000000076L } }, + /* 18 << 448 */ + { { 0x8814ee08f2ff8ee7L,0xed9db8fd0c8aca2bL,0x36967aed6b8d81d3L, + 0x1a937d3f5cf2ff0aL,0xbedf97c9461382b2L,0x29bf2b30aa5a5388L, + 0xa5b6918fa8796dfbL,0x76662a2b2df5c39bL,0x0000000000000050L }, + { 0xb8157fbbb44fc79fL,0x17c3ad0b40212b26L,0x06bfa618e49dee6eL, + 0x81a338f1422b2017L,0x200f6dcd203fb523L,0xa904931782a544bcL, + 0x9d12fa1489795453L,0x92997d54910f9258L,0x00000000000000d4L } }, + /* 19 << 448 */ + { { 0x958450c54ec281fcL,0x5cf3bd6193c614acL,0x99919178a2759a65L, + 0x256015282dc88c30L,0xf3a2766a8f15f90eL,0x12362084c8a6e56cL, + 0x4359fa34ee82faf9L,0x492b73c91c03100fL,0x0000000000000000L }, + { 0x1467c71469f444baL,0xe10379562d904cd4L,0xabc81c9a648ab04eL, + 0x4c0ad4f7910123e9L,0x7bfb6a0943b8ac8eL,0xb51f3de73934362dL, + 0x70d304aba030cab7L,0x577cebb04e8b0148L,0x000000000000009dL } }, + /* 20 << 448 */ + { { 0xe16f41aa44be78bdL,0xb5436f21e014c4aeL,0x9711718156ea17d7L, + 0x77eb089b37c05237L,0xde53fd3bf974ea0dL,0x27c4c2e9c84daec0L, + 0xa7c70452639b53a2L,0xab34cef9122cac08L,0x00000000000001cdL }, + { 0x45942b2b520fed74L,0xe339e2ec18185e32L,0xef29fc70799a5848L, + 0x81cd214092bc0623L,0x56f9ba7d7502fd5eL,0x8c0c55507d06b23fL, + 0x4835f4e6f6780730L,0xe0c0adc9f3b8b6a4L,0x0000000000000078L } }, + /* 21 << 448 */ + { { 0x93c6329431a43160L,0x19f6aa9bf70fdafaL,0x74922f2451c93c44L, + 0x2771a1d8a482a3bcL,0xdab23427ffc50d8fL,0xb06ca094a3138e86L, + 0xdef003634bc843a1L,0xd6bf9e1228d99dc8L,0x000000000000014cL }, + { 0xc618a4c3d49e9c02L,0x202558298a891299L,0x8acb42a297e53ed0L, + 0x012a299092167efcL,0xfde4d81cc425cb45L,0xa7a4f2b788705295L, + 0x958260fb365f6d39L,0x24d12f9c7281e7d2L,0x0000000000000039L } }, + /* 22 << 448 */ + { { 0x51fbbf5fdb40af14L,0x0e795ad376869107L,0x353827bd99d8accbL, + 0x41b9b0f3f0a03c54L,0xdf2e0c6ea3a0f1bcL,0x80c2b91e83d84847L, + 0x28240a87333924fcL,0x0cc6dc8139166ac9L,0x000000000000019bL }, + { 0x150e42b72c3c6ba1L,0x165bafd96b10b2d6L,0x990f141114a18123L, + 0xe89b21600746c897L,0xfbc4fc51f7efda9fL,0xf2e2285526fc48e5L, + 0x95ea0ba212346b4bL,0xfc5b414977e223ceL,0x0000000000000186L } }, + /* 23 << 448 */ + { { 0x0be0a5de07a16d69L,0x9ef32d03e11ca186L,0xccda2049a9ffe33eL, + 0x3470ac5a4b670afeL,0x8770a1ef74a0d67bL,0xe7011ca9df62093eL, + 0xe08c557ac48bddc9L,0xd458493937876750L,0x00000000000001f9L }, + { 0x8a93b565a84fca82L,0x4db14cc9c6be1ba6L,0x06cf1b331830b8d7L, + 0x048707b3eb991fd7L,0x005ef51378b92e81L,0x444b5dbbbf444785L, + 0x158fec89923f5885L,0x2a681a4a1900dfb9L,0x00000000000000abL } }, + /* 24 << 448 */ + { { 0x22a96092b0fef65aL,0x75400e255b4b6264L,0x2d6bf21fcf25cbb8L, + 0x2fd07e1c9289dbf3L,0xe56e840c6814e503L,0x15063366005f14d4L, + 0xf156222e66516b79L,0x56d9d1fbf5167303L,0x000000000000002bL }, + { 0x9c095b7cbe93ea4eL,0xbc26271f7eff54ceL,0xfdca10ae48cb19cdL, + 0x7c582ed41967cc71L,0xd5de0048870d2bd6L,0x92973e875914e7cfL, + 0x56e0b7a321edd49bL,0x1bea592023d39a84L,0x00000000000001d3L } }, + /* 25 << 448 */ + { { 0x830fb6b0b22e2bddL,0xeecc9175ec46dddeL,0xbc5653dc92ba4ea2L, + 0xda57d028df0f6920L,0x0b6373b613d599b4L,0xe3408fbe5a64be7aL, + 0x835d777666ee7d46L,0x2410f0dcd60a8af2L,0x0000000000000116L }, + { 0x74a4658a92a29714L,0xf8a8f1fe844af49bL,0x15d77ccca2b02157L, + 0xa33511645b5c9343L,0x2bd68a266e54c8c5L,0x88d773dd041c6279L, + 0x4aee4590d6a3c00fL,0xa1697b605166b43bL,0x00000000000000fdL } }, + /* 26 << 448 */ + { { 0x08757c74ec97eeeaL,0x27e8eb74fd3c52b8L,0x2809524d010a4f7eL, + 0xc9e0dd4a1377b31aL,0x9cb226d1e082c564L,0x901073ed5af132e3L, + 0x8ce944c8188dfe5fL,0x347892858c5260a5L,0x00000000000000c1L }, + { 0xe363f14e1cc9ba1aL,0xd4d8c73f194037afL,0xc9b307c79c2f34d7L, + 0xf02b5ddb947966b6L,0xbaa7d2e0b7ff7f37L,0x78c228d0f6991d25L, + 0x9bd96a4c0aa5bae4L,0x93155e7c16f862c3L,0x000000000000019dL } }, + /* 27 << 448 */ + { { 0x3a730ac01ccc9a22L,0xe876db8a07b71c34L,0x5a4aa392fe69aae3L, + 0x502aa22d9db171d2L,0x7c5fb005ad0f33ebL,0x5a2169a88c3bec21L, + 0x76efaee9781c7629L,0xfb933efd72fb60dfL,0x00000000000000b8L }, + { 0x1d4fca2eb1e91e17L,0x7187a86f864fd2a9L,0x896e3acbd37c6e14L, + 0xb10aef2d42cb0645L,0x5c0c4b235e72c54aL,0xec0ae29ed1e2f299L, + 0xb72c7f4c77e91c26L,0x41c241c4f37a54f7L,0x00000000000000a8L } }, + /* 28 << 448 */ + { { 0xca49e4056736b7a8L,0xbae02b57f438c9a8L,0x871ecf1d8dd5bd49L, + 0x46b05ef1974903f5L,0x549128c348aa1a22L,0xe8f8d085ef5b4439L, + 0x62fbea2fefee9906L,0xecd06f55c49261ffL,0x00000000000000f8L }, + { 0x603a56fe1def19d6L,0x48d65f1d3e4cf99eL,0x6b92c5694559a51cL, + 0xd133cca108c45240L,0x9656a7c46c134b2fL,0x7e407c2907d6c7f7L, + 0xc346409adbb7cdf8L,0x4ebc0b91eec47250L,0x00000000000000aeL } }, + /* 29 << 448 */ + { { 0x04b789f81da8a341L,0x81827ac482bac9afL,0x85a2510cc0a342a4L, + 0xe3d17837200d2557L,0xf951b01ff55cceb7L,0xa00d66f41472e87aL, + 0xfe101e3e0a0b1ff6L,0xfc4b05f60f9e8587L,0x0000000000000173L }, + { 0xd64fdd33fbea0ce4L,0x8aa0680aedb5fcdfL,0x78876c00e08cda89L, + 0x9c3424f9034f2cbcL,0xea7170d1a86845c8L,0xd36a89c084650b43L, + 0xfa5ef9e6ab2a83ccL,0x0458b4b756c03914L,0x000000000000000cL } }, + /* 30 << 448 */ + { { 0xef816b5cbca24a14L,0xe56ef49d80999021L,0x5724b7fc5984a0c4L, + 0xdf9250fc23c4f692L,0x05b5d16a4a329eceL,0x389dd3268f017928L, + 0x49cce29d7376ef1fL,0xd005fcf9adbb3387L,0x00000000000000ddL }, + { 0xf7d7a2da1919ee1bL,0x99312ba17108fc88L,0x6e0fd013819b2ab5L, + 0xac6dde9ca0415b09L,0x76aa7ad61116cb56L,0xa4164cb64d7e2873L, + 0x25a4982b00d5204bL,0x3d528fb42ca1bc08L,0x000000000000011eL } }, + /* 31 << 448 */ + { { 0x7eaa6f4f202177aaL,0x407c3797edda27a9L,0xd1cc20254d51d733L, + 0xc7950454c1c04a95L,0xb657108ffbb858fdL,0x6068f5a94b7af32dL, + 0xbe741bb72701b289L,0xe786ab377dca6c64L,0x00000000000001f8L }, + { 0x9f04d1950f884ecaL,0x036589956f965937L,0x62e3afa42c33ebc6L, + 0xa2b8ba656f0e5fbbL,0x798f812abbc0c7c6L,0x7cc29ea1537a5d81L, + 0xccd03bf8f1509aafL,0x3cf256c590799937L,0x000000000000013fL } }, + /* 32 << 448 */ + { { 0x30b495ff9b09ac83L,0x7767e8a105d9c9ebL,0xb15ecb0a1690a79eL, + 0xa7b87cce1b301763L,0xc6849deaf28b14d2L,0x16d0b1585e5c94f2L, + 0xbfda45241aee3ca8L,0xe52961635d737c3eL,0x00000000000001f1L }, + { 0x339f76cc589b8ae6L,0xa133954ae02ea428L,0xf11e77f7980481acL, + 0x890c3c2c87596275L,0xe0cfdace80b79bf7L,0x0031db26a3e3906bL, + 0xedf2e3dda1c31197L,0x888f12192bc3f1a4L,0x000000000000006fL } }, + /* 33 << 448 */ + { { 0xfa11efb73c598a06L,0x1a9de85eaf29fc12L,0x9e436a4a3a85a95eL, + 0x169c615c9e3c41d4L,0x2b29db6bf79e4c98L,0x22a26e1e1a467ad5L, + 0x14e86abf782ac769L,0xfeed751a0e260ce6L,0x00000000000001e9L }, + { 0xd4355ffdfe99690eL,0x186ec88dcf7d461dL,0x8422a8fed2d736f2L, + 0x18f55e53ad868624L,0x1020ec9fbc659311L,0x86ddcc05affee0adL, + 0x06931cb237608917L,0x850013ff1ef526c7L,0x000000000000002cL } }, + /* 34 << 448 */ + { { 0x125c5fab85fdc7f1L,0x78c6271112df7083L,0x2fd974779a17dfc1L, + 0xafce9bff8de8fb96L,0x7a45cc8723fd724fL,0xfd1522f2de2fe1f5L, + 0xbc1eceafbe939d20L,0xe8f9b64c5d35cbe2L,0x00000000000001a1L }, + { 0xcebe167b4e9edaf5L,0xc40095abd68c6c3eL,0x5cbd305e44500fa1L, + 0x52d56df841fc36ecL,0xc6dcaf21a267407eL,0xbef96b050a0d5c0eL, + 0xd6cca309cf13700cL,0xe1efd83734b2d474L,0x00000000000000eaL } }, + /* 35 << 448 */ + { { 0x47d2bcbb3a59d179L,0x84c060960c814989L,0x25021261b2543f3cL, + 0x5a894a77824a6674L,0xa80055cff8a90d72L,0xb67da52c3bbd3840L, + 0x41b6cf441d172937L,0xaecb4a0fd0c7f2f9L,0x0000000000000184L }, + { 0x4112abfdadec6f45L,0xec2292fd5af997b9L,0xec81483ea66da6b0L, + 0x2d788ca6bf6e9369L,0xee6d66d64c9df525L,0x652248dfc233275eL, + 0x26ab55ff60d792dbL,0xed6c78699d30d510L,0x00000000000000e3L } }, + /* 36 << 448 */ + { { 0x2827f5a2cfacc519L,0xfa201f6328fe5462L,0xd12ba386e76d1ed1L, + 0x4eecc04b3f1857b2L,0xab2d2292bac8e16cL,0x94ff8f3ddc3f22dbL, + 0x63248a3603b56f1cL,0x391de5d28c4539e2L,0x0000000000000100L }, + { 0x58dda167baea3e6eL,0xc9c93a6d88e1646aL,0x16c540b124b20d76L, + 0x9cfbc92113a036b6L,0x2c3b07ec6322b9ccL,0x88c8cdf43f0fcf9aL, + 0xfd62cb99da598c89L,0xd6a4bbf8fa164bd8L,0x0000000000000146L } }, + /* 37 << 448 */ + { { 0xc393c42ce88d83acL,0x5c8b69266733c944L,0xda086abbca3012b9L, + 0xff283a9b5acae577L,0x61bd875c3059cf58L,0x994aa16617621d60L, + 0x519f8cd8be49f68bL,0x3a0a9a9e98e2cb2dL,0x0000000000000154L }, + { 0xf9b9bd45dc4fa221L,0x0ace7ab384d38b83L,0x88d9fc2be55c1c3cL, + 0x84905b18c6565620L,0x55c76c803bf25fe7L,0x96c9375fcd06b977L, + 0xf890bfe99ce18390L,0xa6d94dbb19361d60L,0x0000000000000001L } }, + /* 38 << 448 */ + { { 0x708f272f49cf39c0L,0x10a48d4cc2ee3fa0L,0xab1e16f901c642baL, + 0xb5eedde41983ee46L,0xe3c55cd6899e461dL,0x23000eba18d744d0L, + 0x2276e90d9eca5818L,0x28ba08fea79959bdL,0x0000000000000174L }, + { 0x19c844006e8985cfL,0x8c47325f19c95cd8L,0x8ce8f9be8cc20f75L, + 0x5c1f05f4ccbe5f74L,0xc7611000293f57a9L,0xdf4273f284dad825L, + 0xd90562d63ba6f844L,0x70362280592e0687L,0x00000000000000d3L } }, + /* 39 << 448 */ + { { 0xff33e72ebc72e5fbL,0xbf62bd87e7d6882aL,0xda5147fdb89aef99L, + 0xb6f32dbd17a2f509L,0x9bd0eddfb286a468L,0xdd4d1b28b31307b0L, + 0x4866ded9c76ded47L,0x22513a3a1ab2e1a8L,0x00000000000000ebL }, + { 0x89e2f77acc1225a9L,0x6557eb9ce8a4d6a7L,0xd06696e55d2b30d3L, + 0xffdbd1fa38241fb1L,0xece18f786da9b3a1L,0xc7be81d7f3778b45L, + 0x551942079c94ba90L,0x4a4151d863583b90L,0x0000000000000165L } }, + /* 40 << 448 */ + { { 0x482c4b824cb11206L,0x2201973cda6ac67fL,0x90df76215995d743L, + 0xee362af67e7b155dL,0x7f3893b65997d300L,0x9cb6280f48c1f500L, + 0x334dcc96c04921e6L,0xd657696028bcbad5L,0x0000000000000182L }, + { 0x9dd406cb615b1c61L,0x7de33627d9cd2e7bL,0x7bb1260c8775b9c3L, + 0x8660e68e17b15699L,0x679fda73ed79d75fL,0x10d1c3e755a10c2fL, + 0xc4a3f01c652e4c33L,0x34c91d58b7c42d3eL,0x000000000000002bL } }, + /* 41 << 448 */ + { { 0x9747a5bd85fa2a0dL,0xe15ad21ed11886aeL,0x2eef7ba93a2651f0L, + 0x9e77aa5962686644L,0x9e7b79d4fb694bafL,0x3a430ea7a6b0849aL, + 0xfb6f511c8c34f2b0L,0xd5c58ed10140425dL,0x0000000000000166L }, + { 0x98c16d41e9539297L,0x287e5e5f403d122aL,0x52ded89abda5f4e9L, + 0xa541ea800d429eb4L,0x763796a0dd2eb689L,0x5e35cb2d2001730fL, + 0xfe0f4cade305badfL,0xd110a0cf2ee75cc5L,0x0000000000000029L } }, + /* 42 << 448 */ + { { 0x516e69a0c5a71125L,0x998a22f25c95ad7aL,0x8a14544f648f80ccL, + 0xa59f36584ed65117L,0x4b5a92a85dd7e675L,0xd262307ac9fa87afL, + 0x936957f30fdc3362L,0x994f83054b783d0bL,0x000000000000006bL }, + { 0x56d8d538ab411cbbL,0x6c3be61485d78e8dL,0x4234020c833fc8e1L, + 0xe4fdbf97b41ed949L,0x8c36618c0f3a1f2fL,0xe7dae0ee58f49465L, + 0xe5b2b939b6a95411L,0x822fe9e126a6e121L,0x0000000000000094L } }, + /* 43 << 448 */ + { { 0x9cebe2a9d059f7d5L,0xb4c8f6deabc86eafL,0x7dacabf6c6e5ffdcL, + 0xd8b22f229922c453L,0x676154f2174ea57dL,0x4d7ce6fdf9819e39L, + 0xb19bc1bbd47ebfc5L,0x24383cd04709da01L,0x00000000000000abL }, + { 0x8d5e8f8a3cb2e7eaL,0x2899be2733d8d462L,0x5e6e982153eb2879L, + 0x04637531a17178c2L,0x8ce508f1f4495e86L,0xbbcc3223935c7f1eL, + 0x72c33d4fd2e53b1cL,0x3f02c919bab47cdbL,0x0000000000000056L } }, + /* 44 << 448 */ + { { 0xf25929a379c36058L,0x6d0eaa35a743c8a3L,0x24943137c5850310L, + 0xb78fcdc74d01efb0L,0xdf7a2cbd2a5e9f3cL,0x6d8c2cb9df226a5fL, + 0x32f82caaedfffa33L,0xb520a44381c3f37fL,0x000000000000018dL }, + { 0x477f91b7e71efa02L,0x9bdb34a93e3ca801L,0x29bdb247fbaa8273L, + 0x1094b513dd94b717L,0x97c93aa247d54bfaL,0x8899075c84807d06L, + 0x6cb0824b57f6ae04L,0x5d1def90e6c92379L,0x00000000000001a0L } }, + /* 45 << 448 */ + { { 0xa9abb86dc72edfbfL,0x976e9021ac090215L,0x045277f4e22e6e61L, + 0xc7659062b8e5caa4L,0xfb951c4a8313d948L,0x5cf7b980aab03d43L, + 0x638ad72853011671L,0x8982856158bf9edcL,0x00000000000000b6L }, + { 0xa9f8e9d5670893a0L,0x950c5bd4695bfd4dL,0x9385ed198b1c6915L, + 0xe65fea3ce8a000c3L,0xf115e65f45299c03L,0x5ea19e3260785ed1L, + 0x68335fb1829d2dc5L,0x486ac0a949a1c2d0L,0x00000000000001baL } }, + /* 46 << 448 */ + { { 0xd1b72ef8e18d7e88L,0xe97656fb4c48d545L,0x72f28d05eb70cd01L, + 0xace95a386825c358L,0xfa20240fed25cc72L,0x4bcd0bba8fc571c9L, + 0xa8a77a6940165a4fL,0xaa0d24d5f1a5e08aL,0x0000000000000046L }, + { 0xb2704a1a257cb8cbL,0x0606229140b7acd7L,0x3fced7fc0418bc8fL, + 0x1d063bf612b0ffd4L,0x7f35d6845b8c2990L,0x11d5f9a3e8035f81L, + 0xa8eb0824708300e7L,0x8bbae009cdff1bddL,0x00000000000001d8L } }, + /* 47 << 448 */ + { { 0xc98465cf1e099827L,0x231b39ff67a1f512L,0xb71a173966598bdaL, + 0xaec36ef7b70b4422L,0x6115ba31fc2de257L,0x2caff11ae2fe1837L, + 0xd1e8fe55a0aab548L,0xdbc7863b19ef5f6eL,0x0000000000000011L }, + { 0xde6a08d39bd49a13L,0xa9b22ee15b3da1eaL,0x69f16ae2b6f6a425L, + 0x81647bfef93b2684L,0xb20f7c385a5214b6L,0x9f3e7078d7dee661L, + 0xd03efad577719f99L,0x58a09d6e42df766dL,0x0000000000000020L } }, + /* 48 << 448 */ + { { 0xba0a5d23dc831ac0L,0xcf0fe6fcbd41cfa5L,0x11d490d3adee7925L, + 0xa8c359fc491d988aL,0xb97a48a53ff98345L,0x6c8ac5f21b8bb845L, + 0xa9c9f2a85f157746L,0x79125887cf6ad9a4L,0x00000000000001aaL }, + { 0x3dc5db89e8469532L,0xa9610220639df36bL,0x5806f730828bababL, + 0x5ad5e58f60ef0fbeL,0xbcdfe863aad0a654L,0x818cdc6134f936daL, + 0x3b6839b4d47dd376L,0x80db0098be99035fL,0x000000000000002aL } }, + /* 49 << 448 */ + { { 0x57e66d8002ac31a1L,0x7cfa3c5249aa4fa7L,0xfa49bed0cbe9b21eL, + 0x52cb15789db98c26L,0x1d47d195301a3fe4L,0xdeb6e083d69b7093L, + 0x1f5ff8447d4af7a7L,0x1d5de0701e12a92dL,0x0000000000000035L }, + { 0xd78224473e451fa1L,0x604c458c535301ecL,0x25c9c867dd8c2390L, + 0x1810f8d0aa93f0a7L,0xc2210ddb316fefb5L,0x25a94d5b68af35f8L, + 0x53300a3e4a46f220L,0x53cd0eff6567eb3cL,0x0000000000000107L } }, + /* 50 << 448 */ + { { 0x463a0c31858ac43fL,0xf4549a21501dc385L,0x349e94abe9aa4f09L, + 0x3e0650cb6df0e53eL,0x266d445b8a31d2d6L,0xe9bcccfe8740fae4L, + 0x2497641bf4678f4cL,0x1399e0a702904c35L,0x00000000000001f4L }, + { 0x2b3fa40fe14ed6faL,0x6cfd3a80a2d6adfaL,0x69b4e928f0fd74f4L, + 0xc1392f2469c56022L,0xfa22fb98a24b1facL,0x5f69d1a2a25757a0L, + 0xabd01cd5c5482128L,0x8c45115297d5a39eL,0x00000000000000adL } }, + /* 51 << 448 */ + { { 0xb0dcdad079443d3dL,0x2d9a42fe3a52d58fL,0x7955958903b2d65dL, + 0xf1b0c12f8a0b9cd0L,0xbd734262222bdb0fL,0xb035bc18d2f0707cL, + 0xaa3516119c456cf1L,0x46f91b9228a7d3fcL,0x000000000000018cL }, + { 0x32b8a62934c1eda1L,0x0ecc02bd130644a8L,0x23d8ce282bfee465L, + 0xbb8a3ed82e8997bbL,0xc46d3480432ff8c3L,0xfc2ebc497233d495L, + 0xe55621e587b0b6f9L,0x928ca258af75145bL,0x0000000000000129L } }, + /* 52 << 448 */ + { { 0xf49c63f00ddef055L,0x48aa7a19d452b85eL,0x2e25071477b7fa65L, + 0x214846773c8ecc33L,0x0bb90b9b9f3eaf10L,0x37b1620414c4c51dL, + 0xc872bb78a1a5a425L,0x9346566a2f80d134L,0x00000000000000c4L }, + { 0x68ac1057a1a2f1bdL,0x12c6c5bcfd33db61L,0x99298470a35fa26dL, + 0x54af5dbf7dfb15c5L,0xca576278357de229L,0x003d3219872e6db0L, + 0xbf3420e5722a6d5eL,0xe3256c9ec29eaf3fL,0x0000000000000024L } }, + /* 53 << 448 */ + { { 0x1bf4838b2a29f47fL,0xfbdc24b3518d9454L,0x75a3de3cf8240b38L, + 0x6da0b6f7893631d1L,0xdfa309cac90586f7L,0xaf89f6d1801d0c3fL, + 0x37544bb8ccddc955L,0x8b20db078bbcf759L,0x000000000000014aL }, + { 0x3ad9424106b6601bL,0xf26564e2d069e9c7L,0x9370b0d5417b0b98L, + 0x572bdd32e88921edL,0x2468cb1b6b12f081L,0xabedc7f96d585c3dL, + 0xdd6625e6956519e4L,0x39d98d7bd3896f1cL,0x0000000000000012L } }, + /* 54 << 448 */ + { { 0xb8bdf12a5e0dafd5L,0xc84b1a43242f9295L,0x7b2ce3ced23a98f3L, + 0xddc3cd517ac86f66L,0x53c1528aa5228de5L,0x8d1ff4e4ba69123fL, + 0x32f4bc01b5ce90d4L,0xad5158cc8cb1409eL,0x00000000000001f6L }, + { 0xb5ffd15aab839bdaL,0xd4652cb2f6c03850L,0x5ed9ea35ba6cdf1aL, + 0x6963ebc45529204aL,0x7f5c0d7f3cbe2e09L,0xf7aa9f58878afde6L, + 0x078e5e24e9c780a6L,0x78da0aee78a975adL,0x00000000000001c5L } }, + /* 55 << 448 */ + { { 0x071cf98df7134d20L,0x707f5869578e1b0aL,0x07a11e13a0c74aedL, + 0x1c32de4466086da0L,0x0ce4c3dc156e6365L,0x3ca495174ca4ea96L, + 0x929409321b049f10L,0xcdc6c5e45549420bL,0x00000000000001d5L }, + { 0x11092c8b111881e5L,0xfd4467c943845ac7L,0x43a474e99b0dccc1L, + 0xb3eb7612ed69f0cbL,0xc5ea3915e920a3faL,0xa80778f17580cc78L, + 0x58bf893d588d51bcL,0x3ea95a47c036844fL,0x000000000000018cL } }, + /* 56 << 448 */ + { { 0x5f8fb091c8a11088L,0xc528c08244a8e84dL,0x2a38b398f419ab48L, + 0x3e6a4d574f9e3fffL,0x84dbeb5e0f91941aL,0xa5d2364e8df0b630L, + 0xbd79a9f578edc885L,0x20cca408771efab1L,0x000000000000000fL }, + { 0x92c00dd612581153L,0x76a00a16d160483aL,0xc5f2f97ef8b8c8cfL, + 0x9c51e9949039fc0aL,0x9ae8239115a1e161L,0x611f60f3f04c80b7L, + 0x7ddee53b5930f39fL,0x139e22293734c045L,0x0000000000000169L } }, + /* 57 << 448 */ + { { 0x492a0d612b8f7df4L,0x9e5938bfdaddc812L,0x270ed13acd3591a1L, + 0x96c535ad5b526ef5L,0x3fe87cb1c08417a5L,0x535abbcbcafeb810L, + 0x682cfa1029bb9f46L,0x5cb429b34e142fe8L,0x00000000000000ffL }, + { 0x3daac71ed0634825L,0xcf6e237fe85711fdL,0x804f34c977556d5bL, + 0xe34f12c3f521b81eL,0x9f4c137f292e32a0L,0x3a8a1ccac9694a76L, + 0x6e2378c78c3b70deL,0xbe0f52a6c07fe2ccL,0x00000000000000d8L } }, + /* 58 << 448 */ + { { 0x118fab8d79a79a9cL,0xbc2c9888bd218b46L,0x4ea3bd8c096a5354L, + 0x75a99a2783850578L,0x2498a867b61ed37dL,0xd99eca87a1b4d8cbL, + 0x78069810e502e8c5L,0xe124a2b0c1ee2bc0L,0x00000000000000f7L }, + { 0x0fb991f57ea8324fL,0x5d03f2fc1b2fa900L,0xd7aecc39a2afaf48L, + 0x6f5834408f8eae4eL,0xcbaf72380821726bL,0x5bac3a5826891763L, + 0xb032fffce3476ef7L,0x5cbb984b60c21791L,0x0000000000000037L } }, + /* 59 << 448 */ + { { 0xa2cdcf6fc7a6656cL,0x3890d3dd2a668d45L,0x234f2e594925e5c5L, + 0xee308962c91f5611L,0xa5c2244dafd2a34cL,0xe1f87b4883b55aa4L, + 0x073efb988e8d2151L,0xf5a2060add8c1fd4L,0x000000000000010fL }, + { 0xb36704d4f67f4c92L,0x5187b6fa3cf744ebL,0xe9e542fd24e4a727L, + 0x80e7047c49cfad5fL,0x289d555413901f7fL,0xb3aa2b678638b4c1L, + 0xf849ebf97ec3d745L,0xbf3ae59d9d645f83L,0x00000000000000ddL } }, + /* 60 << 448 */ + { { 0x62e3e3726899a80aL,0x84d09be49df44b97L,0x6e05ba9f147b7cc7L, + 0xdbc7ebb4786f89e1L,0x59bd342f5fd35148L,0x74ff10abf21cd7bdL, + 0xe9adec9a4963a567L,0xdf49024bbcf19942L,0x000000000000016aL }, + { 0x583ad342e4e67976L,0x384f24864b4036acL,0x2d1de0460fded50aL, + 0x5979c2d3231e9f33L,0x4357f19a6f1f6362L,0x03dc0490c7206d38L, + 0x0a809b04d1f6a3d0L,0xb86a36390dad4ae4L,0x00000000000001f7L } }, + /* 61 << 448 */ + { { 0x4a43f2f8ab33fe25L,0x9b5f5c47cd37c48bL,0x9638182e53cd35b8L, + 0x15080c5a2a01d11eL,0xa2ad1d1681cbdad0L,0x4d368c121388f3d9L, + 0xa52874d32b5fca12L,0x3144ef491c9937e7L,0x00000000000001e4L }, + { 0xaf4c873c1bef2db1L,0x854dbb8b41802396L,0x04ed772d8a107858L, + 0xc302309c9922bb22L,0x8934bf22f903f12fL,0x1895a13453ac5388L, + 0x4e3a136f9795b063L,0x132201f7ed77451cL,0x00000000000000cfL } }, + /* 62 << 448 */ + { { 0x3af5f35a9f1ead38L,0x2251fb919de293edL,0x35e60ce0549a3799L, + 0x2daca779449e2012L,0xfbe464d153de4a11L,0xfc18cb66fec4b981L, + 0xcdcc49d118082f41L,0xe06054d362025456L,0x0000000000000008L }, + { 0x5402f1bd9bd9602bL,0x2f1c926627c3b232L,0x06b4503e2de448d5L, + 0xbbeede93092ef103L,0x266cb7d16ba9f227L,0xaca181901b3802f8L, + 0x358cf8b0902424a3L,0x03288ff4cfc7b845L,0x00000000000001e4L } }, + /* 63 << 448 */ + { { 0xd647fd5243cd6708L,0x3f6caa7ce80ca7a4L,0x446fbd00d53a4555L, + 0x50e707aaf87efc06L,0x6194ffea631474abL,0xe59478e2846b6fd2L, + 0x589039414b8854f7L,0x3b3c679daccf580bL,0x0000000000000034L }, + { 0x3388ccd6d6d55e6dL,0x4f1e9f0c878cd970L,0xc2f128f4c7501b77L, + 0x69d023525f9082d3L,0x350954abbe02b4baL,0x1d5f5bc828f937dbL, + 0x9b01373dc36b85acL,0x0f3810828be8c7d0L,0x000000000000001fL } }, + /* 64 << 448 */ + { { 0x394cdaf5af579e74L,0x892cb4839f0656a6L,0x0ecd401f3e8e1495L, + 0x655f01524caf97c0L,0x9712d15881912568L,0xd16e997a89dbdb52L, + 0xeec17a551a8c4097L,0xbc19e865322bbb31L,0x000000000000003dL }, + { 0x6b181fb67cfb9654L,0x3afdca352d651201L,0x19d26af461453278L, + 0xeba73d693ebeaf7eL,0x39f7171e1832e1e3L,0x84b46c0e31a9f559L, + 0xaea096cdd89db3c3L,0x193c91e76c409761L,0x00000000000001fdL } }, + /* 0 << 455 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 455 */ + { { 0xfb24c5b4c56caae5L,0x3855473b71a1a7f6L,0x225a79370cf6ff1dL, + 0x3cdb0634b5ace9e5L,0x46de798d26b6384fL,0xf1ede134ebc11704L, + 0xe80e4e37554519f9L,0x812e76db6349eebfL,0x0000000000000179L }, + { 0xb0b69f323b354956L,0x27c782ecfc0d2a5dL,0x2bb2ebefa0dff80fL, + 0xae8e58262c887ac2L,0x2e4886170ca3bf38L,0x77b1b7b434c9e517L, + 0x5d42ca18db708de3L,0xc1b0482a4305eac5L,0x0000000000000091L } }, + /* 2 << 455 */ + { { 0x4331cdcdd6a37babL,0x67b8913785c741c8L,0xaeffcbdb4fc72389L, + 0xcc12e59a1c7ae6ddL,0x4a35540bee1e838bL,0x6148fb60ecd3e84dL, + 0x7319e868b2290730L,0x4321baae4de5208bL,0x0000000000000160L }, + { 0x10c813e987717bb9L,0xd9e1f5a8cfb9252aL,0x0983c1f7d7bde47cL, + 0x7446023b5474e52dL,0x1f189de81941b518L,0x1997c69bb425810aL, + 0xe15b5f1e58bca332L,0x715da4c949092985L,0x0000000000000076L } }, + /* 3 << 455 */ + { { 0x384e3f6f58451e49L,0xaeb80f831e31dd0dL,0x4533e3674b9d5a8dL, + 0x531d29f192c91343L,0x3ad61300badb78f7L,0x9430ca90b88c7e34L, + 0xb29faff5d04c4cb7L,0xd1d5172032b962a8L,0x0000000000000037L }, + { 0x15e09965dec37027L,0xd2cb9e7ef367e0baL,0x64c02f1bee17c9d0L, + 0x64a78127eb7a74d4L,0x94bd3d97e2733ab3L,0xefbac0a22548d742L, + 0x1834dd09691ced31L,0x5cd623906f6cbf38L,0x0000000000000193L } }, + /* 4 << 455 */ + { { 0x3e5927537600c9f1L,0x115da68ae4653acaL,0xa83d3cdea21c03b3L, + 0x5821515780477c90L,0x876e0150c43df921L,0x176767d6410801f2L, + 0x95e1b6c9b0b638edL,0xcda8e401979e84c6L,0x000000000000001eL }, + { 0x5500c69c85cb274fL,0x5f2d975bde004d59L,0xcd545d2aa005b4deL, + 0x4d3d02ae4e3b1b94L,0x53e64ccdb8ec522bL,0xacc3386f12efbb9eL, + 0xc8f529c21cbd540fL,0x8428942d2f854a91L,0x0000000000000182L } }, + /* 5 << 455 */ + { { 0x52ac93ab9d106cc0L,0x6ade3bb2a1a00a39L,0xa6de45cbfe378be1L, + 0x77ad40dcd0670c0dL,0xf86878493a2759c5L,0x483cea2b331d66d3L, + 0xbf7da5e30c0a8a8fL,0xb6ae584ae13ed38bL,0x000000000000011dL }, + { 0x80e9d0a253710983L,0xf52a813a2499bdeeL,0x3abc9538b0a63f6fL, + 0x1abbdb5e88454852L,0x42562c99b16c6366L,0xb320389a1fe20061L, + 0x3dc5368a0aafe965L,0x22165cb8786c4c36L,0x00000000000001a0L } }, + /* 6 << 455 */ + { { 0x2cda398733430edbL,0x7f0593f8ee487eb9L,0xe458e275d8765895L, + 0x45c2e09311be3df5L,0x32545b1e51450f54L,0xf35bc1ee49d52cfaL, + 0x59b8b719da91cec2L,0x4426171bab7fe631L,0x0000000000000135L }, + { 0x17c4e11ce00a5618L,0x7a6ff5b1fe8be941L,0x72e91c8c2c90c5d3L, + 0x9f45087980cfdf98L,0xb0d5d0d106ce62fbL,0xd9deb71557fa7188L, + 0xf1d09702ac85d50bL,0x65ab22b5a170e75eL,0x000000000000018cL } }, + /* 7 << 455 */ + { { 0x8dad33f0dc5d94abL,0xf4db966bfac13747L,0x8a77811c06e37e4eL, + 0xf9363ed30c3604e7L,0x90756e243ba7fce8L,0x3696963496b80bedL, + 0xee684c2077d60148L,0xa5c2fb34655054d6L,0x000000000000015bL }, + { 0xc3add12d16926ab2L,0x05ec8eafff3a3fb8L,0x0d04206ee9427d91L, + 0xbeb4f190129e73b0L,0x17261339c4d688a5L,0x24dad00d6ed53db0L, + 0x2528c44d45319ea2L,0x934c89cf849c5440L,0x0000000000000018L } }, + /* 8 << 455 */ + { { 0x92c099d20bec2201L,0x364fa15ab67ff463L,0xfb32ffd0b02f7e0bL, + 0x5b57f9939d4c9f2aL,0xba5f437e4996b151L,0x961bd5fc431462e9L, + 0x1fda019fdcd39411L,0x8ff5288eccdfbe62L,0x00000000000000ddL }, + { 0x43f49a318ef349acL,0xe4f5a626301bae8fL,0x945e6a62d2681847L, + 0x8ef66b51857c09beL,0xdaff64743f15435dL,0x215793cd3db84e5dL, + 0xb2044d4bab1bb6c2L,0x4c7ac000580855ceL,0x00000000000001b8L } }, + /* 9 << 455 */ + { { 0xa136b9c582159046L,0x140057f04c4ead62L,0x8ca6289ebc981d19L, + 0x4d649954c7decb99L,0xe5a5dd22865132baL,0x997c4359c0ac595dL, + 0x0de89fbab741c450L,0x88ff00d621eaec9cL,0x00000000000000c4L }, + { 0x588c3562c9825937L,0x36715b023cee8780L,0x2a36c31fbc044ec4L, + 0x3122c43ddc1830c2L,0x502d3747d0ad0521L,0x0154ba8f1f2a32daL, + 0x81ba266bb3b79bb3L,0x20f1812f498a41f2L,0x00000000000001b3L } }, + /* 10 << 455 */ + { { 0x52bea9dbc2a1b7edL,0x7844cd85e0d786e2L,0x17168d7a4d565a2eL, + 0x1213c72b155affb4L,0x26a4857d606a308eL,0x6f0560d6b55b0018L, + 0x7c8549e874e1b9cdL,0x322ba0629deeec3dL,0x00000000000001cfL }, + { 0x0a7016b8f59fd1d3L,0xa81ce2f8aeebdf96L,0x3ac89a8a0f7e145aL, + 0x3fe8a38d70351907L,0xa636fd48bf10697aL,0x4c0f860d7f93878cL, + 0x546e63f5957d25e3L,0x1fd01e4a1df8262aL,0x00000000000001baL } }, + /* 11 << 455 */ + { { 0x247a1ac087e07d2eL,0xb3ba31de267cec57L,0x70c28376dfdff923L, + 0xa5b0933e98010a4aL,0xe8a5384f36233cf8L,0x629f4df7bb8d7c34L, + 0x08d3ce1f86e72077L,0x42d10ace698660b6L,0x00000000000000bbL }, + { 0xcb6f4d27db329183L,0x702ddbde1d7d2f87L,0x3b33b2b0efbfd731L, + 0xecbd887c8623c86dL,0x33cb70623a026857L,0x37e317913b7ed919L, + 0x51936c18079f2a15L,0x7118bad7bc8668cfL,0x000000000000002cL } }, + /* 12 << 455 */ + { { 0x37e766a6c64f9c9bL,0x6cf5e0b616c4ce5aL,0x31a2b1e27bbe708eL, + 0xf784a55291e24f0fL,0x1cdf807a1fd25709L,0x2669d80270ced72fL, + 0x79e4492dd49751dfL,0x7fe014d87e7f3162L,0x00000000000000b4L }, + { 0xe0a2be33ada43f51L,0xbc88356945f6e16cL,0xb3d2248ec5873251L, + 0xd1118266e3f9b61fL,0xc427e30bf6d1dd12L,0x43f0925b0d8313a1L, + 0x729dfe2a39edf59dL,0x7189f5a4ff4b7293L,0x00000000000000b1L } }, + /* 13 << 455 */ + { { 0x39502a01368c3556L,0xb35130c9e9123869L,0xe3134618586c2e0fL, + 0xdd3442cf24f259d3L,0xf57c35ec40687844L,0xe90532e576f52793L, + 0x55fc2464359633b4L,0xcae06ebd1dbc4636L,0x00000000000000b1L }, + { 0x5489517f5b600d27L,0xc219eb240984d56eL,0x7b016f7ed9384e65L, + 0xfb57337cb879d9a8L,0xb449a322baf2c015L,0x023817267f138634L, + 0x1368bf27c9b321fcL,0x6385cb06247247faL,0x0000000000000004L } }, + /* 14 << 455 */ + { { 0xe4ea02cf8f3110b8L,0xfed60231de50a1e9L,0x62bda5e80a8fe01aL, + 0x882f5313ade31339L,0x30102cb926f12837L,0xbbfcd761c1851e44L, + 0x6e1c5d2b073b2ae5L,0x1542324c357ad561L,0x00000000000000faL }, + { 0xa8eac464c37d1a01L,0x8e5b465daeedd902L,0x7f7b68f5fdaf7257L, + 0xd88a2cf6b4badb28L,0xe0dfde89e4102b12L,0x9ed8dab7ab851c47L, + 0xbb44bd5db4865d1eL,0x0c6d69d57f2f666aL,0x000000000000004bL } }, + /* 15 << 455 */ + { { 0xf1db7612f8af29f4L,0x0da52d90eec6ab10L,0xd16bfb69b851b5f3L, + 0x8c660efa6c961596L,0x3f6f6951bea2c3ebL,0xfdf53cb06ee47dd0L, + 0x305e36d03805cb39L,0xb02da6181c1bebf2L,0x00000000000001b0L }, + { 0xfefb61ddd5edce86L,0x2f7081225cd97a1eL,0x120d97f785787836L, + 0x10ba1b5da5c3f3feL,0xbf2320827474dcf2L,0x94f2cfe21431647cL, + 0xcf6e137c7d9e88bcL,0x77f6796bcb8d5d69L,0x00000000000001d0L } }, + /* 16 << 455 */ + { { 0x9fb0a0d27a2ae93eL,0x89c9dfa314d3224aL,0xe70f39ef1ec1ea92L, + 0x8c7c0024ad2dfd78L,0xc285ed1c3c5b7c09L,0xce6436d11fb27511L, + 0x65f427ade3d07577L,0x22ae30d26827554fL,0x00000000000001dbL }, + { 0x35ab2a33fe772f97L,0xb26d3f9db6b12767L,0x0a55e68e6c9da0fcL, + 0xb481a8f865d2662bL,0xa670cb04ed1fc69aL,0x77e568a7f1c98c0cL, + 0x85c30516952db9e8L,0x8a4ea765f15c664aL,0x000000000000012aL } }, + /* 17 << 455 */ + { { 0x24b51a35aef7687cL,0x77bcd6cd1d943eabL,0xf969c9d01f866f56L, + 0xf7027c8e59f4f482L,0x1261f7e83e164d3cL,0x41ca85ec8188b6c7L, + 0x6958934e4e646b45L,0x26a31442b45b4318L,0x00000000000001a5L }, + { 0xffa95fba76172fe7L,0x8f3bf348c7dfc41fL,0x7b47ee9d9a3249b5L, + 0x91b2331ee3041b11L,0xa8e47621c81d85ffL,0x69c6e66a7d34d69dL, + 0x746f8fa00228f5a7L,0x49b1ab297ad79030L,0x0000000000000117L } }, + /* 18 << 455 */ + { { 0x3346c5da3b8d5865L,0xf9e603778f785ca1L,0xcd7267d26d5dc885L, + 0x96ddf769d69ccfafL,0x2d6dbb25d9594959L,0xa04cb2836b6f3992L, + 0x1a525509f70ba665L,0xd92bb7dc62535572L,0x0000000000000089L }, + { 0x11ced4d3d325263cL,0x7722335f67a97800L,0xc5a85f2dffdb96cbL, + 0x1f96d41972f44be4L,0xc1d49b3ab9261115L,0x0e5855b52419b535L, + 0xd30bc50e4ba83be2L,0xc32fc472bf33f0dcL,0x00000000000001eaL } }, + /* 19 << 455 */ + { { 0xd22d064fd55dd06cL,0x8c131707554e5c06L,0xd0834b1c5ab31d71L, + 0x068e97c08a8ec55dL,0xb354b9fbf2c24393L,0x68526695f43824caL, + 0xeca17949714f7eb9L,0x3bac155a28673581L,0x000000000000016bL }, + { 0xd623a454e1405449L,0x6f6344968257e0a5L,0x48a6f559686924d4L, + 0x183eafd0b335dae6L,0xb0016fca611ff16dL,0x415cb8b78b736901L, + 0x5613c8cb24ce9cd5L,0x923c4567f82c8007L,0x00000000000001b3L } }, + /* 20 << 455 */ + { { 0x5c720be258ab3605L,0x2820c97e11919f9aL,0xa4bc44dbebae5562L, + 0x8d6d08d350fb9167L,0xde7d84ec8006e434L,0xb6e321bd9a42fff2L, + 0x70071b01de4127fdL,0xbb67a667f73c64a0L,0x000000000000013fL }, + { 0xfb25e553f31d98b8L,0x70c787fbe10132d0L,0xf28de5cf258b7508L, + 0x709924fbf8b95916L,0x78ee0fef73060a42L,0x94eba1441ca70596L, + 0x1d9405604113ad0eL,0x6ebaeb9d5eb59b5eL,0x00000000000001deL } }, + /* 21 << 455 */ + { { 0x8f3c34038513ad8eL,0x23d1068ce02335e8L,0x4df15aa26c8c07faL, + 0xc8c79d63208b6e5cL,0x3baba1b5694025f4L,0x7537c5fcfc644cfdL, + 0x67a988643eddd726L,0x5e802190290837ccL,0x00000000000000faL }, + { 0xc972581abbdecc67L,0x1d388e42d42df1edL,0xa36e26d73014b6bfL, + 0x68e7d409c17eb104L,0x1fba2f37c9bd416cL,0xa70c61d86280285eL, + 0x1cca0fba92a82faeL,0x6b7f1886957ab120L,0x0000000000000156L } }, + /* 22 << 455 */ + { { 0xd55a28a7dc10eb96L,0xad0d50f0b772c881L,0xe1574305f391a2ccL, + 0x0406799baf3c73f6L,0xadb82a35b80d865dL,0xc4aa10c63e74ddb2L, + 0x0f9fa673a31c89b4L,0x134b2949ab05eb58L,0x0000000000000101L }, + { 0xd3718bcacd00f753L,0xa01c7f4c5581cdc6L,0x25e7432b8ae54a23L, + 0x9f1f4be4a8864359L,0x2578e281ba8918f6L,0x3df9ca3b8527e2adL, + 0xcf13344198b23373L,0x32ad0ce945d70a62L,0x000000000000010bL } }, + /* 23 << 455 */ + { { 0xebf15c12cd6df125L,0xf8742bede0c3bb09L,0x7400e7b42949e0acL, + 0x25ebe2a5b908bebcL,0x06c181ac9088df95L,0x17250bead541a8aaL, + 0x3b57a812cae85f6eL,0x675f0f006d47478dL,0x00000000000000dbL }, + { 0xff41c2f7a425450dL,0x7e448c6ef6b5814aL,0xc18ac9561125f917L, + 0xc9b09818cf6dc728L,0x0ffb8f8ba74b6314L,0xbd600655ccdd3a35L, + 0x5abf65d02dc8b7d4L,0x3ab6cecdb06470ceL,0x00000000000001e7L } }, + /* 24 << 455 */ + { { 0x4fb8b429f594dbfeL,0xc57ec74f3702a306L,0xa88664eb38a5e224L, + 0x03bd2d507529c812L,0xb4890478fee034deL,0x77840108a3b354aeL, + 0xffb76cdc93f4eb36L,0x0c211ef043c4667aL,0x00000000000000eaL }, + { 0xc3e39ee2336294afL,0xc13f4a278b47f0bdL,0x90c93312fe8d9430L, + 0x23e4f98c8899a8b5L,0x4e623a41007c34a9L,0x22f9e5d9bad91217L, + 0xdace8fffcc6f1916L,0x49cdd45f2be203b1L,0x0000000000000166L } }, + /* 25 << 455 */ + { { 0xd62f4e0bf838c030L,0x5ee8619eff5e4c04L,0x573f6322f2ae5f65L, + 0xf521755a815f196aL,0x52ddf3b992e013edL,0x9b1832f75789e543L, + 0xb79c173b1aa8ee89L,0x6387e6de7a2a6291L,0x0000000000000141L }, + { 0x2851d4792e19982dL,0xb824811991e95866L,0x986123011f6a3323L, + 0xc3acf6105e17b5efL,0x3fc21269157bf97bL,0xacb8c9fe75aaf98aL, + 0x6b4d228c3c3bdfa3L,0x4baf44ceabe5622bL,0x0000000000000189L } }, + /* 26 << 455 */ + { { 0xa8228eb3494ec6d4L,0xf4c3f2df66e6036dL,0x4a0bcbe1987742cfL, + 0xc95b99a8e26fc56aL,0xc97d73eabc55ac05L,0x4ed64770e346b78aL, + 0x3be208dfff5ba2d8L,0xdd2b3048d9de7d1cL,0x0000000000000134L }, + { 0xba1c047088c3bbb7L,0xec2a0e1b7a3f7f2dL,0x6714b1767a37c017L, + 0xf37978ebfca5dcd1L,0xe12941486fe75a3dL,0x563cb5467b5a6d85L, + 0x730a871ae6255730L,0x0526e4aa06b13c50L,0x00000000000000b0L } }, + /* 27 << 455 */ + { { 0xe6e5671391d9ff3dL,0xde573dad43ff0f2bL,0xff2b0f78ead1ef3aL, + 0x65d277c1a953c752L,0x86b27d0729c92783L,0x115d71bbaac092adL, + 0x49abda98f48e8e98L,0x4a8dd8b689b41f8eL,0x00000000000000e9L }, + { 0x3cb6410a0ba5dbe6L,0x59714ff58f9c260aL,0x76c17272988b3f5fL, + 0x82cb77d8e171e7d7L,0x736ad952e19f1172L,0x4440cf6ffc6650a3L, + 0x1dedd658fd78e3b5L,0x44cf2b3b8e33297eL,0x00000000000001a4L } }, + /* 28 << 455 */ + { { 0x71eb9485fb8547b3L,0xaf1da3442e086ab2L,0x6dbd491639614d0cL, + 0x612a6ca365e06d5dL,0x2707f1b2e07c53ffL,0x790f26d549783b98L, + 0xa940f16e94bad8e8L,0x2e1a7bcadb6ca03aL,0x00000000000000b6L }, + { 0x3546c7ffccafca6bL,0x63272d70e14ce7d3L,0x27bf1e92de8a95b7L, + 0xa8f658b4e40923ffL,0xe7e7d90319aaa4c2L,0x31419704616938f0L, + 0x60badb4c73607bafL,0xf5a8c9f71a7f8968L,0x000000000000009eL } }, + /* 29 << 455 */ + { { 0xe8a1be53f7cf8076L,0x835d75075ed17febL,0xdd4e7d82a137d007L, + 0x034b4a4ebec4128cL,0x97a6e77869848477L,0xf5f497447b7cd22eL, + 0x7ebcd6f2e4ae90c3L,0x639a23d313d1fe20L,0x000000000000009cL }, + { 0x63a94a750b5a12eaL,0x5ed1316535a18dbeL,0x108f40b1250766a5L, + 0x0699fd0f3be758a6L,0xef0651edf555cef4L,0xc7cddb0991e0d002L, + 0xd812e01ecc15cb77L,0x45a3ead3ae02880cL,0x000000000000016eL } }, + /* 30 << 455 */ + { { 0xd958953f38053cfeL,0xd0dca4087982a66eL,0xd1e7e8ffedf296d7L, + 0x89403f2546c71095L,0xf9dd442a2197e27eL,0x04c09109a1856828L, + 0xe0f10851c30471fcL,0x9845881cc5fbddfdL,0x0000000000000024L }, + { 0x296b2075de6d1a79L,0x31700b8125baf333L,0x7eacd4191e723113L, + 0xe296b9124dd7b2f2L,0xec4a609c3284a3d7L,0x20d5b58c0edcd7dfL, + 0x4ae17e8562a0a946L,0x05db3034acd03e7bL,0x0000000000000037L } }, + /* 31 << 455 */ + { { 0xe117f7c6c77ec006L,0x52aa819720d4293eL,0xa513494e12cb1789L, + 0x6bc91133c8458cfaL,0xef421ded0667ad04L,0x383dbeb1edea9c8cL, + 0xd47d49b59db7b079L,0xa39d6cb4a990d3b3L,0x000000000000013fL }, + { 0xc7d38b61e69c3f9aL,0x851ecd8f6b0830b7L,0x8a4086e71e5c2abeL, + 0x3a717b444f21af8cL,0x15b162ef29f03bd4L,0x046ebf9d8bff7773L, + 0xdaf386f8c226205fL,0x3b6c08c60f50c5c5L,0x0000000000000098L } }, + /* 32 << 455 */ + { { 0x6ad03106c4dd900cL,0x8227fb5d7ca08923L,0x2e3c1e7f863ba5d9L, + 0x5f76d6bfb5be20d0L,0x841dc1d29492569aL,0xfda5b20e47e0dc0dL, + 0x65aba1062d720dc5L,0x9eab8ffb03e6765eL,0x0000000000000046L }, + { 0x32d92252497da0c6L,0xa2d4c8b1beb7b1f1L,0xede29968f01c7409L, + 0xad902eb5da06607fL,0x2c60236c5c1e7d9eL,0x51f51ddd17a3b1f0L, + 0xb71b393f40ab7cb8L,0x710fc9f9a16ac6bdL,0x00000000000001ccL } }, + /* 33 << 455 */ + { { 0x17a9f511c817a522L,0x43a3b550007aece4L,0x96a038d0c081e0b5L, + 0xbe555f9fabc7c9fbL,0x3d97da9bd3f2d813L,0x924e5ed26daa6206L, + 0x6060389e149fdfb2L,0x63405c57e663161fL,0x000000000000012cL }, + { 0x1d535749a4996c62L,0x154d702fb7d9b367L,0x0683f9a7958437ecL, + 0xed90375c67c11730L,0x25ec469479487577L,0x63d81f9ff945815dL, + 0x62fa26f1ad0df408L,0x443de5eb41432e39L,0x00000000000000a0L } }, + /* 34 << 455 */ + { { 0x9a7ae6cf1937fbdfL,0xe520bc25b3b127d6L,0xbecbb9ad3f934c09L, + 0x4eea4a4bad3dfdb0L,0x44a3e5454d6092d7L,0x79f2a6084b131c63L, + 0xacdc5157338e48b2L,0x5895180b31633996L,0x0000000000000034L }, + { 0x53d6e9a967add219L,0xdbda58e57a0acb39L,0x2beb62a9a501e4fcL, + 0x09ae642eb8bcd189L,0xd99bd74cd3070382L,0x818602ea5fa033d6L, + 0x2c7a8b66bd6174efL,0xc528911ad73d7039L,0x0000000000000107L } }, + /* 35 << 455 */ + { { 0xb097a9c2f6574536L,0x01dfb1ead319bb2eL,0x919b41fd10b8126cL, + 0xd387f3fadafc4fccL,0xf07fe9237b4b8b0dL,0xaca2f681bfdbb170L, + 0x2e229073a14a8bbcL,0x1e583333a004c3e1L,0x0000000000000121L }, + { 0xe9c44b5713153005L,0x74f67017e0319549L,0x893b54f0aaafa8d1L, + 0x61f414ef72b64577L,0xebe1f4aa89e38aecL,0xeebee7400152e2c0L, + 0x63822c46e22b96e1L,0x8145b880f9e04ea4L,0x00000000000001f6L } }, + /* 36 << 455 */ + { { 0x70f229cbb6f71f93L,0x527c300f096b2a6dL,0x7eb15fc35d2b2a1eL, + 0x073037886bf3f2b3L,0x09f662c4d861d407L,0xd56f054e95f869cbL, + 0x2bff96ccdb9ac292L,0xee2d64d25e9b133dL,0x000000000000014fL }, + { 0x5369672b2305675fL,0xeeee27df7c52d654L,0x3e3f0183901109b6L, + 0xf71689afd656a411L,0x7c3d84abc38b5e62L,0xcea0ef8521a746b3L, + 0xc5bcfd6af0d2c28aL,0x90cb94f6898a7fa9L,0x00000000000001c9L } }, + /* 37 << 455 */ + { { 0xaba2f77640133b1aL,0x9114351127a6fef9L,0x31687b81884480c7L, + 0x6c7052c84c380de0L,0x7dfebacf0c44050dL,0x29a6e20b301d0518L, + 0x83f7b2ef05e760cfL,0xa7761fa0e4c7bda3L,0x00000000000001f3L }, + { 0x5e81cf8f5f22a316L,0x0eb5cd7a5b6e2718L,0x311f1e41a7c3b19fL, + 0xfc878ca8e0c1b8adL,0x9ba4b865dce88c89L,0x42170861e7a6c11dL, + 0xc302bd176a3c551eL,0x981f282f0b95bda8L,0x00000000000001c5L } }, + /* 38 << 455 */ + { { 0x45e81509e8e44c3bL,0xb45241a79f5ecaa8L,0x07e66b9bb2dcacc3L, + 0x40043c9c19768addL,0xbc8029652dd99cf9L,0xfab3cdfb005a5e9eL, + 0x36e1d42bed774e95L,0x76967c78d3e4bd9aL,0x000000000000002bL }, + { 0xfbae6c36ce44de1aL,0x0fc8f7010721ad16L,0xde6105a31dd0cfc6L, + 0xb7a3f2c209ad74b6L,0x516e7cf8fcf82cecL,0xd836e0e6b5552158L, + 0x3eb895880579b9dfL,0x2fc2edd075f65cb0L,0x000000000000005fL } }, + /* 39 << 455 */ + { { 0x209fd86fc4137737L,0x34bf527b978a68f4L,0x567eb20b0524f6b9L, + 0x86c410e79eea1c80L,0x97dee397954cc697L,0xc18b5bd59a84dd36L, + 0x2a2da3a7ee023a11L,0x86a285102dbc351bL,0x0000000000000093L }, + { 0xfbcf67f59f2e22b7L,0xfd1c4381e2309b87L,0x70afab784b2ef658L, + 0x97e54cad9d17cb2fL,0xb0c6b4df35aa6422L,0x83fa36a31237f048L, + 0x9ad8104a331e9c53L,0x8bb00b123d97ab25L,0x00000000000001b4L } }, + /* 40 << 455 */ + { { 0x918afde6dd0f65e6L,0x667ac91da60ee85fL,0x0b1a2ad0f5ff339dL, + 0x3a6125964b0cfab3L,0xaa581d090d017c03L,0xbeddacc0fb5e501cL, + 0x55b8408587c6b6c1L,0x00f407609e6153d0L,0x000000000000010fL }, + { 0x205043e2c8e8cb57L,0x3d3727c5823a3034L,0x23370101d9e5df15L, + 0xf731c40fc5114c53L,0x8e75d60430a82cc0L,0xd37ccfb8121c66f7L, + 0xedf7a2a6c8593bd5L,0xbbf51d6535e1fb10L,0x00000000000001deL } }, + /* 41 << 455 */ + { { 0xa17bb177dc0e1ef8L,0xf584de3446ed722bL,0x4b0603607255afe5L, + 0x6d3f576ca596d508L,0x48631ef184744e19L,0xd46c50ad0ef56fe3L, + 0x6ddb4398ea9feb8dL,0x5657e597d644bcb2L,0x000000000000005eL }, + { 0x70c62c4674ceb129L,0xaa4fb4f7f7dc52e2L,0x0d4c8ffe05ec9b3cL, + 0x99df5287e6ec0a63L,0x3113604225242ecdL,0xcd6a183fe5a9f9d5L, + 0xd637a75ce46032a7L,0x3d1a48b60ab918b9L,0x000000000000003bL } }, + /* 42 << 455 */ + { { 0xc171893ab62f6307L,0x79d5af9085ab06e0L,0x9c3679f3599351c4L, + 0x736031906d60ff6eL,0x2c98c205c5148a73L,0xe81ca4cb9f935ffbL, + 0x5101448879df55ceL,0x10d3424ab4c0e674L,0x000000000000014dL }, + { 0x00aeb5cbe469eef0L,0x07c01cfc0f37d3e4L,0x4e267bc688ced992L, + 0x237d6f19027453a6L,0x6ce9a46ca02dff38L,0xb48c2de3e9b59a51L, + 0xf35d0e66ddf74ad1L,0x86f7437185cd3698L,0x000000000000000aL } }, + /* 43 << 455 */ + { { 0x263bf75377aee0e3L,0x5b295858c1eedb82L,0x5331a56b952329a5L, + 0x48a0796cf6a59c36L,0xe85f29551b055becL,0xf93e92e2d0dbccc0L, + 0x3ef186a50aa5763aL,0xb29991763e1eaa92L,0x0000000000000038L }, + { 0xf79fdc4de38ccabaL,0xe7b0c6558caf6844L,0x296cce34714fcdd3L, + 0x1a48345621cfec87L,0x2a8312d2358e9594L,0x610e6381a0afd4dfL, + 0x89e6492a8e1029e6L,0x21374b02a785d5a7L,0x000000000000015dL } }, + /* 44 << 455 */ + { { 0x2861a30eb68c0635L,0xc2129da0128d086cL,0x71851e3d95fb00d8L, + 0x6be824592c8800ceL,0x5521a722ce3a6551L,0x695af21cf0572143L, + 0x1b01ba0f9a2b9d06L,0xc7a637ebc61d4109L,0x0000000000000176L }, + { 0xde540faa89107147L,0xa69764f80cc6d6dcL,0x81569f23de0c90d9L, + 0x517113424a2222f3L,0x0c490876bbbe9e08L,0xb218ae1da3475159L, + 0x0eadae96c8e51833L,0x7993eb897570ca6fL,0x0000000000000003L } }, + /* 45 << 455 */ + { { 0xe9d8cf2d8785a41cL,0x36959f1c8d4f1dcaL,0xb0f16556f44c8f0cL, + 0xf788aaac699f6e69L,0x8abb277294c578f9L,0x8c38c394dfc06c9eL, + 0xdfafe95575442f3cL,0x2d9e16378435de99L,0x000000000000007dL }, + { 0x774db458e4ef85edL,0x198e3637ea0e6694L,0xa5e77830e40e988aL, + 0x180c3b5b74b081e6L,0xa39ebb6fe5bc23f9L,0x5c98492369c1387bL, + 0x0d9d7416587975baL,0x424b5201488c1367L,0x00000000000001ecL } }, + /* 46 << 455 */ + { { 0xff5a55c214efd7e6L,0x1adca75e365a094dL,0xafa2b130605e96adL, + 0x4aaf11849898ced8L,0x5599ad68f50c7414L,0x94be391326bd2eabL, + 0x54ec8fa2f0cb23abL,0xf5920c89b6d8b96dL,0x000000000000012fL }, + { 0x017d7945611261a3L,0x542c34269a3b84e9L,0xe2f9e3b19aa43757L, + 0x75f2e88847f67da4L,0x64eb39e58eeee318L,0xdd8fb1eab23b0d46L, + 0x0971767d60e07045L,0xf8c25ba53967eedfL,0x0000000000000036L } }, + /* 47 << 455 */ + { { 0x7ac23ed5462ca57aL,0x859f1e17b839657cL,0xfd9e2e325a5e48a8L, + 0x45bbd738aeeb1b7aL,0x1786362c8e2b75c2L,0xcb5a7415854623e0L, + 0x1b71ca28e753b2d4L,0x92dfa281fb021727L,0x0000000000000070L }, + { 0x0af516c1c9ffa091L,0x42980007ddbf8d4fL,0x3a9df51f9a67fae2L, + 0xa984f61c8383e456L,0x6eba62bcde9db2f5L,0x612efdba5b905e6cL, + 0xfe1e383995aae128L,0x75a64613ed632571L,0x0000000000000015L } }, + /* 48 << 455 */ + { { 0x2ada7940334d380eL,0x02f067bd7076d98fL,0xbe6af9ceb6975322L, + 0x1d145ee3dc8fec6aL,0xa222be87758076e9L,0x1a8e5d43b84e9fedL, + 0x137692d4ddf34cbbL,0x6dfdb9297eae4ae1L,0x00000000000001d2L }, + { 0x7e231c00d1551943L,0xcc44391628a2f7e0L,0xeecf6cb476040912L, + 0x6d4c14e8be5a22d9L,0xf90d649d3282241dL,0xb5419dc2e561a554L, + 0x4b69138e82ab84efL,0x5224b9b561afd072L,0x00000000000000adL } }, + /* 49 << 455 */ + { { 0xce08fcf6d04171b8L,0x4470ba5dab720d25L,0x2e7666ad5071f025L, + 0x15f9107c0490cfbfL,0x03786dfbf7edec5aL,0xa3ab26f2371f6194L, + 0x665c2e17943af89cL,0x181c21f31c3493cfL,0x00000000000000a3L }, + { 0x9a689ec87ca04084L,0xbc10f3e5a1b7ad4fL,0x3848a05d0e4629c1L, + 0x5d1524e1594465a3L,0xe73e3208939b6dfeL,0x394cab75dcea7042L, + 0x4e6325fcf6c3716fL,0x68a3e3ed7796be72L,0x0000000000000176L } }, + /* 50 << 455 */ + { { 0x599cd0ec19878894L,0xe7143cbef201ce5eL,0x10da3714a5fb9cf7L, + 0x1c9122b2773764c3L,0x4635467e2027dfaaL,0x891a7d9dcd55ed99L, + 0xf343aec526f12cdaL,0x12c08f3a438e2784L,0x0000000000000179L }, + { 0x8e2ef2fe5bae4783L,0x9d7cef170232cbbaL,0x083db67c52253a59L, + 0x37e59d4263da345dL,0xa8e574ea3d0d079cL,0x86ef4870a66d7cc2L, + 0xe52c34274734a3c0L,0xbb6fbdb41bcd144dL,0x00000000000001d3L } }, + /* 51 << 455 */ + { { 0xa00168b412379ad3L,0xa2868b5b4a48c0e5L,0x3e403c86504a9c17L, + 0x75b6907209543de1L,0xb75b42c499043a93L,0xa2e009d45e566fb8L, + 0x11d754e0a583b69dL,0xbe0fa1b4de63df49L,0x000000000000015aL }, + { 0x612814a12c6aae03L,0x408b7372915e29d1L,0x61f61e12e73428dcL, + 0x2085d678f7e77717L,0xb1029211f963b36cL,0x25bd69c89327c6e2L, + 0x7dd7d1a1c981dcbaL,0x2d539c7ce37c6a61L,0x0000000000000022L } }, + /* 52 << 455 */ + { { 0x8d21680e0728f4ffL,0xf7fcafebd3f8ff89L,0xd2d0f396fc6b6546L, + 0x8cf7d514e633a609L,0xeec1ab0faa3e51acL,0x0a48bf23f0f80226L, + 0x4cac2e1fc760431cL,0xcfdea2348ed7cb1aL,0x00000000000000d1L }, + { 0xe9f93bca5750470cL,0x152b0ce438fc7e24L,0x1ef36c9ed4a355beL, + 0x749ecc7dc9cca53dL,0x4ef6355437f7c108L,0xd52c4ba48aedf964L, + 0x8b7ea74c64626574L,0x932ea2a11b2b87f5L,0x0000000000000164L } }, + /* 53 << 455 */ + { { 0x9ff3327685f612fdL,0xdf4f0b6dc3c52e70L,0xa1a1e4a964bb28a6L, + 0x36d03f2ef1d7d1c3L,0x713fb6f44ae93407L,0x0c2663100ec30593L, + 0x66aca9e329b5d9c3L,0x7d37a8fc0a1def97L,0x000000000000000cL }, + { 0x98796288fb425696L,0xff1a6d29cfefe3a4L,0xcfa5943801fd64baL, + 0x6a4024ce28438b13L,0x7f2c7ef6301c73c1L,0x5157f61082d44729L, + 0x9cba0eb974e95fbaL,0x2af6f3c9da31cc7eL,0x0000000000000164L } }, + /* 54 << 455 */ + { { 0xff6f5774e9c6175cL,0xb2021f132c173fc7L,0xc83a71f46216e5f5L, + 0x92cdfd754c193206L,0xde732cde4d22b91bL,0x4cc370e5d7a01b58L, + 0xbfa177b448ea185eL,0xbf12228870f5f0acL,0x000000000000017bL }, + { 0xd724f42f8316c8efL,0x6905c0c0ac459200L,0xab0cbe91dbc7be67L, + 0xf06b9cbae5fef79aL,0xd7a0f1a2cb237962L,0xf69b237bb47d05aeL, + 0x8a81d536c2c53527L,0x51225080b0f26302L,0x00000000000001d5L } }, + /* 55 << 455 */ + { { 0xfea9cc0820d402a1L,0x509a70ebc76080f2L,0x7580e94558aa7d0fL, + 0x6471e00c8d610043L,0xf7c55c9f0fb867ecL,0x7d6d03dd6b89866dL, + 0x3fc18d3420c4eff8L,0xe3dbea145d1d2313L,0x0000000000000179L }, + { 0xb2c10b3c736797f4L,0x9c095e9dd5fb62feL,0x743ddb07edfd0c2eL, + 0xa8800366fa8d5851L,0x29347e8eb6cfa213L,0x42ad89046c7c633dL, + 0xed00235329db442aL,0xad54d66623a2dc5bL,0x000000000000003aL } }, + /* 56 << 455 */ + { { 0x2ebae39103bd7a6fL,0x13ff72a0140373b3L,0xa5b141a530872b9cL, + 0x61ac67b5d9c814ceL,0x6e6ab6920438b9b0L,0x6f4aa29364f88c78L, + 0x72d11944e9552beeL,0x7d9f109277e33318L,0x0000000000000054L }, + { 0x1c5268b29acfdd22L,0x228c8fc024fac5bbL,0x016752ec0ec7ed62L, + 0xd8ef13a7b5a87ec9L,0x9bc2ae729b4b08cbL,0xd893fd89557487eeL, + 0xafa93167c5392bc1L,0xd9c4fb99ab6145d8L,0x000000000000005eL } }, + /* 57 << 455 */ + { { 0x1149336e72ac95d1L,0x4c91f13b36a6036bL,0x225902f97996e931L, + 0xc909dad12d1a49cbL,0xa04dd8823e602465L,0xaa74cf8f81ad983bL, + 0x763f241dc0602498L,0x01e44d072ee437faL,0x00000000000000a4L }, + { 0xb3db828d8f95d8d9L,0xe9b27a3e4450b812L,0x52d0967e11b19723L, + 0xbd50c86ce35c5c80L,0xdb536850b0b1f3efL,0xf5ac19c371efd402L, + 0x54bef49e59cd819bL,0x290ad09a7465ee11L,0x0000000000000082L } }, + /* 58 << 455 */ + { { 0x8c81d20117796415L,0x96df8961a41a5c87L,0x2f683ed593038a8eL, + 0x716a79c9ed5e731fL,0xe68ef96066ac1e81L,0xc9bebccec524da56L, + 0xf4b5d4a9177bbe84L,0x6ce36c9ce6186b68L,0x000000000000014eL }, + { 0x161dc2e2883dff67L,0x51307f27eb431a89L,0xde5d4eb546e62c45L, + 0x8fe6dd46e534d7ccL,0x36425cca2c53c201L,0x415b963968c204e7L, + 0x89e7bdcd9f23732dL,0xfe64f5cfe68f1042L,0x0000000000000173L } }, + /* 59 << 455 */ + { { 0x5435603769b30790L,0x51ed00912f5b77d6L,0x6b54b15b131a7282L, + 0x211ad378cbd36a8cL,0x34c16f4bed3eecbbL,0xc24f076bd9171627L, + 0xd886847908657939L,0x244ec1baafebdba7L,0x0000000000000192L }, + { 0x1b800e47fd724355L,0xcaa24c65dcc4bcf3L,0xeaa8571a7e3df4ecL, + 0x2dcdbb0c5958fc6eL,0x2b1576b4cdd587e0L,0xf4e38c6cf3704941L, + 0xe8e9bc2cc74de3a0L,0xd8594532fa4c5a2dL,0x00000000000000c7L } }, + /* 60 << 455 */ + { { 0x1a2de621d3f97283L,0x6490069191ca7921L,0x8ccc97d8fc69ccc1L, + 0xae42e47f82a38371L,0x6903459a7e1d913eL,0x195ce6e4a78938d8L, + 0x166416eb2712a0d2L,0xfc643243186b4b06L,0x000000000000019bL }, + { 0xd54ca42aeee3a04dL,0x2d541413d5efca8eL,0x49115fe689fbe015L, + 0xbf27d2678dd0851dL,0xbe50e034bc0281a7L,0x3f4e7ff1ca2d692dL, + 0xd14dce191ca3c7b1L,0x27f9941a395f0b98L,0x00000000000001b4L } }, + /* 61 << 455 */ + { { 0xeef667b0e59502baL,0x7004ebb230c2dc42L,0x7dde5dc19816002fL, + 0xcabedadd130cdbe2L,0xd3e313886b61a332L,0x9361770331e77c39L, + 0xe152f854d2e9a54cL,0xf692b0e0409b7761L,0x000000000000006bL }, + { 0xcdf2a769675db132L,0x9cf60fb4c021c4b5L,0x85ae29ccc6f8ab53L, + 0x3f2604555d8ba929L,0xf12440613a526ab4L,0x07f6786582d7e4a0L, + 0x1ee6dc4a196b3b01L,0x63d990fbfe1ddb4fL,0x00000000000000a2L } }, + /* 62 << 455 */ + { { 0x1582b86eb4498ac2L,0xf11f80e2ee605f5fL,0xb7c8dc4e60dcd7f9L, + 0x2a06825b4b80356fL,0x41bb0da77e959943L,0xc0d5e398b4590db2L, + 0xcb3232f28756eab9L,0x9d4ceef76e88c536L,0x00000000000001e5L }, + { 0x73935b8f4dcbc4afL,0x5695ec670d9f27f0L,0x53a03172d530386bL, + 0x51b1e293a6e98f81L,0x1ad5c0e0dc3e5b39L,0xa5414a2128acb62bL, + 0x96a450f791d1f852L,0x398f2a08aa5db342L,0x000000000000014bL } }, + /* 63 << 455 */ + { { 0x25e1f974e7957ba7L,0x2926ba4a36f165afL,0x92eada47e14be343L, + 0xc7acd9f7d2eb5160L,0xf2f1c7004a2f58e3L,0x991e3726c2d83bf9L, + 0x3526d6bb8b367f2aL,0xad87a25e2365148fL,0x000000000000010aL }, + { 0x04bb40a8f2902a8cL,0x69316b4a004e47eeL,0xaf8f683e1259c9dbL, + 0x36f36eff9ce6b2c9L,0x061325ecb4327f6fL,0x4d8a6a22ba5f20a6L, + 0x7ac29df33e3eab51L,0x0cb65295d4063ef3L,0x000000000000009dL } }, + /* 64 << 455 */ + { { 0x8a3e381ccdcd741bL,0x2c5acc0b14e8f964L,0x209d040429621441L, + 0xd27a65cb2995c3b4L,0x3a776ff06ec13968L,0xa2d95ca7db4f8a0bL, + 0xfdbc547d6c04ea3bL,0x30ba75a8db40f58cL,0x00000000000001adL }, + { 0xf6406a6b84115d90L,0x7d2e26ac0f228b65L,0x1e17672b9029a8abL, + 0xeaa84d813ecf05fbL,0x9e16e91db5f5b798L,0x7287dbd5e978749dL, + 0x85880f457fe092aaL,0x80b3126c514afdcaL,0x00000000000000e0L } }, + /* 0 << 462 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 462 */ + { { 0xadbf4f9faf2ed12fL,0xce1d19e4f380fd8aL,0x0957bdb5a39e81aeL, + 0xf9833321626ef6bcL,0x110ae5ea0cf5b28dL,0xab15945020392cd4L, + 0x67c498876bc67855L,0xce7e5938a3fd61c6L,0x000000000000004aL }, + { 0x59c5b9ef28c7dea9L,0xd02f95ba0a6a7184L,0x034dc2578202769cL, + 0x213b0b0894dd6896L,0x03730b7fb5dea95aL,0xfe243ed0617ca889L, + 0x16cf4d17fb1ba052L,0xd8691d6b226f96daL,0x00000000000001c0L } }, + /* 2 << 462 */ + { { 0x15a257c527a668abL,0xa437f499c920d8dfL,0x297b6764d4cd271aL, + 0xcbedd33b87ce8d52L,0x4ab3b76110385c75L,0xca68612323436b58L, + 0x44d5a94d8d52f4e5L,0xd125e021605bba0eL,0x000000000000001eL }, + { 0x578cd28335e40c74L,0x898f4684244ea58eL,0x158ae785154fc334L, + 0x466c1c29d8d62231L,0xef781790c48b1013L,0x7a902e83c3253275L, + 0xd0aab751b1aeae85L,0x89248d120ecd9b0eL,0x00000000000000e7L } }, + /* 3 << 462 */ + { { 0x92e5bc5c8c7671dcL,0x0eda1a20215e6b5fL,0x158462923104e97dL, + 0xd0513d5868bfd00cL,0x3439d9cdbd88796aL,0xb3d0c7919ec37b3dL, + 0x8e0ac88900c49d4fL,0x3ee90cb4441e1f08L,0x0000000000000079L }, + { 0x24c36d60fe7e66eeL,0xff0aae245f990abbL,0x78308e0d041b19c4L, + 0xecaa1b05c7ed7c0eL,0xd1d45e1cd86c1272L,0xff74e3e91b7ddd34L, + 0x17b6d9984948d333L,0x1d2c2bae12ead07aL,0x00000000000000c2L } }, + /* 4 << 462 */ + { { 0xe12d8922cd5f918fL,0xec7c176349f73f1bL,0xcc5a3a14c744eb3dL, + 0x2b96f342782dbda1L,0x87284905ab68dde4L,0x647354b9e720668aL, + 0xe07da694739c4393L,0xbae499bf2cc145f9L,0x00000000000001aeL }, + { 0x74eeeaa477c16db0L,0x297624e8eb22ce92L,0x9d3944bc45098c91L, + 0x6ff31fa0fa4dc50cL,0x3c7fb6dacc40c211L,0x74d95dec96b4cd1cL, + 0x829423dba6388941L,0xaa2b7d45fc3123b5L,0x0000000000000119L } }, + /* 5 << 462 */ + { { 0x90a1a0ec3f49a005L,0xacb7506b94508ef3L,0x3e44866b8af044beL, + 0x00240490deda67cdL,0x88a83372b773399aL,0x84c5c9ab2969b007L, + 0x9fab80da8cc3eb59L,0x55b3ceef0472347bL,0x00000000000001b7L }, + { 0x886acce0c83ffd24L,0xcba2aa08a61f2f61L,0xf4ff7cecbc7ab151L, + 0xc825c0ae23a23bb1L,0x4bbe707cd3b316b7L,0xcab4a987ca9bbd00L, + 0x4cb9d017a4741041L,0x3e5014b4718db768L,0x00000000000000b1L } }, + /* 6 << 462 */ + { { 0x3f614850400ed29aL,0x6ff097c0d252cdb3L,0xafbeb6cb589c3201L, + 0x0f900ae78d3782d5L,0x69902554ce4bfee8L,0x91d1ce3e51965228L, + 0x61c93b4ae8630088L,0x066dbdd793d1c0beL,0x00000000000001adL }, + { 0xf65ac289fc255d76L,0x1ad9d35d122e0cd0L,0x3b29d3fe4f08dde9L, + 0xa1a3a083d1829c0eL,0xc7500da1469b3ce2L,0x8cba6c9931ea259dL, + 0x6724d90dc78af820L,0x0be3b075170d6e15L,0x00000000000000dfL } }, + /* 7 << 462 */ + { { 0x108c69d8ffdb2cdfL,0x95008e039022f9dcL,0x9206904662c29258L, + 0xc388264486233b0bL,0x79862fb5f196c059L,0xe47b071f515c7d8bL, + 0x019d7c9a96ef0fb7L,0xef99e754d21aa1a0L,0x0000000000000068L }, + { 0xfb114e4348013bcfL,0xd510664864739742L,0x4a4f811b623625f2L, + 0x7f36f16b9c64ea5fL,0xe829e645bd173948L,0xfde730475af3200aL, + 0x8bbe62cffc337d06L,0x98549a55c985b1aeL,0x00000000000001b7L } }, + /* 8 << 462 */ + { { 0x404216db33849ba6L,0x7979ab027b42813fL,0xb0cabf29117e27aaL, + 0x3e449cb65a0ab76dL,0x34efba77460b6964L,0x7553e0a66fead00bL, + 0x8ac8857116963cf0L,0x01716cbe017ad9d1L,0x00000000000001f8L }, + { 0x30cfe5c1d21fbd36L,0xc7a0e466df1fe3b4L,0xca38fd1cc8ea2c29L, + 0xde2e20394e54630fL,0x3b152ec3885b124fL,0x704a1d76d39818c8L, + 0x0e74a9825207d2c2L,0x2b2dc7c8ec4d71a5L,0x00000000000001f8L } }, + /* 9 << 462 */ + { { 0x4e5c2656a20ee12dL,0xc4beb2bbf38239bdL,0x8911466401e32b71L, + 0x9282668c52808f4bL,0x99825c89586715f7L,0x73e4d0b0f6760accL, + 0xf24743d27956e8dbL,0x219289ab9074bb3bL,0x0000000000000147L }, + { 0x0ce42484e18d70c0L,0x865b0a12ed9a8b42L,0xe9e2c3a206e2060dL, + 0x11ad93e462d02433L,0xf9faceca382d0deaL,0x13fdae7bc2f70442L, + 0xd5ecf82693862d21L,0x3286fd7361440676L,0x00000000000000d1L } }, + /* 10 << 462 */ + { { 0x9059c13b115d7186L,0x42c5cbffaee2253fL,0xb326c80627914590L, + 0xd78cd2604c197c2eL,0x076b4d3c84d14e78L,0xc75fba4cbe666d35L, + 0xe564935922126692L,0x62773e9a1d415c9aL,0x0000000000000144L }, + { 0x39518f5ffba49366L,0x15a2865a6a82670eL,0xe6e08537e05bc13fL, + 0xd69de2c00bf3f3a0L,0x75096c76d362830eL,0xf3744a2e0177a66aL, + 0x3de2a9ddc9baf0e6L,0x3c0f980fb6d810d5L,0x00000000000001b4L } }, + /* 11 << 462 */ + { { 0xcc94284db189ddb8L,0xb485dd3e810cb5b3L,0x7962589cac622d13L, + 0x66d67d6807da5abfL,0x159b0de064fac523L,0xfb87c9e036ee4172L, + 0x8efdd2605f67a45cL,0x967626223addee84L,0x0000000000000185L }, + { 0x612588e0207d2d04L,0xfcea78b33a859390L,0x73e90526997cba0eL, + 0x805d7844afdcc542L,0xc3f76999e81e478bL,0x9cdebd300a4966e3L, + 0xbf267a7251a2070aL,0x36c52bf4cd0f0a22L,0x000000000000006dL } }, + /* 12 << 462 */ + { { 0x01ef33cf16e0f612L,0xbccc51c3eeb7a33dL,0xcc35c1e346a12c50L, + 0xd2151223b915b370L,0xb03030143aff625fL,0xb7e34e67b76ed5c4L, + 0xd5b48e2210b988f7L,0x5edada0557f1882fL,0x00000000000001eeL }, + { 0x70919a6126a93c88L,0x2729baf111491a74L,0x895470b5541b2e39L, + 0x88e52a03c85fef39L,0x605d48085b35ad5aL,0x3203783c87561036L, + 0xac1c8568546e9de2L,0x61c5703c62d3bc75L,0x0000000000000192L } }, + /* 13 << 462 */ + { { 0x0f55f94ed641f8ddL,0xb2dec9ebc1daf64eL,0x5bcf6cb3f70c3974L, + 0xc0e4304c24e93de0L,0xf3d5cea7b59b116dL,0x34a1d943e9dfdb47L, + 0x908d0e134f9d468aL,0x52b536cb61c837e5L,0x00000000000000d2L }, + { 0xff2678be174d1c4dL,0x952da2e880b4eb5cL,0x474b27b13e739143L, + 0xfc9871cd70a02e7eL,0xcfd72308c6d5fbdaL,0x0a9812e6ecee2813L, + 0x1461ce73b4f29fc9L,0x2f643e82c08cc647L,0x0000000000000131L } }, + /* 14 << 462 */ + { { 0xc1ab1a2af439f3f8L,0xc1aaac5405a1ec00L,0x5df5d950abab5451L, + 0x13114abf23f29c1fL,0x0cdd9aade17cf9b0L,0xd16efb64b78fcf8cL, + 0x9fd47a7ac0d274b0L,0x9bab71c2fa1da55cL,0x0000000000000095L }, + { 0xdccdab46293e406aL,0x40e3b0cc310ece4dL,0xd18c09e52463ace2L, + 0x65122363364dc7ffL,0x77fc1848657b65f0L,0x422392cfc4011670L, + 0x382cd9ee2bb61fa4L,0xdcb156d14558bb3eL,0x000000000000003bL } }, + /* 15 << 462 */ + { { 0x8e96bef0dccad82aL,0x0ff19a8e4919f552L,0x9dd5e116498d3dc5L, + 0x5d25b8a36173057dL,0xdbef9c9586f15c1aL,0x445d807d6f12eaa2L, + 0xe494386943108eceL,0x5736908d88b2f68eL,0x00000000000001eaL }, + { 0x9757f44fbc6b53c9L,0x61faaf06c2072e4eL,0x9fba7b4bd5ac4681L, + 0x130e8e184ecba083L,0x9c12e614b297edbfL,0x746c82a360efb65eL, + 0xb42ca8c31cfe64c2L,0xaabb762ffac5abb1L,0x00000000000001b7L } }, + /* 16 << 462 */ + { { 0x493d8a3fce43380aL,0x27d1657b1511e823L,0xf09125a7e6112eceL, + 0xafd3f48218c2ddc1L,0xc1207ac1602ab092L,0x22fdfe83f80fe3f9L, + 0x736ad27ff479c304L,0xd1fc1905784ca960L,0x000000000000008fL }, + { 0xe87798d27fb35409L,0x86b0257161cffd40L,0x12d991f963058303L, + 0xef9e933e033f10c9L,0x8221774cb298a118L,0x95dcfd1080dc2858L, + 0x83747d7168bfaa1fL,0x03567035a213129bL,0x00000000000001dcL } }, + /* 17 << 462 */ + { { 0xeeaaf96644dfbf9bL,0x86df2b8af44c56c0L,0x2bf28ec1976c6682L, + 0x50c5528dcb7a8ff3L,0xcf2be4a643420ec8L,0x4c214e8984341eedL, + 0x7b65e1aa232ab304L,0x02622feebbffdf3aL,0x0000000000000077L }, + { 0x907131fff1637afdL,0x51693f9b107885e0L,0x7035ec8c41aee9aeL, + 0xa45531d5f94a191fL,0xf3556642188aa104L,0x58aee7a7d7992ed8L, + 0xc1bde242750e9e10L,0xa2bee7d8b6ac4e78L,0x00000000000000edL } }, + /* 18 << 462 */ + { { 0x7ff30e30b92acd78L,0xf64e2e1e397f204eL,0x85ceca93595a69aeL, + 0x82d987dd4dfec414L,0x41e53688f2448e97L,0x9024642e5a064c46L, + 0x706066f5e60807e3L,0xa8743118e8cea80aL,0x0000000000000167L }, + { 0x3a88f81b112ee142L,0x1d43947513e86304L,0x73ce796df75c9d7fL, + 0xfa5edc926cc4a9ddL,0x2b1bbc20981ed026L,0xb632ae87c6a7a48cL, + 0x27425c2d622960f3L,0x4501465610fc0860L,0x0000000000000072L } }, + /* 19 << 462 */ + { { 0xd10883102f412cedL,0xeafe72656e718a1cL,0xb632786f6f5a00a5L, + 0xfb0909685048cf77L,0xf80eaeceeb8fc82fL,0x3a6ecdf3610279c9L, + 0xefa7c7b42855c157L,0x46a8af036abf4d8eL,0x000000000000011fL }, + { 0x492ffe9726805929L,0x1619613e41e559e7L,0x97e9a4f38e33bd9aL, + 0x9ad49693091798d5L,0x356bebd74653b12cL,0xfcc7f27e9fcfd552L, + 0x5ed9331c742cd38dL,0x1645ed1198af87e3L,0x0000000000000088L } }, + /* 20 << 462 */ + { { 0x15aea420d5ac5d48L,0x6e11bf30178c5a77L,0x01f1180d2651dee6L, + 0xd7602ed459e7a8f6L,0x5d3ec901de3746deL,0x314d1580e2400304L, + 0x08f8259265f15588L,0x22e8ff88d9ffe64cL,0x00000000000001d2L }, + { 0xf3f23abc99e0f136L,0xef5d94840f1ae78cL,0x616e957ff6e34ca1L, + 0xe2e237b080b7054aL,0x7987761592baa214L,0xcd321299dd291296L, + 0xf9b74adb20d8bcf6L,0xefcf323631661404L,0x00000000000000e6L } }, + /* 21 << 462 */ + { { 0x91e0a5c0c94fba83L,0x367b5806dc42c01fL,0xe88e6ae4b4373ef2L, + 0x3f51875cd7874231L,0xcfcb6890f984b4a4L,0x8da4e461d4276ba2L, + 0xe47110934f261c73L,0xf8e578b314cbc501L,0x000000000000005bL }, + { 0xea568d5507f17331L,0x4a2e61fe70a5e9ceL,0x1df72d8e970cba01L, + 0xd03d5d92c8f4777aL,0xde53ef7f2e2d88f7L,0x32e0862d4fab2632L, + 0xbd0feb3e82194583L,0x2f2def128b4b82d7L,0x000000000000004eL } }, + /* 22 << 462 */ + { { 0xba8ee1394fa0962aL,0x0f568c5be733014eL,0x5a16b363ffb66b9fL, + 0xcf83600ddb746ec8L,0xaa67a3d2e6bc9390L,0x801fa15f8930fcddL, + 0xfb374cf0005c0eafL,0x66552348ffad00ebL,0x000000000000006eL }, + { 0xb8529088874d7ed9L,0x64a7e8e0843ddc7aL,0x26a9496205980ec7L, + 0x6e219d44341b4fa3L,0xe07b2068161ef467L,0xe6d89d6d3e504a9aL, + 0xf473ae9c290f40dbL,0x250a539ef51dcc67L,0x0000000000000018L } }, + /* 23 << 462 */ + { { 0x8f3d851cbc598deaL,0x95c3f4fb968d82cdL,0xb406ff2f644e4030L, + 0x8b084b6cfb0c34c7L,0x978c1241d9b9337cL,0xdd156dc3d2d630b2L, + 0x976b5a2df1795879L,0x5a188835753b7f8eL,0x000000000000013aL }, + { 0xeb5df8ff0100cfd2L,0xbf96012da14b7ddeL,0x9228c2c593a9a2f4L, + 0x66ea58ffc695a320L,0x5c370488bebf78ceL,0x14c0ab834750ca0bL, + 0x0beb6f8e8fc91786L,0x714eb7802291fce2L,0x00000000000001eeL } }, + /* 24 << 462 */ + { { 0xeaa89410f084380fL,0x9491f70e458ab9e4L,0x6b6772051d8ec684L, + 0x1a9fc7951ef7e9edL,0xc47e84efb05c41efL,0xee377f78c6014e09L, + 0x91d14a75521171ebL,0x1b11fbbc32361217L,0x0000000000000111L }, + { 0xf789cc6ba63bc7a1L,0xbbd74f359393b5cbL,0xb43ff6170bb024a4L, + 0x73800410a844508cL,0x93a28632c5468780L,0xc96c62e52aefa8c1L, + 0xa500ce0b7df9b61fL,0xb59527f2369f304aL,0x0000000000000172L } }, + /* 25 << 462 */ + { { 0x8674334a8ccadedfL,0x2d016c2b11f6012aL,0x4ec4ee5ac3a12902L, + 0x26217299b221e73bL,0xdf28bb47a5f9357aL,0x3676902cba35c37bL, + 0x90e2f3cd24232740L,0xa30b34e8d62d95beL,0x00000000000000bdL }, + { 0x1671d5affeef8af1L,0xbcd9d2aa4c3a31f3L,0xa38ec0bab0adaed1L, + 0xa2cf817f0bd7172dL,0xa30940a00f887402L,0xba1b3af828713fe4L, + 0xb837467dd1b89a24L,0x13361a345c6d8b94L,0x0000000000000103L } }, + /* 26 << 462 */ + { { 0x3f3f51f3dc8fbec3L,0xe7b0c2ea83a6a2b8L,0xcbf7b399a2b70075L, + 0x74e2302823ca18faL,0x0c1600f1e46c6cddL,0x5434852363af7990L, + 0x10506b67950245b6L,0x9d277adb7ecb14fdL,0x00000000000000dfL }, + { 0x9f8cd4536be91611L,0x362a07d783e99454L,0x9e89aa5e1065da40L, + 0xf7f67219c5087c27L,0x1a0dd1c0a36553f4L,0xcb8da38dfbbc5aaaL, + 0xd43603bfe8784073L,0x9354aa7e0acbeb25L,0x00000000000000fcL } }, + /* 27 << 462 */ + { { 0xa7f369b6a543148bL,0x08eb19b28b24443fL,0xca61b0501b1d7de5L, + 0xa17e2e434d630122L,0x77ece0be0b065578L,0x85337244b4997376L, + 0xac8d2abab970ee48L,0x724afcfed85c9efdL,0x00000000000000a2L }, + { 0xaba323225c2cb2b7L,0x63f1f3f64757f00fL,0xf1be3eff90e18446L, + 0x3e5a2a9a9a238a61L,0xfbb3e075a135b254L,0x7187eb847361e1e6L, + 0x340db506c881addfL,0xcb356eac5468e658L,0x00000000000000b0L } }, + /* 28 << 462 */ + { { 0x68b81be9052d2ea5L,0xeadfb53105294d6eL,0xcd41f773a3258647L, + 0xf6b1c867d70fd9d0L,0x9a89cc017be523e7L,0x2499d793487ebf56L, + 0xc82cefe8c0d072a2L,0x91c2bfbbe82cb4f8L,0x0000000000000105L }, + { 0xd705cf5a6705dce5L,0xfcbe204e4462a67cL,0x5e198bc3430d627fL, + 0xe13da562e765991fL,0xb6cfca287032c5f8L,0x0868d9ab1783308aL, + 0xc5b628c6c8050c8cL,0x243e2ec13a93a8deL,0x0000000000000139L } }, + /* 29 << 462 */ + { { 0x98d6256c20245960L,0x457d105137b7415aL,0x0cb5d0dc767184c0L, + 0x8652a67b5bde6afaL,0x42679642e838bb0dL,0x0b577bc7178558efL, + 0x1e03da5c14212285L,0xa591ff706f6c2a33L,0x0000000000000031L }, + { 0x49a0d6f9f68489d7L,0x87763a7005cbcc01L,0x7843af2c848170ceL, + 0x9f7c25bea45226c2L,0xfe56752466707eecL,0xea029f2f0db1412aL, + 0x4f2c2e5d93a8d4f9L,0x405015ba0b3b9910L,0x000000000000013bL } }, + /* 30 << 462 */ + { { 0xa4dcc51572a88c35L,0x132016d41a8ac7d6L,0x65acb09f7a272350L, + 0xc77cc6401ff191caL,0x3fa4c8e460673588L,0x4770229b646a14f3L, + 0x8dbfb85fd19872edL,0x0fade56dd7f01b99L,0x000000000000019aL }, + { 0xbd59629410a943ffL,0x74b893733857ba32L,0xd22c2856fc1ff38aL, + 0xa8f2aa31b1c6c561L,0x033a7f8b5d322178L,0x3efb1ee6a075bde1L, + 0xd9ff5d90aff85287L,0x1c909307f8075ebfL,0x00000000000001e8L } }, + /* 31 << 462 */ + { { 0xd8e925414f766448L,0x06618fed856b424eL,0x25293a2cc4774117L, + 0x0fe866dedcd89015L,0xd06fe1e1625164bdL,0xa5be1bcaca21865fL, + 0xa15048438449a87dL,0x1342fd55911a905dL,0x0000000000000069L }, + { 0xf90da038d98d58c1L,0x69444f7f114e147cL,0x42211c1da866632cL, + 0x78828291b657d2c1L,0x014c0cb987bb87f0L,0xed3d697194d14553L, + 0x5b9df642dbbedc15L,0xcb94c1f3ff65dc9bL,0x00000000000001dbL } }, + /* 32 << 462 */ + { { 0x461c1512910f4978L,0x8ca7f0dd63d2ec28L,0xda9297d2607ce3c5L, + 0x592ba13f81bb2dbbL,0x157d4ac7f740c331L,0x1d5311e782c49ae7L, + 0x5c3046713bec9a94L,0xd81fa25b0a23ab8dL,0x00000000000001eeL }, + { 0x6d8c6efaf51614ffL,0x20cfd353868b4fbcL,0x9b9dba152201aac2L, + 0x6bbd8ded8d1900d7L,0x6ca3e71afa946925L,0x7381e45aa0c3735aL, + 0xcd9db7a2b93d4feeL,0xae40115145bdce69L,0x00000000000001bbL } }, + /* 33 << 462 */ + { { 0xa6dbe971dda4b355L,0x8f07b76adbb73b5aL,0x1aa44431454114d1L, + 0xbc540435c1036c8cL,0x79b688ef916b6533L,0x47a020d49bf029e2L, + 0xa669d577c093a75cL,0x734c102c6a6c09b3L,0x0000000000000184L }, + { 0xc90106c15b9211b4L,0xf076663eef5e999bL,0x50143616d8aaa415L, + 0x62e33d915dfa07afL,0xd0bbb32724d176b2L,0x091bb80a291db76fL, + 0xddfe277c8c809e75L,0x4c890027e3160b27L,0x0000000000000063L } }, + /* 34 << 462 */ + { { 0x30f1ef52ad35c0c9L,0x612892843c67bce8L,0x139970fcd128fe01L, + 0xf5b1d3cf33cc222fL,0x972e134b9547e5eeL,0x5d32f64d19aecc5aL, + 0x5172742be7c61b95L,0xd64abb911323f811L,0x000000000000005cL }, + { 0x16463fb98f92a5bbL,0xd32f5194b1fc0a53L,0xf36b396b4bfefde2L, + 0x83eec78ac9f4aa74L,0x1e4f3f25af333f1bL,0xc5f9a8710407544eL, + 0xe2268d05367d93f1L,0x6634b2c450c02febL,0x00000000000000a6L } }, + /* 35 << 462 */ + { { 0x4dab3300137a409aL,0x045e43dd13a206c0L,0xea9f0e00a76a7c9cL, + 0xcdddc2b562c647a7L,0xaf673bf31d6f043dL,0x7d16a2d372adb549L, + 0xe25427e8edd38a7fL,0x090e5b177a9e105aL,0x000000000000010fL }, + { 0x2f64bf1100d25195L,0x6439c231eed4846dL,0xad6c59f7318ab661L, + 0xb9883d0585cb9b6cL,0x646d32e70d3f1dfaL,0x8165416abd4a686eL, + 0x4b7f45101d638a74L,0x4d3d143be8dae5c7L,0x0000000000000094L } }, + /* 36 << 462 */ + { { 0xb20d7fef01dd7411L,0x9f9a104047aa41c0L,0x37677f41aaedbfb3L, + 0xe3e23e4722350c64L,0x93964e8a30ff0ceaL,0x956a88124b5b4758L, + 0x6bad595e7552aff2L,0x0de81b3d2b93308bL,0x0000000000000199L }, + { 0x64899c662f0bd592L,0x41616275f3969762L,0x8cfc9ddac434793aL, + 0xeeeba2d4cdf048f3L,0x6f2d05b12b87e0c5L,0x1abf46121bf8117bL, + 0x674deca6b630a10aL,0x061cca5779acde0dL,0x000000000000006eL } }, + /* 37 << 462 */ + { { 0xbf73223a615df738L,0xb0f9d8e26e8ee169L,0x7f7aaed7a4f7f685L, + 0x7e16553a207bca3dL,0x29893c7c9342b6dfL,0x6a4b16aa6e7f48e4L, + 0x4ea7fa828eb3893eL,0x53e4146d6dc5de8aL,0x000000000000004bL }, + { 0x51549c3a9eceaebcL,0xf7484acb280d64edL,0xa5a7b9319947daccL, + 0x949b17ca4b59849aL,0x52689f53bd033b58L,0x23696fa3f648f6d2L, + 0x1081e1fb55ab3259L,0xb240a833ece6561aL,0x00000000000001f2L } }, + /* 38 << 462 */ + { { 0x000b7bdfb3e1b3beL,0x612d3ff2b5bc3fb2L,0xa3d0960b093edcadL, + 0x3b256f3bb671e1faL,0x97c8e0f576a1f2faL,0x7ed1db11a5c9641aL, + 0xb46ccc7d006051bcL,0x932c2d1a403a21c8L,0x0000000000000063L }, + { 0x81a9d8e2ad21fcacL,0x1a1ad119061dc386L,0x1cfdb59dd2c66b25L, + 0x80a2f73d5b1d3223L,0xd214b239a132a94aL,0xfd263d6816369c47L, + 0x511bfed818c54c13L,0xdb57736c68a9b65cL,0x0000000000000044L } }, + /* 39 << 462 */ + { { 0x254c45f5fc8a16bfL,0x0998c4a73e0e13feL,0x3678d827444e685fL, + 0x7e81038b391118bbL,0x1b49935aa752e752L,0x3c3d9ce4e799cad9L, + 0xed9bc30f2b1b2569L,0x8e4faddb0dd293c2L,0x00000000000001f0L }, + { 0xf6d16e3934933756L,0x5949198eab748e09L,0x5e5accbad4b3d3fcL, + 0xa96f70b6f0177d5fL,0x226abfa1a0e50e79L,0x8ea06e063323f2adL, + 0xb05b8c59c31da2faL,0x7cd44db8c6e09ee3L,0x0000000000000135L } }, + /* 40 << 462 */ + { { 0x19f565b51420debdL,0x9a19ab6e5291fe77L,0xd729fff2b428ae74L, + 0xe237994c6d8256beL,0xb74f543cd93d3e98L,0xc460e7cfd599c9ffL, + 0x6bd6d31f7924ebf9L,0xad07316cceacb2b2L,0x00000000000001e6L }, + { 0x1436f3231735fe0bL,0x2e13910896a8983fL,0xb18c48fc100cdb54L, + 0xd777496c81e38ca4L,0x9dbc2d4457bba02eL,0x0b5949a121c1d384L, + 0xd1cdb9b170e6d68dL,0xc6b803539621e4c9L,0x0000000000000164L } }, + /* 41 << 462 */ + { { 0x6da3a967d08f917eL,0x090db6a01bdb8109L,0xd087e8ad559a4ee1L, + 0x5e3104faf6af44a6L,0x15104cf52a8bbfbcL,0x5af76555ef32bd3aL, + 0x165a9153f38429cbL,0x03ff29aa3eaa52d2L,0x00000000000000a9L }, + { 0xb841ae420ca5f75aL,0xd119e95ff2f4063dL,0xbe45f4d88e1dcd37L, + 0x56ca9d99446b1fe3L,0x1c796119161e7c84L,0x91b1ea75a6098a52L, + 0xa2248d4a4c94a9c7L,0x4b86cac0e9cb5d3cL,0x00000000000001e5L } }, + /* 42 << 462 */ + { { 0x8970aacb2a4748e7L,0x6887665163ba1e04L,0x08e1037e56a8f850L, + 0x88edd5d94a14780cL,0xf85dc5e2d01fb425L,0x41ea90dd96ebecf4L, + 0x052996e99442c9f3L,0x3ca7dddba5fcdb15L,0x0000000000000034L }, + { 0xd541059044b9fc63L,0x9f546de20a523dfbL,0xe1d18513d39a6bcbL, + 0x830c4a77e61b66c3L,0xdf0da6545a4d1d7cL,0x620c1d84287aba62L, + 0xa0941743ba971acdL,0x28b96fff367a1d57L,0x0000000000000075L } }, + /* 43 << 462 */ + { { 0x0125e7cb3d67d593L,0x948900e272eb89e6L,0x4cf465978810287dL, + 0x086d2b5bc21c7406L,0x996874adcf082ce7L,0xb72176f2b8523d29L, + 0x6be04d3184c4a83dL,0xe407ada12517a615L,0x00000000000001deL }, + { 0x35ef6a5d0d931f5fL,0x55f15fdd69c1db16L,0x43d2e66069c6bfc8L, + 0xb67513ea8993e68eL,0xd0591d864892844fL,0xc530cb5925b1216dL, + 0xbc971e9a92a1390bL,0x5ad3d777b5efa5d7L,0x00000000000000beL } }, + /* 44 << 462 */ + { { 0x8aa2fd3a0324bbaaL,0xfb02da2e28cb3ea1L,0x938876146493b3fdL, + 0x68c6a8a4da6d66b5L,0x31477d83222398fcL,0x9498701a98da59d1L, + 0x73eafd532bdd61a1L,0x6dea3d8cd8b1b1bcL,0x000000000000005eL }, + { 0x30d3c83a929e403eL,0xffd69ca5c2c1881eL,0x36e2dd8f21383e3aL, + 0xc0702450c42cd33bL,0xdc2ba2ae01146955L,0x74ae932a3060efd3L, + 0x1f01d81b1f96136cL,0xed2a00caa434d7d4L,0x0000000000000121L } }, + /* 45 << 462 */ + { { 0x1aa3adde98e3be01L,0x11b63a1782d4a9a6L,0x48452f3d8f8d8fd7L, + 0xa83c3daffd794b08L,0x9cc7138f1149204eL,0xbac5f41644e190a0L, + 0x1b835e8d2533b4b8L,0x1486716a654cf4e9L,0x000000000000013fL }, + { 0xbb1300a807389decL,0xf4d28849eab42f87L,0x0cbde28ea8e39949L, + 0x3837fddab7a80ec2L,0xee970327aa8141f1L,0xc204fa0442caaee8L, + 0xe516c2275a08d44fL,0x7b706f80ff20b54fL,0x000000000000017dL } }, + /* 46 << 462 */ + { { 0x96c595d04345853aL,0xe761a852cdbb2404L,0x2a83d0fab05cd025L, + 0x067908543d1728d6L,0x0cc868e988564b8dL,0x72a369ffe715f13bL, + 0x4a09518266b3aea0L,0xe4498f3188a89d31L,0x0000000000000186L }, + { 0x669bd0b549234ea0L,0xee783a18c484a4f8L,0x1219541596adeaf0L, + 0xb1b395886a1084e2L,0x9c553221983d47a4L,0x97bd48e4e145793aL, + 0xee9bc1d4d465ffb1L,0x81a0e9d512deb77dL,0x0000000000000189L } }, + /* 47 << 462 */ + { { 0xff7a3b81c38382d0L,0x0e47569c54b9c17eL,0x9d9bb42f06eed152L, + 0x0f584d32ba6d6964L,0x1a73edeef6aafa41L,0x1a9ae3e5b4442b3aL, + 0xa70e0e98592269a5L,0xa91be178c77cb2f9L,0x00000000000001c1L }, + { 0xcc0528718ca1e18dL,0xa2f8ec2d89bc1ec0L,0xa4f2de28a1dd1f74L, + 0x93f7e481a49289f5L,0xcedad21b51007754L,0x5b2cfc40f9df77aeL, + 0x77d5bd687e39e15eL,0xe3d5331f241cc064L,0x0000000000000195L } }, + /* 48 << 462 */ + { { 0x90d2dcf1251fe06aL,0x4a9e61312213edb2L,0xa86cc889db34c6beL, + 0xc7c9ef492fc20da6L,0x62831aa947aad9d8L,0xb8ef1f7f79aa0e99L, + 0x4d8254a91e016254L,0xaaeb6757c5cea077L,0x000000000000011dL }, + { 0xa8f5e18502f4709bL,0x3cc2096d28f8fd0cL,0x6d384ee1bbabbcdbL, + 0xaa16805edc2fdc58L,0xd5a61658a4d13408L,0xae9c4071255c1b61L, + 0x1588a828a6436d33L,0x0e07bad24e74d1a7L,0x00000000000000c5L } }, + /* 49 << 462 */ + { { 0x6940796af899523eL,0x94846b1d315ec56fL,0x025781d6859d96c5L, + 0xe016820639653920L,0xf4ec2f4e51ad51d9L,0x225ee1b72ec9bcffL, + 0xdeb2e6d76317aad0L,0xcee9b056541569b7L,0x00000000000001b9L }, + { 0x2c60d275d2a7a25fL,0x09200147306bb343L,0x8b3bdbf0178a2c60L, + 0x1abb7417cc0cb699L,0x50f8f8200976f08cL,0xbf142ef66273fba0L, + 0x1eea00f53268f7abL,0x141ecaa300cfd5dcL,0x000000000000003cL } }, + /* 50 << 462 */ + { { 0x6f522b1b2190c116L,0x77a85bf90072e3f0L,0x7bbb62f976b08b4eL, + 0xab4878bd0c4970b8L,0x758b6e91dd7752ecL,0x3979894756baec80L, + 0xc8860bb4aea97d48L,0xfc446d97154895f4L,0x00000000000001b3L }, + { 0x8d8f9950072c678dL,0x3b46c8529ace0a49L,0x356b87c69f2f64c6L, + 0x2f0e47accf59cf99L,0x73c0fea460efde36L,0x64ea7c7f00aa8852L, + 0xb6f020820d9b7b8aL,0x1edc53d62f92a2a8L,0x0000000000000057L } }, + /* 51 << 462 */ + { { 0x8d66caaa357071a4L,0x635b07527ee00192L,0x517a6792c818c034L, + 0xf93830a391d08486L,0x1368f7b673d5b938L,0x077f9d50cb4ebfa5L, + 0x8a7855ecce5a34dfL,0xffa4063f122de424L,0x0000000000000171L }, + { 0x1410dc66ab6a99feL,0xfa2c782485c80476L,0x4fa85fa6b00bd91bL, + 0x9fa465c2e83c2beaL,0x866e4cd18c657a22L,0x0c49aaa1995f77e7L, + 0xf7e5521b2b0e30c9L,0xdac4d4b507f7b749L,0x0000000000000128L } }, + /* 52 << 462 */ + { { 0x3ac08717b8501555L,0x884c3347812fbee3L,0xa934580777363609L, + 0xdb4de6841aede158L,0x6d3850ea6312d55aL,0xd4247574a47c5a6eL, + 0xb5cd31b2047c8ebfL,0xcca5ed851d8cefe6L,0x0000000000000124L }, + { 0x4afe51372a51a7cdL,0xd28b0d5de510ba3bL,0x1ac5ade85d1c1d2bL, + 0xddecd8024d330dddL,0x7f03c6419259bc0fL,0x9901f74bb3cc0e2aL, + 0x997b76d0adee582fL,0x4562419478dff0f3L,0x00000000000000adL } }, + /* 53 << 462 */ + { { 0x0a05118e4f11ecf4L,0x0f0df5d498d05869L,0x0703359921333009L, + 0xcfcd5c427de5b677L,0xdcec94971bb48849L,0xab4f855137e51f02L, + 0x0ff5e9dc686f1e15L,0xce51572a73711fd2L,0x0000000000000053L }, + { 0xedc9c0318dc19110L,0xf701cf6e9516787bL,0xef8846d98b359d47L, + 0x59c48207309176deL,0x2b820298bda74af1L,0xbfdaf25de022e2ddL, + 0x906a64e0a21e66dcL,0x35b661b622569e50L,0x000000000000002fL } }, + /* 54 << 462 */ + { { 0x3d2a23547d37e036L,0x15fa6258b069c547L,0x971084595a585197L, + 0xa1ffc74e71f550fbL,0x1fd862279a2a1355L,0xeebca426540f8363L, + 0x9511a7b70357d4f4L,0xf55a55a2786a3114L,0x000000000000005dL }, + { 0x4d95d924d2dfc1dcL,0x02bea96358f88b67L,0xb29bb82aa213c829L, + 0x5c334c6c4fb103c7L,0x1788c6f3237b3fdfL,0xe04ca128ff38a49bL, + 0x657435fcec49e78aL,0x86cb93b2b77ff94fL,0x00000000000000d1L } }, + /* 55 << 462 */ + { { 0x09336b9eb22a4bc2L,0xe5b69e8454d2cd44L,0x21f75483f29260f4L, + 0x95aeabdb24e2ab76L,0xb2d234334ee7e921L,0xb989e4477eb10e8bL, + 0x11f2aca492c511a1L,0x2fa2108fb4411745L,0x00000000000001c5L }, + { 0x9ba0fa7de8e834d5L,0xcde5ab91e5fb25ccL,0x064eba31b24855ccL, + 0xe6c909ff648d6e5aL,0x937bb03c03acf9a0L,0x2e8988a3193b5fc2L, + 0x670a1f72fc822accL,0xdbf52ce2aee811a1L,0x00000000000000b5L } }, + /* 56 << 462 */ + { { 0x0261a6571304c934L,0x86e5b425f4254d3dL,0xd6b8c7293330a402L, + 0x67ee96544281090dL,0x3056a9113a1d5fbfL,0x7dfc3e9e98494b38L, + 0x12c53a18777cea04L,0x2cc4ffec23165e61L,0x0000000000000005L }, + { 0xf16c1e271d3c8db7L,0xc4954310ab46ca93L,0x1a5a066b158cfe00L, + 0x62b499a9f192e1feL,0x0c9a5e489a9bb573L,0x2907b4fb630008c1L, + 0x9c832da019ae083aL,0x6159daa1c26961f7L,0x00000000000001f5L } }, + /* 57 << 462 */ + { { 0x73cf6f1f0a7cff9dL,0x5990e7855d970d95L,0xb0a27227d43818a1L, + 0x8416719cc806298dL,0x9b9c4684a499cd3dL,0x80e188e59dae634fL, + 0x96d56fd6999b8803L,0x25747b11714423c0L,0x000000000000000fL }, + { 0x109847c5145aa843L,0xb4610155b5c2d75cL,0x9193c45dd9014b50L, + 0x8102f8118a050159L,0x46c50351e1d9e3b2L,0xb7992a3b0535b00fL, + 0x416cc678dafb0a88L,0xda1554890526c87eL,0x00000000000000cfL } }, + /* 58 << 462 */ + { { 0x3b96330d5a106b1eL,0x4261e39e301af093L,0x96b81a66369534d3L, + 0xea444b4a97b9861dL,0x19a98ade47fae332L,0xb540c85ebf30cbf7L, + 0xbd205dd1bd6a3c2aL,0x53606fc7d30e5984L,0x0000000000000193L }, + { 0x43f3560d4cc4d643L,0xd02e6d1ed892f3afL,0x410d637e8680b671L, + 0x91cc5b1bb60494edL,0x6a19cc7f1044ee62L,0x378962fc56a89c1aL, + 0x4c9eb689adadce81L,0x4f9ffad100d88194L,0x0000000000000147L } }, + /* 59 << 462 */ + { { 0xef4f9fa755ae6dccL,0xec7bf270f1f5304cL,0x2bc4d7a9401b4a40L, + 0xd0cd7b680044f20fL,0x0e0e7f8dd324e238L,0x2c13feb72fcdc80eL, + 0xf1a695f47f4d3dbfL,0x931c17b7233a8395L,0x0000000000000048L }, + { 0x3e5b9691f598ebb0L,0xf343309a57eacadaL,0x5ff3a06acce81fccL, + 0x2bbd1f7dfad4a66cL,0xa3185537e78d466fL,0x69121af74d50571bL, + 0xce3f6351d8d90fbaL,0x2e89c90e992614dcL,0x00000000000001e1L } }, + /* 60 << 462 */ + { { 0xbf9c09d3e9b072d5L,0x627e64871aceaf57L,0x21938240829175eeL, + 0x781ea3f25bc4d226L,0x996532aca6726027L,0x567da8151177e922L, + 0x9b3fec8aee0ea637L,0x793f1038091eeef4L,0x00000000000000deL }, + { 0x30a23a7581d2a890L,0x41f22a0c973e9ba3L,0xda3e78685c1a136bL, + 0xbe524c93084420d6L,0x53518e4026661908L,0x7bd7c768fa169b4bL, + 0x0477e8834709e45cL,0x41b66d3827ed78a7L,0x00000000000001a0L } }, + /* 61 << 462 */ + { { 0xf684635ac1aff7f3L,0x815ad1a840c52364L,0x1aa44c22ee68d865L, + 0xd4ccb5a9d42e7c4cL,0x280ee2b19a67bd2eL,0x8eb3e67583e89a28L, + 0x7adfd31240147486L,0x5297a92c030f6dbdL,0x0000000000000074L }, + { 0x93f574b97ddac781L,0xb60f1f78431eb598L,0x76ce97fd505b1c12L, + 0xf129e08ef9584629L,0x99d5c2eb189756f2L,0x077576da3f376986L, + 0xebac685cf932a27dL,0x22bf40db85bfc6b6L,0x00000000000000c4L } }, + /* 62 << 462 */ + { { 0x1d9044cd1279e866L,0x124f41d1376eb5c4L,0x5ea4fb8d97030140L, + 0x38339643a1412be1L,0x5dc984b181b0ca10L,0x3caca47205353734L, + 0x6cc45d1cc111cd9cL,0x27abce536b44e337L,0x00000000000001a8L }, + { 0xde5d6867c8beb49dL,0x1fe2ba1f6e6747d7L,0x0c6ea670fec38d24L, + 0x28b701a08091cf52L,0xca72ca34ef2c8462L,0xd6e8d3fd9fb18624L, + 0xbb1c102467c9700bL,0xb78362320eeea37dL,0x0000000000000118L } }, + /* 63 << 462 */ + { { 0xd32c1d2cc13c0d24L,0x03a69200a8085693L,0xf18102e90989d81aL, + 0x0d57994b8af284d3L,0x54a3c55b2ac803f0L,0xc43f6f5ae4601c43L, + 0xb3184a8f59c5c667L,0xd8052c20a179b319L,0x00000000000000fbL }, + { 0x78b9d9197a9af989L,0x5df6c709c925390cL,0xb442eaf5e4846e5bL, + 0x680158b4316ff4e8L,0x528f587fa99e1dc6L,0x3788609a6fb34357L, + 0xbb9ea9c9d901d43dL,0x09cb2dae346643c0L,0x00000000000001ceL } }, + /* 64 << 462 */ + { { 0x2bf35f94ab7a841fL,0xdcbda33878e3fb0dL,0xb913947aa0195f98L, + 0xc8e30a9c4370e51dL,0x8a403e1ff0e558eeL,0x7dd167c4606db1dcL, + 0xaa343cbe5827ef91L,0x1404f2d6aace0f43L,0x00000000000001acL }, + { 0xdfe19ea53db1c500L,0xdd04717148ef4a56L,0x38ab939eb9a5ec76L, + 0x8f2d56855648f852L,0x7f81313f2726f67eL,0x41cf3794f68f54bbL, + 0x9d400e52de14d6ecL,0xc046fda14c9eb1baL,0x000000000000008aL } }, + /* 0 << 469 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 469 */ + { { 0x563f57ff87744684L,0xd8f2f1476562ce2cL,0xbf323b07ce7b4b70L, + 0x67b2111a9098ed36L,0x0c8a85c10b687259L,0x23a569d745d51e50L, + 0x6643f19b88d97057L,0x1eab273a875181d0L,0x000000000000013eL }, + { 0xbd33de9418a1bffbL,0x3edd0df89700ac60L,0x3757dbeb61839145L, + 0xd31a9aab1a59a97cL,0xcaffafa2faf790ebL,0x9f36c13414676e8bL, + 0xef6eda5b18545787L,0x806a74c509ac46a4L,0x000000000000005dL } }, + /* 2 << 469 */ + { { 0xe6b236300de2a07cL,0x64d219926e52ba0eL,0xe5786eda68d1e768L, + 0xad163f8ecb6a04f5L,0x5a09908a101039e2L,0x8708327611a5f49eL, + 0x231d61fb5446a61aL,0x39e98ad1e7ddbba8L,0x0000000000000185L }, + { 0x54ca5f0f3861f6e7L,0x0e0f1667d20d86e6L,0xcf229fd613d9adaeL, + 0xc8852aedc488ec9fL,0xfee0c03dee1061e2L,0x7167271a35298521L, + 0xf4fcf268fc0cfd1cL,0x645a9d2fdf809af6L,0x0000000000000057L } }, + /* 3 << 469 */ + { { 0x38c2b6bca021ca92L,0x715f2bf72d1c900fL,0x79d4428f5dce0826L, + 0x7466180355b0f95aL,0x59d9ba60059f6b0cL,0xa3a157eb1cad0687L, + 0xcb4e3f7aeacfd2e4L,0xdea5cbcb1e36ad17L,0x0000000000000090L }, + { 0x49b0c5be4040f60eL,0xd354d658cadc11bbL,0xb17f1aa48b5ab35fL, + 0x09956859856bdc09L,0x4bdd06fff18da95bL,0xbf66d16d235e890aL, + 0x57d83d0076201868L,0x0bb0291f91e1a0c3L,0x00000000000000e6L } }, + /* 4 << 469 */ + { { 0x7a6b3fcb3b407efcL,0x05f6d0a3063ced31L,0x4a79c880f3f03bf3L, + 0x94181fff7a1438c8L,0x47fa4e4956106ed0L,0xaa97355dfaf7201fL, + 0x1264069e02f7c119L,0xf702c7f04d06b273L,0x0000000000000187L }, + { 0x368dcb7ae7e258bfL,0x5f8a64eae8e4597bL,0x98bbcf2b6f13bcceL, + 0xc00c8e627d099f3aL,0x4803c4005e326891L,0x229deec9342badbbL, + 0x3b810a6f49e0992cL,0xfb55c6cd63758695L,0x000000000000002fL } }, + /* 5 << 469 */ + { { 0x07da16efbb07562bL,0xb7f9c5dbde14c8a6L,0xe687783fb79d420bL, + 0x14fec271567ceec8L,0xd765d3336d608c4bL,0x70e67e3a92960599L, + 0x24060de6d9a5a16bL,0xadc2fcf91c6f83b5L,0x000000000000014aL }, + { 0xb78d9ad205aaecb1L,0x8e984114bcdeedc1L,0x0bd480509394336bL, + 0xfb533f263a0aea2cL,0x6308aa76b8c58b3cL,0x80124d89f64d3a4fL, + 0x58064b407bc9e04dL,0x64e21d55adbeb1b6L,0x00000000000001bfL } }, + /* 6 << 469 */ + { { 0xe0096d0f5c98771eL,0x05c2327096228ce9L,0xb38f45accbc9c99cL, + 0xf44d39cbb7abd830L,0x583a57461600275bL,0xe7911b696bd758e9L, + 0x54545d59b691c578L,0x0b03c85c8394c30eL,0x000000000000002aL }, + { 0x988ac22042c8fe20L,0xde56a00b7832fc78L,0x8ea363bae0cd6d15L, + 0xfdcaee21cf51cea7L,0x104bd64ccfee6e76L,0xd9a935a575e0cdddL, + 0x775cbdf5da1bdc5cL,0x019215c361758097L,0x0000000000000113L } }, + /* 7 << 469 */ + { { 0x26ad960425b64fa7L,0x65d59abea783eacdL,0xa2c81fe86f55f993L, + 0xdb6068b8fed6d301L,0x52f76e9add95612bL,0xce2060f002858a68L, + 0x369c2092ff3912cdL,0xdc2d072aeac4c085L,0x0000000000000117L }, + { 0xf6142267de8e4075L,0x74ffa58005e07abeL,0x13331f5dbec8999fL, + 0x08c7046124188d1dL,0x63a12765a9375cd0L,0x5afe946f3b9f95caL, + 0x2ced26b921e69960L,0x614458d7512f10cfL,0x00000000000000f4L } }, + /* 8 << 469 */ + { { 0x02fb84fd5f7abaeaL,0xa238ad04e991e445L,0x0d9b2d417c972addL, + 0xac2439f9478c558aL,0xc3c78a2c1ec86ed8L,0x3735839b2d3b2f29L, + 0x259728d450317454L,0x7b53a9cbe2377abaL,0x0000000000000179L }, + { 0x95f44add5e4846ceL,0x85d2a8e87211225eL,0xcbf0abd8091a6646L, + 0x9d8b3475102deeaaL,0xe2a0a4ab602ba91cL,0xa6276d7f131de0d4L, + 0xbb90d1175f215c70L,0x49a60e3e07e3aa82L,0x0000000000000019L } }, + /* 9 << 469 */ + { { 0xeb40bcc4be860ee5L,0x47622caebabd1dbbL,0x5b4881286df7411eL, + 0xbb0a57dc8e4ae017L,0xdecd1b251a27c9ebL,0x9c33044ebd3d1b20L, + 0xd9e1d80e3205ffddL,0xa5f51d61a07581caL,0x0000000000000096L }, + { 0x51a3a0a4441ea9b2L,0x0034263f805898deL,0x20ffe434628e71e1L, + 0x51b50e3897075cf1L,0x556732f72ae952c2L,0xcae37d27433977e5L, + 0xa79160c2bde9cfafL,0x2dd6cce9b25d68e5L,0x000000000000014fL } }, + /* 10 << 469 */ + { { 0xc5edbc6f85e4dc22L,0xc5933f26efc6be8eL,0xfd818a793f5edbf6L, + 0x09af29cde4c05b4bL,0x386ed43c5cb088d5L,0xa50a8e572fbfa65dL, + 0xfc3e90492effb79bL,0x8aced5f19883127dL,0x00000000000001e0L }, + { 0x8aa6a432b05205a4L,0x7d5100c71ab38cdcL,0xed0d59ae9f026787L, + 0xf9812b5d04497c3bL,0x66ed8c8cfbcc0dbdL,0x9597f83779f6138cL, + 0x076883cd899808c8L,0x9884a305f4024d9dL,0x0000000000000118L } }, + /* 11 << 469 */ + { { 0x87d449701c005da6L,0x109fc435e49f09ceL,0x3070e9b5df09fb82L, + 0x6627c2c8fb79e375L,0x23c488d5aacf7040L,0xf7d96b62c4f9bd5bL, + 0x54e5c4d917734569L,0x02d5cee9240dce26L,0x0000000000000152L }, + { 0xb4f24ee03d62aa32L,0x41ebe4e11051470dL,0xfc3a01594ef1cf93L, + 0x80efd898e64e8687L,0xd9b3993e8daa3365L,0xc0de82692618ee26L, + 0x461447cb4c5e1d3eL,0xa28313e53e23eb2eL,0x000000000000016dL } }, + /* 12 << 469 */ + { { 0xd898aad7aa89f44eL,0xa62a2d93c518f791L,0x6acbea45ad2d934eL, + 0x4dedeab78d3bd1f7L,0x072894788c0ff7e9L,0x11a995f7e6828443L, + 0xf0c92ed4d44b4181L,0xb7440b69416d7dd7L,0x0000000000000121L }, + { 0xe800b3c21e920c11L,0x5c7dc454e9d3af72L,0xb2211645283116e5L, + 0x71675dff4ad35006L,0x2a8783eb5231dea8L,0xf79c777c659a3cb6L, + 0x1e8eadecee43df0aL,0x7ba45eb224be6f96L,0x0000000000000001L } }, + /* 13 << 469 */ + { { 0x4044fb9aa0e53379L,0xd71b9050787f853cL,0xd87e57ee44779c58L, + 0x5f3009e4470fb0ebL,0x378b1a209941a1c6L,0xa677ef62504924faL, + 0x7be3f1274b7545a1L,0x3ff5508eb5960cd7L,0x00000000000000f2L }, + { 0x4cef6ffac4fc4869L,0x6898cb2900c32214L,0xdd8345ec9f931b5fL, + 0xd0a51e6c1777d16dL,0x4e3f7a028833256bL,0xf2eec60d0be037ddL, + 0x8ebf92143a28cb07L,0xfe94e5812e9a7bd7L,0x00000000000000f0L } }, + /* 14 << 469 */ + { { 0x9cfd0d70379920ffL,0x5b138849a7d11592L,0xfa3e1debda21ceb3L, + 0x65a7da37afbce19cL,0xad36c1e2ac24e4d2L,0xe82248727b2f4c24L, + 0x6b62f348d52a2de0L,0xf6e56f5762566193L,0x0000000000000018L }, + { 0x5231f0dff354e7b4L,0x677e082ce7a8c0c9L,0x8d3b59ca91e10de4L, + 0xc8b5d573099d1495L,0x33ace8529f6dc625L,0x78bba7b132668cb4L, + 0x20f626184a9b0c01L,0x641ba19c9de82920L,0x0000000000000078L } }, + /* 15 << 469 */ + { { 0x701b527393cf6217L,0x1cb48f663555e29bL,0x22181c63c8f9bf06L, + 0x12da892bdc0c2b3cL,0x57522bc3cf766a0dL,0x521ffabc04f79b89L, + 0x1d10e64ec217b9a3L,0xf613de04359f3d30L,0x000000000000006fL }, + { 0xeb657ed419c0bf3dL,0xa9ccee58bb8fd14dL,0x27bc141e9e50598dL, + 0x6b5e14c593fc9ab0L,0xd050fbb65ae50f0eL,0xf9f8760fa0521b92L, + 0x1ff8bff96c853693L,0xbe061e788df6d469L,0x000000000000010bL } }, + /* 16 << 469 */ + { { 0xcb30076118e71dd8L,0x01d2684604886b27L,0x22871d7b7cc1d69eL, + 0x1d138a47d0b3caf9L,0x53aeb97400a7c0b0L,0x712aa1a2262f9366L, + 0x58c84da46bcae07aL,0x521b7dbcae8708b5L,0x0000000000000045L }, + { 0x03bd36438d3cd884L,0x20a737ee1d62a335L,0x37e1775f904f5c53L, + 0xb76783dd3baa91fdL,0x7c5521189a84e971L,0xd085796e4488ede7L, + 0xb418e7085068d6dcL,0x4417e81ae5275a73L,0x00000000000000d2L } }, + /* 17 << 469 */ + { { 0x81c12b63e30e72b8L,0x354fe20db4ad0c2cL,0x3bd33ee584fb3da9L, + 0x80c265beab0605a9L,0x4652ec39d3ab432bL,0x85b6357af4ac355aL, + 0x824d9e895096834cL,0x5b6886fce0e0bd26L,0x00000000000000a7L }, + { 0x9747dd8b530ba303L,0xca95b8cff5a8cf9eL,0xcf0766268c2ca53fL, + 0xd62aa41a2f2b99fcL,0x21b4c001ead7300dL,0x536b0a59e026d010L, + 0xcb625f1b66ca8331L,0xcb613a3a56b6f938L,0x00000000000000ceL } }, + /* 18 << 469 */ + { { 0x1e26efcad3dad3b5L,0xc3f580c0ec404efeL,0x63e4f54bba07fc5fL, + 0x2c208d6c1655cb23L,0xb10a1a18e482f788L,0x7f6fd02dcd7f89f3L, + 0x5411e0380743db4cL,0x0e22bd76ce4efb17L,0x00000000000000a2L }, + { 0x873c0af60d01af74L,0x2f4a0ec9ba060e1aL,0xded0dfbdaf48d8baL, + 0x5d2b528d780ec754L,0x42a02e2dbadf7060L,0x90d1c9a037ba2665L, + 0xb002f7c4a1007a96L,0xfff6be26355ad02aL,0x000000000000008dL } }, + /* 19 << 469 */ + { { 0x0bd4a3f0f9bb7cfbL,0x8d61b1c4087487ccL,0x9fc2ea4af9c93934L, + 0x77ae38d82326e828L,0x0c0ae32cd531fc73L,0x46b6a9c04b35fa57L, + 0x36798f1570f76923L,0x3941fd5cf13b2b6bL,0x0000000000000047L }, + { 0xfa79c31c97ad3427L,0xae83362f4b6a8c6aL,0x19da33c36893337fL, + 0x15d666fcd8edfad6L,0x378ad97ba6648fe5L,0xba2d4463ff23dd7cL, + 0x687e42e6868e99d5L,0xf6c5d9dd88f69974L,0x000000000000000fL } }, + /* 20 << 469 */ + { { 0x14d14f636b330a4eL,0xf943c489289430d8L,0xc220f9b3dacb5aa6L, + 0x5f64ce3dbbdbc661L,0x32bdc1d5ebf6d100L,0x6d2cefa026f8304eL, + 0x55bc066635196423L,0x099f19e813c68f56L,0x000000000000017aL }, + { 0xf8898c0067fd6808L,0xc9fcdd64aedb5d38L,0x141911bc93ecf107L, + 0x7e2034093432891cL,0xdd9f385e1b9f5f0eL,0x61b231e96f97e818L, + 0xec1dd4b9e8cc8371L,0x4e0a1cb9f44505a4L,0x0000000000000127L } }, + /* 21 << 469 */ + { { 0x929bafeded53c5e0L,0x14ddb02139df049fL,0x06226f149b246597L, + 0xf60eb64085bee3faL,0x37606f309b15a907L,0xf5b90ecde7fbb734L, + 0x038410daf9e730afL,0x4e2ee195f5d72e08L,0x0000000000000134L }, + { 0x461c2d552cebcef1L,0xa43f38b5a32863ddL,0xfc3a8ded6b8d3365L, + 0x03b48889360f80bdL,0xd22be91598cbea9dL,0xc32a9ab72557b88aL, + 0x2ce4a476ae78f249L,0xae5831e640f9bf03L,0x0000000000000095L } }, + /* 22 << 469 */ + { { 0x41b796bf4796af15L,0x177d42f351a325a2L,0x487f3715601c15d9L, + 0x15ca3b1b40649c1fL,0xb06abca7f36a5e64L,0x14bad851e4b57bafL, + 0x2d42059d1862f500L,0xd2bf6d28db084902L,0x0000000000000186L }, + { 0x128345b83e7774bcL,0x4598fe8563520a7aL,0x444316e72587d345L, + 0xc44643f03c926f91L,0x1ee021d93f3cfca1L,0xf5c84920bda26ccaL, + 0x673620287af1bb9cL,0xe05d4e740075bde9L,0x0000000000000061L } }, + /* 23 << 469 */ + { { 0xedd410e397f8467fL,0xf16210b41a5ccaa8L,0x2f078b037f73dd71L, + 0x2c2a5f448e99ae3cL,0xb2bf1970b7dfc8fbL,0xf37185b250883783L, + 0x7d5412a36a24e693L,0x18ee647497be701aL,0x00000000000001b1L }, + { 0xdf6b6594435265b4L,0x876e6a7a9004e6a7L,0xe3ac6f6873cf7ae0L, + 0x965460544bda6cceL,0x84e762aca94218f7L,0x06425f2496b3336bL, + 0x569da13247d63056L,0xb4cb63d0b2d5a789L,0x0000000000000135L } }, + /* 24 << 469 */ + { { 0xee2ca82b76f521d5L,0x404c95b98d54184cL,0x7ef78330d43e178eL, + 0xed67f94305f36bd2L,0xd734f6b845f09837L,0x8760f0f7bd8f5a29L, + 0x2e7cf02112059285L,0x8d0be801ec54d3afL,0x00000000000000a6L }, + { 0x51ba60c971042440L,0xea9c35ae614a4512L,0x006bbca51d410f81L, + 0x3d31ac431170157dL,0x69b0be25fc7e723bL,0x6da152ba38276789L, + 0xbe486c0593492808L,0x3fd119d980afa598L,0x000000000000008cL } }, + /* 25 << 469 */ + { { 0x8c94532ffb7677d0L,0x397f7f18f8a5be84L,0x94356c7499cce279L, + 0xb334201593980d55L,0xbf6ef9d18cb8520aL,0xfb28b68edd535916L, + 0x9cb149ed4dd8cc9bL,0xb97ea42aa9e2c064L,0x0000000000000035L }, + { 0x9171a89ad37f9c82L,0xddc272092d5c7bf1L,0xadc3af681b9e94b7L, + 0x613f81ca5682caf4L,0xaedd0d66e3453e90L,0x651161a92971b9f3L, + 0x0399d25284688cecL,0x40d177e41be785fcL,0x00000000000000a7L } }, + /* 26 << 469 */ + { { 0x6871be7478d0c1f5L,0xf6c364f7456a1d88L,0xee53602c1666ff19L, + 0x8b0a05995621622eL,0x8937537341feb4d2L,0xf34622cf94a0e8f0L, + 0xeb9e1ee76efcc350L,0x1312f3a350063323L,0x0000000000000055L }, + { 0x2fc73efe356883d3L,0x62a03b106d884fb7L,0x901bc04d7cabf01cL, + 0x2511c5e129ebb711L,0xf94d884739bf842cL,0xe50486da63edc964L, + 0x8cb348db60b10d7fL,0xe29a998c08ede95bL,0x0000000000000174L } }, + /* 27 << 469 */ + { { 0x3f8ddcd46347473cL,0xc61ed0f9a7e239f6L,0x45dbea9289ccadc2L, + 0x2c5683f3cef07a9bL,0x51df4da3df13c487L,0xd78c61c30a089e81L, + 0x47302b22831fa26aL,0xe3a79e8940504a7bL,0x0000000000000088L }, + { 0x16d3f12ba4b11470L,0xa70377ad6ea07a36L,0x959342d5cc15cad5L, + 0x1fd7d8cd8e4d42b7L,0x70696e3cb694988aL,0x7c73847433e52d9fL, + 0xe96d5471c41dc241L,0x49cb9b9ca6f8eb8cL,0x00000000000001a7L } }, + /* 28 << 469 */ + { { 0x29dac47503b83778L,0x13fdb36b56f8223fL,0x15108cf7b189bf97L, + 0x04cb678c3d959be9L,0xbfb72511cddec7cdL,0xd255fb17b2a649f7L, + 0xe7b7616ff0210128L,0x49891af158674716L,0x00000000000001dbL }, + { 0x5fa647c585e511daL,0xe6d774239b517adfL,0x131596649e9e0ed5L, + 0x6a090a1234cbf60cL,0xc213cfe51b5cd8e9L,0x152c5434d7236d27L, + 0xfc7df774944f90c9L,0xb1890bc9c119589bL,0x0000000000000167L } }, + /* 29 << 469 */ + { { 0x9b16f2144a14d787L,0x1036d9fa52079b2aL,0xa83ff9ecce07e344L, + 0xddfeb66c46311e85L,0xbf554a70cfb9513fL,0x467bab678b31b889L, + 0xdfae7a0fe9372329L,0xe4f2f5917e51c8f8L,0x00000000000001f2L }, + { 0xc486a286d75feb83L,0x69c676f3087f2443L,0xbfb329c2ad4f54e1L, + 0x301e122a09a7f840L,0xdc1ed7aab169543eL,0x3ae1310799c64627L, + 0xdfbfa25ca5c949fbL,0x6eb39e641b020190L,0x00000000000000e5L } }, + /* 30 << 469 */ + { { 0x4d98b2ac32d8dbe8L,0xace115d91cc99ad7L,0xb8f9b3315543aef7L, + 0x7d65d76b79819d68L,0xe70fa04ca7b35f9cL,0xf0679fe75fbe62b6L, + 0x8e878e2f00de3fc4L,0x1a90ed223a42887dL,0x00000000000000caL }, + { 0xd80cbfe401063d2dL,0x6cdc0a2c27cf1c3aL,0x447c49dcc0141542L, + 0x98f2ec0c0cb0cf80L,0x07549703ff297bd2L,0x833c79f2a23585e4L, + 0x4224adb3ddf0936fL,0xecb57a5a699ea6eaL,0x0000000000000018L } }, + /* 31 << 469 */ + { { 0x56e17019ff059f29L,0x002581359d92fa55L,0xfc9b7a6925777d70L, + 0x9870806ae3f5ce87L,0x99e086f26d73ecc3L,0x3b2e4d9fa35841dcL, + 0x485817f1b62ce114L,0x0bac63bdb5c1be62L,0x000000000000018eL }, + { 0xcffa5dc1b9ac6c2bL,0xc3a26fdd56f9eb8aL,0xcaef1a4f3898781fL, + 0x426a86c5a8cd88d0L,0x1716fd7cab614e6eL,0xe12c9101c06bbbb7L, + 0x63e9171f62757ef1L,0x4cec09acd1f320f6L,0x00000000000001a4L } }, + /* 32 << 469 */ + { { 0x099c2888d6c5b0b9L,0xe35ce1381ffe8a3bL,0x133c35ca74939411L, + 0x13521b322ff27ae3L,0xbde73625f760a06eL,0x199340715ee54408L, + 0x69db07e102f9b217L,0x1389c23586a5e6a5L,0x0000000000000112L }, + { 0xcf5b0e89ca1aa74fL,0x0cfa76325d844ccfL,0x04be199c65c3286fL, + 0x483e0ff9d204e249L,0x9df8ce9704c424b5L,0xe35cda3ec05c0793L, + 0x8a595df3f2798180L,0x7522b808fbe7f5c9L,0x000000000000009cL } }, + /* 33 << 469 */ + { { 0xcadd02a27d7784aeL,0xcce86dfea7fcf493L,0x87cd6f95bdc14ad8L, + 0x27d2fe24611512f7L,0x9bc408a777dfbb5eL,0x9cb1f6603e961625L, + 0x08542ccc1c253a6aL,0x1ec8713fa541f855L,0x00000000000000edL }, + { 0xc358a687448cd884L,0xb75d140050d6ba88L,0x137a7569196b52c7L, + 0x0cc62c0712f61a5aL,0xaa3653cf8d2497abL,0x9c79351e5d45d412L, + 0xecababc65af55894L,0xc409198b52097ed1L,0x00000000000001c6L } }, + /* 34 << 469 */ + { { 0x0e472757f7011a71L,0xcebb0eba09da90e8L,0x933ee6b4aef7b7d2L, + 0x0d2e742cb68ae8c6L,0xf373fc3f32de83bfL,0x0b51f343d249eaa0L, + 0xef0bd45abab03af6L,0x965b8fefa1c07e8cL,0x00000000000000cbL }, + { 0x571817f5ed0017b4L,0x6aee829608047226L,0xf4334820a389806fL, + 0x2e0c2dab167c885fL,0x2c1894f48d6c34fbL,0xcba4358013ce3154L, + 0x816b7e7132649e05L,0x3b02046e20aebd59L,0x000000000000019dL } }, + /* 35 << 469 */ + { { 0xa949bdcc0216439dL,0x72e5adadad102c3fL,0xaa9183ba0af396b9L, + 0x41a83eacadac5678L,0xda7577495ea96f88L,0xac27f511f7e5d25fL, + 0xbc92e35389a6c9eeL,0x472b947c1dd5234dL,0x00000000000001c9L }, + { 0x1c9d64a4fcb14344L,0xa6d842b3c98a85e8L,0x1b4fb2636cd3f69cL, + 0xc484dcfa2189d8c3L,0xe4e45792e280a9eeL,0x65da8ef6202443b7L, + 0x2ee30251e71dd154L,0xf650203adea3ba9dL,0x000000000000016eL } }, + /* 36 << 469 */ + { { 0x0fd34803710bfda8L,0x697671fd37d08a77L,0x61590350952da84aL, + 0x6355b6ef4eaffb94L,0xa654647f3712f7deL,0xf7d3db844bf5551dL, + 0x803b4b6f92e47512L,0x2dac8ff69e3838c8L,0x0000000000000109L }, + { 0x55e242b9515c2e04L,0x6802de1d9eead096L,0x3e2098970399ca1cL, + 0x62dd1077a4ef1ac0L,0xb89f5b0c24e1d439L,0x8ff7dc3093678ea4L, + 0x9858576394713929L,0xc14b25abdc8d0a7fL,0x0000000000000042L } }, + /* 37 << 469 */ + { { 0x7e70f8794d7f6f2bL,0x419829cc4a31a41aL,0xb4eeaa99324fd76dL, + 0x95c37844d200095dL,0x95873b642c3e8cf4L,0x9fc8600ff3733834L, + 0x13ea6eba7403564cL,0x2d4f9ec2f93389fbL,0x00000000000000d6L }, + { 0x4e0f87b3220667ecL,0xbe666aacc0f871aaL,0xe22ba2305c46c0a9L, + 0x3f7f397cc2fb5d1bL,0x701ca7f8a310ab98L,0xf561043c97da4402L, + 0x9d7d99c7dc5e2e08L,0x854db674bbd1ff7eL,0x0000000000000065L } }, + /* 38 << 469 */ + { { 0x48b0c44b10fbb232L,0x23efcb9474bb12cfL,0x2283fd4289254aaaL, + 0xf824e6cfafc254f8L,0xcaac3118aebd820eL,0x9a69ec62a1e6b686L, + 0x50ccc5245df7b70eL,0x2bf74a2119c0ef11L,0x0000000000000097L }, + { 0x1cb448466450f3d1L,0x316968226ff2a91cL,0xa32c75e1e3e83f42L, + 0x45cdb5ddc766d607L,0x44e4f4644405a2eaL,0x1ce0895a2af43ed5L, + 0x546bcf8f5d4fabddL,0x2ae184db680ad728L,0x00000000000001eaL } }, + /* 39 << 469 */ + { { 0xf873545aadd1bb8aL,0x6df75855b3381916L,0x85a46959282f5600L, + 0x7f63ee00cab1b8d7L,0x61d5dc47e3960f89L,0x2ec0164befcfb0d7L, + 0x05fe38cf325dbeaaL,0x786e7fa34b0292e5L,0x000000000000008bL }, + { 0x29c1395fce8b30e7L,0x0aaff0c90c3ea317L,0x3b0ef202a4dfab3eL, + 0x675cd59aac01dacfL,0x8a22b232aebb8ca6L,0x552271b7f28586ddL, + 0x2a23a56dfd03890bL,0xbdc3ca28de16b4e5L,0x00000000000000eeL } }, + /* 40 << 469 */ + { { 0x14596043e92e85b5L,0xe3caa4f1309bcc86L,0x0946eecd6a004131L, + 0xa8f39a0a5d559370L,0x2907f0fb9dd39852L,0xd26639c2b3624b6cL, + 0xb45f0d32034709b8L,0x0ae962d48065c9d8L,0x0000000000000144L }, + { 0x3de036a5d521ed0eL,0x5ce0990022e88bedL,0x2170357d527f6be6L, + 0x7cc9e9ff5b47a6c2L,0x801ff33b9748eb94L,0xcb3961d6372d1840L, + 0xc3db0b3bbda53a97L,0xfff1a0789bd74f00L,0x0000000000000127L } }, + /* 41 << 469 */ + { { 0xe2beef9875c4e3d5L,0x6e2985be41385dfcL,0x3dc139491d5be62cL, + 0x421ad84a59db88a2L,0xe290f65ef1cd9c62L,0x8cad81dd031fdf5aL, + 0x2ffff38b7a70772bL,0xdbcae403aed61b17L,0x000000000000003aL }, + { 0x685cff22a6961366L,0x46543384b061a17aL,0x1d93260699b2b99dL, + 0x115b73e62486c41dL,0x6cbea3427fea8a8cL,0x21e238b85fb70622L, + 0x13df7dc40dadf79dL,0x89b2c737bb4fed33L,0x0000000000000008L } }, + /* 42 << 469 */ + { { 0xe470a3f68811f377L,0x8389beb00236aa7dL,0xf0f6c917982f42daL, + 0xa8d017fca5e6a3dfL,0x00a281a86f6c2422L,0x5a713942a6cdb487L, + 0x4a1b57083f4d0029L,0x64d0114ab2000863L,0x0000000000000145L }, + { 0x842dd8a0569a9ca2L,0xb55b007b3b347c48L,0xba12f822b3667837L, + 0x67579db2bbe1a4b3L,0xee34c780b2ee7cd0L,0x08c9f20244fcd799L, + 0x592790de609495baL,0x6c279b065fb8bc01L,0x000000000000009bL } }, + /* 43 << 469 */ + { { 0xd9ec089d45a61664L,0x6adda151ef4d7edfL,0x788c96573e74cdaaL, + 0x2b73df110162a42eL,0x70fa479627178991L,0xc028dde06b7360eeL, + 0x683cb8de527692e6L,0xb06c972e4039737bL,0x00000000000000aaL }, + { 0x6ecd795e03cf1545L,0x4cb6ef7ca49d14f6L,0xe7d26ea24c9f0793L, + 0x74f86a00c5bb3a0cL,0x2f029188b022a7c1L,0x1f5a67a5d11a7ac5L, + 0xac81b2fe6a5724bbL,0x8c149f3d175d86e0L,0x000000000000019fL } }, + /* 44 << 469 */ + { { 0xec1c52dfbcd98d89L,0x311fa61f10eefe3bL,0xc46f60f85e624ea6L, + 0x30c2171be17a2b62L,0xb12102e30b8729aeL,0x8be6c0ceb4723895L, + 0xde06c2aaee640520L,0xa71ec10da86e11baL,0x0000000000000051L }, + { 0x73019b3097a7bd73L,0xa01007744052d44aL,0x52cd660f34686dd3L, + 0xd02b71fee0879244L,0x8efc04c0f2266eeeL,0x1ee192c7b71ba925L, + 0xa6d63f084408ef54L,0x23adc3915c77390fL,0x000000000000003cL } }, + /* 45 << 469 */ + { { 0x84ba05e3bd62d818L,0xfa9746922dfb9f05L,0x6b15453937718201L, + 0xa6624bf9106687d4L,0xe09a44c3d9f29e00L,0x87d2e4bc137d2f83L, + 0xff90b283b9b00e40L,0xcfb6978b87df7a57L,0x0000000000000129L }, + { 0x4aa5e02c503bd70cL,0xc985efc743315974L,0x61e2ef6999fd881dL, + 0x8ddec42d5c84dcd1L,0x4eb1d068f6d37ccfL,0x39e186198b84a79fL, + 0xf3924d7f4a78ec0eL,0xbb702e055c90b885L,0x00000000000001e1L } }, + /* 46 << 469 */ + { { 0x2885ea42d044abd7L,0x13f7e9191c8843b4L,0xae0401519f420498L, + 0x8829dba445329e31L,0xd1a19fcf17754079L,0x367b6764c564ea64L, + 0x5f27569de885a386L,0x660bc6bb97b48147L,0x000000000000000bL }, + { 0x5936d68d04ab459dL,0xe45234d817c2a65bL,0xd76cb1fef46a84eeL, + 0xc310f74af3bcd4a2L,0x605f3f125c2dacf8L,0x3e533d128bef904dL, + 0xd389dd9b76ea5518L,0x7fb8d72eebce15a8L,0x00000000000001bcL } }, + /* 47 << 469 */ + { { 0x1b39356dadbcad10L,0x611d06efd2c02a3eL,0xce5b763a4c497670L, + 0x48fc1378ab055283L,0x46b39cb9678c2c83L,0x03a783b393eb7d11L, + 0x72b6d5f8f5a13b37L,0xe69316dfa85fe503L,0x00000000000001b7L }, + { 0xabf89b61520f51b5L,0x93e7cdc45db81dc7L,0xc4be33eef9ee3d28L, + 0xe1e43f98d5983740L,0x8f37e5a35ad31353L,0x97083b97a896a61eL, + 0xd117eb5e292c0624L,0xba08108c874f328dL,0x0000000000000081L } }, + /* 48 << 469 */ + { { 0xb758f2cd23509208L,0xd4f7af5088ba1557L,0x8ff26bbde472d9feL, + 0x071723868960ac4aL,0xf63e404e9025de52L,0x05053c3fcacbca10L, + 0x55ad71f0e90cdfb2L,0x25d5716580e65089L,0x0000000000000125L }, + { 0x5de6508419a94651L,0x9aa2dbc01bf9c999L,0xc97a237737d94752L, + 0x9963d33ca4d21f9cL,0x679f837fd92c494aL,0x1876a9cc69fc4047L, + 0x028e4edfca690225L,0xa49474168359a44bL,0x000000000000004bL } }, + /* 49 << 469 */ + { { 0xade71ac8fea31c97L,0xcc8f381103919f2eL,0x066d7a1c0fd575abL, + 0x5c1bb1f252a3e9d5L,0x830ae9819689a600L,0xac9bb5b66763b7c2L, + 0x40db293aab2e1ad9L,0x8f0de39f530fa624L,0x00000000000000cfL }, + { 0xc963031f35628945L,0x9a2e7f3e99ea6df7L,0x8b759a2fd3491ee5L, + 0x88136e3f009653aaL,0x48198194c5239084L,0x9fef1f19e4af6853L, + 0x42cd9f4adaab6571L,0xd834784943ac38daL,0x000000000000010bL } }, + /* 50 << 469 */ + { { 0x77b95920abd07a42L,0x66ae951e1b05f9d9L,0xa0b29e1f1b845797L, + 0x460087bdea66fa1eL,0xfca234ff973965e8L,0x4ef0d58742218113L, + 0xbb90f9c0e58e2ba6L,0x2106ece57a1ded9eL,0x0000000000000004L }, + { 0xa9d190182787b7aeL,0xb127a7d318cc1c8dL,0x46455feadf775a3fL, + 0x4bc1973ab91f6f24L,0x2941ac6d1a3e4cbdL,0xedc8328bf1740029L, + 0xe4068b2e54ed0273L,0xa7228a7e8c23a1d4L,0x000000000000019aL } }, + /* 51 << 469 */ + { { 0x957939dae7894375L,0x1b2775f257816ca9L,0xf67d7c380004bfe9L, + 0x4582ae0821eb8de4L,0x55df582abcbb4aa4L,0x30e5b50b16723f24L, + 0x91f672daa350f89dL,0xb77fe0544be6380fL,0x0000000000000117L }, + { 0x1710653ceeb1c98cL,0xf82be08c6ae35cc8L,0xdbcdf1421680e596L, + 0x2032b7410227e6c9L,0xe478bd37f07adb40L,0xc8b549c8aecc5475L, + 0xff5f364c7ba0a27fL,0x654355061768ee6bL,0x0000000000000042L } }, + /* 52 << 469 */ + { { 0x22fe2e191a3ccdfdL,0x55f8139c13a8c7cfL,0xedc73b1c01e0f56eL, + 0x5a9c1968a3fec725L,0x782182d6f8ae2cd5L,0x5f66b8c17f9f64b4L, + 0x17a2bb332c101e97L,0x187ef5a232a96850L,0x0000000000000197L }, + { 0x5d5fb38747d1be1bL,0x3713edbf1de57442L,0xb04956d328a92374L, + 0x9c92546515debd61L,0x0e0f52fec13defccL,0x891c71261307bbbdL, + 0xa772556bc296d5aaL,0xd7eb1d8ea8f6b37fL,0x000000000000018dL } }, + /* 53 << 469 */ + { { 0xf241efe3e318cb2aL,0x03033711ac571aa6L,0xe3efeeae1b42ad11L, + 0x49bed2b2dcf40cd9L,0x3c9fd4976f2832c9L,0x598a96e609c7dc68L, + 0xfbf270ecf5686223L,0x3c0871ee7cbf72a3L,0x00000000000001e3L }, + { 0x477eda86b7fefe1aL,0xd36256c26d037d5eL,0xb0df707391f39886L, + 0xbf2c97fbaa590f83L,0x7b8e2aa452150aefL,0x1d0fcc93482aeb90L, + 0x5086665c90256ba7L,0x90a4de3f64e6ee28L,0x00000000000000e3L } }, + /* 54 << 469 */ + { { 0x9880edbb634779f8L,0xa78c28e60c979f1dL,0xe94854d81204dd29L, + 0xf4a09ef365453269L,0x8015c054bc074682L,0x3648356358d5013eL, + 0x0d2b20adf0cf4a28L,0xb96421ac0a192c94L,0x000000000000017dL }, + { 0x923b881f9418af47L,0x5f86d74cd0744d6dL,0x9bbc2f0317c2ede0L, + 0x2ff3016d4d7f81f2L,0xf7e99bdc588dd88bL,0x60988dfd36e09430L, + 0x7fcd2c1911cc41eeL,0x7dbb6444436063b6L,0x00000000000001bbL } }, + /* 55 << 469 */ + { { 0x13a7925b4f357fe8L,0x54250aaab46ba080L,0x6b75101f52792018L, + 0xc1f2737f27e87303L,0x8496a09b7abc0687L,0x36a23d44f65d79fcL, + 0x1079339f87d831eeL,0x529ae62aa07364ecL,0x00000000000001caL }, + { 0x2742ce4c81b0623aL,0x513a65fa584a9f69L,0xe5b6b1ece25bd3f5L, + 0x1a6a7359d5883ef7L,0xd5a90c6dec6efe65L,0x5db42aea38802659L, + 0x31671373bff583dfL,0x9fccd94f6920e040L,0x00000000000000dfL } }, + /* 56 << 469 */ + { { 0xcc51cb4b1aa88055L,0x55c2dfc46dac9c77L,0x4e0019affb5e8de9L, + 0xd686f86dd0d60937L,0xfe4bcb522f3ac7dbL,0x102adf661cbd8b5bL, + 0xfe830c11f89ad228L,0xb635e0e1969b169bL,0x00000000000001a6L }, + { 0x6b51c9abd5b1c229L,0x1d2912be9ee262caL,0x6208f3dd838c6d6eL, + 0xc3b9a9e5d6e11544L,0xb88e99336a2696e9L,0x81b5b9c8ea9d3206L, + 0xa7f2f30243123369L,0xbb51e48673bc3a3bL,0x0000000000000113L } }, + /* 57 << 469 */ + { { 0xdd0f189b4c77e727L,0xfe4cd2f6d9bd0fe2L,0x6454e3cd94c2a526L, + 0xdff102f317ad2166L,0x461734c46191e95cL,0x04cc3f496c830f05L, + 0xa8740c863dd2ed7bL,0x5dca654622703431L,0x0000000000000019L }, + { 0xa80ffaa51518d57eL,0x9e1fcde11f48dd75L,0xb0498a4b829f4b88L, + 0x4823bb28b7f05dd1L,0xd9c6f5d262938b03L,0x4d1660c414d15f13L, + 0x444a74a0a0fd9a40L,0xd764f15e3770cf5fL,0x000000000000014eL } }, + /* 58 << 469 */ + { { 0x7c62a8d4146c122cL,0xe575b2ca1799b0a5L,0xe5195dae1b06ffeeL, + 0x5f0dc83a847f27fcL,0xcc10735305898b9dL,0x7f2ef8ec4713c3b5L, + 0x653844ca4760afe0L,0xc6d972950f6152b1L,0x0000000000000065L }, + { 0x996a0d1a59bd137bL,0x6511d7685abb04b7L,0xb916dc2f78ef48c9L, + 0xf46d1bf3de129423L,0x3f7f3de897ac10d1L,0xa37c5c11676afc7dL, + 0x39dbdf85d2671f5fL,0x2b5607945657c0f2L,0x0000000000000088L } }, + /* 59 << 469 */ + { { 0xb24089fa6d540d96L,0x77c1e4b018a57d1fL,0x1c62eb46e056b93cL, + 0xb98bc55624e9658fL,0xf225c49850aaadb8L,0x9843ca5ece753e67L, + 0x27f231f12c66df92L,0x9825c25b34934cb3L,0x000000000000001dL }, + { 0xf92f334c84795c3dL,0xc1f54e7b36b19eb7L,0xe891a3cc496b86deL, + 0x405cac982f43e3a1L,0x319c8569e8d9c821L,0x8ce93789ca7db1bdL, + 0xa4a4227155c7920dL,0xf63a98e73fb59f04L,0x0000000000000088L } }, + /* 60 << 469 */ + { { 0x3d9e327a3dffaccdL,0x23e7de91b57be47cL,0x34af6490996a4df3L, + 0x9024fdcd23d46ef7L,0x9b55c37bef6749e8L,0xcc0e2ea4750217ccL, + 0x9c0aa3a54f07fee1L,0x678db2f28154dfc2L,0x0000000000000007L }, + { 0x3ca56f0f1355da47L,0x7ab6eb7d4cc64fbcL,0xd68b2467a367d8dcL, + 0x908dadb995cbc15fL,0x0694c136c5ca5ea6L,0xc444772432837dbeL, + 0xb29f989c23df02d4L,0x43a6d58a7d60fe91L,0x0000000000000103L } }, + /* 61 << 469 */ + { { 0xaf60ddd4fd331e77L,0xdb2b23b6e4bda275L,0x50de018692fabaf2L, + 0x18f4ee1e439f1467L,0x1d6ad1c55f41e1d8L,0x1870539a6185d956L, + 0xa469cd954b7c195bL,0x0512eee3a8f71640L,0x00000000000000c0L }, + { 0x649527de55b56f56L,0xb6c666f14fa873e2L,0x8c12fcc042440b9bL, + 0xe44ce0c97ddb3192L,0x5c9d67f0c5e1f50fL,0x4d2e9d041c957109L, + 0xece745bf334a3f13L,0x4c0fdc639473d8cfL,0x00000000000001b9L } }, + /* 62 << 469 */ + { { 0x214dd24ea5dda3ffL,0x4423c61b58d48ab1L,0xed596e642546d55cL, + 0x3e0c6b0726ca41edL,0x47b13e973bd71b9cL,0xdca94741169e56c3L, + 0x6c380c7598078a65L,0x364caeb1ab093024L,0x00000000000001ecL }, + { 0xf5dd2e474ea3d662L,0xaadb18becc01369cL,0xbedca5296be99116L, + 0x4dca80e0934f07a8L,0x99adb72766b0b50cL,0xb478a6f79766ca8eL, + 0x771ac1e2d3573d18L,0x0c64c745619fd002L,0x000000000000014bL } }, + /* 63 << 469 */ + { { 0x1ff694760c4e06f6L,0x00e69af1532a26ceL,0xf86f1f52c6b2dd00L, + 0xfdf77e8a92114d6dL,0xf5d2ac406197eb57L,0x66465d59ca274247L, + 0x708608a82c41da24L,0x3f3d3a4afc46e7dcL,0x000000000000000bL }, + { 0xbf456142b8e1680aL,0xe98df23e3d5d3376L,0x4e0c78faea79d6a5L, + 0x1e7cfeb16436e15fL,0xf7ba6fef589b8115L,0x1cee877d7b5d2271L, + 0x1a9a5e2faa96639aL,0x1b756ea93bd35dc4L,0x0000000000000171L } }, + /* 64 << 469 */ + { { 0x5ea6d7ec70be25d7L,0x7123951418497519L,0x86ec4211661279b0L, + 0x3c673364d41deb71L,0xc7f5bf951884db68L,0x17eb082e77ecec04L, + 0xec159bb14ccef5a8L,0x630d61dcee69d948L,0x0000000000000130L }, + { 0x0d0db8b4c1a69df7L,0x28590b46cfc11acfL,0x5137b85bb19d5a7dL, + 0x6069874da376813aL,0x871126092ab28149L,0xddc1900069e6d09cL, + 0x7b3544c70114f9bbL,0xae2d081fe842291eL,0x0000000000000110L } }, + /* 0 << 476 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 476 */ + { { 0xc7cc50fb09b5947bL,0xe166a9afd5eae02fL,0x47c7953fda349c90L, + 0xc3d4bcae21c0984dL,0xcee13c2f45ff2419L,0x334cc1f147780ddcL, + 0x21381fb6f1157180L,0x28ff4f86eaede87bL,0x000000000000017dL }, + { 0x10c72b8260e096a3L,0xc73306ec5ff796baL,0xb3f60666fecc17ecL, + 0xb9e0d51f46f82c8dL,0x6cbe5c7e296da50cL,0x87cb11044fa6c1ceL, + 0x5d5e0f6d491f9b8bL,0x187baf6970eb3d18L,0x0000000000000103L } }, + /* 2 << 476 */ + { { 0x2fafc73f189a0360L,0xe08428f1a68509e8L,0x4700e3eb3a2af597L, + 0x143c0bf14b298188L,0x9fb03775f0b9b8f6L,0x7c9e5a3a2653d193L, + 0x41109c62d1f8ab93L,0x8ad7cb71434536b8L,0x0000000000000073L }, + { 0x82c43b2daa844eddL,0xad5be7ac3cafb62dL,0x8dd99c5b4a5b4b0cL, + 0x2a11dc4131c113aaL,0xe62332e93390e968L,0xa912cdea4b073b39L, + 0xbb0b8141bc11cb2cL,0x504272d1c9dca239L,0x000000000000007fL } }, + /* 3 << 476 */ + { { 0xde0324030689ca3fL,0x4584fd9714fd0202L,0xbea63186c84bfe39L, + 0x0e579aadaba48f51L,0xcda28545c1a4c309L,0x33629507fd645c90L, + 0x0c77cd5627b58e13L,0x4d84313664dda00fL,0x000000000000016cL }, + { 0x281cddf1e91a439cL,0xaedc15d538bf386bL,0xf2f22343f35ea657L, + 0xf9fe82ea18a01f1fL,0x04f9f4730b5a27bdL,0xd296626d025df338L, + 0x45b950ba633dca68L,0xeeef58ad19f73b2eL,0x0000000000000008L } }, + /* 4 << 476 */ + { { 0xd92a04c26573f504L,0xb69c2dbb85de7ce9L,0xb2b82853a2a36153L, + 0x2899bd376af7feb5L,0x301805e5433c9010L,0x5bed8acab616d5dfL, + 0xc8afda4270e4b63eL,0x496eb13248cce468L,0x0000000000000103L }, + { 0x6ebcd0500973771cL,0x6377d971474e651bL,0x7d0a936b0f6dea19L, + 0xf7cf4fce0aeaf504L,0xcee2076c170c2fbaL,0x158448263af78646L, + 0x67737e06e27f2225L,0xdfff84af4601bd5bL,0x00000000000000c1L } }, + /* 5 << 476 */ + { { 0x035d0a019df3894bL,0xfcfb77c6c2f36a07L,0x04957358c9319662L, + 0x52fb2c650e0fbe5bL,0x26d1fb01cf3ebed5L,0x3a417dc06f338b83L, + 0xfb234b48d11a9621L,0xbba96393db3f5cbbL,0x00000000000000d1L }, + { 0x78b95f1495129ff9L,0x10504de14d5f0abeL,0xe8c7250d3f4dde14L, + 0x20c04ab4ea00ac99L,0xfaed11686eb2143bL,0x23e32bf4544284b5L, + 0x0512a28dbad3acb7L,0xaa1926aa86d008a6L,0x0000000000000192L } }, + /* 6 << 476 */ + { { 0x81f919bc75e0a1f7L,0x89de5dd7cd6a017dL,0x14d88451c9a96fcfL, + 0xf3f61d046c5b2967L,0xd21b7e9c453a4189L,0x18e9c74dc8b7bf46L, + 0xf8903f4f63921ee6L,0x1cabb3f025060de5L,0x000000000000006aL }, + { 0xf15cd5c8c291f1cbL,0x26c8a484d0311e2dL,0x1e5d93b9d9e366c1L, + 0xb037cbdd812fd505L,0x8614127c0c567514L,0x4125bc9b4256930aL, + 0x84f62f976a0a82acL,0x5b12e21e8dbd5c67L,0x0000000000000192L } }, + /* 7 << 476 */ + { { 0xbe5764c9caf7fff7L,0x16cc873d6d606b04L,0x6a6f20784268cdfbL, + 0x06cd7ce627bd5738L,0x532d5c4242e47936L,0x704f0b2a7d0f8758L, + 0x6046d9f36d2e977aL,0x53feb41a1327c874L,0x00000000000000fbL }, + { 0x9425b4d1013ef2beL,0x98b50c6e98a45194L,0xbf181a2b9df34265L, + 0x39c40ce4c2de575cL,0x6757daeb883b4dccL,0xcb224d4bf8b5efbcL, + 0x2dcf10aeed49a2a0L,0x9c1fff7a9ffda898L,0x00000000000000ecL } }, + /* 8 << 476 */ + { { 0x7d2636053e0c991eL,0xee3ed571275db80fL,0xf20cf648561dd429L, + 0xed7ea515f12bddc2L,0xb84130bd0a544f33L,0xd56fb7297832d23aL, + 0x728ebeec0bbd7452L,0x6006394d28216463L,0x00000000000000d2L }, + { 0x43b5561b228b41f1L,0x90eba77819b17767L,0xca1ac49ea2533ac2L, + 0x23b080f75ee9175aL,0x98a9470773a70bb7L,0x4ca74d0815cecbebL, + 0x5141db56b4a97b7cL,0x758fe34ef1450caeL,0x0000000000000111L } }, + /* 9 << 476 */ + { { 0xd2b17efdc9bf0c9bL,0x79352df94a3175e0L,0xde19af86841b5f46L, + 0xd2bdefc98317fe9fL,0x4349c726a5ae3bc1L,0x9991211e43cad229L, + 0x3245ce6ef16a1f19L,0xb7a43c99f6b0a022L,0x0000000000000027L }, + { 0xa0d43e88bfdd7fa5L,0xb9fdb126a56aef59L,0xe36fedb3eb1646b9L, + 0xc2a5b17fb2d25b2aL,0xd1db0f0f7526ee70L,0x526e46c6739fe3b6L, + 0x5cfa3185a6320001L,0x908282721717cee8L,0x0000000000000054L } }, + /* 10 << 476 */ + { { 0x61ed6a7f6e9e6e88L,0x0456d4c464e89e5aL,0xea5fc9d8f9e17ed0L, + 0x2dc7dfafcb542d12L,0xca40f95c8c90f19dL,0x347f0c9f375c2872L, + 0x3a5e1ff600f96cd2L,0xe3442888a9fb6359L,0x00000000000000f9L }, + { 0x16e56ae6a6743465L,0xc9e24ada478dfe6fL,0x298ff6dfe42d5813L, + 0xfacb1cf51531c516L,0x9b778fde097be036L,0xaa17ea655155b221L, + 0x8fad7e57c8266064L,0xcbb079a05536759fL,0x000000000000013aL } }, + /* 11 << 476 */ + { { 0xbc23aeb2d1fc50f7L,0x5a63b2379b184c0cL,0xbc43ff0ee6794086L, + 0x64713b62031f0138L,0x3db4b0bd81da7dacL,0x4d6a996b36d6fb58L, + 0x6b064df122b4e62aL,0x0612931c7f6b9cbeL,0x00000000000001c8L }, + { 0x55503228462bbc5cL,0x5651ec3a244999ccL,0x93b5b3d709872c80L, + 0x8cf163af070882c7L,0x7a2923223b30afe3L,0x8d92b4aaee45a4faL, + 0x94646bd3697a47b4L,0x99bde8f96423023bL,0x00000000000001feL } }, + /* 12 << 476 */ + { { 0xb627fdfbc0062d02L,0xa527c2db7c3ecc6eL,0xbd959f5d49faa954L, + 0x5e110744f6c6a3c9L,0x40f10e096d3907ebL,0xa94e585ee27341f6L, + 0x8f0e11851cb56481L,0x0f687b831cec3022L,0x00000000000001b3L }, + { 0x5f8c166d962a12d0L,0x00b41edad2445d5fL,0x2b259ccee9690a54L, + 0xe4bdac1d6bfe61bdL,0x30cf14bcf51dc15bL,0x43e72573c600759bL, + 0x0bea781ff86f10c8L,0x3259d6086a363f83L,0x00000000000000c1L } }, + /* 13 << 476 */ + { { 0xc288ee74cdf30f6cL,0xa8fa52b3d8b19859L,0x1ff94a3023f725acL, + 0xf02bef5b58b44ec1L,0x0af81bdfb862bec6L,0xcb19979d38d65bb7L, + 0x4469bcf7c662adf9L,0x6f29e33195e8305fL,0x00000000000000bcL }, + { 0x72244e292093d379L,0x590c16a0ec9d9897L,0xc635439ee45d50c3L, + 0x5b5562779b40b375L,0x9c9c449643d0381eL,0xd61c59c2378d8002L, + 0xf0811d15ac014d42L,0x37da2de58afa297cL,0x00000000000000f3L } }, + /* 14 << 476 */ + { { 0xdd15b8890006da10L,0x415306bf6643193fL,0x79036f1f242d9d57L, + 0x6c9d5588b36f9ecdL,0x58305c8bf19d2f9eL,0x383d6245f5c17b58L, + 0x9c6af8d8640cabedL,0xe8d10e728adb59ddL,0x00000000000001d5L }, + { 0x5490f2f98e99708cL,0x5373965efdd3f754L,0xade0363389e6bff2L, + 0x59d82f758ea6e9dcL,0x3ddb97148ca95fb3L,0x25bc3b9a85713ed3L, + 0x0b118e05090cf1b8L,0xd95ea9cfc707cd92L,0x0000000000000028L } }, + /* 15 << 476 */ + { { 0x73bbc51cec9886f0L,0x6d91832d76c9105aL,0x3a4938ff7734de0dL, + 0x68613d279f71510fL,0xecc8b09f9c201233L,0x2b426b0d02f20792L, + 0x917bab5133f87aebL,0x21014269b921e7e8L,0x0000000000000139L }, + { 0x366b3a8b91a3cef2L,0xbf63ed9910bba815L,0x98143ae944329e33L, + 0x7bf47db912c92e6fL,0x3e298e35de7bf2e6L,0x517d6a51770daa8dL, + 0x80a04c0e85e292c7L,0x252aac413a6ce0a0L,0x000000000000009dL } }, + /* 16 << 476 */ + { { 0x62a1f73e3fbadc57L,0xeabf4b68b1ba2d35L,0x711d7f6376f3bde7L, + 0x162f5183b31c3fb3L,0x0dcd66fed7d716f7L,0x6e1dbb78e46816d9L, + 0x532831b8f833a6f7L,0x64191ccb4cfed0cfL,0x0000000000000190L }, + { 0xdb3763396d7824adL,0x6b948aefb16a391bL,0x2d174fd64ae9013eL, + 0x9b5c22ee16a57de2L,0x3bfed1da9e857a25L,0x635187a64d644bbdL, + 0x94786e3a6fe288acL,0x827c8b03ece25962L,0x0000000000000092L } }, + /* 17 << 476 */ + { { 0xdb67687a67d36ad4L,0x3d1af6ac40f1e687L,0x2c71a3ddae798682L, + 0x645140cad7a2071fL,0xc3517c50eb0d963fL,0x355c6438ab08414eL, + 0x4cc8ac9e0180c6cbL,0x561c7ca326fbc369L,0x000000000000006fL }, + { 0x0342d1496e7e396dL,0x5a5cd24a1ba4fac5L,0xdaee6359a50d62beL, + 0x05d3140c9f8ec814L,0x1468a0a3faaf4476L,0x57b01227507375b4L, + 0x4577d26211476222L,0x6b9ff34821fcaf64L,0x0000000000000169L } }, + /* 18 << 476 */ + { { 0x053b265090739d6cL,0x81b004bd712093bfL,0x7f50af875afe50a8L, + 0x9b5ffb5e616741b8L,0x681137aceb3c3a6dL,0x69b12c2245ec1c2eL, + 0x43bf3b334d35bbdeL,0xd78593fe2a340a3cL,0x00000000000000d1L }, + { 0x995af65a2422397bL,0x832b654bd1e5f2efL,0xdbde76cee8f57027L, + 0xbf107d39ac9d310aL,0x028a3faf7ef5943cL,0x2a51126a9db46e73L, + 0xaaf481c0b65d02dfL,0x6c0678a8cdd9a840L,0x00000000000001abL } }, + /* 19 << 476 */ + { { 0x7509b5b0a968ac0fL,0xd55260f8c75e5f25L,0xb517c59a16a4515fL, + 0x85e826661c3270d8L,0xf42ef8d3b92dcd94L,0x0cf375adc36f162dL, + 0x4335872c0da09278L,0xe1256b6a06d898e1L,0x000000000000011cL }, + { 0xbed033f087f764ceL,0x55e48707aed0398eL,0x721335dc155cc588L, + 0x97b2e056d98215c1L,0x6348051fb6149163L,0x005844d2ea832fe4L, + 0xec5b0eb5c8d9470fL,0xd89a66da78fa2054L,0x000000000000019fL } }, + /* 20 << 476 */ + { { 0x4fe79ab617384010L,0x0113f08cf667c198L,0xacb83a5dc1b7d603L, + 0x3894655a113e8492L,0x4198a4dda53d7e23L,0x4154cb63a659d9b2L, + 0x87dc05902048cc45L,0x75bd93c9c871f14aL,0x000000000000017cL }, + { 0xc5d2c8571c27cd43L,0x63c9590620c15abaL,0xb7338ea047b68435L, + 0xa328b780f45c64d9L,0x5830791649070f2eL,0x00fcabc4434c1e1bL, + 0x49af9ad69c28099eL,0xf45e8e264cb91946L,0x000000000000012aL } }, + /* 21 << 476 */ + { { 0xa566513cd10f4d45L,0x96be203262cf6caeL,0x3c901109bed653c4L, + 0xc9ddc6f5c9e6aac3L,0xd71c7f5cdf7736dfL,0x8e668bacc14d30f1L, + 0x8472c592082aa9f9L,0xa956d0c59e379045L,0x000000000000009dL }, + { 0x2a30d8f689f409f5L,0x959721c74f2040f2L,0x4f2e28eccab44c6eL, + 0xd03ad278fa69fdbdL,0xf23605b67d5434e3L,0x5b23a0fb99dfd293L, + 0xbc9005f5cd1aa7b2L,0x9b380071de406705L,0x000000000000003eL } }, + /* 22 << 476 */ + { { 0x9cd76d418f852e50L,0xe53ef54d74e73fedL,0x719560097ff71362L, + 0x7063bd9fb11e07eaL,0x612e763de01438fbL,0xec67b225fea1f881L, + 0x689a7e7f935b985bL,0x32110c564098c919L,0x0000000000000052L }, + { 0xa6f57214c1811d27L,0x72f18b108b658477L,0x24c30d8cf4eeab7bL, + 0xaa6d60131c6f8ab5L,0xea6b996378dba5ceL,0xdb404acebca2295eL, + 0xfe583aab514c8334L,0x6497bb4d15394b37L,0x00000000000000acL } }, + /* 23 << 476 */ + { { 0x199de705d09dce53L,0x3677a5add36a0a45L,0x1f052544a09d949cL, + 0x79cd422fda21efadL,0x01669ab6f92c9479L,0xec05bcd94b070ffdL, + 0x78cd06d3915dd773L,0x657eac7820febb9eL,0x0000000000000078L }, + { 0xdbc2430b4f4fde91L,0xd68c37e8d9330002L,0xe37f66a0a1ca0636L, + 0x42420f8b219f789dL,0xddaae3cfc9391916L,0x964b9ea1de5040b8L, + 0xba0d5701f5637d90L,0x2433cff430254a91L,0x0000000000000136L } }, + /* 24 << 476 */ + { { 0x19427f7c3a0dd44fL,0x2422033f899cb9bbL,0xca5616531c414cdaL, + 0x2a6dd24ff611151cL,0x785ac036cb1ae063L,0x523c8d8947ed9de5L, + 0xa73e893cd2a988cbL,0x2c3013cce4b8ed7dL,0x00000000000001a6L }, + { 0x4eecf37e9d92a8f3L,0x5e624fc1a23ca927L,0x4093ed7b54675ceaL, + 0x0ab770fc3071943fL,0x2d70962e31069eeaL,0xb1f1c5acae0038d5L, + 0x1b3164d04aa60fcfL,0x941eb6e60de4061fL,0x00000000000000daL } }, + /* 25 << 476 */ + { { 0x3dd4ee8e19a73314L,0xfe0237c31ee71340L,0x304ef3c4dabd8295L, + 0xa470f16e2b0e401fL,0x9b9b18bda90af043L,0x16879d527d4b1066L, + 0xa64163238a3d7f8dL,0xaa9ccc7e65ace4acL,0x0000000000000026L }, + { 0x57dfdb26efd2582aL,0x31a4cbd5816472adL,0x3696706110bc7c16L, + 0x5a4b9b40653ced98L,0xdb6ebf15a0a04521L,0x7837ec24873b5291L, + 0xa7e51a29a5790ea5L,0x4fe225932c1f90a3L,0x00000000000000a7L } }, + /* 26 << 476 */ + { { 0xe04751fd84b60e15L,0x907bdd54dfdb9fc8L,0xde29491fccea9731L, + 0x08b53084558b3469L,0x932829537bf3fe60L,0x9542d0cc571d9a75L, + 0x3e280110dcd0424dL,0x1c653fd7f1fffe62L,0x00000000000000ecL }, + { 0x0fe3420222f47f20L,0xc55aa305727b5ca5L,0xda700ec9cb053631L, + 0x5b540840d29a3db8L,0xa3215fb991f76098L,0xfd43d98f587dce0cL, + 0xaf9356d4b0186f34L,0x42fae7d342631c7cL,0x00000000000001d5L } }, + /* 27 << 476 */ + { { 0x809a4ee19e29d9bcL,0xca98f3eb866d85ddL,0x8b012a73c6076d52L, + 0x2d3c172d25f44794L,0x9c51f8b7aaba69c6L,0x0d8418773aaf1dfdL, + 0x9f8d5aea3aca08b2L,0x2d259c4d82594f4bL,0x000000000000016bL }, + { 0xbb8f9a9ec0d718f2L,0x6731472692f7dd58L,0x30d7fb584429c13bL, + 0xbbb606d88a6127bcL,0x40e9739fe02e8f49L,0xc14feea8cd2d0037L, + 0xd08e46c31c288befL,0xad94174d58456ddbL,0x000000000000004fL } }, + /* 28 << 476 */ + { { 0x918ddc53c03584e2L,0x63792fc32a8e3474L,0x8f62970f8d6bf25aL, + 0xa865f2e231898fe8L,0x39dc06d1ded08fd0L,0xee1d4e6f603536e0L, + 0xcd4951727a60552fL,0x427b8e9eb63f05a9L,0x0000000000000045L }, + { 0x2229440303fced47L,0xec0db4da27fc6286L,0x0ec8947b4f7f7b5fL, + 0xea2cae19d7dbce85L,0x9bcf3f58900d2b20L,0xc8688ae67529f727L, + 0xd45e9ee17169208dL,0x1b6c3acc1513e205L,0x0000000000000080L } }, + /* 29 << 476 */ + { { 0xc9ea814896361293L,0x7e8dc3b2cbe8593fL,0x133f210f3a9b8cb6L, + 0x90a01852e96e46b4L,0x5075484c849ce67fL,0xb452de9c8e5b2fc1L, + 0x0a3a41c03efc8f60L,0x2466d6310970d1beL,0x0000000000000056L }, + { 0x3ef2e84ffc6a068bL,0xe878a857b90a37d2L,0x983320953070a043L, + 0x3a884fcd9a96f4a2L,0x023ece985cfe8c13L,0x5d397f8566a6e2bcL, + 0x566cb17a6e325e8cL,0x8583e930d325b396L,0x00000000000001b7L } }, + /* 30 << 476 */ + { { 0xe4e1585378ad5c40L,0x88bb989adac2b22aL,0x0640a72813d25937L, + 0x453c268adc17fe6cL,0x1405015ffff90425L,0x6df4af98c282227aL, + 0xe903cd7dcd424986L,0x3a642d65ddbe81a6L,0x000000000000009fL }, + { 0xf69256ff0b072bbeL,0x025e83d594792f5dL,0xfae73b56e1a0d4c8L, + 0x41b4794fd773135cL,0xbf533baefa6a7c6eL,0x8d10d72c9370e86cL, + 0xaf4005bb45c5b34fL,0xc929d758bc34a7d8L,0x0000000000000149L } }, + /* 31 << 476 */ + { { 0x4595f8cce9d67ceaL,0x6943ca4166942ca4L,0x53b1cac7945ab052L, + 0xc6c82175cf0ab86cL,0xabf93f532f16a36eL,0x29a01d58ccc0db55L, + 0x71cc7e2b2e389be1L,0xef5bd8ce5f396997L,0x00000000000001e9L }, + { 0x2344a22bfd5a46d3L,0x0e4804a34e184097L,0xb24186d5ae5021caL, + 0x7688f016081e9625L,0xb42d53683dbc5299L,0xeb8db964fc0f984dL, + 0xb4ef82a2421e1f8cL,0x1af0710a07050a74L,0x00000000000001caL } }, + /* 32 << 476 */ + { { 0x027f4ff2d35b5acdL,0x0e0fec359eee8cafL,0xa99aa6e2544c2a44L, + 0x0548945fa9de5ee3L,0x02ee7fb3c4c5263fL,0x3fae05eec75a400dL, + 0x50ecff533640dfc6L,0xe57c180fc0cecb87L,0x00000000000000c2L }, + { 0x22b3504387ba8e83L,0x6c47f5d21c652199L,0x2ec15d3d19f3260dL, + 0xcf9e5050e95faea6L,0x7adc35cca4f5d193L,0xa7b0f1f33ddee652L, + 0xdfb751246e213c07L,0xe33a115a7946ddc3L,0x000000000000009fL } }, + /* 33 << 476 */ + { { 0xbad8626ba00b3223L,0x28f525c1276bf50eL,0x5473d3efd0d599f9L, + 0x8327c435483a8f1cL,0x0d6bd8e891b88a89L,0x3ed2f18bb691fa7aL, + 0x4bb37c999089f2dcL,0x7872a7d971139d0fL,0x0000000000000073L }, + { 0x5075c85dce1794aeL,0xc677297b24378c22L,0x9f7ff8617d981d06L, + 0xdff027c8defa63c3L,0xebe3157882b6ae41L,0xad2afc54c16e74baL, + 0x29f51ea9c2b7ede2L,0x8af4f36efdc2ad9dL,0x00000000000000eaL } }, + /* 34 << 476 */ + { { 0xaeedb8fff0380ebdL,0xb4c5f02397de16e4L,0x6d48399034558657L, + 0x7c35f8d58dde237cL,0xf6476a21ade8cc2fL,0x610f9504bf380f9cL, + 0x66d63301ba054ee0L,0x2f1bcb891340be68L,0x000000000000019bL }, + { 0x4a514ab9bfab6e8bL,0x6602369b2eb7baf6L,0x6642e1f1c7c4bd05L, + 0xaf22b1bfc31de5beL,0xac45bff337ba8dfeL,0xd3d3be278a0f3484L, + 0x39ad805e35d3e9cbL,0x5cad4328fc61a017L,0x00000000000000a1L } }, + /* 35 << 476 */ + { { 0xb485946aa3d8dfadL,0x1e0ef60f4d09e4edL,0x2465fb251f947cd4L, + 0x86ac22024f26eb6bL,0x1ef5a7e60ade65acL,0x7e9e1e9ad1343201L, + 0xaac3955c7be0aaf2L,0x59c532f852752348L,0x0000000000000136L }, + { 0x502f0d9b95d6df86L,0x695153fdc8819108L,0x46488395b504460bL, + 0x0dd02f9d7f9bedf1L,0xd03f5b64be406a37L,0x81ca920f0d3f5fa5L, + 0xabc154fa56564543L,0xf7e482f6ad9dd5cbL,0x00000000000001c5L } }, + /* 36 << 476 */ + { { 0xed7c87484052967dL,0x539e1bc034ec54e2L,0x45a999801315ccc6L, + 0x6ebab37dfd3c16fdL,0x35c88fd9776a636aL,0x1d5074b9545a61bbL, + 0xe13c2ef55e5fa705L,0x161fe458b33824eeL,0x00000000000000f3L }, + { 0x6e403b0577aa5135L,0xf830daa6bc413029L,0x778b4c30533435acL, + 0xd8436f9421a4dbf3L,0xfb75c9677ef5d437L,0xbf31e122f1c93e89L, + 0xeba1e3bc524a71d9L,0x68458dac4b75e86cL,0x000000000000006fL } }, + /* 37 << 476 */ + { { 0x6f4f3dd0662499e4L,0x52fc1991d3366014L,0xf3e18982832a72b8L, + 0x139e6a0aa61c4ed6L,0x30f28391fc271a46L,0x0567b845d969103fL, + 0xa37f178406ccbc6bL,0xef56b5fda5093031L,0x00000000000001deL }, + { 0x1254563e29bdc854L,0x51dffa0a17e7bfeeL,0xeb23b7b813055a8eL, + 0x6a0c00672f115b69L,0xfde42254f5451cf4L,0xfad70d3250091f5cL, + 0x8e3344f7169ff7dfL,0x509c843aa437d427L,0x0000000000000195L } }, + /* 38 << 476 */ + { { 0x34a512a52212da86L,0x35e49c33dc085fa5L,0x67559f2c3268fb8fL, + 0x5a0e4c28fa04bd3eL,0xcc71661f406b00b9L,0x10b53ac137f6f369L, + 0x322427439de56f17L,0x50992ef21074232dL,0x0000000000000086L }, + { 0x2710c0242e9778bbL,0x6a445e195e6730f9L,0xe9db7c6b182bcf75L, + 0xcc2c0c169cff3237L,0x38495c929e91016bL,0x14a3999b65c4b494L, + 0xb94667e0ab92f8ddL,0xc52df245a2e4dddcL,0x00000000000000a7L } }, + /* 39 << 476 */ + { { 0x24b43f0449465931L,0xe9846e833c923c39L,0x640cf800272924aaL, + 0xc9b6037cebb88275L,0x3e920427a1656018L,0x7cb62370b8f85c77L, + 0xa9df472ad4347c6bL,0xe438aa66e042c30aL,0x0000000000000031L }, + { 0x853f201d6011ebd8L,0x065d9edd4ce480a2L,0xc2eabdfcee6a18d9L, + 0x9a03831d83905426L,0x09d2b3fc3a039862L,0x75040f7227c291b3L, + 0x77dcba094598435dL,0x1391704ac9b9e54dL,0x000000000000018fL } }, + /* 40 << 476 */ + { { 0x0cfe94974ad63b91L,0x39411a2cadcc08d6L,0x198aa66d98605b89L, + 0x0c1153da708841cbL,0x7edd21f29e46a446L,0xa45f38dce62221fbL, + 0x9cf28ee83580482bL,0x06aeba1fa674e7a4L,0x000000000000015aL }, + { 0x38e7fd4db449387fL,0x6179190109421a33L,0x65c1a5844fe13330L, + 0x6fbf269567144a82L,0xa06241c4d40afbe0L,0x9af35b88eff6a461L, + 0x001b6b24e2be2435L,0xf1ec4414905dddc1L,0x000000000000000fL } }, + /* 41 << 476 */ + { { 0x00de445f7006f9feL,0x0786824e311bf00dL,0x8206644605c0fce0L, + 0x25cc4fd9905278f0L,0x1882ee6eaa7c33ffL,0xcd1577bc651cdc8dL, + 0xd31936e360161788L,0x9ab9852fc5fa4f46L,0x0000000000000170L }, + { 0xc3979e72c25c1871L,0xe6ab079252c82442L,0xa1da5c25e8949acbL, + 0xd91b3d9bf02b7d0eL,0x54fa0575516699a2L,0x8efb5dfc11397f0aL, + 0xa694a0446f27a9ddL,0xd776b8b7635f3527L,0x000000000000009eL } }, + /* 42 << 476 */ + { { 0x18ceeb27896f7815L,0x2e6a7298196a8bdeL,0x3a104d3eacbe43a9L, + 0xcc207227823bbaa8L,0x6d905c49f492f53aL,0xe6ff675f6839c256L, + 0x0458eb94c0dd385bL,0x0fdda10a3f558bdaL,0x00000000000000c3L }, + { 0xaf516cffa8da33b6L,0x1ed91b01df22270bL,0xb11f0c5561393addL, + 0x44c5ae29d31390f1L,0x5b2b530a894ae78fL,0x4dbdca81df45dc47L, + 0x5f0834c94e3827d0L,0x302a6a06ea2a27c6L,0x00000000000001d3L } }, + /* 43 << 476 */ + { { 0xbd2cdcafcbac1d22L,0x8b67a628bbf18095L,0x8360fbd4b99755deL, + 0xaa3c1abd36b6a3d7L,0x90258eccaa6f0471L,0x75df5700a954109eL, + 0x6ed4ac78486b2593L,0xb456ccbf8b51a370L,0x00000000000000d0L }, + { 0xa6aa0ad865cf6c0aL,0x3b1965ac95863b68L,0x129262647c831811L, + 0xb02283d73e9eed94L,0x7314ae01b2126091L,0x39219a7287aa13ddL, + 0x2ee49375c32a59baL,0xcb689e7335851c67L,0x0000000000000109L } }, + /* 44 << 476 */ + { { 0x75c28be714a5265aL,0x8dd13c72b4c7802dL,0x26d2cee7c8d89192L, + 0x83c5d55d24272316L,0x882f4e1c4c5b6041L,0x477bbf51d3237af9L, + 0x929ad1da89e4635aL,0x462e54e4440dcf82L,0x0000000000000115L }, + { 0x92e86b7bd6710cc8L,0x9ae2cb4c210a3bdbL,0x5e366b0765cc970fL, + 0xe0c9c82b7713075fL,0x4fe46ed1410513e1L,0x655944acf5b5edc8L, + 0x328afb24e3986180L,0x2c86c170ec210c87L,0x00000000000001afL } }, + /* 45 << 476 */ + { { 0xdf078ff3615fae7bL,0xbd1e1ff130dfd4aaL,0xb5acc56700ef9988L, + 0xc8deb6c5e593e61dL,0x9705431612ce76fdL,0x466d5faedc8235fdL, + 0xa7887816e3802e8bL,0x8c477b7168cee998L,0x00000000000000caL }, + { 0xf7f7097b7146d6d2L,0xbf5012c5ede058c0L,0xf093ce34a1cd690aL, + 0x6412920239deeba9L,0xfad016ef45d0ec64L,0xb8006ac513ce1bc2L, + 0x519ff55a527f8971L,0x8203739fdf660805L,0x00000000000000d2L } }, + /* 46 << 476 */ + { { 0x589116a0f1e8c56fL,0x6ea85ed6cd20d5b0L,0x5f1f5d6832ad1f37L, + 0xf98c761eba7ec611L,0xf5295fcd103f1f7aL,0x26b7bb746cd41f07L, + 0xd8ab44aaa0d20471L,0xbf4002f4cac0098dL,0x0000000000000182L }, + { 0xc6e82ac8c6c269e5L,0x5140dd2468160d85L,0x98e7440ceaed9d4fL, + 0x72eb1107ba61a8c7L,0xced4753f28c51ea0L,0xfbae3303b6a8c96dL, + 0x2b03bdff7c3e52e5L,0xefe149227825d3dbL,0x000000000000002bL } }, + /* 47 << 476 */ + { { 0x42359d455c79dd95L,0xc58e19f927540084L,0x430f0e5278fe7d63L, + 0x16a8589a0faf615dL,0x3839b8a7503a9f7fL,0x0310ebe623e6b6b5L, + 0x2c585bbb62de235eL,0x91cdb04da2d3388eL,0x0000000000000035L }, + { 0x31b09c074ccae626L,0xbad7e2a38eb5fd2bL,0x609f75e05bed3b8cL, + 0x61ae6c2b8b23d6a9L,0x7f9c74bb2b493455L,0xa7ca5bb0beb78ac5L, + 0xb2413a13d4af6a09L,0x23169f3e554697ddL,0x00000000000000baL } }, + /* 48 << 476 */ + { { 0x6e965255ef4b1b98L,0x3f63b6bd8ba97bf6L,0xc6cacadb19bb0ba0L, + 0x9f88f0c83c8edcc4L,0x3a2d4bc51a489aceL,0xaa3075b75529bb67L, + 0xcdb0c4d6ffbbd2bfL,0x4a3b41d33a568fd4L,0x0000000000000146L }, + { 0x46442711effa56aeL,0x941858d847acc062L,0x9480d79c7434d565L, + 0xa92e885943322eccL,0x650cd6de5f82931eL,0x4ef5d48397876156L, + 0x04ca704aaaf946f4L,0x75258aac0a95e02cL,0x0000000000000132L } }, + /* 49 << 476 */ + { { 0x2544f5d10856d9fcL,0x29678dabe80f610dL,0x0987e134066ea695L, + 0xa208023070fe1ac3L,0xc65df557fb51ba2bL,0x60a2d5693320b29dL, + 0xd097f83de67d902dL,0x7f92f3d3d76522f8L,0x000000000000005fL }, + { 0x7ecc057cbd6ff176L,0x5abcb1d0d907062dL,0xc49f4bed9fd1bcf9L, + 0x4042dd6399c27c6eL,0x1b1a7480a867644fL,0x8d65632c49d3134dL, + 0x6f31a12a548695a9L,0xa0abbea7be0ed228L,0x0000000000000170L } }, + /* 50 << 476 */ + { { 0x16e0e933c494da37L,0x3018f05aa75b90ceL,0xa382c38aa8dffc72L, + 0x66775c45f105047bL,0xd4f234b08929555fL,0xfc5c829646525c45L, + 0xf65136353442a793L,0xcf862712fbdc67ebL,0x00000000000001adL }, + { 0xa1afce91c8e7f888L,0x23a79d7eefd25a7bL,0x36d5b89881e2f333L, + 0x4e9f1db4e12880deL,0x4ae2df6b6ea83f9eL,0x3dacb4b7f81caaf5L, + 0x03d46680fb01b238L,0x3b6ef2c1077e2c41L,0x0000000000000104L } }, + /* 51 << 476 */ + { { 0xb5549eaaade2d6e8L,0xecee8b8177078851L,0x75f43854e0913393L, + 0x5752bf7208c25924L,0xcf0f36aa1873d036L,0x6fb99df52d814f28L, + 0x12fcade24f4460c7L,0x1f39dfc0d1ff8940L,0x0000000000000073L }, + { 0xc4821f9e1f3474cdL,0x52eae506d410f535L,0xe5fdda1f337122f1L, + 0x7436c966551d7704L,0xb1315a9b7e5d83ebL,0xb47b7fa163dbe687L, + 0x9de3f163e1ad5528L,0x4eab78571865d6caL,0x00000000000001e9L } }, + /* 52 << 476 */ + { { 0x4e989dfe03410c9dL,0x33cba247a0b9dc26L,0x00a287260ec09c0fL, + 0x5ef32ea9166a3d1bL,0x11acc0b22efa02c9L,0x78780ccbc37bb3a9L, + 0x80695d7957cd294dL,0x2d2fcf8d22a843baL,0x00000000000001e3L }, + { 0x01b2f4a035455b91L,0x0501fad5292c8300L,0x1d2c358027a13460L, + 0xc58b09314cdbe18cL,0xa00615bb09c40371L,0xc07dc42a31e3bb1eL, + 0xb85f8730f5bd0e84L,0x9f6fbbfff41772afL,0x0000000000000030L } }, + /* 53 << 476 */ + { { 0xd09c91bc77553a15L,0x361e49601bd5da39L,0xd3980e9f72042f3aL, + 0xfcd4543d671a9b4bL,0xe0f776a8a6acfc0bL,0xca99229a69a5624aL, + 0x831862aaa1f64885L,0x5883a7af179436caL,0x000000000000019dL }, + { 0xc40411817d06a8c9L,0x8a0941ef8965fae2L,0x6150d992eb25d71bL, + 0xfbaf356ee2350cd8L,0x5cffc7c6a439b155L,0xc3566052a70c21ecL, + 0x9d4432847f016d22L,0x30295f053f58948bL,0x00000000000000ecL } }, + /* 54 << 476 */ + { { 0x6f279f62ede689e7L,0x9b66e05622e958e0L,0xbf0df539b143ca4cL, + 0x945ffe3c094f5f2bL,0x425a9a9008dcd6e5L,0xef36de89a6fbae42L, + 0x9e5291e573bb2760L,0x93b5925a57d1173aL,0x0000000000000183L }, + { 0x4dda1a9760968355L,0x7c45e7440760b638L,0xa80c805f102af8e2L, + 0x3b784e7d0d1802b6L,0xae9196a070c07c40L,0xdec2da01ef7d0af9L, + 0x0159c65e60349221L,0xea3091d6801d78d6L,0x0000000000000080L } }, + /* 55 << 476 */ + { { 0xa09d33ba8c2aa8bbL,0xa73e2da2a4db5575L,0x62caaad1556990feL, + 0xdaffaed72d31b382L,0x8504f2e08be1f79cL,0x15f8630b9a60fca3L, + 0xa2ac9180540630ccL,0x1190d92dd7720e42L,0x00000000000000d9L }, + { 0x208070c9c36d06deL,0x1b195581806e6fc5L,0xe358e901b6f52524L, + 0xdc2bebf342ab3a2eL,0x4825f836a7a28526L,0x64ea6668c0a29d15L, + 0x8c66792927af62bbL,0x3b9b031889fd3214L,0x000000000000005eL } }, + /* 56 << 476 */ + { { 0xa901a71312538fe5L,0xce0c9871f2f66dccL,0x05e90fbadbfa8c57L, + 0x82626b4d4a84065aL,0x95101f3bcec2fdc0L,0x00fedbd1c2389459L, + 0xc38e00945c1ce6acL,0x07d68900b4ef5efcL,0x00000000000001c7L }, + { 0xbee9a6aacab8c27bL,0x5a8c52d998486109L,0x68351a08da2cf44fL, + 0x83e72bad29e7f1a4L,0xf6397648b1220a5cL,0x791fb8c6230e46b3L, + 0xab83c6386b1dfa43L,0x8b7d1b711d99ac55L,0x00000000000000f8L } }, + /* 57 << 476 */ + { { 0x765aa8e7876c772bL,0xe6843325b5b97f6eL,0xfe9fd6c58ab54c16L, + 0xd6a1e62441d8de82L,0x42a5842800260cb5L,0x407b0e42520aab9fL, + 0x039d551ec8ce6af2L,0xa884cf26f0a5f4d6L,0x0000000000000155L }, + { 0xc32e89bf69c53346L,0xeaabbd18c5e05a39L,0x88520159b89c3d28L, + 0x9d7431d255e2270eL,0x6a27c977c289dff5L,0xfcd3b6da33729f3dL, + 0x30a1e20283c7398bL,0x19f5f537da4e5125L,0x00000000000001dfL } }, + /* 58 << 476 */ + { { 0xd6b1be19b2227e11L,0x089708a5103e4640L,0x234103f813d0eaa9L, + 0xcad17afd3fe18f65L,0x86b69be58933c95fL,0x3cb944820cbde5b2L, + 0x63ef96ce553e46ecL,0xfcc54f027ee7ab01L,0x00000000000001edL }, + { 0x6c85f7c1b24788e5L,0x3b7135a7ed618715L,0x7cbeed1899453d4fL, + 0x2494a201ee56ea6fL,0x718327b3cff78c9cL,0x0b51a87c87f006c6L, + 0x0da34203f2a2a6e4L,0xdef43eb70f3d8ddbL,0x0000000000000118L } }, + /* 59 << 476 */ + { { 0x5660ef89fca5e4dbL,0x147cb2c562b1adafL,0x656e8f4565f6200dL, + 0xed790c437415cb36L,0xe9edf21257aa09adL,0xbc19c5fc95d36d02L, + 0x43f17f8678de47e7L,0x2850fe7691b3b0c6L,0x00000000000001f9L }, + { 0x5d9cf8962551c84dL,0x66bba935edaea8e2L,0xa8ad879ddd2adb96L, + 0x332b3b65a8ecffc5L,0x598adbd76449ab1bL,0x92d14bb3e3ab3305L, + 0x5288b2dff02d0b51L,0x63556a666aa36025L,0x000000000000008aL } }, + /* 60 << 476 */ + { { 0x26693bfd33fd2555L,0xc6ffca6e0d7d3eebL,0x3df570919647dcb9L, + 0x05ee7744cd5235acL,0x4f33bcbd0acccf3fL,0x10ff4e69e5176e6aL, + 0x059a1dbefd230eecL,0x1589dce81c63e145L,0x00000000000001d0L }, + { 0x283ef53ad892bae1L,0x25787ebb1e7b35adL,0xe4d612fe84ddaa2bL, + 0x1350f60986da9a4aL,0x0daf1a86d28940d9L,0xb58efecedc742269L, + 0xb316aea1c05daa85L,0xfc0c48a1c8adb92fL,0x00000000000000caL } }, + /* 61 << 476 */ + { { 0xc8e460eb5db3df35L,0x4a3b8c99fc914e7fL,0xf8352c30e961adfeL, + 0x2c2fe81b2f9c30f6L,0x3f8d9d32676a3d21L,0x663e1dea77a990a5L, + 0x54974a741998b5f5L,0x78968bd0304b02c2L,0x00000000000001afL }, + { 0xf07c9eb52479b8baL,0x90a4eb34b2f78c13L,0x8d4c1421d769cdf8L, + 0xc2ff257feac177e7L,0x5a7f84c740363d1bL,0x845c7d87cf3e6f23L, + 0x0801827661f1cc24L,0xa6f875b23bf25783L,0x000000000000019cL } }, + /* 62 << 476 */ + { { 0xf9fefec9d7a4cf66L,0x38ce98608bfc8387L,0xd72acfec1c10d914L, + 0xc98c23e6f2daea2fL,0x9395abfe41cec6a9L,0xedde92f2ecab3380L, + 0x403c12cfece67cd5L,0xdc77f38e72ef3b83L,0x000000000000015cL }, + { 0x05d31e27bdbfeae2L,0xeed3521c3cbc6102L,0xdae5b97d85b16c89L, + 0x2211c4770d03349eL,0x8de8a13fd5f6ba3cL,0x769676f194a76b70L, + 0xd7487982a0097db3L,0x39c43746da241827L,0x00000000000000cdL } }, + /* 63 << 476 */ + { { 0x5c311656b5797336L,0xe90cb3caa840d076L,0x20bbd61e2cf37453L, + 0xa0d0e7e30487e7aaL,0x56ab5c950c73dd7cL,0xd239c650a42de3c7L, + 0xf6bcaab92e59b709L,0xfcd8f1277623641cL,0x0000000000000162L }, + { 0x6025552300cd2ba8L,0x95be73bfe5ad4751L,0xac7188bc800a9c4bL, + 0x7f30bcb9e920f752L,0xc9b970f03e122bbdL,0xb95efc15eb82e5bfL, + 0xed09e0d7aecce9f3L,0xa75e92cdb123af88L,0x000000000000002cL } }, + /* 64 << 476 */ + { { 0x7aaa8f37d3cb8e9fL,0x8f3b48a91a73d853L,0x783ebbe7ef285a53L, + 0x4f9d97291c3ef389L,0x130f276c6453da69L,0x76683273834facedL, + 0x026a3722af5af17cL,0xd588aa37f08a5e50L,0x000000000000016dL }, + { 0x81b6610bbbdc76b3L,0x406785dd563a6902L,0xe0755f6ced775283L, + 0x08d6c84753b16d0aL,0x48ebb570654f906aL,0x3f0c62b7184b373cL, + 0xd6bb91b6314d776aL,0xab9bfb3a224744a7L,0x000000000000012fL } }, + /* 0 << 483 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 483 */ + { { 0x5abd70212d7e7be5L,0x5ae72dd6b01cca76L,0xba2a179499fa6345L, + 0xbd22d699b5d0cb9bL,0xde336adffce9c6b9L,0x602dc43cfb816436L, + 0x9d86318f33b17e66L,0x43f8ee75dcdd628dL,0x00000000000000e6L }, + { 0x1bf13bba3d223930L,0xd77bbb123b6b0034L,0xa4ea21c4be34bac8L, + 0x95e5f221aab61589L,0xb4494eb79d411c28L,0xa423c6f5cbe6faa1L, + 0xab882ee7a6e4e735L,0xb36eacdb546545b3L,0x00000000000000c2L } }, + /* 2 << 483 */ + { { 0x651b47c813b42e63L,0xe705441316bee2b6L,0xdcf60ca386a71ac0L, + 0xabddb357e3baf787L,0x99d631d181a2db2dL,0x3f1e3c7fc8c9b834L, + 0x3b30fa1861adf4d7L,0xd1d8d98014b9f46bL,0x0000000000000139L }, + { 0xae6f6b72f6af1252L,0x0fc583984ff8a00bL,0x1b2c0cc5a81fd641L, + 0xbdcaabe6e68b6fbaL,0xd1af82fe70b14e44L,0xe9c35419e55ce735L, + 0x3c83d9dc01dcefb9L,0x42604200fb5824c9L,0x000000000000011fL } }, + /* 3 << 483 */ + { { 0x83b863c448455ab7L,0x76d7a946b45ec4d2L,0x3a26946f1a9dbb92L, + 0x3912023daae7ac94L,0xa55fdf4ab43e7baaL,0x0b84464b5164d35bL, + 0xdafbaecd58ac3925L,0xcf5aa8cb1f7b30bdL,0x0000000000000089L }, + { 0xb3fa2268858cb669L,0xd325b4a6074aafacL,0xe364297587e39973L, + 0x1d9aac78d8125423L,0x205ae65940a7d2d6L,0x102e8f88220f449bL, + 0x5b98b50f429d6d97L,0xef341ac978674bafL,0x000000000000002dL } }, + /* 4 << 483 */ + { { 0xc4c1d5064d245501L,0xc7c62e341083602aL,0xd5d6895961f152c7L, + 0x3b142866906e6f00L,0xfa4d7aec3468335bL,0x6c36da5cf4687f7fL, + 0x6643389b0dc658daL,0x99cb5866d8447de8L,0x0000000000000037L }, + { 0x0fecfbab807a703eL,0xeca0bd01ee68c875L,0x375589c920d8bdd0L, + 0xc0fbd62f14025692L,0xbf5a0c1ee6b70d27L,0xf9630aed9cbbb1aeL, + 0x9776abebba99b392L,0x883c8b53c97978c6L,0x0000000000000095L } }, + /* 5 << 483 */ + { { 0xe79aa3fb79b1fd27L,0xf4613c54cb46f207L,0x9a56f2c3b0a5bcd7L, + 0xf6ce188a5ecd38d4L,0xfd52c36f90250becL,0x182bd1b97d054438L, + 0xb1401bdf2586edc0L,0xe147512afa989224L,0x0000000000000044L }, + { 0x68fea57855a2d55cL,0x547f9129b57a7470L,0x4627c2ca8df3b0d9L, + 0x4ef699bee8aad7cfL,0x62677d53f996c4f1L,0x7f79982e924cf349L, + 0xbd5db9da4d785216L,0x2554390fe96df8acL,0x00000000000000baL } }, + /* 6 << 483 */ + { { 0x753ce74ae22f1171L,0xae29377f15f959eeL,0x90d8c9c5dace4bdcL, + 0x45f210f7ab4dcffcL,0x32135e10e1283166L,0x23a0610932072197L, + 0x4d3cccb74d767572L,0xccdc43a3807a50d1L,0x00000000000000e5L }, + { 0x6a5534206880d712L,0x92b60f04dd80c4cbL,0xf3538ba2dcc9d35dL, + 0xd3166fafd0787305L,0xd2a1b177b7d68560L,0x847aaf905e9f7915L, + 0x210046e55b83cc7aL,0xd26ce0bdb4964926L,0x0000000000000179L } }, + /* 7 << 483 */ + { { 0x2a569603208e6bcfL,0x8d08f4df9f8f8597L,0xc13bef95f9d62574L, + 0xc17f7fae4fdec97bL,0x7810ce14a511c162L,0x10fa0d23941c7cd4L, + 0x2f41e9f76aff6e68L,0x70665ea2ffb8f648L,0x000000000000000aL }, + { 0x0b5f267fff4baad5L,0xf1f79c10cb85a780L,0xd2003dee61e33305L, + 0xda556c2fbc3b90cdL,0xbb799f45e116a936L,0xc6183fd80ce85ebeL, + 0x57b3224fa035eef6L,0xf6840485e96bc901L,0x00000000000001baL } }, + /* 8 << 483 */ + { { 0xdcbfbb8be5f5826eL,0x31fbd1c02954969bL,0x452c254d09536e44L, + 0x6b733e3b36cd821cL,0x8fdfe02f2d8260f9L,0x26163d810e2a44e3L, + 0x23e435a33e2e3a4fL,0xcd8529b5777baa61L,0x00000000000001c0L }, + { 0x634dd97aa246d14eL,0xdfed13a7d443a978L,0x4668e97a8752bec5L, + 0x1f52f42826a0f8e7L,0x0d69a760308ddff1L,0x54602bf7591ed51eL, + 0x223e342b3bbab1b8L,0x1cf905cbd5003ef7L,0x0000000000000132L } }, + /* 9 << 483 */ + { { 0x070339e46d0ba610L,0x57de05dccb0e3fccL,0xb4f1cd37d61a1266L, + 0x92ca341caa4f2316L,0x4ae641fc258976faL,0xdb7cb7cb079c8bcfL, + 0x88c11cb58ec68df1L,0x42cb801909df9b8bL,0x0000000000000198L }, + { 0x3cd7238e57aded5eL,0x0773bd3cf50528f0L,0x4c2b4f24c93b6753L, + 0xcbd40a940481c43dL,0xf37fc077262905a7L,0xf079fd24bcc8664cL, + 0x6e7453fd34a874d0L,0x41d6bcfaae635cd9L,0x00000000000000f7L } }, + /* 10 << 483 */ + { { 0xdebd96ecded7931aL,0x409a230cdfdb9955L,0x0eae02d5b65c48e6L, + 0x3ab3dff83515edc3L,0x0440a0c53d119f82L,0x7601d278f7b8048fL, + 0x3e0da58153655d73L,0xa04162f342c66d32L,0x00000000000000b9L }, + { 0xdc305022e0d030b2L,0xb812c6a1af7d339eL,0xf845455f91973a30L, + 0x3a40c4d454ade939L,0x7d5b0f691e0972bfL,0x7eadf7a28839861aL, + 0x87a2e7067d36e5a8L,0xa25554c5d13c7670L,0x0000000000000055L } }, + /* 11 << 483 */ + { { 0x2f313869d8f59e86L,0xab16466f00c93e40L,0x534952a8f5901890L, + 0x597b8bbfd3e7e334L,0x2a1d5909bf2e93e8L,0x74910268a78e52c2L, + 0x3c2077e0e09c8fbcL,0x36e2b7500dd6ec37L,0x0000000000000168L }, + { 0x21c2d362c58660b7L,0xcf97b440205ece90L,0x81e5da200af6629aL, + 0x285fd88ad30ea06dL,0x9da2d653cf67478fL,0x6cb24c21fc1128d3L, + 0xc32d0dc119cc5d3aL,0xffc4fcd6ca1c7866L,0x0000000000000113L } }, + /* 12 << 483 */ + { { 0xfc6e3abd7e791a5fL,0x8b60a8b7394f9865L,0xf07f164cb0f31984L, + 0x3b0f7be135fbcdd5L,0xfa914a53c372e0ecL,0x6142002a0392f749L, + 0x8f7ddc3b952ccab5L,0xcec17ddfa5e85e1dL,0x00000000000001a1L }, + { 0x376e182e3bfb6450L,0x34275e3b5874742eL,0xc6c3bbc436fd4d5bL, + 0xd5359a5f17024a3fL,0x690901d1be783d99L,0xd98f0714be92b949L, + 0xd7ff8e0ffa44b971L,0xae0b1ea4a248a978L,0x00000000000000a5L } }, + /* 13 << 483 */ + { { 0xf6f8b353b1fa4f9dL,0xbc0ae01304d67400L,0x95e7d657984c5864L, + 0x6e6799c1923cd2bfL,0xa98f35f7c07c86a9L,0x5149ab912c79f53eL, + 0xb8219f4e278fa756L,0xfc6562d707de4775L,0x000000000000007eL }, + { 0x92731c6de48adba4L,0x479c5d6bec0f7335L,0x8cf900f0a1d5e919L, + 0x4bf6c08bfc45f29dL,0xee14bfe8c59cdc1dL,0x134bf2088cb8a93bL, + 0x8d9f4d2221f26bfeL,0x602528a79bebee86L,0x00000000000000f4L } }, + /* 14 << 483 */ + { { 0x8ec2bc822c79e1e7L,0xec5c692cffb8b4d4L,0xa0d4d5090846b9f3L, + 0x41eaccac33456629L,0x43e23429d484f510L,0x0bf959d35018896dL, + 0xa817bfedf257daf3L,0xcd04bc49a1fb8e26L,0x00000000000000bcL }, + { 0xda060441d92bced4L,0x278c04c95d4ef4f0L,0x8b6f0231396b753dL, + 0xf68caa504533e041L,0x49d2f5f1d52bc9bcL,0x83ac55150141e329L, + 0x453951c6c866b038L,0x09b032d479acdc69L,0x0000000000000196L } }, + /* 15 << 483 */ + { { 0x067e44283cbbdce6L,0xb8ebf52529eab785L,0xc28bb79079474bdcL, + 0xc29cfe635c83856dL,0xd95ac7b911322ac1L,0x8e3d2b8879c119f1L, + 0x9870c2c572896608L,0x2adcb4c763374d0cL,0x000000000000005bL }, + { 0x2dd8023e63551e79L,0x5ae537db0a8e6116L,0x4fd274598917909cL, + 0x5e805e1cb79a58c9L,0x12d0b7a853b21d48L,0xf80dca62cba9f1baL, + 0xf90fe39681b2231fL,0x552083b960bab640L,0x0000000000000170L } }, + /* 16 << 483 */ + { { 0x1ae0819be9933309L,0xdf8bdb84232a74bfL,0xe0cd42ac756981f5L, + 0x658128186734b942L,0x6b63d0a2f4accf5fL,0x4dc5d87a55294162L, + 0x0ba927b230bc9a8aL,0xcdaae01ed8e36a78L,0x000000000000013bL }, + { 0xbb632bfe8e1f565fL,0x29ebfdda53e94443L,0x1f98a0d3e10d165bL, + 0x9f82830c637c795fL,0x3b33128740e0c47bL,0x8940d375f4bee639L, + 0x05cc4dc38b8ad839L,0x3b4d9dbe10d505eaL,0x0000000000000117L } }, + /* 17 << 483 */ + { { 0x7549c45aa2216b1aL,0x221596f16518866eL,0x4f8ca3c1fe5fd8e9L, + 0xb78df22a96363b34L,0xd189cef05ae37982L,0xc157b84591a02318L, + 0xe1b74e4197511a4aL,0x5877f96c53320c2fL,0x0000000000000025L }, + { 0x12689dc6861c57bfL,0x598b361ad283a108L,0x939b82fb7fa13a89L, + 0x311cca2a36cfe81dL,0xec335b17bab0c07eL,0x440b8c5c65642a12L, + 0x813884de04a2ed28L,0x7d19429e87290a2bL,0x0000000000000069L } }, + /* 18 << 483 */ + { { 0x5753bbb8a848cf13L,0x82b32a43d90c938dL,0x7f5770d7e6faa6e2L, + 0xc7cbcaf37d684cbcL,0xcf943b1b2e7b48a1L,0xd1a7ee6f8d5c5321L, + 0x1ab34e2d15cc06dcL,0x27d2cfbb74b5a5cdL,0x000000000000006eL }, + { 0x022c5255ad3c3bf9L,0x3026a8b4f744d697L,0x5cfb32d674b6a2ceL, + 0x8df70a0b5e50e51fL,0x8c9156a9ace784b5L,0x2efad6cd487c3a9bL, + 0xf5e804c0aa2cec88L,0x5780cc59f9bd480cL,0x0000000000000035L } }, + /* 19 << 483 */ + { { 0x89c3d30477369722L,0xae2ecb09201a68c2L,0x8d5769fecab6da8cL, + 0x6782778b33aa7ec0L,0xc74ec1254fc65e80L,0x72cc87bf6bdfc390L, + 0xc8689471ddc0391fL,0x6f58f3f3482baac6L,0x00000000000001e2L }, + { 0x27ccce608b129566L,0x41008f6064f9fa40L,0x2aa9067f4fa0a001L, + 0xa1f2ba9a46716791L,0x2d4d15cf3a5e32e4L,0xdc75218aa4f0884bL, + 0x349e45bfcf86c131L,0x1e11015c3228a631L,0x00000000000001aeL } }, + /* 20 << 483 */ + { { 0xf240d36be4ab3d55L,0x7acb5bf0593c4a6fL,0xb7456194130fdfbcL, + 0x3d3af6220ec1bca5L,0x14a5cbd878b4f4deL,0xde12e1cd6c32e96dL, + 0x5063ff425b7bce8fL,0x37efaca1003dc72aL,0x00000000000001b0L }, + { 0x4274de57ccda7e54L,0xff66c962086c6d20L,0xca33121d4c9071d9L, + 0x97b4200b3e732b5eL,0xec371b1543208ba0L,0xf6aaeaece25d09ffL, + 0x226e14d2e18bab6eL,0xda5464f9459a3df6L,0x0000000000000130L } }, + /* 21 << 483 */ + { { 0x017af4517f7b8b0fL,0x0dfc68bca4a49a70L,0x789e4ac8e231f9d6L, + 0x20db8ef6065ede81L,0xb739cb55d638a77dL,0x79d863e1184c2b26L, + 0xca7312bbc0240acbL,0xf300fb15d3645326L,0x00000000000001beL }, + { 0x1d991e4e459dc169L,0x7000c7d42cd45ebeL,0x96e9b26038f97f29L, + 0x0c3f5c5fddb39121L,0xabdfe949839ccf8fL,0xa5c2a20559df3d2eL, + 0x39295c3c114de48aL,0xbf438537678d7b4eL,0x0000000000000158L } }, + /* 22 << 483 */ + { { 0x9e96ebde76178c94L,0xb30646eb53081e92L,0xb2541cf8f47b37ceL, + 0xf5899b4a72cdfe16L,0xe86f9bebbc9855dfL,0xcd97de762a3c7176L, + 0xe792c9ccad4af521L,0xf4efad3de059fcecL,0x0000000000000153L }, + { 0xad541c6a53a88fbbL,0x817ceac1fb7c79b5L,0xa4a04dac7daa33c6L, + 0xfa09f56ff3d2d674L,0x9f53ef31eec7d069L,0xad1e5af49211cd93L, + 0x0ff0a6dc959d77d0L,0xcb232def76ed9affL,0x000000000000002eL } }, + /* 23 << 483 */ + { { 0x9d7f958a886f6543L,0x0cfd178d4b11f4f2L,0xbccf93cb83966213L, + 0xa733d7f47868f2bbL,0x35b2cefc80a83472L,0x791fd530e3108c17L, + 0xd8b2fc0e8366c15eL,0x4aa1a7a237d4c1f1L,0x000000000000014dL }, + { 0x965850c704d1e2acL,0x0cb5a3c0a6d83cc9L,0x1e354634a97bdc9eL, + 0xc40e3a6cc4361defL,0xf5383a2d1945be19L,0xe2b5c363e4245615L, + 0x685cd8c1716af301L,0xb0817184a59dc09cL,0x000000000000005cL } }, + /* 24 << 483 */ + { { 0x9134dac8966dd7c6L,0xb53909cb72bfde57L,0xa06f7191d2a4fae0L, + 0xb22a5d860d8aded5L,0x9b0869337ba88547L,0x6a027da5ca1b3e75L, + 0x55e8542182831df6L,0x52811cacca4fd543L,0x00000000000001d1L }, + { 0x9184b34252d1a260L,0x046f70f2d4ae87f3L,0x75f0132452052638L, + 0x9476a1609a4e8cadL,0x1485632621396b03L,0x381f263ff5af85aeL, + 0x8e77b921f7010305L,0x7fab4d8838f30a00L,0x0000000000000072L } }, + /* 25 << 483 */ + { { 0x1ba8c7879deee4caL,0x0de11c34b39ac162L,0xe3c13cf606577091L, + 0x4afa40f1069ce9c3L,0x8c4c9362c9588cb5L,0x70cefa89fd8388d2L, + 0x5b93bd0bae70801dL,0xbf19651b29e3f773L,0x0000000000000173L }, + { 0xcc716100d6635a0bL,0x785ab9e497770cd3L,0x97ad8f05f2d3d0f4L, + 0x9153f661210b85efL,0x6ea72d9be05e09b7L,0x674d169512a35919L, + 0x734afc5174d1756fL,0x685d097d4e4a7d3bL,0x000000000000011aL } }, + /* 26 << 483 */ + { { 0x586a5a4f92f845ffL,0x4181e8bf433b9969L,0x8ca665031bb30e35L, + 0xddb8880029ef7c76L,0x91ec943993a91394L,0xa102438734acf11fL, + 0xb31bab71e3a99420L,0x46671df0823803acL,0x0000000000000182L }, + { 0x4311612323967817L,0x10bcc7e1d83add78L,0x5cb65ce01b4d7ea3L, + 0xc204e7395fc7aacfL,0x138fd0eb0df86632L,0x5a7f3bf92920e7aeL, + 0x323b55da74fb04a2L,0x87a1cd2dcf6b4579L,0x0000000000000105L } }, + /* 27 << 483 */ + { { 0x99426408c2095c41L,0x77924d94d2203699L,0x63282e3b8ad5282cL, + 0x0e342f0a9c3dddd2L,0x0ab9c156579aa49dL,0xfd67a28d49baf18eL, + 0x9dec1fc0f0ff38bcL,0x2120dfdf91e8bd79L,0x0000000000000038L }, + { 0xc33a4e7791297f43L,0x90d0ca4fd1d21410L,0xf5e6111862b0ce45L, + 0xc99e5063601631abL,0x3d22412ec894eb2cL,0x78ce86202ddc8d51L, + 0x38961c0bdfc1a41aL,0x2b435857e1b449f2L,0x0000000000000190L } }, + /* 28 << 483 */ + { { 0x7df95a3c1b3f5cf1L,0x3b30e0f2ea0077fcL,0x733880b50aabcc34L, + 0x2050bc2701193010L,0x2dd36def5c80fe3eL,0xf2e14b2be8e228d6L, + 0xf13a348574802ebdL,0xf08bb1ef17f57cb1L,0x0000000000000087L }, + { 0x4f81dad0a662456aL,0x8c645bb185f5b388L,0xa39acf0e19503f7aL, + 0x214d1956d7530999L,0x2dfb62dec339d2aaL,0x9c54f1a27a04264aL, + 0xde5e1fa9bb74e313L,0x3fb024216dcdac67L,0x00000000000000b0L } }, + /* 29 << 483 */ + { { 0x23b886a9393c3fe7L,0x7408b94f4b1b33b0L,0x01c0254af542525eL, + 0x4ce60b318ddf1982L,0x97a6fee7ddee6485L,0xa2f7d7b2a99f2778L, + 0xe685a50bbaa0ffe9L,0xbb82afef930aaf29L,0x00000000000000d2L }, + { 0x32e878790644ee4eL,0xf736c70517d83725L,0x44c56e15b3b2e87aL, + 0xd3eec9133baeb44dL,0xbc10fa0741ad6c2dL,0x26c8d0c696115e03L, + 0x8d826950a411254cL,0xdde6af743abd2faaL,0x00000000000001bbL } }, + /* 30 << 483 */ + { { 0x28702dc496ab1d1bL,0x2dbd79460dc11ba4L,0x0956696b883faf9fL, + 0xcdb23f5f227932d1L,0x34957d5e0de851edL,0x40065b9ed930f741L, + 0x1be825122138d1e5L,0xb9f0d75934c7c9b7L,0x00000000000001e3L }, + { 0xbaf4143b567ead92L,0x5eef3357fb9cc866L,0x4095892ccb122a8bL, + 0xa3e50ae814b8aa9fL,0x72024c42376f6b0bL,0x6c63ee2ddae219e2L, + 0xbba101782069c115L,0x3ba8834d7f96def1L,0x000000000000015dL } }, + /* 31 << 483 */ + { { 0x2f7beb7f56438c0cL,0x42a16fa2ab4f67d3L,0xbc8d85f6e40ce460L, + 0x3428e0fb2f34378dL,0xea54de6ddc41a2f3L,0x5ccf4b0d17ba5e5aL, + 0xe27843c9abf87924L,0xcf117ea6101dab80L,0x0000000000000082L }, + { 0xc340c90e1e82611fL,0xb894452a8e4173a1L,0x9a4b787ab9afa27cL, + 0x417736a5d0f14693L,0x5f1a49924ae95026L,0x1206bbd394734633L, + 0x5dede89ad6f4a4c9L,0xb766440df1f24b1dL,0x00000000000001a6L } }, + /* 32 << 483 */ + { { 0x8dd33d1c66bc31a5L,0x80a074959d19c1c0L,0xda13c485e03a1ee5L, + 0x316d85f968acfd38L,0x8617c80d0d9d6273L,0x6ddebb71f94d5f22L, + 0x1c3452038efd0a44L,0x7ca0b4067b3746e3L,0x000000000000007bL }, + { 0xbec7b15087436d56L,0x07cf1192a01f6c04L,0x047b37f6f73971b0L, + 0x7e7d8b9b5ae1f9d4L,0x0e311b4fca1a9900L,0x5dc55f3d6a81fb38L, + 0xe257401a2956af04L,0x90c2ad09f66ae95fL,0x000000000000016bL } }, + /* 33 << 483 */ + { { 0x6f3e5a621a735a98L,0xf17c344e4c0f9811L,0x0985657887068d99L, + 0x3c9e47d0fdd2aac0L,0x9ef34869797f8a14L,0xbb27f6da4ee23bbfL, + 0x51b7f59c213596cfL,0x60aed74c54a4b339L,0x000000000000012fL }, + { 0xbc361782384a09baL,0x6fb5c273ea0b54d1L,0x41d2c9755423bc32L, + 0x5938aef020a6d38dL,0x07e0841a132dc550L,0x0b8fe98196944602L, + 0x77ce11d1eff12314L,0x13b30d3f01450ca9L,0x0000000000000127L } }, + /* 34 << 483 */ + { { 0x6167ce3b4c38b2f0L,0x78bfe46ccf1a74b9L,0x40519177b17eb1c4L, + 0x5379831fa2ea88f5L,0xc7aa54520f90d032L,0x1b77f1a1b1ad4c9bL, + 0x1456a598aa55a518L,0x638b62720114002aL,0x00000000000000afL }, + { 0x2c04235caac99882L,0xbf34e8dd41d65ef0L,0x49e51a8c79af103bL, + 0xce352f3a901a1a57L,0x00e123f6a49e5646L,0x351a25801d22efc8L, + 0x883f53b385bbb439L,0x7ad4f5539780ce59L,0x00000000000000afL } }, + /* 35 << 483 */ + { { 0x317db0ef6b43f86dL,0x187e452ec376cdd6L,0x1ee31d29b1c17bc5L, + 0xf0c4ae43e8a19f7bL,0x71525f553e9bc311L,0xe948de7a66d91aafL, + 0x947a0c70638f5b34L,0x8affa837e7c40657L,0x00000000000000e6L }, + { 0xc76f3535c3acf132L,0x6066128710d80c77L,0x55db52e39cfad0bfL, + 0x9d65e598c72d88a7L,0xffe8b7d5a016cceaL,0x73cd6e7bfcc198bbL, + 0x90142e63f7277c8aL,0x73d673ffedace584L,0x000000000000009eL } }, + /* 36 << 483 */ + { { 0xfa1c0bda6a639f04L,0x0d34fc8850a3c441L,0x0e2d322ad15965adL, + 0x700433af853b1f5dL,0xce7454a25cafcb5aL,0x2201bc0473b16a2fL, + 0xc178af49986e01feL,0x00383c44f4196a72L,0x00000000000000c1L }, + { 0x26951af44027cbd8L,0x0a64b5bd58133d67L,0x55c70f8bf9edd0ebL, + 0x43762ee7c4ebeb49L,0x5d5b9d8b33a6b1c1L,0x1446f8bcbdc002f1L, + 0xab85db78ce19c7e4L,0x4daf43d163f0ad8eL,0x00000000000001b8L } }, + /* 37 << 483 */ + { { 0x3afd28b85d94ef9fL,0x81257a58c3177e20L,0x38dfaf6c65eb35a5L, + 0xe6f3ef64288e980aL,0xb97158885cedaa77L,0xe99de3f8a7eb0cdbL, + 0x81059e958eac3ddeL,0xf37c0a32c9c83c67L,0x00000000000001faL }, + { 0x1823b2fcd4f70ceeL,0x57762228e9c5bd49L,0xa17f38348a42a86bL, + 0x1cb50d437b41063aL,0x12a24600639b5b82L,0x70cead17dc2a9befL, + 0x8604e4aa0bcaa435L,0x8425f4b68890b5c5L,0x0000000000000059L } }, + /* 38 << 483 */ + { { 0x9b9bed42938f1c9eL,0x3d20941e97fd1135L,0x441f7e6ef64abe42L, + 0x1def7b3f0e07fdd7L,0xcc052cc7ba19fac0L,0xad9a823b94b5a3e5L, + 0x76a1332af3b3bf6bL,0xe7f4dea925d8294bL,0x000000000000008bL }, + { 0x1327bf2381bf2ae5L,0xda8af8f9c5ae36bcL,0x5a03be7b253aa57eL, + 0x16c4d1e0d8673996L,0xdf37a5c5ade6c913L,0x326cea3193cd3c79L, + 0xd5bfca3f8b2ce002L,0xde1ef94ad2966ee8L,0x000000000000016eL } }, + /* 39 << 483 */ + { { 0xe848a22c8a20f973L,0x7d4838938707d89dL,0x743e47af3a602f42L, + 0xa793999ee80346e6L,0x70c2abb4072cf57dL,0x10de7dd0f254926cL, + 0xac0465d0b3f05ba4L,0x37222ff549ebdf47L,0x00000000000000adL }, + { 0x84a14bc688843ac5L,0x251de0ffa7cc807bL,0x81df07af0e43fdfdL, + 0x6b799a7269b2f867L,0xa4a0f81fe9f40220L,0xa4d91dc17e0d6fbdL, + 0xec9940f238cb4a33L,0xb802f2e17771ac3dL,0x00000000000000c4L } }, + /* 40 << 483 */ + { { 0x82fb9f71437c9b3cL,0x8f892941aa7bbbd8L,0x8c0f30acb4aef227L, + 0xbcddb282afb0ffd0L,0xd6ac26bf9835b70cL,0x2a2483ff28422e87L, + 0x3839a2cbbffd1ff1L,0xa3c660355b9cd379L,0x000000000000006bL }, + { 0xe6dc8fb8e50a7d38L,0xdca90512836bfcc1L,0x7688a1b3e4e9ce06L, + 0x1740f1a5f17e5457L,0x697380917ded4120L,0x33d97aa918814d34L, + 0x530cfc7af7927f48L,0xe6d6c9ec929172b2L,0x00000000000001b0L } }, + /* 41 << 483 */ + { { 0x428216a6c2b89eaaL,0x80af7400c889421bL,0xdbf3e6d5ab483733L, + 0xa174a26fc80b04a0L,0xf43d9f300a0a9600L,0xf6bc9b055a36c398L, + 0xeec10bbaaecb2d9bL,0x4b6343a90db85b00L,0x000000000000019bL }, + { 0x8f3b5dc05b69a5fbL,0xfe36ce3f962d1949L,0xb1ed23e1b901c965L, + 0xa27d8354310d5c7aL,0x8d8cd709cf1c8261L,0xde318e922ec59425L, + 0xcd74892887cdd65cL,0xd5000ab0cc3df271L,0x0000000000000145L } }, + /* 42 << 483 */ + { { 0x70d80c68accc8cddL,0xf18270444d36dae4L,0x1de1d63c5c1b23dcL, + 0x3f692f0db6af112dL,0x90be5f8cd1627389L,0x269bcd96e3cb096eL, + 0xdb3da141e52f7191L,0x020fd372236f6719L,0x0000000000000026L }, + { 0x32d2aa912e6b5359L,0xc69d58b61355b897L,0x019f986593f03d9dL, + 0xcd3ab51998cc76f4L,0x0a8d4dabe1f31d9bL,0x13fb607b605b601aL, + 0x3412ab49d1b0a857L,0x9d4f6cc56a59c8e9L,0x0000000000000024L } }, + /* 43 << 483 */ + { { 0xa4ff17fb90aa8d12L,0x4b11676ffe3b60e5L,0x4b6736d5520b491eL, + 0x111ed0242549df91L,0xcd9a9f14e120c6a2L,0xe9a1ab1a16e281bcL, + 0xa8c787c8464e5043L,0x7b8ba81beb71f3faL,0x000000000000008aL }, + { 0x9b0a74f9ee5f33f4L,0x1387695a7e5e34afL,0x378e761f008494b0L, + 0xa9497876b556e962L,0xaa58debd121a018fL,0x97f52405a924e778L, + 0x432f362dcd07aedfL,0xab22f4c6f814c3caL,0x00000000000001cfL } }, + /* 44 << 483 */ + { { 0x75a95d201ba19e5aL,0xc4ac280da7814129L,0x39d17e5a0053757dL, + 0x8b5fe8f5c29a489eL,0x2616039717aace25L,0x561c8f439652ddaaL, + 0x61822a50f5c95227L,0x072f8e5dc007cc38L,0x0000000000000152L }, + { 0x9ff7313cc50fad23L,0x8092be14bb03aff1L,0x922d11fc480cb8dcL, + 0x9c2524ddf22d2e95L,0x5b2ab1f4f8672817L,0xb77fe4669b52a810L, + 0x09ad0883f510b41aL,0x302b7c261b4138d2L,0x000000000000013fL } }, + /* 45 << 483 */ + { { 0x18e8c1d2b312d646L,0x00e3253bceda36afL,0x45d2d5d0752540a0L, + 0x87a38fc7a3d85d35L,0x2c3862aefd90f76cL,0x5af9307c79fe8ba8L, + 0x35214c7c8d686649L,0xa7fba7f225ef82e4L,0x00000000000001faL }, + { 0x66014ff18124515aL,0xde9da3353fced30cL,0xac96a6ac7f50e6ddL, + 0x0f96e01a39abdad2L,0x67a39cb6b46bae57L,0x596edccd2fc272dfL, + 0x4ebdfecfb15ca893L,0x3e5f5903e0b3c942L,0x000000000000006aL } }, + /* 46 << 483 */ + { { 0xb17ba42760f48d19L,0xa4ef46ab969dcc37L,0xee09b13a004629c2L, + 0x7936d9b1947f6795L,0x7b0167f8ed153962L,0xb3dd551c1b18a245L, + 0xb15372159f7517f8L,0x3f6cdd5d9f9aa1ebL,0x000000000000018eL }, + { 0xf72f53d22ff0201cL,0xf072228ac9d861c2L,0xe35876de8febce56L, + 0x5fc283033c8f7bffL,0xa12b0a81d2893b8bL,0x57ded7bde89263f6L, + 0x81494bfcabb2822bL,0xdb399fa9f2c8ec57L,0x00000000000000dfL } }, + /* 47 << 483 */ + { { 0xaefa7fb0808cd5d5L,0xe80f21e484884ca0L,0x9d0486cfae65b5faL, + 0x4b54c759e8168cc8L,0xfd1c1a068ddd9c6aL,0x8c04efb02a8d8bebL, + 0x33d61fb78b5ab913L,0x4fafce55715f0cedL,0x00000000000000e6L }, + { 0xb9c352f46c0d348fL,0xc60a94bb45bc984dL,0x9f743556605c71a8L, + 0x176de676067997e0L,0x7ddd2aec7d72394cL,0x3de5a535411ef5c2L, + 0x818a2fb45e052585L,0xadf2f7ea83ce38d1L,0x000000000000018bL } }, + /* 48 << 483 */ + { { 0x15f956694d4a5461L,0x56f1b9232775dac2L,0xa4f77a80b3028c01L, + 0x4c7c09f84a216681L,0x1469e858444401beL,0x9bbe83c5f4fd909fL, + 0x3f52c7eb914d3efdL,0x769e218db97ccb6dL,0x00000000000001b4L }, + { 0xca757e3b80dd9fbcL,0x741fee50f5fe9f79L,0x8a7bb30cbab00224L, + 0xa33bcefe4ec16137L,0x62d0604c2fdaf3e9L,0xad66f9f66d7376b3L, + 0xca856f8d02c9ab0dL,0xaad774a6df87aa79L,0x00000000000000e3L } }, + /* 49 << 483 */ + { { 0xf446887e5b80ef17L,0xeef1494d936989d3L,0x6058c0405f82a6f1L, + 0x6f97d2f5624c5294L,0x4772ad1a59e08502L,0x41ca4882993ec149L, + 0x4890ef7309e3ec9eL,0xe1843ede041133feL,0x000000000000012bL }, + { 0x42d78ebb7b2f37e1L,0xd130e9a18b4ea8c7L,0x002cc021fa9d9adbL, + 0xf26e382454996fd5L,0x206eda2db4ee2ad9L,0x95e069ea3a1147e9L, + 0x8f1ce0085798b869L,0x96f99e7fc24b7809L,0x0000000000000131L } }, + /* 50 << 483 */ + { { 0x74bc731aa521a2dfL,0x2a7dad589ca35915L,0x24ba1b555ce46968L, + 0x1c00375723481711L,0xd1b381c6e1986244L,0xb2c4710846b9c4c0L, + 0x11ef4e90a2ed3152L,0x4c1c7086f0863dafL,0x000000000000003eL }, + { 0x795131fe66eb30a2L,0x0ea3589b94b68f85L,0xdc7e3fd070ef34d4L, + 0x14a5499a44df4e9fL,0x9b340e60a69e2691L,0x91c5e50752f35777L, + 0x7582716785c69903L,0x547f96ecd8536e21L,0x00000000000001a3L } }, + /* 51 << 483 */ + { { 0x96fe0761a501282dL,0x79bd2cbe04a97c94L,0xb2445622206c0bf9L, + 0xfa2bd5260a8c8b82L,0xc22acd0c7aa2cd47L,0xd541785b67bd2672L, + 0x5644081f97b909d2L,0x97ba1dd70779b879L,0x000000000000003dL }, + { 0x3586ceff5a15e6adL,0x347005ce76b70d67L,0xafe431e24ae4b2b5L, + 0x6de0cdb6183abbf0L,0xca058f19fb220de0L,0xb6284b1d1e1dbd82L, + 0x81c3068ccde36a43L,0xf45a2d76aebf2615L,0x0000000000000197L } }, + /* 52 << 483 */ + { { 0x2fed71ab13191ef0L,0x4d093b4bcbc0201fL,0x04d6d7b64e54b64bL, + 0x335641e3afaeeab7L,0x36a8bf03f565f906L,0xe0a1162706db040aL, + 0xfa9564f345ce75e7L,0x62220cd0d1651390L,0x0000000000000172L }, + { 0xb86f0816fe6efb3aL,0xbe06aa2752ca0a50L,0xfe5aff01ca6a34bcL, + 0xf1d3f54920f73725L,0x16db75d8f822f3e8L,0x3aac55a586dea1b2L, + 0xb4e6e8f2e6a3437cL,0xa8239de5f86343ecL,0x00000000000001bfL } }, + /* 53 << 483 */ + { { 0x8d18bf08a3bf7997L,0x391cf08b6fade171L,0x7991a05de5ca321cL, + 0x23ece7ee451ce87eL,0xa6362e03a977f619L,0xade9302513907e13L, + 0xd91cc1f52e2e9088L,0x44e5824df58e5620L,0x00000000000000baL }, + { 0xf8e7816527b063b9L,0xbe43d2c36eb79697L,0xcf36bc31c97e543bL, + 0xb2051af018cd43abL,0x9ed9341dcdd344e4L,0x30701b2957da9489L, + 0xdad39c53f3b5d5ddL,0xa2116edac4c47541L,0x0000000000000047L } }, + /* 54 << 483 */ + { { 0xdeb91f90da9ab6daL,0xcb56bf2e406a3cd9L,0x58b62e487e8b0b3eL, + 0x48b2605634436be3L,0xd6cbd675edbbccf2L,0x84ca877854f9a5d2L, + 0xb63f26c8f06dc3e5L,0x9d447a727c79478bL,0x00000000000000c8L }, + { 0xbe4c5f568720253cL,0xd4079cf2a66930ecL,0x262b2dd86742b311L, + 0xf72c0a9437c98222L,0xb375cb9908db8f62L,0x034efaa36502ab48L, + 0x2f679b5720420bffL,0x0834c67ae890bc7cL,0x00000000000000acL } }, + /* 55 << 483 */ + { { 0x2f056bb90280425eL,0xc045a362804b751fL,0x8267e19fb9b62232L, + 0xd25162b3f6b83e5aL,0x4bd3f73c52872a79L,0xbea0f70e2c4d8454L, + 0x93630251bdc7bb71L,0x62421c79d6eb5964L,0x0000000000000152L }, + { 0xc6ef31326a4c9b1fL,0x10407eace4383f40L,0x40efa08cf6ba160aL, + 0x61931947438f0e09L,0x083519fa2b79a0ccL,0x1323a71d85b17aafL, + 0x3c745547b5841d46L,0xd3b26e8bb60d1f8fL,0x000000000000013cL } }, + /* 56 << 483 */ + { { 0xbc02d7b5a958b119L,0xa924be8546b327caL,0x5964c5e9b0a46409L, + 0xac8450ca97d92118L,0x693b41b24e926688L,0x03099cabb8d094b0L, + 0x02b779c117d76963L,0x92e5b73f8eb4ddc8L,0x00000000000000a8L }, + { 0x36c561713f20d59dL,0x12f7a1815115b8b4L,0xf6e1be6845535c8dL, + 0x7706701a41ec92d7L,0x7569d232e849b822L,0xf6adae238fe67de8L, + 0xe43d347c6d2f8982L,0x734a8be36cc683e1L,0x00000000000000a0L } }, + /* 57 << 483 */ + { { 0x715334a7e1a910e1L,0xa92a800408ac113eL,0x8bb38d39d14c4a79L, + 0xfbc37ee2a2f2932bL,0x3e57033229575dceL,0xeafb135a22021b9bL, + 0x72efc8661066b3ebL,0x03b2dbf7b7719907L,0x000000000000014dL }, + { 0x7e7331c7f6d4e630L,0x1e13582ade86bf74L,0x18907e4bc3b9eed0L, + 0x79374dc6003080b7L,0xcbf4f614c62cd738L,0xccd41e459f20a97bL, + 0x96f32f37ffcf7fc3L,0xd1e204949781c875L,0x0000000000000035L } }, + /* 58 << 483 */ + { { 0x9643c5266c6ea0b3L,0x4702ff43bd69bee2L,0x2670cb7e6cc182faL, + 0xe98e50f972705286L,0xbec3b5a84fdd27e1L,0x72f1a20be3ef8bfcL, + 0x918033ccd2ba4245L,0x0e41de68e1b00b7eL,0x0000000000000127L }, + { 0xc0ed8187396b5808L,0xca20ef18132aab1aL,0xfbb1f99a81240c77L, + 0x2e7e99c89dc5003dL,0xe0bf0f3d599c63d9L,0x5caf5b2cf267734aL, + 0xa22e005dcd02332aL,0x4e905dc503980512L,0x0000000000000096L } }, + /* 59 << 483 */ + { { 0xd2169049caab35deL,0x6c5528742823e046L,0x9a79cc29108047a6L, + 0x97bbf70d9d8ede4eL,0x977167668212110cL,0x33e99c39371101edL, + 0x09d4af70ecaba866L,0x3d862acf46064375L,0x00000000000001acL }, + { 0xac064f7c335c3c36L,0x6407ec11432968baL,0x440593247aaaddd5L, + 0x3d2a9b26f25718f9L,0x03aa5c73ce3da559L,0x40154af74b8098cfL, + 0x14ef244fc84d6eb4L,0xce11d75ca4fb0dffL,0x00000000000001a8L } }, + /* 60 << 483 */ + { { 0xdf396a64435e6567L,0x515561def1b4f6f0L,0xa2f3488c17f354c4L, + 0xa93b55f2015e8c9fL,0xba4caa85f6e40c57L,0x540a0df42dc22395L, + 0x09d3208e4d93e932L,0x53902eee0a578b7aL,0x0000000000000123L }, + { 0x431a21ddaa811b87L,0xf7dfaa57e535f1f4L,0x76ec86761a516854L, + 0x914cdc51a55047d5L,0x8ca17868e13643f4L,0x2e0d6bbe3507d525L, + 0x614677534be4b04cL,0x66fa0a07d060337dL,0x00000000000000b0L } }, + /* 61 << 483 */ + { { 0x6d505ed813aee1c7L,0xadb253ecb510cac1L,0x799dc47e52df92e0L, + 0xe4a6f8667e721b9cL,0xcc769496928e3833L,0x77f90f407b701ed8L, + 0xad4605cb6c36f3d9L,0x396b31b5fd3dcf33L,0x0000000000000129L }, + { 0x01a28e7ce5e1e93bL,0x72105a72947e6131L,0x1ced05353a4700e4L, + 0x3a2868be782e5b4cL,0x4b8a224e7e1f4bf0L,0xc984bd6aeb002e28L, + 0x75ff62558ae5757aL,0x573907a8f4b0f10eL,0x00000000000000a0L } }, + /* 62 << 483 */ + { { 0x7435bf81b0aba4d2L,0x2f9cf230a9304dc5L,0x8d2a2f7efef7bca2L, + 0x6a7bc8be83196a5eL,0xf1307d2a3fc22eadL,0x40ec9e45e1063531L, + 0x663d9867fefecd17L,0x0b271efb3c0bbb6bL,0x00000000000001c7L }, + { 0x7c694979933db66bL,0x962d1c3acae9cbc9L,0xbbaa0d5b73298f99L, + 0x950dde19bf0c7515L,0x3689529e6f23e672L,0xe9ba0a528e7c9720L, + 0x98841855b3d3fb27L,0x9fb4812a1b62a7e0L,0x0000000000000026L } }, + /* 63 << 483 */ + { { 0xffc68ca0becea497L,0x67aad6d19361196bL,0xf73b3170f64ff2eeL, + 0xd6cd04b308cf964eL,0x6f025d2f2af74054L,0x6a75708d110f5de6L, + 0x459a54092f314ddbL,0x8b546498a68bdd6bL,0x0000000000000053L }, + { 0x2ecc3c54d94ad9c3L,0xb4aa6c1674d8ee26L,0xab443dd9a6e38149L, + 0x1fa9aa6983111140L,0x416ce37bb0d97e03L,0xb4058478f2437c90L, + 0x8fca985139e81813L,0x0a0454b3c956b700L,0x00000000000000a4L } }, + /* 64 << 483 */ + { { 0xa949016c027effaaL,0xd4fb4288e691a213L,0xd18a53b17fcdfd96L, + 0x6808bf4c0ce4cac9L,0x7e7dc551bed1f520L,0xb84f242f2db78ffeL, + 0x7e6d15d34261fdbdL,0x836f8254a8ffc498L,0x00000000000000f0L }, + { 0x029233daabe4e4f2L,0x9617f000d0327536L,0x444fdc65de62e889L, + 0xf9170a9969512dc4L,0xa37edb8d4e765028L,0x7c003ad00852dc5dL, + 0x628756447cdf14efL,0x87ae7bb59b509913L,0x00000000000000a5L } }, + /* 0 << 490 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 490 */ + { { 0x51199e42088a01d8L,0xa9c970190e6c6197L,0xcc8bba8aec9f00dfL, + 0xdb0e728aa3534e0aL,0x4f130e04b74d62e5L,0x3e26abba0368bf53L, + 0xf97571a599d4743fL,0x64863a87c7eb2500L,0x000000000000003aL }, + { 0x0cbb2434a3d02698L,0x3c37545572eb32aeL,0xf8555f578806b951L, + 0x11215434ce2dbe0aL,0xf0d7a7f00470fb38L,0xc4e0ad472707cb8eL, + 0x4caef200c03c9c3eL,0x5ba3893846612de4L,0x00000000000001b2L } }, + /* 2 << 490 */ + { { 0x4f3c3bc3a1e703a9L,0x134edac80f4d9e8bL,0x55da5480fe4727cdL, + 0xa8f98d584c37a7b2L,0x06bc173cc5f9592aL,0xd1f2103c5220ee93L, + 0x956efef67acf01bbL,0x55746c426c878b89L,0x00000000000001a2L }, + { 0x1927395435dd9b7bL,0xb81ef6482a61252bL,0x508f2cb818236f94L, + 0x655ec86d31b04f92L,0xdc06ce6a381743aeL,0x46059e3637e92347L, + 0x6dac815fbe6a29d8L,0x4b4fcf720b369888L,0x0000000000000181L } }, + /* 3 << 490 */ + { { 0xf8106d6fdb1a83c1L,0xc55816668a3b992fL,0x30be609761e65036L, + 0xb7e260eded1ad38fL,0xe51ec351f98254fcL,0xfd1e301827f9210fL, + 0xc209ab2533522612L,0xf68c7a487baaa1d8L,0x000000000000007cL }, + { 0x6677dc5814fc9ff1L,0xe57b02bb6a95a729L,0xf520b6157618c694L, + 0x61b4174f60b020afL,0xa0f2cb83185d0531L,0x647a5daa640f434cL, + 0x1b1ea4352d853d74L,0x222ea56cd31bcdceL,0x00000000000001bdL } }, + /* 4 << 490 */ + { { 0x6106402972e2518aL,0xfacd33fa7b5fbfc1L,0xd50f77717632ad67L, + 0xba8688071f0d2282L,0xacacd91104d90803L,0xf94b3e0f51496ee8L, + 0xfb8a5406ece3c365L,0x96f5c5e7620400f2L,0x0000000000000109L }, + { 0x7389f3dd36988cbaL,0x4ab5c460762ca5f4L,0x9b96cf82b5cf8989L, + 0x2ffa0db5158a4c79L,0xb5821b1a3a3d24f4L,0x17771cb7f8c464d0L, + 0x9ffd764662c59a09L,0x0eefa35fdc162385L,0x00000000000001e1L } }, + /* 5 << 490 */ + { { 0x020e0b3723563c41L,0x51cb4e065011a829L,0xd4ce6fbaac22f4c8L, + 0x953bdb3db2415370L,0x316a668da8bbc065L,0x6b97ad5dee418011L, + 0xc595b7d16c4bab89L,0xd5a5e27163504ba4L,0x0000000000000104L }, + { 0x9325755d1808ab7cL,0x89bae8ad5bc9c781L,0x690f475bbe6cfdb1L, + 0x12c3ce21a07a4c3fL,0xf71a959da04a872fL,0xcf877db397297763L, + 0xc7549bfa18e0c680L,0x98e9a6092fc07c3cL,0x000000000000001eL } }, + /* 6 << 490 */ + { { 0x9a672f1d10413087L,0x411e1e5bf1f3ff42L,0xe82c5986273d35f7L, + 0x92e879a6b685dbadL,0xdecbf5453ddb51bbL,0x9ff90e3a39eec3d5L, + 0x08a257ea40810789L,0x62bf063097c15f38L,0x00000000000000a0L }, + { 0xadbc2699525f4599L,0x431b2afdef56cc0eL,0xbbf0c5497df748d6L, + 0xf68f2e050031a3d8L,0x30e5621f74d09f61L,0xfac58606bf9b7376L, + 0x83216950be2ea8e4L,0xf039db24af8340a5L,0x0000000000000187L } }, + /* 7 << 490 */ + { { 0x741ab54678b66b18L,0x1c5f59b4e9fa653aL,0x9e0085be4e2c1d9eL, + 0x69c5ed9c89b83f53L,0xee80d5102b74ebf3L,0xf1ac08ba53fdaee3L, + 0x89551a3689f58574L,0x55298a412d97b54aL,0x000000000000000bL }, + { 0xc12b2160a54c169eL,0x557ac5fad1ba765dL,0x8fc94a99af6d1ff0L, + 0x01ffeb4f50587c6eL,0x030b43f838ef4acbL,0x0f9f1e53091d7b0fL, + 0x2a6e11c6edd00a6aL,0x6a611260fa85f47aL,0x00000000000000a8L } }, + /* 8 << 490 */ + { { 0xfa050bbd035b25a2L,0x2a14076e36ee92fbL,0x3e95a7eb2f514218L, + 0x8a67615dea3b77d3L,0xe2764f071da9263eL,0x47bbf1ff92b51d79L, + 0x82cd065659df9eacL,0xc80bb0f05e020e6fL,0x00000000000000b2L }, + { 0x7dac49e7a7560919L,0x7238fa6ec82b9da0L,0x3ffcf645ff7cacecL, + 0x72087198be9302ceL,0x13094b5c513ba920L,0x8cca51f343eb77deL, + 0xeffe0e7d60b89283L,0x036368bfdf9075deL,0x000000000000019dL } }, + /* 9 << 490 */ + { { 0xae445f227b4593b8L,0xef35356ad4842ce6L,0x2db43c86bf3bac89L, + 0x414690ed7dc182c6L,0x84a514a50ae14289L,0x5ecc2b3c805b103fL, + 0x0bd5f847fad21f5bL,0xee1dd686b358a036L,0x0000000000000000L }, + { 0x22052b302fee1645L,0x84910ab0d993b563L,0x58431b24c62691faL, + 0x0d573a5ad696d720L,0xdf3b69ed9c571a76L,0xef2bdca8814c3185L, + 0x35e2a1ce22a6530dL,0xc3d59a8c72873a48L,0x000000000000013aL } }, + /* 10 << 490 */ + { { 0xea12baba36c07770L,0x7149b07d7aa338dcL,0x7dd392b45d156c18L, + 0x84a12f7cc4365ee0L,0xb44e45db4084be8cL,0x5d54b2fd5fe57a77L, + 0x302ac5bb71c43590L,0xe66a79c59fde373eL,0x0000000000000134L }, + { 0x2bfd7beab5bca463L,0x928522e5980a8823L,0x00a2c869b924694aL, + 0xdd8a185649d07554L,0x16b0b4c34cec885cL,0x37f1fc54599c8460L, + 0x7d27418a7d607e4aL,0x8fb68d5ebd28f94aL,0x000000000000004cL } }, + /* 11 << 490 */ + { { 0x9f19c16c46720d7aL,0xf378e65e2c7603f1L,0x4767d22438b49a6fL, + 0x54745d098b03e0e9L,0x4b04b342d735cda5L,0x4f2b0ea305c600f4L, + 0xa951a8762c6442daL,0x306df593775e0120L,0x0000000000000114L }, + { 0x626a917bf58d9acaL,0xcb451ec176b10f78L,0x93da7a771559c731L, + 0x7fbec23ebbdac761L,0x5fdd3a75a36dfd28L,0xbf715077d53ec252L, + 0x0b7d2f675fe204ecL,0xe2d696507278bc38L,0x0000000000000008L } }, + /* 12 << 490 */ + { { 0x2ff0f1cbea39d3beL,0xa756a96ec7f3195bL,0x1dfbdedfdeb42713L, + 0x7cb9e1be4a87567aL,0x64541aa30d2cec73L,0x8eec22690a0327b0L, + 0x967b1bc10c157f20L,0x2a6f1a402d8969e0L,0x0000000000000181L }, + { 0x42547f4cc471c0cbL,0xb990b1a2815ef0f1L,0x672398cbfbc29aabL, + 0xfb7e649b104ac38fL,0xf0ba1354e9cf1e5aL,0xb0c191da2cd48a07L, + 0x1fc70fcdb41854f5L,0xf11295462504091bL,0x0000000000000195L } }, + /* 13 << 490 */ + { { 0xac46f9b5cb50100dL,0x515da1b65497f79aL,0x5e5cd6053fdc142cL, + 0x40b22f9fd29696b7L,0x6f54de30d431a575L,0xf031047cc22eccdfL, + 0xb93ab0eb3686ff2fL,0x2916ce36d93d864bL,0x0000000000000060L }, + { 0x09309e3b78bcbe2dL,0xedaa63647012435cL,0xadca8f2837274001L, + 0xaf2ddf5fb46f8b32L,0x1cee73d6478e7979L,0x76de95d86428aba3L, + 0x13e5f39308dd313eL,0xcff92fc04cf616f8L,0x00000000000001b5L } }, + /* 14 << 490 */ + { { 0x3546089d84333088L,0x60abe80c5c2a72d2L,0x1c1c8e6a1698cfadL, + 0xcd2fcdddc6364862L,0x858e2049539275c2L,0xf238daa1d1452d8eL, + 0x6c50daa23fe94cbeL,0xebaf801da026f28aL,0x00000000000001f5L }, + { 0x28bdb6603bf45e6dL,0x44465513fedab00eL,0x0a86e5a08f0bf163L, + 0x6d14d86fc43eca4fL,0x0c6eaaad4aebdd05L,0xb974e68bd4a58166L, + 0x7208623eb468b01cL,0x56cf14ab87826367L,0x000000000000009aL } }, + /* 15 << 490 */ + { { 0x51531ee8e746180bL,0x43121900720c9cb4L,0x2e2dd6ab3e0d0ed3L, + 0xd8c964b4d1bb19beL,0x693688ccbf410642L,0x5cc38a4f81ef0113L, + 0xe4564abcafc1e72aL,0xdf8f203c9c2ebf93L,0x0000000000000099L }, + { 0xea13eb46763e5b3bL,0x945069aa4cc61cddL,0x3758dc5ce40e7f01L, + 0x7ec20717b8a812aaL,0xbc5492e988dd7752L,0x993eb9cc2d7336f1L, + 0xe10cbab49e4ff77aL,0xd4ca5b63e32f8934L,0x0000000000000197L } }, + /* 16 << 490 */ + { { 0xb8a3f6bda1bd56ccL,0x3b8f16bd2e598d13L,0x304cd218dc92bf6dL, + 0xec30d12dac0b947eL,0xf7fa65eccf12510bL,0x9c45d10c7f2df1dbL, + 0xaf49aac9cc89cbbdL,0x7771c58aa3936ff0L,0x0000000000000141L }, + { 0x486b202acd4e00aaL,0xd4ce59c3e9cc0cffL,0xdc299e6110dcc4f2L, + 0x64f83481559569cbL,0x005fda65297ad136L,0x929eb2207cad3258L, + 0xfd85773d62647bf0L,0x776fbaff177af6a3L,0x00000000000001a4L } }, + /* 17 << 490 */ + { { 0xe79e46cd5fc30347L,0x674285a267e94f51L,0xd31d7d420c5dd942L, + 0x7172dd7f3a511c36L,0x765b80b7db09bf5cL,0x5c13a3197e80a403L, + 0xbc4af3e874c08746L,0xe103ca58fd80a7f5L,0x0000000000000025L }, + { 0x8ca87d3999ca3f40L,0x9db2fd4e540cb22dL,0x757103a3d792fc7eL, + 0xffe2bc304dd1afbcL,0x9d12a8273ed96ed3L,0x2e3ce176b658113cL, + 0x95938a67fc9f6e7fL,0x9794212ca41fb2ddL,0x00000000000001d4L } }, + /* 18 << 490 */ + { { 0xd361033049be3163L,0xa62047a34d55201bL,0x6a4e87786f886ff0L, + 0x604ce4c96b10def2L,0x786a6da1db90c42dL,0xb8d75e55c29e737cL, + 0x1c19c2d0b9a6bf51L,0xe444d64f8fc3a7a8L,0x00000000000000d4L }, + { 0x7853870ac196ee6eL,0x1be278b5c127bc47L,0xdcb5a00c50ac1962L, + 0x37bd8ee5ff0e97bcL,0x88847dbd1907b331L,0x66c67e868ec11029L, + 0x0e664bb33a23b51dL,0x3b9552ac67489da6L,0x0000000000000185L } }, + /* 19 << 490 */ + { { 0xb21548262e25f5c0L,0xd2ec6bc2992d921eL,0x2dc4a7a869093ae0L, + 0xfa7d331b63a7e7daL,0x625d366c96e835f2L,0x796cd894f83cce59L, + 0x296bd295152b78f7L,0xaa68fb2b0807b951L,0x0000000000000150L }, + { 0x7f6be3866d0cd79dL,0x04e17e832b100dc2L,0x34edaf1a36c37de2L, + 0xaee7d6ac66758307L,0x0cd64d977e13f95dL,0x94ec5730a07f1e19L, + 0x5bd6888ac4aa4311L,0xc81682ecfd0c4555L,0x00000000000000b1L } }, + /* 20 << 490 */ + { { 0x49082b82f794bfedL,0x090ad58da7508ba4L,0x016c6c61abf36de8L, + 0x69744e76c17d97e8L,0x64ba471f5087cce7L,0x45bac2bbf3fd22daL, + 0x1bbc08f0e8f6671eL,0x6e45905d268427beL,0x000000000000007cL }, + { 0xfdc632d4d810ab70L,0x2d68f73cf9546668L,0xf506ef8b178148abL, + 0x95db78df162c23c7L,0x4df86cb933dbae55L,0xe47825b77d81edc1L, + 0xbe78419056e1496eL,0xd707fa3a48f497ecL,0x0000000000000008L } }, + /* 21 << 490 */ + { { 0x02255e039c52c74fL,0x9d3fcbd7f200eb00L,0x16aa73a9acbf4abbL, + 0x70e1ca461687653aL,0x2756e86c3bab9f5aL,0x4443f19c6e3180d7L, + 0xf6da43fedebf5b52L,0x8c4901ec7788d048L,0x0000000000000172L }, + { 0xb9d9e5d51277636aL,0x0277a4d11a31a1f7L,0x5f4b0a7c948ccda6L, + 0x1548a1eb9a6c4ddeL,0x8ccec5bceb7e26b2L,0x955a00b8ae69a2f1L, + 0x4cf07abc10fc1875L,0xebcffdabf4395ff8L,0x000000000000013fL } }, + /* 22 << 490 */ + { { 0xac56388e6483d8a4L,0xfdfe4569e227c038L,0x450294e22eb72fabL, + 0x88409dce0fe1329bL,0x29fcce43fe7f2e69L,0xde4876f54b1a398eL, + 0x3801bc1a201ae634L,0x0fb643e1c7b52ec9L,0x0000000000000126L }, + { 0xa11b43e67ca49857L,0x23d4c95c7a771e14L,0xbe8fcfac9bae2b51L, + 0x777f2e78be5a50fcL,0xb8cb4c6fcb75d302L,0x270687f1d494af8dL, + 0xb2c986981e494043L,0x2b6e60e78a8179f4L,0x00000000000000b0L } }, + /* 23 << 490 */ + { { 0xe6bdf3b488959264L,0x5542a7fd644ce746L,0x63ab33f2dce11936L, + 0x3553de8e474efdefL,0x8af61691c83e85a7L,0xc884742bd56fe2c5L, + 0x3f040aca37f03427L,0xb60c9b41263319d9L,0x00000000000001daL }, + { 0x2c57b645883caaf9L,0xcf22170bd9ae6a66L,0xd9df329915b5c403L, + 0x9a8e8a4aa5c81461L,0x48a6ea41145a6d0dL,0x82539f6ad425ace1L, + 0x79d9a233ad3a89f9L,0xc7f8856bdc08a1e9L,0x00000000000000b4L } }, + /* 24 << 490 */ + { { 0xc2ec5aee77528da4L,0x3c8c6f50b12e7da5L,0xe36d0b4775e8fd04L, + 0x49affce87e0189c5L,0x2c23922de166e234L,0x44f5033a7e52c62fL, + 0x1d3b1a480e1dd67dL,0xc530693580ff9f3aL,0x000000000000012fL }, + { 0xfacfb18bf94695a9L,0xda2ea3903a8c9298L,0xf8a29d06b18c310aL, + 0x10bee839046bd445L,0xa78643a0094093bcL,0x77cd1796a2c10fcaL, + 0x5fcfed6a5ac6c799L,0x8331b5b782907caeL,0x00000000000000a7L } }, + /* 25 << 490 */ + { { 0x782b00c9ff08cd68L,0x3bebf3ecb5c82844L,0x0db519c634fbe0edL, + 0xc6d0b96b80968dd2L,0xca8a16449fd14732L,0xdbc7ba82770939aaL, + 0x57b96f47947233f8L,0x94cc0ccd686cb7d3L,0x0000000000000092L }, + { 0xa1ec820aaecee40fL,0x81fee315a9e2e645L,0x4794883f242620acL, + 0x68adbdd9890be143L,0x1dceec7b76458c1dL,0x90737afab7c270a6L, + 0x640520d1c504fec9L,0x7db9a04721283767L,0x000000000000014dL } }, + /* 26 << 490 */ + { { 0xdc7ea260fb335eecL,0xf4fb771645e4162fL,0x83083490caf2fe89L, + 0x236f08720175995cL,0x36111ad435504affL,0x0071b03ff342248aL, + 0xafa645143c794f89L,0x81caa96447a160d2L,0x00000000000000e1L }, + { 0x3dbacd20ac902e52L,0xf65e2a91a533f9c5L,0x6cb5952b31b56177L, + 0xb9d864204aed174bL,0xdad31e286f481fd0L,0x47018a784b45b876L, + 0x2de43a62934e6063L,0x8893fc2e48f73becL,0x000000000000018bL } }, + /* 27 << 490 */ + { { 0xf3a63c14bdae5604L,0xe11ed2086d3e10efL,0x10639e5e6021fbffL, + 0x6b6f4e5d76c3b6f2L,0x153e9ad8ae20d93dL,0xa0d205d3a3c51052L, + 0x84802526d77f38d5L,0xd14890d7bf780614L,0x00000000000001c3L }, + { 0x9aa8671588be4774L,0xa877900f0edbc5e3L,0x1106cd8a0231c6c9L, + 0x2a08b3dc26146217L,0xa98a1202f7041571L,0x2352cf2fec9e24dfL, + 0x990d2feb89c59932L,0xaca88d14d00fbc45L,0x00000000000001b9L } }, + /* 28 << 490 */ + { { 0x9a9d1b1914198957L,0x09272b36b8b27468L,0xa8adb29478d8695eL, + 0xb2a77ef928e6b551L,0xb92c59a171983bbdL,0xae27218037661116L, + 0x8aedb3dd9058858fL,0x2ea5bebd716b7a4dL,0x0000000000000188L }, + { 0x43bcd03eb534ac3fL,0x0a3a64298d025a80L,0x3e6a0674dcaa5ea0L, + 0xa0b446922a7bed53L,0x962c45437d1d2fdcL,0x4651ccf2e3ec4a2cL, + 0xaa08f169a883b3f8L,0xbcd939e4513a5d60L,0x00000000000001b8L } }, + /* 29 << 490 */ + { { 0x3ebb07d66165d930L,0x8e91d5450ae51c2eL,0x3583819e644fcafcL, + 0xa50fc18d091544e9L,0xc0d83ac6d906526bL,0x8dda11369adafa5eL, + 0x641ec76d47b69511L,0x4a8c8b3c9b78be9bL,0x00000000000001d4L }, + { 0x9bf7a595b578fba5L,0xd19f5d5b35363b48L,0x59ffc205b60d4397L, + 0x566d5dc8405a1ad8L,0x6fefa0d077eb44b3L,0xe541d6ce224d90b8L, + 0x2ed4482e17f7feb9L,0x7f19a002fa2bcbbcL,0x0000000000000027L } }, + /* 30 << 490 */ + { { 0x547c70b4548ca6d2L,0xd6a7a51ebce5664dL,0x92e8ccd00472cd19L, + 0xbb32b3050441f3e0L,0x9ba8b75a17a40be2L,0x544c456d08adeea8L, + 0x37972d606ad0417aL,0x812b7c83d87711afL,0x00000000000000f2L }, + { 0xdc0584d5f57681f0L,0x4a450ce001727532L,0xbeeb07d401eb264eL, + 0x0d6b72c444f15915L,0xebf41a6f2ce8922fL,0xf342d770f71b84a9L, + 0x45e38af44fa730c9L,0xc65be7b2ae916caeL,0x0000000000000168L } }, + /* 31 << 490 */ + { { 0x346422ccd8be3ef4L,0x78e55bbd6b6397c3L,0x1122c7c23eb94d9fL, + 0xe335c88d6ebd1011L,0x5d69022902d6a508L,0x32404617bae4b52eL, + 0x4105738dbe1bbbf4L,0x4633d69992bb6653L,0x000000000000007fL }, + { 0xecdb3d91f642459dL,0xda072b00b406bec3L,0x15c08fda4b06ff3bL, + 0x8a66e8cdafcfb908L,0x4f142a2cb4892aabL,0xf6c64b9220d47b2bL, + 0xfdf4c62737194064L,0xd8ab8881d881142aL,0x00000000000000bcL } }, + /* 32 << 490 */ + { { 0xaf9bf352b2648196L,0x00a9569d042fe760L,0x43854e69b9d94444L, + 0xa01a26456e3392b7L,0x43d8ae3686c0702cL,0xb658edeedd0f9617L, + 0x0676b0ef8b270ab3L,0x71ceba8f02072f7cL,0x00000000000001daL }, + { 0xaf1b398d4d9bc7bbL,0xe6f033c930e4a71eL,0x38919b421c8d3fd5L, + 0x4dca8a1f31065bb9L,0xba5a637e0bd4bea3L,0x30f886eb9a42d432L, + 0x1e7c0fa3000fcd13L,0x9dffac1d7299786bL,0x00000000000000daL } }, + /* 33 << 490 */ + { { 0x9c6981caa81142f7L,0xfda7dbd4fd156b03L,0xb55db395f9ad1b37L, + 0xd7614d623bc530f7L,0x0cdae9d22c905663L,0xbf9d97cec181cc24L, + 0x84e1995a0462b7d2L,0x02eb429532ed0544L,0x000000000000016dL }, + { 0x37f3da4c8f85488bL,0xdd74c11700010fdfL,0xd73da972faa1b459L, + 0xcfa6099fc4fee2aaL,0x831e38705c0b914aL,0x0c7e3a8fd4a13005L, + 0x4b0c86e03a3d633cL,0x9c104f0720231bfaL,0x00000000000001d1L } }, + /* 34 << 490 */ + { { 0x2c6420ccdc38445bL,0xf8636ca67fa9a5cdL,0x4222168ede7cafe0L, + 0xbf9f6254f0e34b77L,0xdfcc8d0688f9e0ecL,0x06d3252d988e02aeL, + 0x56557b2ce3609068L,0xc4d157a4b96068e1L,0x0000000000000032L }, + { 0xf1135648cf72d12fL,0x37e911e5811b868cL,0x2cb5772f4b7bcabbL, + 0xddf409b0c02c026aL,0xdfcb7373a6c344d6L,0x2b71c0dedf718cfeL, + 0x8107675ff9043a2bL,0xc4cdb35c04f0f193L,0x00000000000000d0L } }, + /* 35 << 490 */ + { { 0x677701ccd5a7e191L,0xab23235dd26500eeL,0x0802b5f4f83e8885L, + 0x0113e7f2072026edL,0x343f785ff5757027L,0xcf49e2e57b6406ecL, + 0x5fe76f7dd846655aL,0x13a05fec06dd0b73L,0x00000000000001c9L }, + { 0x3ced3cbe8b640e13L,0x945068dbd6b72e95L,0x2181cee07981bb0aL, + 0xdf3fb0d3fe9aca7fL,0x4b29a06f1a49a253L,0x4f29456252436f51L, + 0x50798afd75f5ae8eL,0xeb4fb55d4b0e21c9L,0x00000000000001cfL } }, + /* 36 << 490 */ + { { 0x35900c17f31f03e4L,0xd46ab57ecacb20d0L,0x346e2e7d1f2ec957L, + 0xcc002ceded3e9556L,0x34af851dd2729afcL,0x38f4c7007ef8ad10L, + 0x9383b41e2514f43aL,0x41f5a0c43156a417L,0x00000000000000c9L }, + { 0x0430eb170573cf6eL,0x75cc52b227b7f3d0L,0xa0306601a0c813bdL, + 0x202894d4bf1a5bc9L,0xaa87196d9ce7d613L,0x23a3fb14644485acL, + 0x5d9b9f3ae9854099L,0x76d3383791c4b3daL,0x000000000000007bL } }, + /* 37 << 490 */ + { { 0x7be352469c44d309L,0x864ae27e8f4e2484L,0x9f457a4f3af5a594L, + 0xf215103c858aec93L,0x792d6a3f138244e1L,0x6bb16e51920ab210L, + 0x2560020ece573467L,0x2d6e20b5bc565058L,0x000000000000011dL }, + { 0x429c2f1cb8826bb4L,0xfb092daa14d953aeL,0x70d6a39a9ea3076bL, + 0x8bbad6153bc68687L,0x1f429219a3f6d0b1L,0xe335739bd50a3ca8L, + 0xfb76e216b8f25500L,0x55e606b970172ee4L,0x000000000000009eL } }, + /* 38 << 490 */ + { { 0xfbdc60f37bf5b26cL,0xe42fed785e165d3dL,0x8eae9ace8103041eL, + 0x68c8d798e97cbf71L,0x4d7dc5156fbd8c5aL,0xa047cf13f473bce5L, + 0xc7e18565a71c679cL,0x12c321ccfaae7237L,0x00000000000000daL }, + { 0x896bf0d3e357c359L,0xb05051dcd83f4e44L,0x2cacb7bf71a73589L, + 0x7418b85011c1aa4eL,0xf5780ebd98f16d37L,0x01ea234502b2e8acL, + 0xc4210ee8ca205ac8L,0x4e166dcb055d8862L,0x0000000000000077L } }, + /* 39 << 490 */ + { { 0x4926e24e5b63b15eL,0xfb0ad1e7ccca4dc5L,0x13c006d3529ecf40L, + 0x03ef68cb6fd4c202L,0x56267eb15fce340aL,0x116fb062ca544956L, + 0xc318e26a69799c4fL,0xe700e63ac7b0e582L,0x00000000000001cdL }, + { 0x492f4fc30a8acdc6L,0x58dbc159ca8e09a4L,0x837eff9405a42e25L, + 0x2ab5849271b0ae77L,0xbbc2c2541b9eae08L,0xf7ce5295b6fd5969L, + 0x7358028beaee4711L,0xb2b7233552b004d5L,0x0000000000000046L } }, + /* 40 << 490 */ + { { 0x295b183393dc69c5L,0x8eb4b6ab3c37e07aL,0xa6e07d0f04408713L, + 0x2cc845f2c69254d1L,0x93f7aee14396112eL,0x44c963cc55c306dbL, + 0x7e18cecdd5eab009L,0x4397a27ff9a2c2d3L,0x00000000000001e2L }, + { 0xadb5af020031c31dL,0xe273ade87aa7656bL,0x3c97406a8cf07d6cL, + 0x348d729e460e4aafL,0xae24daeeb2373a39L,0xef275cb1b3b2264aL, + 0x947252cfed43148eL,0x4237243e33c44033L,0x0000000000000105L } }, + /* 41 << 490 */ + { { 0x6a9270fdb51c81c3L,0xd9e2b1e12e4b9a01L,0x303aa12d63918a66L, + 0xf5128af5ef62e4c4L,0x91518d003bdbfe66L,0xe877b72a2e705e37L, + 0x5825aef41e608e82L,0xdb47715ea274cdfbL,0x00000000000000bcL }, + { 0x8e760b7fbc4f794aL,0xfed90cdb13aa043bL,0x2e34fab55f258013L, + 0xba455f09e6282952L,0xb2a41de10be69b3fL,0x6a1d3f45312ba3f5L, + 0xd92e0895e78874c2L,0x77439f3ab99cb460L,0x00000000000001c5L } }, + /* 42 << 490 */ + { { 0xad16ea1a15f7f16eL,0x7536eaeaf0d41825L,0xc8db9cb5e341f9caL, + 0x4f565eb5627278b9L,0x2016945238adfec5L,0x754261a91855a678L, + 0x8b8e9eb9b6341562L,0x55119faaecb6f837L,0x00000000000000aeL }, + { 0x06bd4c7f758c138bL,0x859730874ae4e99dL,0xfeb70bbc6eba9b30L, + 0xb5e394a4d593b54eL,0xc54ceab4e18068aeL,0xd9d13043702dad7eL, + 0xa822afc22d6a1621L,0xebce87fe99b2c7edL,0x0000000000000057L } }, + /* 43 << 490 */ + { { 0x210636ee6e2f194eL,0x653d19cccd3aa630L,0x1211c87bc72f054cL, + 0xaadb7dd07de52f23L,0x3ac4892a014a9826L,0xfc871d728c3dfe32L, + 0x453434ea1acc75ebL,0x15668e025611b7a1L,0x0000000000000060L }, + { 0x1e4c1d7254f88188L,0x06891c05992e686cL,0xb8b93008914cae00L, + 0x2e1a4ca56e494b72L,0x63a4fdaa50f56be5L,0x9059e7c4b2d616c6L, + 0x586aee6d4bda7608L,0x90428414735778caL,0x00000000000001f1L } }, + /* 44 << 490 */ + { { 0x3aedec9f37932307L,0xc90ba7cc3c278c5cL,0x2172d22004be1c8aL, + 0x9d9943b37ce8e3f3L,0xbfec2077211548bcL,0x938e1d2f1d011ee4L, + 0xac9bb9d5f14f2246L,0x44152b2dad89a636L,0x0000000000000182L }, + { 0xee7da6c1c7e6c8a8L,0x3c6d8dbf61fd9a43L,0xbf8daaec74a9c52fL, + 0xf71e888e9bfad08dL,0xb16028e58aad6006L,0x203343ca89a9ef88L, + 0xb218490db9b624adL,0x8b7ba480d01f5fa5L,0x00000000000000e0L } }, + /* 45 << 490 */ + { { 0x36196133272975fbL,0x01384c75cb6a974aL,0x7fa975c36340a057L, + 0xc24df57784a3583dL,0x2da75192bb84c62cL,0xf1f3a70b14579cf1L, + 0xd50474daa2e588a4L,0x4aabd6d26f1e9de5L,0x000000000000018cL }, + { 0x76e60ea6fab24b05L,0xf42bbbd2df7d817eL,0x3520cef607b34b07L, + 0x2def153d9996c9c2L,0x179dce6195252be7L,0x34a49977959fb4f2L, + 0xbb05b0c6414e4630L,0x582b85ee8e51ba0bL,0x000000000000010aL } }, + /* 46 << 490 */ + { { 0xb3f85e6bdcb0257fL,0x9f7c94610db3ae5aL,0xd1309e33f7db50ceL, + 0xe6be4681d69ff6cdL,0xabcfb97f45b3e25cL,0x611bde5acf2cbfa2L, + 0xcbebd63d8a0ee819L,0x84f589922a881952L,0x00000000000001daL }, + { 0x375d99a15eafd5b1L,0x1624edfd343c8399L,0xcb922ca0486cb49eL, + 0xda5bf79afb059ddbL,0xfbd112db909e0533L,0xe45142308e9322c1L, + 0x4e4484879fe403eaL,0x90fa5474a20f933bL,0x0000000000000148L } }, + /* 47 << 490 */ + { { 0x2f0d4c3b48d8f2cfL,0xe7c0c77e31ac3990L,0x8ecc391aaa04fe9cL, + 0x6de447a596d400f8L,0x1e4e295ef1a240d8L,0x68850f917621d6d9L, + 0x56018d58316c4b1fL,0x814b364b00c15d95L,0x00000000000001acL }, + { 0x5bef4d63f3de314fL,0x396301ec89c3021cL,0x248f51e95efc1e80L, + 0x1e9acc4af7b3209fL,0x17094679973357c4L,0x7ce6f1b39506621eL, + 0x564fd1c1b2bdc480L,0x574423eb3a4c1975L,0x0000000000000177L } }, + /* 48 << 490 */ + { { 0x7078a5f8c793fc62L,0x5ed9d1d57dfa971bL,0xe1f701b4a25cdee9L, + 0x362b88524b57f87eL,0x2448012320869233L,0x025ff4771481f116L, + 0x3ca9f2f68a6275deL,0x562666c7da246190L,0x0000000000000086L }, + { 0xb5cb110004d25dafL,0xc869f7b3748d3d45L,0xa68f07320c847ab3L, + 0xe03e1ae202c1d0a9L,0xb4f46c5ec2a80ef6L,0x79b46f90606e1371L, + 0x0bc1f2a0a728979aL,0x9e8f17833037fe72L,0x00000000000001a1L } }, + /* 49 << 490 */ + { { 0x92c876eaba57a5ecL,0x91b9ccc35f0a0aaaL,0x6a208e24040313faL, + 0xbac2e28d87983b26L,0x5956ba23851ee137L,0x6efec8e850c067c9L, + 0x7bd5118ae2dc92d8L,0xc88977760dd4481fL,0x000000000000002eL }, + { 0x170d9e4bebf66902L,0x7dc58ca4351c94c8L,0xf0338c8a9f4bf58bL, + 0x54dab2aea52e0c73L,0x0f4a6a5177eda7a7L,0x5356635827e03485L, + 0xa43fa520a87f96d9L,0x8b82a2dc026c86a0L,0x00000000000001e5L } }, + /* 50 << 490 */ + { { 0x9a17a22e28c209c3L,0x4c3776f5275c7b6dL,0x2b78c520e4b62488L, + 0xef7cf9ba8b99989dL,0x540c3a11a7883805L,0x314401635fcc1197L, + 0x27a51e77406245d2L,0xfeb5c66a87da3cc5L,0x00000000000000adL }, + { 0x2406a28ff97dc5c0L,0xc44de6717cc0304bL,0xd8b8c3a7a893730eL, + 0x1755fec500c14fcfL,0x34e16a56e61a6b56L,0x936f471d8469199dL, + 0xeee12092ddfd51c6L,0x912b3141617a3645L,0x0000000000000112L } }, + /* 51 << 490 */ + { { 0xe68216c4acabfc0bL,0x575047098206f7b7L,0xeeacfd72278d0fd6L, + 0x26009914125b3bffL,0xf2c4d59974430c62L,0xa883c18c6576fecdL, + 0xe8c29e2b58f5c1abL,0x19715f47d63f80ebL,0x0000000000000136L }, + { 0xf1fbd8413907f8e8L,0x3b6d0ee100b6840bL,0x582779a8e7ea2006L, + 0x24d05b01c79c4ba1L,0xc11c23d1e48ed567L,0x31ca1d96ca92fcd6L, + 0xfbae1fdfdc9f7647L,0x47d7d61d94549301L,0x0000000000000035L } }, + /* 52 << 490 */ + { { 0x31da39dbb68cf907L,0xb02e9060d1e99729L,0xadd55a9689bb1507L, + 0xecc7856a4b5470a1L,0x40a83460eab8cb1fL,0x1c1f3c112b2c9159L, + 0x7b97b58f7883ff7aL,0xe7e97a4abde1caa7L,0x0000000000000139L }, + { 0xfa1da9d4b8ecc0d1L,0xe6212df16e7641b3L,0x828c55c6ebb36f6aL, + 0x5b7ead1609d6a861L,0xdb35a05fc2844248L,0x3c5089b2776dd2cbL, + 0x2b6595bfed9eb5b0L,0xbddfaf004d176cc9L,0x000000000000008fL } }, + /* 53 << 490 */ + { { 0xbcd382ebea56a75aL,0x53e072c1532eddfbL,0xf9e876e05394a791L, + 0x8d1b21bc90cd2760L,0xf1dfbd70d35c0c7fL,0x5f974acd25822227L, + 0xb7f59ce3ad8a5cbbL,0xdf4c1e5a5266c9abL,0x00000000000000d7L }, + { 0x49c8ff4e5896a558L,0x38da671d1b3eec29L,0xe8ad9b620128778aL, + 0x99478bf41d53b8daL,0xe0196e99b521a1c4L,0x9075689f5ed4551dL, + 0xa89983aed3b8db57L,0xbf2e18e0920ed9beL,0x00000000000000b8L } }, + /* 54 << 490 */ + { { 0x0f498b6bab33f9d8L,0x4ad288a62ccfc0fbL,0x7115968d312aaf15L, + 0x3c01241a2ecffcc8L,0x80f841855f6704e8L,0xbfec176e279033b9L, + 0xa72fec76e9634860L,0xb3b673afafa844ccL,0x000000000000009eL }, + { 0x0c0409d7f67aa5c3L,0x06d70f351f87efb6L,0xb203904279578f67L, + 0xd72c5ae7d4a73775L,0x15146ae382883850L,0x368d7af83f25050aL, + 0x1fbedcfc4025b2b7L,0xb75fc92f5ab52601L,0x00000000000001f8L } }, + /* 55 << 490 */ + { { 0xbcea48fce7f90fefL,0x2948f00104a5cad2L,0x741f5fd7381f63c2L, + 0x2480d66320c40d0cL,0xcc71dd56fa3933c6L,0x32537996b42aaccfL, + 0x2058a90ec1de949fL,0x8cddb8a4337c3d9cL,0x0000000000000118L }, + { 0xbec707c69f6d7d34L,0x3961a86455e2adacL,0x6ef452a7e74039a5L, + 0xc7dc50b0ad925d03L,0x9abece09b0c3266bL,0x7099d99a9bed8b60L, + 0xcf86e9b4dddc307fL,0x4b55f07acff68c17L,0x00000000000001d5L } }, + /* 56 << 490 */ + { { 0x3e39683dcd351b10L,0xe16197210a895f34L,0x252e80da411d3459L, + 0x1cf1f5433456c4a8L,0x7d561558fbe9379bL,0xb851ccfa95445524L, + 0x63047f7cd73af554L,0x9414829ccd036021L,0x0000000000000138L }, + { 0x1a267c2a72c27112L,0xa663014f7031c061L,0x6c342632bdb60991L, + 0xeabab7ce0d230b20L,0x0bb0e281c3bae5dbL,0xc46b86b7d63a462bL, + 0x1964d38cb34aeafdL,0xc165169bb2548043L,0x00000000000001e1L } }, + /* 57 << 490 */ + { { 0x0069f78595054d41L,0x1915d067ffb41edfL,0xd2a26117106803ecL, + 0x01aee38f45acc15fL,0xc17f9a13e5e0ca69L,0x9650dd183b3c005eL, + 0x80dcb7b979ea11a4L,0x8016e13f161b05b1L,0x0000000000000005L }, + { 0xc34447de63240c80L,0x7055b0885924cfd7L,0x0827c7ddbd76a15aL, + 0x2c0986b73aa18316L,0xa6c6a88f2ac32019L,0x0738b1b3cf9a7941L, + 0x7b55ebfc25337538L,0x533fcc0a015f8678L,0x000000000000014eL } }, + /* 58 << 490 */ + { { 0x1a96801aadb8bfe3L,0xe75278daf390769aL,0xcb8de5149bc85577L, + 0x26d506ca59959b5eL,0x134f13278a3ecd50L,0x990cb7b3bf13e3c4L, + 0xd2c83b2625b4cab7L,0x762f5e88306fda07L,0x00000000000001f4L }, + { 0x4c07d7fbada306d4L,0x4bec248d3e507275L,0xd2e58fd9b23246baL, + 0x9cd7d608419a7944L,0x85b076b56ff066e8L,0x75815995ad21887fL, + 0x898f9f2d778e8eb2L,0xda74a1c47d16b4e7L,0x000000000000010bL } }, + /* 59 << 490 */ + { { 0x5a99a36a89e67b6eL,0x6a9fadb934faa0e1L,0xbff8efff81886662L, + 0x6d30dbba5e4281dbL,0x793e30fec50c8b7bL,0xcd8b5ca3d31f298bL, + 0xc3618e2f8b3ab7b9L,0x6d9afdddb566cbf6L,0x0000000000000192L }, + { 0x588a45bf8c86b790L,0xe384791ccbcc0262L,0x94dcfb139220da30L, + 0x0a11742af8a98f9dL,0x8b32affa180b4e5eL,0x009e3017900f981bL, + 0xdc0cdb67077045ebL,0x6370593425e84b0bL,0x00000000000000deL } }, + /* 60 << 490 */ + { { 0xd8a907f1495af650L,0xbcb3f6e27a3ac35dL,0x1aced00f606706adL, + 0x8ce32c00a2e1cf9dL,0x1e0e979705f91cacL,0xab21e96b9e98af3aL, + 0x92f4e6dbbbe3c415L,0xfd9b407646c929afL,0x0000000000000097L }, + { 0x5da0209deee8423fL,0xa22af5cf098eeb14L,0x5e379be216e722beL, + 0xe06cda5b9680b5acL,0x832c5aeb311d0269L,0x7471c5d7e4943d03L, + 0x148e2ef20776d3ecL,0x658a17b50c51efc3L,0x00000000000001f2L } }, + /* 61 << 490 */ + { { 0x661c9b4356ddaea0L,0x293c59e2922d963eL,0xc4cde92daaebd3afL, + 0xab071b5f1d039ea1L,0x001a3455800846aaL,0xed60d946be47068eL, + 0xae57d28e81e38d14L,0xc1ebaf7aea1b4b13L,0x00000000000000d7L }, + { 0x813636f2f6189471L,0x18081bb61053facfL,0x5df021c61a72f2bdL, + 0x95867c61e6221fb5L,0x98230da20f68c36aL,0xf8a71504bb930cb1L, + 0xc66f68c49206110aL,0x6d859d5b4a9704b4L,0x0000000000000107L } }, + /* 62 << 490 */ + { { 0x945527c00753ebb0L,0x58d67d3c66168307L,0x0ec55d7dcc6d285eL, + 0x53f3f9edfd180ff3L,0x862c163e841badf8L,0x3a283e8fe6405388L, + 0x1f21be3ca83e0e8aL,0xc8be2fba8d854e58L,0x000000000000015eL }, + { 0x9f10d5bcb68293e1L,0x634a08d72787aaceL,0x29edab8649be3cd9L, + 0xbdaa11c791666966L,0x42a13787d7f98440L,0xbeca84bfcccbc458L, + 0x7c9e9a69a565c2eeL,0x1a0af783e8b2bb1cL,0x00000000000001d9L } }, + /* 63 << 490 */ + { { 0xab166e77803ef03eL,0x02c155561695a136L,0x0d569bf059f0a685L, + 0x71c9d373fe7d3aa6L,0xb0560c7ae92b0c1fL,0x0f31f03af89493abL, + 0x9a403a9f2e67af99L,0x39d67688b84fd6dfL,0x0000000000000079L }, + { 0xa1544f774f550f0fL,0x4dde151021bdfcabL,0xb20ff20adc61a321L, + 0xd7762640a6b40f30L,0x1ceca244750f159fL,0xebdcad0d46ce6583L, + 0xae45629873f0b152L,0xa7d11d19be995716L,0x0000000000000141L } }, + /* 64 << 490 */ + { { 0x0ce1f2d1945039acL,0x1d1e6504fdf94d41L,0xf36eafd58752d630L, + 0x24477f7fefbf0132L,0xae8fba2cc02bf85bL,0x53a24ca8eb510e61L, + 0xeadbc7f79c2c453dL,0xcfdbc9dc310eda46L,0x00000000000001a9L }, + { 0xfbf287c8d8f82b51L,0xb7ca60967f0ecadaL,0x1dc411b715242666L, + 0xebf82d6c3dc6ed96L,0x8f82229dd0a92b49L,0x75a6a4a79dac585aL, + 0x443ae75ec839248aL,0x4defb89f1ba7c9c3L,0x00000000000001b5L } }, + /* 0 << 497 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 497 */ + { { 0x20e2fe1972925756L,0x4815ae3332ce7832L,0x183d90c3eb8f4eb7L, + 0x0e088af52a8045d9L,0xf8232956da04790fL,0xc24661b5ea0118c0L, + 0xe4152695a72924a6L,0xffcb277840ab56b7L,0x0000000000000120L }, + { 0x1c766208aae9af57L,0x21769dda239689e7L,0x9dc2194a235ecf56L, + 0x930e993fcf83a067L,0xb6725ad332913e2aL,0x09bd9ff9fc942b50L, + 0xdfb47d67600d426dL,0x4cc06c589def7175L,0x000000000000003bL } }, + /* 2 << 497 */ + { { 0x9c3f082939926493L,0x6bf1089ee3d0f530L,0x3e91285d7d7d5b4cL, + 0xd8d70f1b02bb958aL,0xe4b0e8c1bdbd2b6eL,0xea70f1079239f187L, + 0xd30f4ada27d37ca6L,0x0358f6b3c88eda99L,0x000000000000010cL }, + { 0x8957ea97e385ac45L,0x707e7445b4ff3d17L,0x4a6d4dc840a361cfL, + 0xd086353b13aae7dbL,0xd856f4c9a478b479L,0xf687400f4d7cf67dL, + 0xfa3000b6ce52336bL,0x805459d7d0b88a94L,0x00000000000001ecL } }, + /* 3 << 497 */ + { { 0x96632b75abe029fdL,0x38280dbd57aaa853L,0x2a49ff1956556bf7L, + 0x76af8029017ad9e7L,0xe231979265ea6edfL,0x71b0793c9cbb4069L, + 0x1f89d54df62d86ddL,0x48601cbd52809190L,0x0000000000000116L }, + { 0x825a9ec295f5317cL,0x9af044995b568f34L,0x0a74e92670c96172L, + 0xa87fcc849cbe4ca2L,0x409e58c017b5587bL,0xae221d81686599f1L, + 0xdb87c25e9b04ae84L,0x2a847873f9a49cb1L,0x000000000000019aL } }, + /* 4 << 497 */ + { { 0xe862a17bbeb4f9f1L,0x5e701369310c4b65L,0x01ef66253377f678L, + 0x56157709da8beb61L,0x7ca21b049ef7da41L,0x9f776dac88e486a6L, + 0x53dd04cf8aed6f6cL,0x4a7df43e953c2d95L,0x0000000000000158L }, + { 0xe5bc0ded1fcc4484L,0xb337eb4eef6d0e8aL,0x02208b42dabf0d9eL, + 0xe64c8529467bfc55L,0x0cf409592e141810L,0xa34af9d656fa0badL, + 0xf3471d5a5345141cL,0x89a7a82c1ff7597eL,0x000000000000014eL } }, + /* 5 << 497 */ + { { 0x3fe4077368245bb9L,0x89a284bba3626a8bL,0x73c190915a9d7ff6L, + 0x8e3baf6fcef0b5ebL,0xe3d8006921aa008aL,0xaa3f179932b686a5L, + 0xd20cfb52eb1ea953L,0xd62b692f360c2ac8L,0x0000000000000050L }, + { 0xb1b8bf37f4c933e6L,0x090d3bdd5af935bcL,0x8e4854d2ad9d74e0L, + 0x5b38d8eba00bf5f8L,0x837582475f478824L,0x5c7e2413f71a8d3aL, + 0x955642770665c873L,0x37d95026d1f7a638L,0x000000000000013fL } }, + /* 6 << 497 */ + { { 0x10459adc9fae827dL,0xb9c6d71d622dae77L,0x8d379015bed58a7eL, + 0xe6b71390b90143c2L,0x08eb29c40a8985f3L,0x30c331ff08db169eL, + 0xbe336067077fcc98L,0xaf6477724e16c914L,0x00000000000000bcL }, + { 0x1836d521e134601fL,0x8365f5c7a58ceda3L,0xc4c114f6a6fe142fL, + 0x4651388d461a4d3bL,0x1b4cf10a639f7948L,0x140f9efd361665e3L, + 0x436f20b0c743b8c7L,0x4e1d732d710cc0a6L,0x0000000000000186L } }, + /* 7 << 497 */ + { { 0xbd6db523c85a0848L,0x987c70fddacb2b06L,0x3dbaa519b16e9914L, + 0x14205aef40b2923dL,0x5d09323ec1d161dbL,0xa68b68850e462298L, + 0x0eecab4fdb91ceb8L,0x10ff0480339fb226L,0x0000000000000099L }, + { 0x4bfe6f364def0c94L,0x21b3cd30ae7c0ceeL,0x2396ba6610b510caL, + 0x2c9b665184b52fbfL,0x25e93bef7a84e6afL,0x83846a6623f7a204L, + 0xf5eb2e3e5da84c02L,0x73e65cc62c870984L,0x00000000000000a3L } }, + /* 8 << 497 */ + { { 0x4270a7973c80f820L,0xe1455e1cd8e3db55L,0xfc7d04b5c31eea36L, + 0x86d4b43d8237619eL,0xca90fead5696ac8bL,0xd1df350dbe0a5d49L, + 0x844e818faea06270L,0xfa19b70bd89cc1dcL,0x00000000000000fbL }, + { 0xbae72ff552fe816fL,0x7b5d9e529a5a3747L,0x7a7eadcca689a68aL, + 0x18dec239330ba649L,0xafc49e4ceb30bd8fL,0x2504271680c1a715L, + 0x4d5aafffbf6e92d9L,0x94af51fb7577cc4fL,0x00000000000001b9L } }, + /* 9 << 497 */ + { { 0xbb49e8054732030bL,0x0829d482bd03cd5fL,0xb9a6136c5f6950e0L, + 0xfc781907dd4f9182L,0x9b4141a166ec206aL,0x3577a8fe18a46639L, + 0xd1aadfcc28960c88L,0x0b13bc74f4dd4d20L,0x0000000000000142L }, + { 0x99edf0c678898705L,0x9177bd2fb71bce68L,0xe0ce6c6305ba1470L, + 0xab99c6573f5fe41cL,0x437f2df7367dd373L,0x240fe1bf4d543d1fL, + 0xd4f2abbfc0ddf245L,0xf230491ad6da4440L,0x00000000000001e0L } }, + /* 10 << 497 */ + { { 0xeb2ac3780ca7c04aL,0x6503ef0d2c07f4ddL,0x25f353f3eab6a53dL, + 0x53d58f68879a37d4L,0xc63460daf7a73064L,0xf6d94b623ebc4da3L, + 0xd32258dbc810cff9L,0x3aaa05f66f1b0c2aL,0x00000000000000ddL }, + { 0xb16bf6925cdb2275L,0x3de2596915cdac6bL,0x9e073ef027ab70e2L, + 0xe343955416e91fc5L,0xbf3afa34b457e4b9L,0x4f7538f76659f402L, + 0x0eee4ca52c76dd2aL,0x4d756cfb8c55212cL,0x000000000000014dL } }, + /* 11 << 497 */ + { { 0x909a8cb6915e964cL,0xba20835036b97440L,0xa316b2e155cffb24L, + 0x86b2afd21b6c690eL,0x8d3121b8ef32a58eL,0x243e5a3755faf21bL, + 0x86a70ccd48e1eb3fL,0x7fb7ab7b856482a1L,0x000000000000017dL }, + { 0x9c565882ab37df1aL,0x2c2e3f640ceb193eL,0xde7462f66fa488a9L, + 0x9505158bc0a49496L,0xf8c7087263ed98fcL,0x90d169ed35962a1aL, + 0x9c5a71216e74e666L,0xd0276edf3a407e03L,0x0000000000000005L } }, + /* 12 << 497 */ + { { 0xd8e41426a9df462dL,0xb74954b0ed2c3482L,0xa89b9b49cd400ca3L, + 0x300d10c3ca71c044L,0x6a7d6d0c3f027519L,0xbff329d8daab84adL, + 0x9df171a25a1bd3caL,0x4d3ac287f908de36L,0x0000000000000155L }, + { 0x3c39aaacf399d7fbL,0x155af8e309b50490L,0xf1c274c44e8b235bL, + 0x80dc35149732546aL,0x17bf13806555e151L,0xc41ea61064ee4231L, + 0x997707e1a29c4555L,0x7c29fdf3658270b8L,0x0000000000000039L } }, + /* 13 << 497 */ + { { 0x4470a10c15b5487fL,0x59e0034abf2901ebL,0xa004882f96f26b66L, + 0xf0747b1f55ac9ec0L,0x00b2726126587c55L,0x08a8217ef3a868f2L, + 0xc138619039ab6420L,0x75a2f37063ef87ffL,0x000000000000015dL }, + { 0x31e93fe5ca9cf04aL,0x86cea3141ab48f4eL,0x61576808902a3aa7L, + 0xdb2d78c451cebc3aL,0xb20d8a8239946890L,0xff048d9292b61791L, + 0x87a1e8d88e48280cL,0x09921ba4db22a5e1L,0x00000000000001aeL } }, + /* 14 << 497 */ + { { 0xf4390ba3fa83b37cL,0xcb3a94f06e227864L,0xd13f93362d54afc0L, + 0x734b02866bf22e28L,0x027d32d36ecd5c60L,0xb7c8a2936ea8017fL, + 0x9001729bbbfee411L,0xedc83e030bc69acbL,0x0000000000000042L }, + { 0xc848406df076c0ffL,0xa7a5157da1258854L,0xe68222a851655acbL, + 0xd63300044c79c785L,0x4bae7f1b974690d6L,0x934fac86bb32fc1fL, + 0x8ebf3a73e88a72d7L,0xadac779440b91c50L,0x00000000000001f1L } }, + /* 15 << 497 */ + { { 0x3b64a126b7335d8dL,0x2961ee1ba9248dddL,0x640c9146e01dbb47L, + 0x0b7a2164c2ff9613L,0x5056eb03fdbd4002L,0x0ccaec0a02c765a2L, + 0x20064fd4a46f0c72L,0x0f0710b082b4ef91L,0x0000000000000067L }, + { 0xff5d6b7ce04b5b0bL,0xfba3d44ac1f15aecL,0x89765ae5ce66d753L, + 0x65b3330be6e0a598L,0xd5f357987e388524L,0x1fedce600b10eceaL, + 0xa49a65d453659065L,0x685a55643162e5ebL,0x00000000000001edL } }, + /* 16 << 497 */ + { { 0x513a0c6260637ab1L,0x4a25bee092c5b308L,0xcd3dde9265d8b6a4L, + 0xce8ece9b60ea70d4L,0x289b7557af5d779fL,0x1df029a4ff6459cfL, + 0xd00ac6d5ce09288bL,0x9a9f96859a9f5b92L,0x000000000000006fL }, + { 0x18cb0922b5b63cd8L,0xd4753ff126fd1c21L,0xa4d1f3571080dac9L, + 0x7fe4f3ac24de88c3L,0x4ba767da3e60655cL,0xaf4a647436c75e45L, + 0x4854eb2195f2505fL,0x831446cffe3b33efL,0x000000000000016dL } }, + /* 17 << 497 */ + { { 0x4fc3ceb50d704c8bL,0x76277d00511f43f0L,0x81319fed1acdcf62L, + 0x9e45ed07c9e7557aL,0x57486b5372a27c01L,0x37dde1bf44718054L, + 0xf299012b5f67b6d2L,0xf611272d52b4570aL,0x0000000000000119L }, + { 0x4e9f26295623b9e1L,0x5e63fd62e8c47729L,0x31c312230ecddef0L, + 0x5ab7172a2ddf7e5dL,0x916a08f99de4622dL,0xf70f248bc8f3a4bcL, + 0xe90e066fcce7573eL,0x4a8d09745854a217L,0x00000000000001a7L } }, + /* 18 << 497 */ + { { 0x35ce884eef326466L,0x12ea07254fd35784L,0xa8eda224b6c12e71L, + 0xcd4088a98fca4b4fL,0x2c65668458e0c8fdL,0xa1992c076c3e0026L, + 0x4585191b0de1ff73L,0x41e41c18b945e9d5L,0x000000000000015bL }, + { 0x95d265696d686841L,0xc3b5ec5b7c82209eL,0x708412906b2f6dd1L, + 0x62711988377ec7f0L,0xc5a41362f8044572L,0x5c152ddc6c97a652L, + 0x315a403c78043e3fL,0x00f750863cae2d02L,0x0000000000000041L } }, + /* 19 << 497 */ + { { 0x69eaa031e84a32efL,0xf64318511f18bdb0L,0x45a72bae00ea0ec0L, + 0xa58114764f8cd7e5L,0xa2d4f720df207a7cL,0x10f9d9582e4b3929L, + 0x1919647cafd9a331L,0x932093227bb3b2d1L,0x000000000000002bL }, + { 0x1a489aa9999524adL,0x461e5e8f47ccbca6L,0xb112fa557218dd85L, + 0x8d6a44e90f5f41bcL,0x8b5dc510de9a654cL,0xe03a3eb28069bb7aL, + 0xe4c8200320045ff5L,0x683271bdfa37db56L,0x0000000000000085L } }, + /* 20 << 497 */ + { { 0x3bf023ad4af30875L,0x618ff68809817545L,0x07d4ead2451da716L, + 0x794aa49829eb0835L,0xbd59562f42924bccL,0xafcf72eb72e399c9L, + 0x8bf1a1a963e80b19L,0xbf8bee820f4c4aa8L,0x0000000000000161L }, + { 0x020961f55448c727L,0xa703507a4eb32427L,0xe1778e68d45d8b76L, + 0xe649604ffad79eb2L,0x61f41e6254ad0127L,0x0b54947fa01b89d7L, + 0x5642af5b120716e9L,0xd9aafd3ed0876d5cL,0x000000000000004fL } }, + /* 21 << 497 */ + { { 0x24b368c4c9fc49d8L,0x5901a2196ffababbL,0x2b27398ff8d51ee7L, + 0x7bbbafa1e07890acL,0x4e54902636dc7e93L,0xb9b7f2be5c0aa112L, + 0x4ef4a83c5f948620L,0x851400c7ba7ecc64L,0x00000000000001e1L }, + { 0x8e5ca181b6067620L,0x5793d961496faf2cL,0x8ac484364996f453L, + 0x8df70948137517bfL,0x90cf159779458200L,0x949c38c4aca5b183L, + 0x319b3c666211c917L,0x57bf88e3500ca01aL,0x0000000000000098L } }, + /* 22 << 497 */ + { { 0x78811b6e05fddb66L,0x5658f3a212a3cc05L,0x5b35c76238f7cbe2L, + 0x57f36fb154f22cdeL,0x4a825341f83dca4cL,0x87e1c04e42965f1fL, + 0x2791df2518daad8dL,0x14132863a8944d8eL,0x0000000000000194L }, + { 0x41bb6a0bc1925507L,0x045af8dcd773e57eL,0x19c14986fff5e4c3L, + 0xdc7c96ca0f7586c0L,0xf37a6e7b439c87c4L,0x1e4e93876e166cffL, + 0x2111c8ab6872cc08L,0xc078f60d86c301fcL,0x00000000000001d6L } }, + /* 23 << 497 */ + { { 0xd79e9b9fa38e1f16L,0xc8ee2fbd74b510ffL,0x9be3175ac09d14f6L, + 0x417030d3d350614bL,0xb90492c8d8dd55f7L,0x00bd5e70d09ee667L, + 0x144eab3f9c4f5197L,0xa066177fd1ad6d51L,0x0000000000000149L }, + { 0x20db4c6c0879aa61L,0x5ff9dc7d1071725aL,0x1d80ccd2b39e07feL, + 0x3bd60188b3f048d7L,0x5f60142b994c914eL,0x5e858454058ef641L, + 0x02e1e4a5ff4ae974L,0xc5f4e021399f23b0L,0x00000000000000e1L } }, + /* 24 << 497 */ + { { 0x8e1a6c2f3e0bef3fL,0xb715403e14018b23L,0xefc99827613f0c37L, + 0x92437b5613170a30L,0xe84cbe8f9468e300L,0xe69b08f14bab3970L, + 0xfcb78f771558e9d6L,0xc0140529af25ed50L,0x000000000000017eL }, + { 0xff3fa668168134c6L,0x46e8edc72bcc2e99L,0xbbebb21c86bfbf06L, + 0x6b7d6159f204dd32L,0x75bb85ec1fc7e40aL,0x0f3e1043ad14c187L, + 0x44187117000a86baL,0x556420bb13d09defL,0x0000000000000177L } }, + /* 25 << 497 */ + { { 0x554448c0fba3377fL,0xe136db9d3902cdbaL,0xd47511ebad6e52e8L, + 0x7339fa6cd9b3e28dL,0xb7642d4a6ca6868dL,0x943007ad8c8fde03L, + 0x9cc9807a123d09e7L,0x841e97739beae0eeL,0x00000000000001cfL }, + { 0xe7a94fa8789a0156L,0x76bbe08089619245L,0x0a7314b211444aa3L, + 0x8984ef43d2639f78L,0xd5a4a5e517699a5aL,0x6c3988ba314c3535L, + 0x7c82b93ebfc25e87L,0x857f7c3cba2d2b98L,0x0000000000000039L } }, + /* 26 << 497 */ + { { 0x7426610eae8eb2f9L,0x2e63e518ed0157abL,0xf4cf1d37ec0141d1L, + 0x1fa5a97754c8a497L,0x90e2750c50f7cbc3L,0x2465f6e651392c9fL, + 0x9d19fff295ceed17L,0x13020fb3d3023087L,0x00000000000000faL }, + { 0x56de6ae0065bf7eaL,0x793e1c385c37fa6bL,0xff3c043b4b74f191L, + 0x8d7ad72faf2ea8a7L,0xe606b3fde9a85535L,0x6167d112c411751cL, + 0xe594b53785ed0344L,0x5d10ae657197503dL,0x000000000000000dL } }, + /* 27 << 497 */ + { { 0xd75d6bcc525a4ba7L,0x75831970ff5192dbL,0x2881fed46ebf94b9L, + 0x9287616b74c60c34L,0xbca8e1929750259cL,0x5f06620d9cce8056L, + 0x88a1cdbdec42039bL,0xd2d7f35782851092L,0x0000000000000194L }, + { 0x6f7297c9e99faef8L,0xce5aefa067ca905bL,0x8aea3927825cf9abL, + 0xc61c45a46c6b844bL,0x218b1a55c21688ebL,0x84f51d90b50e00c0L, + 0xab5528156920a836L,0x19900cfa6be8182eL,0x0000000000000125L } }, + /* 28 << 497 */ + { { 0xe27b7fffcdfd27a0L,0x081213e1de02dddfL,0x0506179a90c68515L, + 0x91f33ddac6979fdbL,0x7a7b766f6c075aeeL,0xf5e9f6bf4f16c84dL, + 0xa1502a0ec8fb6599L,0xba8682718ed8bdb3L,0x0000000000000182L }, + { 0xcd8039b45e609153L,0x45112e760e819341L,0xc2dc373c023ea208L, + 0xe45a2f499dba881cL,0x50b2e55ad8dee4dfL,0xd15b89ba99ccc4e8L, + 0x943358a9fa42dc9dL,0x73659482f0193e0bL,0x00000000000000d9L } }, + /* 29 << 497 */ + { { 0x99aad65bbb3bb203L,0xe0310f2e07991fe0L,0x528a081533b4e15cL, + 0x342e836e408a61c5L,0x7cfb0b0fe062a87fL,0xee028eecbd81b770L, + 0xdb3ba8f9f978510fL,0xd5a19a2c8beac70fL,0x0000000000000052L }, + { 0xc7cc2ec1a4e6baedL,0xf1f5e6088ede29c6L,0x9ab7a0251f2a4e0aL, + 0x0938e50c2216f6fcL,0xd3fbf3a2d36da4dcL,0x14eb4e27d87198d4L, + 0xef09f9566a7d394eL,0x82f28f877059da0aL,0x000000000000005fL } }, + /* 30 << 497 */ + { { 0x39e766d6fc267beeL,0x21eaa063a51bef4aL,0x04b31088d5b1da6cL, + 0xa9fb47bdb21fa3e9L,0x2f50eb96cbd14de7L,0x14de2d69f6109a85L, + 0x00dd17255716845aL,0xb8edfda66e70389aL,0x00000000000001bfL }, + { 0x1032c9f212d25839L,0xee5027717e862a70L,0xabb31c0594b14062L, + 0xd9ab09d45d063501L,0x96ff13a5130e8e50L,0xa6c8803b27ba8876L, + 0xd2e3e7f07dfbbefeL,0x9d2e5ac4eb8ec9eaL,0x0000000000000026L } }, + /* 31 << 497 */ + { { 0xe9d412ef1bbceb78L,0xfcf978bd208b992aL,0x6a53da6f703e13e1L, + 0xaf8956f7a8424181L,0x81a70a68e97cb9e6L,0xed0fbc16341b2d69L, + 0x70cf66385192869eL,0xbf9dd960f5a0dd0bL,0x00000000000000b8L }, + { 0xee6e26feea5c7ed1L,0x81c271315321b93bL,0xc66fa92b7608aba7L, + 0xb63345cbeeef7219L,0x4ae5e8c22895ec0dL,0xeeb3fb467f3b6e74L, + 0xeaff7601191adf9bL,0x44eab56470dc9ef1L,0x000000000000010dL } }, + /* 32 << 497 */ + { { 0xedbb59dca9aff80cL,0xb32f9f1d305107ccL,0x7a0de3d199d4d1f8L, + 0x701b04850637729eL,0x13d85cc4b86c08afL,0xfe4b62a8cd43b0e7L, + 0xba8ba3f38a012627L,0xa589240b2168bdf6L,0x0000000000000073L }, + { 0x028c8c11d8f16b56L,0xff6f2aa753816648L,0x677b98ab29bbd5abL, + 0x492513b8221f17f2L,0x828fa4baa5eca5aaL,0x15b65b2b560d8196L, + 0x737cdf4377fe2f2dL,0xcb1bc918c044cc80L,0x0000000000000138L } }, + /* 33 << 497 */ + { { 0x0c11966e4353dc79L,0xf39bf5ae84b26b86L,0x9b07118314d565e4L, + 0xaa9f8c5664977731L,0x12340d1b899b8932L,0x81b43cd0222df038L, + 0xebacd57fa96f4f7dL,0xd38b93c8e704f11aL,0x000000000000019bL }, + { 0x485e1ba6836402e1L,0x6f34201901bd178aL,0xf342b57ed7c7d3d6L, + 0xceb499c57de7c78bL,0xf3be5233a4ad7926L,0x217081dd11cc5b51L, + 0xc0f92f053c9437d8L,0x38b2b045c0e420bfL,0x0000000000000124L } }, + /* 34 << 497 */ + { { 0x82c834ecced14849L,0x35a0e1c96e2ad702L,0x9bce8fc925afa4afL, + 0x0c78ec0dff85e7caL,0x754f5c1a18db20caL,0xf112798679e6b204L, + 0xb5148832e6a42b51L,0x9db5ac23ecbdf889L,0x0000000000000180L }, + { 0x4d07cefb65b7214cL,0x7ef0f7670a8db016L,0x2f7db6ecc9f8d9c4L, + 0x754299113c619d3bL,0xf8c6e1ea36619891L,0xc016f4c409d50a59L, + 0xe36545e182b52f78L,0xed17d9dbbacaf6d2L,0x0000000000000052L } }, + /* 35 << 497 */ + { { 0x5d1c2189b589ab22L,0xd05f29db8f8c11bbL,0x445edffb8eff8657L, + 0x3fdad8e54cbc6627L,0x1ee7e60dd2a3383cL,0x7e31b910991ad078L, + 0x65bd80d5b1a278fbL,0x352d10928e03512cL,0x000000000000000aL }, + { 0xc726086490b1b600L,0x4d4152ece639a6ddL,0xbd35547f41c4017bL, + 0xa1d24f1cf1c023eaL,0x8a536e83e7d8082bL,0x8689dd3fbf567013L, + 0x3c3538ae5cea1147L,0x9ab19fa10c5438c3L,0x0000000000000136L } }, + /* 36 << 497 */ + { { 0xe364461feceb36f5L,0xa2c2b4407c5572d3L,0x30e788e4db92e5a2L, + 0xf3efe8fa6d4ad73cL,0x621f4fd368d3b7b1L,0x0bd9693c3fb46615L, + 0x52da482fd34f2a58L,0x888f46b79880fd28L,0x00000000000000d0L }, + { 0x8a0ec5cf1a628d9aL,0x7205caf0c8e29f23L,0x274d619a16f112beL, + 0x79041e561bf69ac1L,0x5b17ee9cc5ef616cL,0xa66c9d83bf671d10L, + 0x7bf826d41abdf2ddL,0xfb3391f2135cc6d5L,0x00000000000000e5L } }, + /* 37 << 497 */ + { { 0xe7bddcc19fcd4762L,0xb029f4697c5390ddL,0x98ef31963fac7209L, + 0x4840f8f3513e4762L,0xb6e1f35152698b01L,0x699aba9e4bc640dcL, + 0x9ea16ea460648961L,0xe6a218fc9a46a0a2L,0x00000000000001c1L }, + { 0x27371dd1727b38f8L,0x338bed65e0b7a0ecL,0x2e333657c0ba0c0fL, + 0xc6e7f81340196865L,0x87158160bb8bbb34L,0x179fbf852dce1880L, + 0xe3b4f4ef9f2c7923L,0x73795470316d5fa5L,0x0000000000000020L } }, + /* 38 << 497 */ + { { 0x19712908cce1861fL,0x0359fd743717f4efL,0x9a092879bd3a53e2L, + 0x97aa8533f6854d5cL,0xc4163cb6d12cf988L,0x80025aec03691f95L, + 0xb62cf9e5276fcea3L,0xf1ed5103e50af5b5L,0x0000000000000114L }, + { 0xcc7c15357ba70e60L,0xdbc285f899e1824dL,0x2bb06046666906d3L, + 0xefdb9e5ecbd68ec3L,0xaccfb9c45061c150L,0xc0107ff714434399L, + 0x3ee3636f3e9f6b0aL,0x23b13a8fc2a10c8cL,0x0000000000000162L } }, + /* 39 << 497 */ + { { 0x995ac8a670b17352L,0x7cf059b2d9d4f826L,0xa82b0722bf98b787L, + 0x1532410d612d9a79L,0x75668bc2b44ae70fL,0x63cc756cbaf63a0fL, + 0x5499dfcf97a52695L,0x541e7a68c40906e7L,0x000000000000002bL }, + { 0xc02c4aac17f7b128L,0x9c456b93a171c241L,0x931d09919b48b980L, + 0x52cccfad3e0c0ed6L,0xf23004085968a6e5L,0x93a8b420bd3d48a6L, + 0xb3ca08bda2bd79d3L,0xe02f0782d018ad6fL,0x000000000000014fL } }, + /* 40 << 497 */ + { { 0xf0fa90d83b50a230L,0xdd15885ffd95910fL,0x4105a4e148038a64L, + 0x2f69d817ae07fcd1L,0x0536353fd0484f76L,0xb3beb1eeb8d64baaL, + 0x2ff37ff558adc510L,0x5677b99149f78e1fL,0x00000000000001b0L }, + { 0xe22bf1ce43b8275eL,0x5ec18f04b7bc2931L,0xcb53d3e0f01fa620L, + 0x49db793f9587e525L,0x7f1e4ca35c884f2dL,0x5ad4e4c459437624L, + 0xa1e3329f4994207aL,0xe4cb938f88594aefL,0x00000000000001ceL } }, + /* 41 << 497 */ + { { 0x68609efdc4fbbccbL,0x55975ca04dc6e8ccL,0x54f541b358d63323L, + 0x6564edb1bc8620afL,0xbe0e1fb6bc6a6d58L,0x78d76d4cc87ed075L, + 0xe4942b1c177dcb48L,0x382da425ff82d99bL,0x000000000000001eL }, + { 0xe06e7855568fded1L,0x560b408d0a5c74deL,0x7b77bbb3945fc598L, + 0xa8c08370cfe47a88L,0xb0616cd18fb375ecL,0x2a0b5ad9ad76d748L, + 0xd30413703f493011L,0xa69e11003e55d3cbL,0x00000000000000a4L } }, + /* 42 << 497 */ + { { 0x7fecf0a771bdf609L,0x3c77f17427388d48L,0xd8a52ea544c38321L, + 0x5132ddff7ff0262eL,0xdd5b822ebe8e4bcdL,0x2e4ab4a7c6632ea0L, + 0x58056a95d5597f97L,0x99b300e1b6f7252fL,0x000000000000005bL }, + { 0x2bb664009a2f2222L,0xe57e826211c55fc9L,0xd9b16efe6529c0aaL, + 0x2d86e6845811abd9L,0xcaf181305148e5ecL,0xc049a71a5bdfb7b4L, + 0xa11cd09454df8687L,0xd598e94d69b7f051L,0x00000000000000b7L } }, + /* 43 << 497 */ + { { 0x74d7cb89a20d45d0L,0x88bcd5d4b12515adL,0x1faeb941f345ea8dL, + 0x999006026ef7c319L,0xdeec886fb57ca7e1L,0x8db761df7067b2d9L, + 0xb2e6ae1ab260579cL,0x64e68265cb5a69eaL,0x00000000000001c7L }, + { 0x6f3bb54959212b50L,0x821d528591a39e68L,0x1e218e8723df1378L, + 0xc9ea11eed526496cL,0xca9a1e7dcd9e2d41L,0x3204aad9b1d36251L, + 0x99e0255efb37b606L,0xd1284e4d5a48ed5dL,0x0000000000000191L } }, + /* 44 << 497 */ + { { 0xf1c9da6a27ad2ec5L,0xfd11d5bedb7fb650L,0xa0ed1c2d92df8304L, + 0x5b74a2fff7e7e732L,0x6cc6fae21e467b7dL,0x8f630f200311f271L, + 0xcfeaa597c30da635L,0xeef29453958354c7L,0x000000000000014fL }, + { 0xb228c7be4e6c4c35L,0x4f8071a6ec861995L,0xec449886cad11591L, + 0x521916a7d12c188fL,0xfe72b9341387b0bcL,0x5c68a3203b91d2aeL, + 0xf5726b473e702af5L,0x07251e98f461f30bL,0x00000000000000a9L } }, + /* 45 << 497 */ + { { 0x535fb2d6ca7b48ceL,0x002b9584f7d6cc4fL,0x52f45ad33a95248cL, + 0x7501b7a02ea66d7eL,0x9feb38ab7345a8acL,0x4a90561945016598L, + 0x1e0b2082228eb380L,0xcfacf4979a84068fL,0x0000000000000148L }, + { 0xe41b7cb46b84c609L,0x8acb9f4520274dcdL,0xa965f88974d7d8a1L, + 0x9143c0be2ea1e86dL,0x8efcc7e8ec08a1b0L,0xa10dceafa60bea94L, + 0x38a2118b077b69f8L,0x9f3b1129ec56b981L,0x000000000000009bL } }, + /* 46 << 497 */ + { { 0xcc62a4fd61016fa8L,0x34ba576fbcf14171L,0x4f276a77b50a1ac1L, + 0x55b7c1f3dbf3ec85L,0xddf585ff68176817L,0x09f83a7f463b7850L, + 0x23001f87d980c554L,0xd119b8c6d12ea1f6L,0x000000000000015bL }, + { 0x3ca9eef95c1b985dL,0x11c46468ec7fa0d8L,0xc91bf9bb7b7e67e5L, + 0x2f745795894a4c24L,0xe927075a2d6f2a3cL,0x16334965df569634L, + 0x95e8b1156cf5f8d2L,0xb1498e3675502a1cL,0x0000000000000147L } }, + /* 47 << 497 */ + { { 0x8bbde2c84b59597aL,0x2cf819ee0ecf18f3L,0x98960e16463a1dd5L, + 0x69ac53598837c406L,0x375c963416a0917bL,0x5688d330d9b9a5b7L, + 0xc204410d1cbcef2cL,0xaa89d49bc57e3a50L,0x00000000000001c4L }, + { 0x314671e4520b4cddL,0xae039a6c4844a7f3L,0x8504d47e7244c720L, + 0x23761b887653a33bL,0xa019334b492e2ce9L,0x238325d889d81fd3L, + 0xab0d048b2a6d5e42L,0xa279aee301f953d3L,0x0000000000000143L } }, + /* 48 << 497 */ + { { 0x00a2ad7eaafd5467L,0xa8290d94f9741f30L,0x6173f2e7a3d3adc7L, + 0x0ad6ab5468875a74L,0x48e49f0324328133L,0x2d2d704a19e27e19L, + 0x3bd63d36de1ed374L,0xdebc4384c0ac7774L,0x0000000000000195L }, + { 0xeabafce86fd61140L,0xef8c38af22938df5L,0x407c6d243c98d287L, + 0x75bc698685564dd7L,0x240a470c4c9d6c14L,0x9cdc41ff8aeca79eL, + 0xcf08acaee1dd8f22L,0x2d99cbe50d69225aL,0x00000000000000dfL } }, + /* 49 << 497 */ + { { 0xa7c51b21072d7fdeL,0x527140744830f224L,0xf2bcee38e86c8630L, + 0x2ae48a3607a11ce1L,0x5d88a9a479f6305bL,0xe2cc312d9e125713L, + 0x90af98b3735bbaaeL,0xf5c2758b2a9a7a92L,0x000000000000008dL }, + { 0xd0b7dcbbca0b4267L,0x462becdeb843a792L,0x5f0837cabbfd7278L, + 0x4033b03b9db29327L,0x19050f0d9a038b59L,0x920cf7fbbcad1171L, + 0x2b588f590ae5890fL,0xa0d6a15e8c69264dL,0x000000000000006eL } }, + /* 50 << 497 */ + { { 0x256d92b449692cd8L,0x16f7806ac8753022L,0xdfc975c9ad8910a9L, + 0x291e020a38d4c11eL,0xa2bf3aafcd6bc9dfL,0x93188d8b6859ced2L, + 0xf9a0f5634267273aL,0x1f02826bd0aca382L,0x0000000000000037L }, + { 0x7d4e7d11495e0018L,0x69a51736af7093d8L,0x9cb0839c29b16fecL, + 0x2bd6421bed5e6623L,0x29fb37de288b50d3L,0x046755a28942d297L, + 0x7b68173a922a5964L,0x13ae2a14617887a4L,0x00000000000000ffL } }, + /* 51 << 497 */ + { { 0x37f2f1bd1008aa59L,0xebbe4ee8d7877405L,0x6534b8205e1dfa69L, + 0xffd83302ab461e03L,0x220833bf46370ac6L,0x0c809ad6ed94ac0fL, + 0xe9b985e776abecf9L,0xcb9f63a464052dd1L,0x000000000000008aL }, + { 0x16a0502cc177e4f5L,0xb2021209c8a5268eL,0x89d764aeccb9dc38L, + 0xc0971fe940f39ed6L,0x1692bb66210ff610L,0x7c43540c6b860ad0L, + 0xc27a870f92c90811L,0x0af5c7b08874ba9aL,0x000000000000000bL } }, + /* 52 << 497 */ + { { 0x0c160909c0fddb5bL,0x20b497a4723ded94L,0x5798938ff2559f5fL, + 0x1f08e0b26fd8f7c6L,0x91734f0ea9ea8a83L,0x190d5cd5fd9ec96fL, + 0x0039942a156842a1L,0xf2dd3eeb57f5b843L,0x00000000000001ddL }, + { 0x8fbf191fc432fde3L,0xccc105d79b2e7247L,0x13e19408d412c84dL, + 0xd109e1b8955eb313L,0x32248ed7d485a078L,0x6f869288f7d20534L, + 0x16335bb41aadb56dL,0x156ee1e9c93ea83bL,0x0000000000000079L } }, + /* 53 << 497 */ + { { 0x6b27600514d5cf52L,0x26d3dd2557bf242dL,0xfcb2ad25e77b65aeL, + 0xa2696c484496d9d8L,0x40fb884bbed1711aL,0x5ab56199af6676e3L, + 0x3489a42d1d50bc5dL,0x267f4b1a5dd94b04L,0x000000000000000dL }, + { 0xd0cb67206cd4c268L,0x7a304e3241bd7e32L,0xe9e0393e3385b3b0L, + 0xc5af909585f2bf82L,0x5e6518cd54b9fa71L,0xb0a424955fea5182L, + 0x51454d1a5db5447cL,0x78d932d27470d4e9L,0x000000000000013bL } }, + /* 54 << 497 */ + { { 0x4fa2b894e601946aL,0x7fe34aecc58937c0L,0x0ece9764fb14315bL, + 0xe99a9966ad9c094cL,0x1b6c9df4c2636283L,0xab8675e311af1698L, + 0x5089ae244644775fL,0xf61c9b6ad97a729eL,0x0000000000000146L }, + { 0x9708305696c187d2L,0xce73fe18ef43b92fL,0x9430722d10aa21e7L, + 0xe6678192d9d6566fL,0x20774c5d8c1b282dL,0x18d835b6a31caaa9L, + 0x5f0210647dc2591cL,0xe78685554b424eeaL,0x00000000000000c8L } }, + /* 55 << 497 */ + { { 0x5af375ed01d68e46L,0xaa1a3ae3693092efL,0x0fb33f563388045dL, + 0xe142b8de023fbb2fL,0x20abf7183d17e8a8L,0xbe65e5a0c20f91fbL, + 0x387c242d053bc18cL,0x7aeef64a0e658156L,0x000000000000014fL }, + { 0x8f4db305913dcd07L,0xdb17ccfdc527b72cL,0x5cb063552d09ad07L, + 0x9af5f2abf21ac13fL,0x76960c0cb12dfa2dL,0x0091281b7edbfb4cL, + 0xe6bb97f288dbd682L,0xfb0ec5e6d6444d47L,0x000000000000012cL } }, + /* 56 << 497 */ + { { 0xcd2cdd2e734c5662L,0x086508dd52fcd5a2L,0xd1789e1f65545779L, + 0xa3e5a37a3ebf0e2eL,0xa648b51121f33614L,0xc1b816d4000154beL, + 0x7d9c4292a8f82a71L,0xf0f81cb86f43867bL,0x000000000000001bL }, + { 0x1fe1f9962b51c7d5L,0x5c77e78006a54f1cL,0x79dc60e2dcfa901bL, + 0x2c548507bbd1244eL,0x59cce9c315ca7601L,0x2ec825718f106b96L, + 0xc1ebcf8df5dc4e24L,0xb785ee5f1448c529L,0x00000000000000d5L } }, + /* 57 << 497 */ + { { 0x3bf38b30bdd410d9L,0x1418a30dfbac23c4L,0x9bd796936f78ea59L, + 0xc439273c741242f5L,0xb82a4e2943963c86L,0x7da09c8b0c12438dL, + 0xc4f95cbc28340db6L,0xb5d5f1b32ee06cc3L,0x00000000000000c3L }, + { 0x68715c7d4916e9f8L,0x762df94538b674f1L,0xd2cd99ba52df5f98L, + 0xbdc945e1a4ed2f3dL,0x2470084a3226802cL,0xf2198726336db006L, + 0x281c597cb7580370L,0x152de2640a85f991L,0x0000000000000076L } }, + /* 58 << 497 */ + { { 0x6a2a065186b12001L,0x85e1b44db31c50f1L,0xb529629443af7c4bL, + 0xb17bcd8ff5f5d300L,0x886b618135999ea7L,0x1e7b5f91221fab4dL, + 0x5e542fe3e0933741L,0x8480aafe1c2cf270L,0x000000000000014cL }, + { 0xd0d34bb30eb3e31aL,0xdba2b2f352cefa30L,0xdccdf0c8be98f160L, + 0x06a9e3f7c6700364L,0x751a1bf37b35ae43L,0xef2b6388718b6c06L, + 0xcae98c2a20384ab3L,0x6d888903192eea76L,0x0000000000000017L } }, + /* 59 << 497 */ + { { 0xdaed4f22f9c98384L,0x54a1d279de9d7aa7L,0xdad853c369e40ef9L, + 0xed2d7bcc72c325b2L,0xd4917c6ccac877c1L,0x8b1ab23c20238c1eL, + 0x82238d3d304f3c1bL,0x0c40a679c838bc69L,0x00000000000000d9L }, + { 0xd8c865944347dc9fL,0x7b5ec837488b47f6L,0x4e13c2451871b524L, + 0xfcc994a13ab456ebL,0x25d937b72a177a59L,0x149f53800e48d935L, + 0x27a91603d014aa99L,0x0da2ba0f7132ac64L,0x00000000000001b8L } }, + /* 60 << 497 */ + { { 0x0e040aa82121e195L,0xc5c6b8ebe60295fbL,0x6373b8e834e041adL, + 0xee6f882377096ac9L,0x9725bc9eb85d2c70L,0x72adf47e6c4bc9e7L, + 0xd4927a8002819598L,0x6888aba6a37d446bL,0x00000000000001dbL }, + { 0x06bf993746fc5462L,0x301d1031e41eaeb5L,0xf7db3c5128914430L, + 0x0fcdd9a2b00293b3L,0xb2cbbcf0a1adf14fL,0xc5da67fe89c9b883L, + 0x2f06cac6303fb018L,0x3b56905f892a426eL,0x00000000000001e6L } }, + /* 61 << 497 */ + { { 0x7351dfd061af0e72L,0x8c0280a7c4ce075fL,0xdc7bcf53d2b4d750L, + 0x302914d4a13c3af3L,0xad93d8e6eb3856e3L,0xac0833fd710879ddL, + 0xa18ecda6b53eade5L,0x8857fc9d4a68e884L,0x00000000000001b7L }, + { 0xcf85f1cdaa81a0ddL,0x115857a3492a9569L,0x44815c1fd4e35cd7L, + 0x00ea5f0f3910e239L,0x526bf994ab96b0ceL,0x73e9c0578a5314e2L, + 0xa77a5d00a17ebb49L,0xfaa340bba606f00cL,0x0000000000000032L } }, + /* 62 << 497 */ + { { 0xecd039c8b79b0b50L,0x3e979b0d7669260dL,0x9f4a0105a2946901L, + 0x9108708a15e67e45L,0x947bb04c64fd7c28L,0xa5575a2d0b583d41L, + 0x8e63253dd6b1f55fL,0xa61f3c5f117fc622L,0x000000000000013bL }, + { 0x499d5c08dca3362cL,0x1e6712a4655bc057L,0xcee49de8531b83bcL, + 0x26be0a09a3cce725L,0xe3a9b79f0de01b60L,0xcd70dd0de7e3f04fL, + 0xd7d6cb72b1a3e8aaL,0x9eef0dc3addcaaf9L,0x00000000000001e9L } }, + /* 63 << 497 */ + { { 0xc272ae2e41f1a555L,0x60d289f28ec78c74L,0xec1ec46cc2733581L, + 0x9b5e503e1072df26L,0x20ef9df8dd04b6c1L,0x3f912a7e098645abL, + 0xaa73fe18f541328cL,0x15319a5c09b4185cL,0x0000000000000031L }, + { 0x0ed3c388229ef248L,0x73fd903ccc1aac19L,0x4fd6dab3c863e1efL, + 0x9dd6b78894ceda3bL,0x8056b1eaf09fc3c1L,0x4c69a9ca6985a660L, + 0x7054de2ef0c22c86L,0x6ab6c43a9688cf95L,0x00000000000001a7L } }, + /* 64 << 497 */ + { { 0xbf2b204cdb0e34f0L,0x177ee402b55297cfL,0x1008722a0905589dL, + 0x92be4754965ef825L,0x4ce902bdeeef76a6L,0x55b910714f318009L, + 0x96beae611484b98fL,0x5d84b61535b88659L,0x000000000000015aL }, + { 0x43c74ce31d78d96aL,0x131fbc58a17753baL,0x4e430af6dc789aacL, + 0x31f38805e9af4951L,0x834de4aaa61cbb24L,0x8437617ef678c74aL, + 0x411e5009e030372eL,0xbe15903ca2047bcfL,0x000000000000001aL } }, + /* 0 << 504 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 504 */ + { { 0xfc5ba8d59da7bb54L,0x29f3e817a7c7fbcbL,0x31a34412a40ad3fcL, + 0xe17b93692d1711b0L,0x41c7bc1716ee6505L,0x5feecffd3d1db8caL, + 0xcbb9337802f9e04eL,0xaeb67b8b091db41eL,0x0000000000000187L }, + { 0x611fe09d4c695615L,0x087334f6323b6dccL,0xf7bd51bbd53bcfdcL, + 0x98ab5c925a8bcdb4L,0xe7e9d273cccd6f6aL,0x2582b21924b4a3eaL, + 0xf8fea0e3f22c4405L,0x42c0f700d3e5a701L,0x000000000000006dL } }, + /* 2 << 504 */ + { { 0xe9f67439cf6368a6L,0xf570101a4c1c8628L,0xf3cce189c652a95dL, + 0x85a1af6b75cedbf5L,0x8b7f513d68d0ffbcL,0xc71778fb136394beL, + 0xd0e7f981d99c9f74L,0x6a4ae0f46471443bL,0x000000000000018eL }, + { 0x702bf6631f3f2c6dL,0xc1233cdecedad859L,0xa8ea4a8398c674eaL, + 0x338b3334f3c423c5L,0xbea2bbf7dc103dc3L,0xd49c4a620fbd8a4cL, + 0xa73566de57b93da1L,0x0ad24597f56838b4L,0x0000000000000127L } }, + /* 3 << 504 */ + { { 0x38e5b06f76f468b8L,0xeab47690c79fa86dL,0x990e59de6ef8cd4aL, + 0x826c78e86884d059L,0x951732fe341b20f7L,0xf249fae4449ddae6L, + 0x32c4b1e5a30a08afL,0x79fb2cab15be777bL,0x0000000000000027L }, + { 0x1c65411bc3f609d4L,0x4a8ec1376194fa7eL,0x025dd9caa41c5f89L, + 0x878f8d01e9d5aa4bL,0x34c9e12e0f364d3bL,0x5f4403d0344cf712L, + 0xe66bb7825bea60c3L,0x4dc50e44d40e673aL,0x0000000000000125L } }, + /* 4 << 504 */ + { { 0x104e0c37f8cb0cdfL,0xba344bc4b934f03fL,0xf963fc250a5aa9d6L, + 0x8bbf6e9e662e69cbL,0x5b9945c916fca9b1L,0x6805abd3682c739fL, + 0x3d1ee371c3f18432L,0x4708b55c05e69189L,0x00000000000001e8L }, + { 0x9682d9d3143ec89cL,0x519a8b7ab2b5914fL,0x7fb197f1fa9d63c6L, + 0x6c4d61eed19fd1d8L,0x3a2a488ed637793dL,0x1dbb1b1a1ffc620dL, + 0x66eab0524b551244L,0x290e50fd434c6cc1L,0x00000000000000cfL } }, + /* 5 << 504 */ + { { 0x4515da106221c6f8L,0xbf5fd8e5bd34caf6L,0x9174ad19c7a869a1L, + 0x3e327c1357152ef1L,0x58656b31222cbbe4L,0x111892141c4191efL, + 0xa5e5ad3eb7143ec8L,0xe0646afa4536e2fdL,0x00000000000000e9L }, + { 0xb13d24486d16bae5L,0x42431b34af57b380L,0xcafe41015e097fd6L, + 0x5f794cd6e5b0937fL,0x4de1813d55270a50L,0xca0a5c09b660bc2bL, + 0x08f20f6b76579886L,0x21268e682ada1c30L,0x00000000000000b1L } }, + /* 6 << 504 */ + { { 0x9793778b98c439d0L,0x79788f592af8ce86L,0xc1520a86146d0c4fL, + 0x541063da596b5b59L,0x47823a1e9eba6471L,0x7f66a3891d23923fL, + 0x4b23c930452ecbdbL,0x40c7d3dde30657beL,0x00000000000000f0L }, + { 0xb63d974a77d535c7L,0x2e16ac32ca31932fL,0x68da3cd12b8664b1L, + 0xc895a61fcee5478dL,0x34bbac9042c0ac31L,0x5cdbd70da3267f5eL, + 0x8b4d75beb4a7ae31L,0x1555caf3f43c9c86L,0x0000000000000039L } }, + /* 7 << 504 */ + { { 0x50714fb3c0d037f7L,0x071bfa5b8c050618L,0x7f1798269d15e2f1L, + 0x2081039729e730faL,0x7948c181b45794d1L,0xc8fbdee3171227acL, + 0x1faa467045da4e9cL,0x565405c00d3fe74bL,0x000000000000007cL }, + { 0x353fc94a3f42236aL,0x684a11c27bc40d6cL,0x182ca3af52826e75L, + 0x7b24ecccd4058f7fL,0x6cadb9ba82386f7bL,0xeebcdeb53567d0eaL, + 0x904fd55c1e8c8ce2L,0x43fd20db6b578b37L,0x00000000000000a5L } }, + /* 8 << 504 */ + { { 0x842e5742310be81bL,0x9cd6a5ac815531f2L,0x1c8179b837f9f02fL, + 0x009cfa294fac713bL,0x091f5101d2aa2c79L,0x70360695ef9256c6L, + 0xd8d52d11fad0a22aL,0xf395fef240ff5ee7L,0x00000000000001abL }, + { 0xfbd0e2d746cccde9L,0x8e8a81f7f8d82fe8L,0x63bb8ce1c0766bf2L, + 0x917ba75ac70e96e5L,0x2baf9fa493d4d0feL,0xa203f382c28324d4L, + 0x36385f59b1b2c7d3L,0x746bce122c80ee37L,0x00000000000001e1L } }, + /* 9 << 504 */ + { { 0xec99f07e8b313476L,0x9856c5b352530de1L,0x08ca437cf2693956L, + 0x990f6a868f4122aaL,0xa36c9d84d76bb2abL,0x6004fbbf1e657ef9L, + 0x6c64f52f4108d873L,0xe3e589a670224f93L,0x00000000000001a4L }, + { 0xae219487d6132691L,0xb26711505bb911e5L,0xcdd0bc5f15177196L, + 0xb093aae02605fd89L,0x26d1c416c319a26eL,0xe8916e3d6ef59cb6L, + 0xb05a13d56e79807dL,0x8a178214c039cd95L,0x00000000000000b6L } }, + /* 10 << 504 */ + { { 0x7282c91615fead87L,0xbc1aaf596565472fL,0x5a7bbfc458b387f5L, + 0x665eb3b16defa8e5L,0x1944cfc86da1ca94L,0x41aaff4d2a2de565L, + 0x4497cdbab7311f8cL,0x129ecc29f2264d67L,0x00000000000000e5L }, + { 0xa0e20df230229efbL,0x7c4e6966bf174745L,0xe96f0ab07c0177fcL, + 0x221d8cb684fd2c4fL,0xccf21ec330689b70L,0xd7daa585f24cf84cL, + 0xccc2582b438b0f34L,0x6940385ed02cce0bL,0x000000000000011fL } }, + /* 11 << 504 */ + { { 0x6e985fdcf7286539L,0xbf44ae0526699adfL,0xbfe83f9d751da127L, + 0x7df2f5c0d22871d2L,0x61a184da09f210a7L,0x69b4001a9e56e9bcL, + 0x3f48dc50f5801782L,0x114fb0a2e0bc2f08L,0x000000000000002bL }, + { 0x44eb8f92011d6de8L,0x3e0fe24320df8124L,0x494b83e071b8153aL, + 0x6b50c1e31cbc91e6L,0x260d1694b8f14373L,0xf12e1b9298782cd5L, + 0xb8bc2afe7211a9deL,0x3604c2396ebbad8bL,0x0000000000000123L } }, + /* 12 << 504 */ + { { 0x5f9a4772e44f4ddbL,0xff089398a0479feeL,0xc7984a9a309c362dL, + 0xc8858f6c2ecf134dL,0xfc9c56401a56d029L,0xd77e773207b47628L, + 0x36bb84d6d5d61144L,0x8d6e8e0df628bb09L,0x0000000000000171L }, + { 0xfd69c84a1b585b76L,0x2fe3fc195bfe6558L,0x9365c43c978729f7L, + 0x3bea9b49b368b24aL,0xb6fef7db69a7b390L,0xc85c1d51848bd710L, + 0x7b3a49fa90bbc984L,0xd65e2e2d651df377L,0x00000000000001bbL } }, + /* 13 << 504 */ + { { 0xe8eaa2b093ee1688L,0xab51392f1bd7b754L,0x70faaf847f0113f0L, + 0x0b68566cccdfa903L,0xbd54a08c9114d07dL,0x4370af4a2eb3d473L, + 0x6f5596b430650b8eL,0x1d9d998ebdaeebbbL,0x000000000000009bL }, + { 0x1e07aab3eb570ca0L,0x6cc5717883d78d65L,0x5c13b03927e07ba1L, + 0x4a8db7258593acf1L,0xf2fbba8b4560970bL,0xa7ee45fb79fe5510L, + 0x8defa4ec7df14edaL,0x664e0d14f6249cacL,0x000000000000005aL } }, + /* 14 << 504 */ + { { 0xfe6bc737ba913d04L,0x5356a7c0e8d667f3L,0x71d72789c6a981fdL, + 0x7b33cccc1d45cea8L,0xec69e42ad2f757c8L,0x3e228959a5590d96L, + 0xc04189aa63be779eL,0x2649054dc984b85aL,0x0000000000000188L }, + { 0x332af6f228fb3abdL,0xe8de53e45031c4cfL,0x881f78f7363bb9f8L, + 0xffd28eaa31d76d0fL,0xf618633502723c25L,0xcc4d0074e497dd32L, + 0x7c24a1c114be4804L,0xfefeea3e5150ca3dL,0x00000000000000b7L } }, + /* 15 << 504 */ + { { 0x93d8a6cc1cba39aeL,0xc09391f078b306ebL,0xc50192796bbe7209L, + 0xae87581b69c4000fL,0xaaca7f02e29cde0cL,0xf495236787d2f7b9L, + 0x831f4d68d01d00d5L,0x17c135acbae32d64L,0x000000000000002aL }, + { 0xe83e6bbefa7a973eL,0xaa3d68978ed040daL,0xb2915bb0b7726a22L, + 0x84c84815c39a8a47L,0x42fb407eb29908b3L,0x9a5be91841b90531L, + 0xd534b5ce6025ec50L,0x56f74c30bd08106dL,0x00000000000000acL } }, + /* 16 << 504 */ + { { 0x312dc896a3a2be76L,0x9e77c0c361da109bL,0xb1c5379dbf0993beL, + 0x9272b9e8b7bcc4ddL,0x5437f62509337d54L,0x5c897a61cf735302L, + 0x368e21ca002db288L,0x33197cc1184aee4aL,0x00000000000001afL }, + { 0x76ec7312e5c35f69L,0x6832a65ac07874d4L,0x398dab5ac121e8c9L, + 0xf88aa2c897b47c38L,0xfc77e1c007678cf3L,0x7327a90f345a9b89L, + 0x267417a049240b3eL,0x3aa3ea97e3912921L,0x0000000000000116L } }, + /* 17 << 504 */ + { { 0x16517bb959607b44L,0x58608704772ed61bL,0x32fef9718443087dL, + 0x9f12aefdc264831eL,0x2c40d8ec3f264eaeL,0x958ef2e1822e53fdL, + 0xb4538e5fa48195deL,0x5af48f9e914ed410L,0x000000000000019dL }, + { 0x6495c1c2ac7152deL,0xd4f608df5b546ddeL,0x91ae22e465f6c27dL, + 0xf35827bfd2b14d0fL,0xdd6bb7ba3ef928d6L,0xcdb8ffc3fe3f1e15L, + 0x7d86649006bf8de0L,0x23bccaed54c4f18bL,0x00000000000000faL } }, + /* 18 << 504 */ + { { 0x34a26ac92efd1575L,0xd3cdf9b63ddc81daL,0x54f503deb2b82effL, + 0x76470a9ae9dd4f78L,0x4cdda269f255e55fL,0x4a41053e48c647ffL, + 0x2034a1a932ab5322L,0xad82c6c2573c0345L,0x0000000000000083L }, + { 0x9bb0cb5cc498d833L,0x7593917eee4055e6L,0xe3d6061ad2555ed9L, + 0x16a5ed5c5fedc448L,0x6ce815f86f54029dL,0x844b1cb950a4ef2dL, + 0x80f5a634cd8ee69fL,0x94429987738e45afL,0x000000000000009aL } }, + /* 19 << 504 */ + { { 0x6b26293c55373725L,0x44d86d44d9b7d4d2L,0x63bb25dd8c19dcdfL, + 0xed80dfdf15e7ee04L,0x0d41bb216778dd90L,0x2247af2ff3835136L, + 0x4661b4c1c2b2770dL,0x97c716acebf37b12L,0x00000000000000ffL }, + { 0xbce5ea8d1257f979L,0xef5854b4cc612fabL,0x484669d15cf5f330L, + 0x036ff03ac916d5c7L,0x199255965a0316e8L,0x527fa32eef2b4e43L, + 0x35b5691a24cc4432L,0xbc3f392ba525aa00L,0x00000000000000e9L } }, + /* 20 << 504 */ + { { 0x6be6e36a4e67687dL,0x769be7472011695bL,0x7874176fbdc16d7dL, + 0xe1d4d7112ecfd4d9L,0xcf6d7df697d9c412L,0x55f8bd12161eeafeL, + 0x2771d1072a331bceL,0x8f5e36f2560bc2d8L,0x00000000000001edL }, + { 0xda8a3b0ae614739aL,0x5a6beb773bbf6fa2L,0x9998ec8de200b58bL, + 0x35b43df935407009L,0xe1417a07ede69a33L,0x655b564015528691L, + 0xe358cd067124064aL,0xf9274d0801093150L,0x0000000000000165L } }, + /* 21 << 504 */ + { { 0x0833c14b828446b5L,0xb5eb590f3df9e196L,0x1ecd9d2eaa99ebc2L, + 0xf09914d0f3d5ee35L,0xed839373970b266fL,0x47cdf5303cbbe599L, + 0x53aae86c3c9d6be5L,0x48397114d01a4f2fL,0x00000000000000bbL }, + { 0x49a7d8895bd1e0edL,0x7529cb4958c666c4L,0x93dd25d1d5ede656L, + 0x6e221d6536276585L,0xcd09546ffa0d3db0L,0x4d4eaa62fe48abeaL, + 0xf0ec27945bc60c2aL,0x2955763602052899L,0x00000000000000adL } }, + /* 22 << 504 */ + { { 0xea3bdb8c7886dba1L,0x59aa2a6c8d15be37L,0x17527731f8bc12e3L, + 0xd61af3db387439e9L,0xc21f6203738ae818L,0x13d3ed72d0333648L, + 0xcec655779c9e07c4L,0x58d6b0692a992bddL,0x000000000000006dL }, + { 0x81abbd9c660424f8L,0x38d2ee2d29992dd6L,0x483a59bbe9f85330L, + 0x5efa6313f18f1b3dL,0x93a79facf698990dL,0xf9ed5f5b3c66a3e5L, + 0x99a61ee05582b82dL,0x5630acce3ba9602dL,0x00000000000000c8L } }, + /* 23 << 504 */ + { { 0x6711c3b83bc0738bL,0x1a48588a747bb955L,0x64cd7a5d64fa3ea8L, + 0xd63b165af7ac67e6L,0x019bad5ab2a7d054L,0x13d53c68a775a2bbL, + 0xac13f9da74aab4e2L,0xbe3d79dd84f166f0L,0x00000000000001a1L }, + { 0x5d591f141d79ec2aL,0x310a04bcf86e3b5eL,0x3219ccae81f9ba7fL, + 0x3058a5646f69a5d8L,0xba6d658a17b969ebL,0x04d9596f48efab07L, + 0x47608a070cf8472aL,0x443c8a3f122ffa4aL,0x00000000000001b6L } }, + /* 24 << 504 */ + { { 0x372fa55f96c43863L,0x7395007ecbdc87d3L,0x77261a297e9a1418L, + 0x9009f7081fe5569dL,0x43644eecf3186f12L,0xab5348754d413313L, + 0x7d795ade94cdd269L,0xb9c7bcce390cb5baL,0x0000000000000061L }, + { 0x70e879398b2a76c7L,0xcf9a990b4d0a94d8L,0xd8bf89e734eb7c11L, + 0xe2b79ff8f33be326L,0x62f7f2e73ccd3327L,0x6e64a25cb871bc98L, + 0x0fd70684f42448dbL,0x162769cc7f3e6d46L,0x0000000000000098L } }, + /* 25 << 504 */ + { { 0x929dec7dda33404cL,0x68dd0e8c9b6afedeL,0x610dbd629e32f8aaL, + 0xad0a251f30192ce9L,0x6b9aeb4328e01611L,0x9c984eecb9d4a292L, + 0xb53bec5485ebd1a3L,0x765ae5798458714eL,0x0000000000000116L }, + { 0x622071df31ed5307L,0x38945da1f6fa8116L,0xe1e1d1f3ca229eaaL, + 0xf8959af5acf489e5L,0x0816a41eb2abe438L,0xb00f3a2e251aa88eL, + 0x6aae71efe018e553L,0x2cdc74d817357be4L,0x00000000000000aaL } }, + /* 26 << 504 */ + { { 0x9767c342cd3915b8L,0x52206babc8b72893L,0xbe65a1ce6c589a4dL, + 0x69e29cc876ef527bL,0x8d656d54b58e124fL,0x0e645dfdaa0b6624L, + 0x89808ac8e7f38d9dL,0xed9ea95e458a43ffL,0x000000000000001dL }, + { 0xbff236387d49f886L,0x20d403d22a2ed0f3L,0x7755c59da90909d0L, + 0xecd97f616c5555a5L,0x815f269fa6ec5bd8L,0xe2cd2c32d3ec306eL, + 0xee1816903d1a8ddfL,0x94e7e62fecf72211L,0x0000000000000003L } }, + /* 27 << 504 */ + { { 0x5d3d66768d1f4d27L,0x12f0837d7b1cd52eL,0xebc608c48838c280L, + 0x66689d737564013dL,0xe48e496f630cae1bL,0x8693853fef551190L, + 0x9db620a86f7855d3L,0x51bc8e485658469dL,0x00000000000000f3L }, + { 0x55fb173c5ae32677L,0x287c372d09d83dc1L,0x5faea3396768dc7aL, + 0x3f0b476886a95fa8L,0xfd22197742d617b6L,0xe12ab48537728980L, + 0x5f458000fecafdc6L,0x86f0eaa95ba83a8eL,0x0000000000000065L } }, + /* 28 << 504 */ + { { 0x71f1dfa99806b3cdL,0xed41b6d4f784cfbdL,0x1e803f9dd8f8bb61L, + 0x4106986517cb357dL,0x2f389dd4d4040c02L,0x27693585e59684b7L, + 0xfd400b8e04e1c8cdL,0xff53e1e1a9c47d96L,0x0000000000000193L }, + { 0x358de8b359771fb2L,0x87ec3a6fa7e440eeL,0xde6391d307c30cbeL, + 0xcf9116cc866e2bc3L,0x293676994011f5caL,0x1303ce2c9f52929fL, + 0x85e2c0ab4dedfcfeL,0x612d08a650f8610bL,0x00000000000000f3L } }, + /* 29 << 504 */ + { { 0xa143f0e16750e3aeL,0xda58596363f7e043L,0xc393fb468a82e2dbL, + 0x152e78159c18b33aL,0x07690241c322bfd1L,0xb7d34430f9a7c039L, + 0x30a02eacd91a3b33L,0x0333b1c5fe1f5e15L,0x00000000000001f7L }, + { 0x892ecd1b3a10dff1L,0xecac73e2d7792032L,0xda8792e443089985L, + 0xe921f6f77e8b9e6cL,0xad5ca8d078844200L,0xd08b2c0ec4902474L, + 0xfa5928ee76ecb1c4L,0xa6f32e2284703e74L,0x00000000000001faL } }, + /* 30 << 504 */ + { { 0x6b83ceef1e86021fL,0xc4500e85fe4f727aL,0x45a925374e43e89cL, + 0xf73d7392cc6c3522L,0x3f7d2f0484cbe016L,0x02584521b02203aeL, + 0x901c127550206becL,0x0c55c247de3580efL,0x0000000000000198L }, + { 0x7fd8681dba78f98aL,0x5084bad583d1e7a9L,0xd6024a7bf5fed50cL, + 0x9605ea57ae4b56ddL,0xc2bb29d60451b3e5L,0x339a6c82b1280ec7L, + 0x5711b1280c3cf155L,0x20308f7a86c613f1L,0x0000000000000188L } }, + /* 31 << 504 */ + { { 0xbd43c00d79623d05L,0x8d6020e28c78137aL,0x38c6c2126fb0dd3bL, + 0x9cdf9293666d2d43L,0x0f0d17dd3331b5daL,0x9e3fc10de29bf866L, + 0xdc2b1a910c04d32aL,0x94f8f744755315e1L,0x000000000000009aL }, + { 0x4dce110b54f9068cL,0x2584dfc82e106069L,0xebc73668a694f59eL, + 0x4610b4b2512ecff4L,0x6afdf6064cd507f0L,0xf7b208d6fda16ccbL, + 0xfa72234c123b45ceL,0x474e7152c4e7c78cL,0x000000000000016eL } }, + /* 32 << 504 */ + { { 0x11771bfc92e6cd70L,0xf5bd487f65c65a34L,0x1b3cb720081b98d1L, + 0x32485ec4026b378fL,0xe3c71a99529c1313L,0xc85bde7d3526ef07L, + 0xa85c138b84e3dbebL,0xe6e886afa239b5a7L,0x00000000000000f4L }, + { 0xf65240a56e2040feL,0x1596f10816af1685L,0x20b38d3eb90b3a9cL, + 0x81a30feb9c3fdf7aL,0x04031bb33731c3bfL,0xadfe3fb3ad2b6d6eL, + 0x243b36230fee06e0L,0x2bc6df9225d016e6L,0x0000000000000053L } }, + /* 33 << 504 */ + { { 0x6aacd1b33a694c8eL,0xda4292439920facdL,0x8eeaf582b2d8d80bL, + 0xa03afa0b32a1c52eL,0xab7822fea3b16c64L,0x415e3dfd85cd40a4L, + 0xba75e4d576958759L,0xec9a95e88e58d2eaL,0x00000000000000a7L }, + { 0xa096ee7bbebf8902L,0x799d6979afd1460dL,0xd1b8d605ebc51254L, + 0xb077aa8e1c2cb180L,0xc31759f4f8fd1e58L,0x704f588bdd1bcd8aL, + 0xc22ab0d67ddf1e1dL,0x8f5d78d3968074beL,0x00000000000001b2L } }, + /* 34 << 504 */ + { { 0xc5daabcd932ff166L,0x7f5eea67a8d77d4aL,0x3d6d04eeb62e5ee7L, + 0x4736983815707ed7L,0x05738964e8183517L,0x3537ccc50255f489L, + 0x156ef32d9b13c129L,0xc2a62d7eb3002c45L,0x00000000000001a2L }, + { 0x37e128bda21b49cbL,0xd2bb496ef8dea102L,0xba43bd9cd8814cbfL, + 0xcc475dfa3707a908L,0xa86675a1ef5c5c74L,0x321544dd25ba90fbL, + 0x65ebc4bbe77d6fd6L,0xdd5e75955fcf3feaL,0x0000000000000070L } }, + /* 35 << 504 */ + { { 0x6c474c2718234b72L,0x3631873d1ba8afc2L,0x4fff947bfc6cd2d0L, + 0x91a95c6aa6187a12L,0xc18812d51a5161ceL,0xf8182c4728ba5591L, + 0xfa08a35ae05abb0bL,0x49a5f3a0f3fc003aL,0x000000000000003cL }, + { 0x1f2fc52f0253b00bL,0xdce041369e3ebd22L,0x6b326cbb0b545102L, + 0xe06e27cfc2b15e4bL,0x96294d881cb480fdL,0x62add48fc24a99c3L, + 0x8b5f2549ebbc3afeL,0xf7108acacd6c9533L,0x000000000000011cL } }, + /* 36 << 504 */ + { { 0xe928f1ace4606931L,0x2fb928a4fc874afeL,0x87ed3480ac730690L, + 0x18c8bac63f3b5593L,0x870d817bba8f50c7L,0x37b17f1e1dc4f816L, + 0x14ac19394828b6d8L,0xb66131579f7594fbL,0x0000000000000055L }, + { 0x59887289d3e0d261L,0x2e38efc348aee80aL,0x8362fe6d1c19065cL, + 0x714d078031d57631L,0xd38e42dd96f0ad85L,0x2ba3990c16bb603eL, + 0xc79e2fcf756396a7L,0x3897420f536c1994L,0x00000000000001ebL } }, + /* 37 << 504 */ + { { 0xa7d5782fa86ba30cL,0xfe671190e309d5a5L,0x2b8d391effa437d8L, + 0xccf1231d4ed20668L,0xcb2e725ba841c8ccL,0x8551c6a6d12c0043L, + 0x40b6efb484751314L,0x4cd15f5ec044b80eL,0x000000000000004bL }, + { 0x3d0f897b73ea15ccL,0xa5d6c44dacae986dL,0x82b035b4e098f68bL, + 0x2e004b3153e48458L,0x4033ced959c997e8L,0x45fb094539c64c7dL, + 0x244ba9c3d1ac61b4L,0xe05fdc81367bdaafL,0x0000000000000083L } }, + /* 38 << 504 */ + { { 0xc7a406be012b5684L,0x1652b7ff5d60f261L,0xc2d4d16178843bc5L, + 0x3f50380e1a1955bfL,0xb2d8f5d2368f254bL,0x14b0b3f931c0c910L, + 0x7587bd7a099232e9L,0xf29fea7c7a379369L,0x0000000000000014L }, + { 0x92c5844ae54bc692L,0x0798175cf2c3171fL,0x177ef51601a94811L, + 0xe5632d4aaad297a0L,0x7069e9fe58584a58L,0x6eb4de9187428d96L, + 0x8354ca18d1679a12L,0xd296dc08a7660684L,0x0000000000000025L } }, + /* 39 << 504 */ + { { 0x1a42f1615e99315eL,0x84762c55407122fbL,0xc46bc953c8d0f1a5L, + 0x2523a0261be38aa8L,0x274145a1b36d1ab8L,0xc0d9a5f585c407a8L, + 0xa8b0e1afd8dfec41L,0x8c5bb30851100703L,0x00000000000000dbL }, + { 0x843a2657aae55c46L,0xc92b3449d905af3aL,0x6ac9ef1d9775d9ddL, + 0xf6c822000e3b1212L,0x85f51f8cdce6fe66L,0x5349a598c92594b4L, + 0x6f61700302270118L,0x70f2c99de38d8200L,0x000000000000006dL } }, + /* 40 << 504 */ + { { 0xd636d4c4da43d267L,0xa3b87528e75bbb4aL,0xa60189a1f2246ad7L, + 0xe8904e536635c4e7L,0xbf1af52d8a2dcd98L,0x7b93a05344fac70aL, + 0x8bb6fc09d73a81a4L,0x05536fb265f165fbL,0x00000000000000cfL }, + { 0xe366e5dacb409af6L,0x755696bfff43ee63L,0x3a97d43e3709ed1cL, + 0x644438f85e58f866L,0xdebd5495e8fe5577L,0x6e84a3b5375bfa1bL, + 0x987b5dd1202c78b0L,0xfc3e4908b822d808L,0x0000000000000150L } }, + /* 41 << 504 */ + { { 0x73ecabcf3acbb512L,0x9533fc1e8997573cL,0x5d5c5e92f163c949L, + 0x525a58a51ee35ab5L,0xbbd9e4986db9fa29L,0xf3233214954015cdL, + 0x710635f82ae6ad57L,0xe2dc148513b93e37L,0x00000000000001b6L }, + { 0x0b972426c062c228L,0xf68356f3c831fedaL,0xdbca88e0294d3418L, + 0xa0b2ccfb74891390L,0x15a50844452848bcL,0x3d40a30bf020c354L, + 0x78654b100f743e38L,0x5830b682071e5c54L,0x0000000000000024L } }, + /* 42 << 504 */ + { { 0xdc7e268751b64c15L,0xd64e9963851661d3L,0x6dd6e61155c36ceaL, + 0xc9dda78f95af04eaL,0x0e36204b717529d6L,0x18307d7b1a934b4dL, + 0x9a2d9ccab18f8a3eL,0xbef73bc2a39cfa59L,0x0000000000000014L }, + { 0x0ea74fcba1a4ca75L,0xea6233eedca12c9aL,0xd189f92afec9fb84L, + 0x4b60da2d2d6bc935L,0xec09409fae1a492bL,0x2eaf3c6a96ecef1aL, + 0xbd5744de79cc9c96L,0x137f9e124b817ef3L,0x00000000000001b5L } }, + /* 43 << 504 */ + { { 0x501c4b77a206d5d0L,0xeb9e35026823bdb1L,0x9c630625e12d89b0L, + 0xe907a0599c9efd7cL,0x2fd286f722eacdd9L,0x6816d3af1bbf7f70L, + 0xf28d01cd148540fcL,0x494b69af35e1db97L,0x00000000000000b5L }, + { 0x27a04d983ee774f8L,0x7429986edfe44dc9L,0xe9ced611c5d67f13L, + 0x3064c8278e8f1883L,0xe355a38fc6690faaL,0x8f46002a56ae4f60L, + 0x2a02daedda54740cL,0xb7c3b9dad082c12eL,0x0000000000000182L } }, + /* 44 << 504 */ + { { 0x0a6349315830d60dL,0xd8eb73b8bbe24eeaL,0x0e5c491d01d78f8fL, + 0x6a64025fc3741449L,0x3cd080c70e2ed20dL,0x2daabc21d2898b84L, + 0x66dc0235f4c36524L,0x9327c374780d6ea5L,0x0000000000000180L }, + { 0x3d37d86133febf4dL,0xc14534786988a93cL,0x3f74774eec0ab171L, + 0xa3404c1e3cfac694L,0x045db8c32f3b022eL,0x8eabe9f35c01b893L, + 0x61d83f12a7ed9fa8L,0x99bcee45a35d2628L,0x0000000000000005L } }, + /* 45 << 504 */ + { { 0x762ef77032a8e116L,0xd143a6c5fdcb778bL,0xda97c9de66b5309aL, + 0x30a842f58e90a23eL,0x49c70a68e1a6b687L,0xc075044167230ac7L, + 0x95a702baf0cd790aL,0x40a23752b7e6f3f6L,0x00000000000000efL }, + { 0x7ad3f0231b216453L,0x8848cf889478adf8L,0xb59a0b8a73ffc8a6L, + 0x3890a4b18886ef88L,0x01f563b7721c750aL,0x27200ccef586d6a6L, + 0x3a00ef84acb996e1L,0xaf308a0d87ae84faL,0x0000000000000082L } }, + /* 46 << 504 */ + { { 0xed73a960cecaf2c6L,0xe3e2fc9f72f3e432L,0xe342c3c4b43440a3L, + 0xcffcd0b38eb23a04L,0x97c0e69ae0230013L,0x8240dd50988f4aa3L, + 0x450801c0934df0e7L,0x2ae8732d8b5e1134L,0x0000000000000185L }, + { 0x88e7c72895bdb2feL,0x8b89c3a481110a0aL,0xd5c325dc38403b6dL, + 0x8c9981cf462fb4caL,0x8d8ee77b4bc4127eL,0x66f0849209dd6dd8L, + 0xae3c2cf2156f42e3L,0x05a9a2f7624bcdd3L,0x0000000000000188L } }, + /* 47 << 504 */ + { { 0x4b2cede9736d1080L,0xf3493b40f7a20cedL,0xd2ae2b575506f48aL, + 0xbf15b5f1b26852fdL,0xbabf030c296d9bdeL,0xca059522434de93fL, + 0x05e285648bfbf1f2L,0x6f3025ec7358ef2aL,0x00000000000000f6L }, + { 0xf58301b7edfb2019L,0x66fd8abe39bc999dL,0xde69e5c9fa32955fL, + 0xa238f72ae59bc937L,0x895f843786635c79L,0x26699c76bfaf8f08L, + 0xc17214b9136b91b5L,0x362d2e7465ecc70cL,0x0000000000000113L } }, + /* 48 << 504 */ + { { 0x29d30ccfdba3263aL,0x3ddec6f458f319d0L,0x6859be59aa8889deL, + 0xd21b5a3f37e57788L,0x0bc2f50b8e56e5f0L,0xb76d24e42b8c2b6dL, + 0xd7b4c5575731d66fL,0x59d33a6e8a0e50a1L,0x00000000000001e3L }, + { 0x4decdb8e57dc5e0bL,0xe1fd10aed8e33978L,0xbc70d10b17f26c32L, + 0x15026720a9d2255dL,0x68a1e644fb625ffaL,0xdd3dca9f0b24bb44L, + 0x3a192d90f8932bc0L,0x0f7120b476268713L,0x00000000000000d5L } }, + /* 49 << 504 */ + { { 0x40b536383a312500L,0x3583ad2393dd5ce9L,0x9edc8f2c2a6c32bfL, + 0x331d9f888a7af41aL,0x8c2e24be679248b8L,0xfaaddcec55d3d6baL, + 0x5389eeb3d7f4b6f2L,0xcfba884a6dbc261eL,0x000000000000009bL }, + { 0xd297f9d412c86f42L,0xb1efdde1bf731999L,0x83e2c8ec1c74c90dL, + 0x25172953e1a86c88L,0xe085e66d3ad7c757L,0x6a4205165f2264acL, + 0xfec41f5ae94c09d2L,0x7b04d8889fc2520eL,0x00000000000000d3L } }, + /* 50 << 504 */ + { { 0x780729c7cc378344L,0x57e5492132481eabL,0xed30489faeff7f7fL, + 0x915fbf7ce5f1c7e5L,0xa447efaf62b8497eL,0xfa764a3273aa8012L, + 0xb1a75f1ec5f79a73L,0x904bbface55412abL,0x00000000000000feL }, + { 0xedf27e0035d70f3dL,0x7403d8160c686661L,0x780f88caf8226941L, + 0x0f3dba77374750dbL,0x22f036ec463ef54bL,0x05c98a11b4bcd349L, + 0x0b9c1e1066c6c48cL,0xf97542e3aa4d23d8L,0x00000000000001ddL } }, + /* 51 << 504 */ + { { 0x8eb6298bd1c2340fL,0xa69b2a078c93effbL,0xa0fff41717e5cac5L, + 0x4dbda6416788219cL,0x67ff5f5b77ee2712L,0x798d318757e8c5e9L, + 0xcac3a4211adcedfaL,0xb77a995db508dd1dL,0x00000000000000ecL }, + { 0xf5fabda250f13f9eL,0x2d3248164ab98a01L,0xef848f30862ec877L, + 0xdccc661edb74997aL,0x6f1f9efcccc7a331L,0x03d6e862bc776e7aL, + 0xd178f910bdea3158L,0xb6608ef07b9f535eL,0x00000000000000c6L } }, + /* 52 << 504 */ + { { 0xea95917775247610L,0xc03f4326d5528775L,0x8d46289b5b48a156L, + 0x9447bc35cdcd8d8eL,0xc5688f26210bbfa1L,0x539bc76734f83af5L, + 0x371c1bf9804413d9L,0xd38c342187de862eL,0x000000000000001bL }, + { 0xb2a00040b183085eL,0x43f284af2524738cL,0x361c7e8eb6834e75L, + 0xfe4ea9ff720b8af1L,0x87c7eac60299f9b7L,0x7f6f668ea0c54c54L, + 0xe4e14938275b843eL,0xbf786cd0274de49fL,0x000000000000009bL } }, + /* 53 << 504 */ + { { 0x7b47c51e6e8c29baL,0xe8b5828112556021L,0x7b9996154cccd41eL, + 0x80829cb5a6bd4f11L,0x8165e02b4261a0a9L,0x94c3b0b10516943aL, + 0xb81de83d0a6c224dL,0x91d8d20a87625a2bL,0x00000000000001b6L }, + { 0x3f36fec901bee5b2L,0x9f393611070292c9L,0xd54c6d895f498ab5L, + 0x398e1f9042264500L,0xbbfaaf64ca95ca7cL,0x6cd5062414ab92a7L, + 0x44ac4b76fb688987L,0x7bd8192888a78c22L,0x0000000000000077L } }, + /* 54 << 504 */ + { { 0x90ff1fe8d54b0174L,0xb2ebfbb8684f403aL,0x30d99502f405689fL, + 0xd5ccc821baa34800L,0xdf3b996ca272984bL,0xe27f1378e9d1ef86L, + 0x0a411e3fadf082d3L,0x54a704968bbf2c2aL,0x00000000000001d7L }, + { 0x08cd18eaba4c82e3L,0x5487015aafe09e1fL,0x4036ec0feb49e0edL, + 0xe5c93ba052d7820bL,0x6abbbbf084b8e6c0L,0x9303cf2644072ab0L, + 0xe680096617b73ea2L,0xd0910a6b92c47ac1L,0x000000000000001dL } }, + /* 55 << 504 */ + { { 0x07c321cd72888b9dL,0xffdbfd2032dfac5fL,0x18e938cb5e246911L, + 0x01e54403d22c20d9L,0x8a61ba0678040df9L,0x29b01d78b68c8c8fL, + 0x52a16c436254a93bL,0xfe54503959f8cef2L,0x0000000000000133L }, + { 0x9af34c9a1c33772aL,0x0c558a079c3fbb98L,0x91c66b9709f6ec94L, + 0xdd67fd474a98dcddL,0xe6e210db396eacafL,0x14b35f752518c8d3L, + 0x5094c31a411c5a07L,0x80aa1b8a6623c16dL,0x00000000000001bfL } }, + /* 56 << 504 */ + { { 0x9aa63d02cdcf2ceaL,0x5723ddf40c02861fL,0xe2642a0fb5f899a2L, + 0x2066e595ff9a8295L,0x09a1e790cab91b8dL,0x01ce51d5dd4de2dfL, + 0x661e1d8ba91a7c90L,0xad71031cabcc182aL,0x0000000000000002L }, + { 0x374339b7290a7058L,0x42e89d09a15f4303L,0x8ec108f35cf10478L, + 0xc13a2d048f10111eL,0x98a8386ba87259eaL,0x2039e70ef38794edL, + 0x5d142439e8b922faL,0xf9af7e06caa6f96fL,0x000000000000010aL } }, + /* 57 << 504 */ + { { 0x04879dfe6a2ed236L,0x77038ae59d6d8c40L,0x2bd91ed47b88661bL, + 0x7d66fdd1efec46eaL,0xc25adcbe1c315781L,0xc26b39782b2936f5L, + 0xe88f1265bd9c530bL,0xdf6dc2bfadd1306eL,0x00000000000000ddL }, + { 0xb105f92eb6bf1010L,0xff5971d686b2fb4eL,0x57dcbd9c796d21d9L, + 0xca5e8768c6ca78a2L,0xd45c6f483a77d271L,0x87510561077977e3L, + 0x50a4d3b1f03a3df7L,0x6d6487051a921d24L,0x00000000000001d8L } }, + /* 58 << 504 */ + { { 0xeefdb850013f564dL,0x10005fb12555de7fL,0xa2974fb059b20bc5L, + 0x12544629137bfcb1L,0x8825f36f940b7f02L,0x628241621e47d20dL, + 0xcf8c26e06e353253L,0x9abafc166e5e0754L,0x00000000000000b1L }, + { 0xbde39c9a360d199fL,0x51dbeeca981c811fL,0x221f8008941f52acL, + 0xf807c3dd0759984aL,0xa8fe7157b30a65e2L,0x4e3a799cd1894cacL, + 0xf4b935eb2164eb00L,0x6c3792c0bdce96feL,0x0000000000000014L } }, + /* 59 << 504 */ + { { 0x1fd42bbda9198a11L,0xacddf8f2753687f0L,0x43d36ae6608ff4a8L, + 0x216457684c59d30bL,0x93bcac7770397bbdL,0x8e90bc116c01a663L, + 0x95771938531cc0e4L,0x2075e6e5ba8ec166L,0x0000000000000115L }, + { 0x06c9883d44df0f72L,0xcba38ada86240567L,0x8b1d6a7a6765cca4L, + 0xba76644c5013d624L,0x4e1261cb40b8f5fdL,0x0d0f9598c47fbe89L, + 0x6ab7a704f44977e6L,0xd07aa31acc08c06bL,0x000000000000006aL } }, + /* 60 << 504 */ + { { 0x7b8418f213402889L,0xc8b3b1fbffc1d05bL,0xc3746149da384c02L, + 0x3ea1be4b2491f86eL,0x694ebec1df96386bL,0x16e114752f4f39abL, + 0xa042cdd70b357d4cL,0xc769a1b67fa71919L,0x0000000000000077L }, + { 0xa51c483cabd2c768L,0x1cf05c6c7b0db8d0L,0x8ae60ab9eb16bb67L, + 0xd2d75ed8531d5cd3L,0x5e9c20a46c476eceL,0x794a41fc8209e94fL, + 0x0da3bd97c01ab9f8L,0x83682c72b3a18c74L,0x000000000000013bL } }, + /* 61 << 504 */ + { { 0x6dfdecfaed5deee9L,0x09adb9fae0897b6aL,0x9291b38089b471d0L, + 0x2f2df6bfd95ba0b4L,0xbc44d08fb5564882L,0x2217b76cda298b3bL, + 0x3cd6025f972f3b35L,0x9f6daa7db04ed8c2L,0x000000000000005dL }, + { 0xb08ea52022feb51aL,0x40dde4545059e8f9L,0x47d16db04a9e1b07L, + 0x47d4d1d19faa1affL,0xcc720686bd48f06dL,0xce3ffd573ce5f368L, + 0x03789475048a878cL,0x163c7421eb4ac6c4L,0x0000000000000088L } }, + /* 62 << 504 */ + { { 0x097575c0b0812157L,0xb6c24f4ccdf6b760L,0x8ae073f297c5e905L, + 0xfde1488d6ca3a648L,0xb758040f8987147cL,0x6f894fdd869e28f3L, + 0x123cc1be7110e0c8L,0x039fa2bee9e9a359L,0x000000000000010dL }, + { 0x92986cb50a63a55cL,0xb7669237de5e1cfdL,0xd6a1c865196de727L, + 0x10345506215d79beL,0xa398a1557075f3ccL,0x82fdd1af7bfa5d76L, + 0xe8e47652a81532acL,0xe5a95521bff4cc65L,0x000000000000003eL } }, + /* 63 << 504 */ + { { 0x9b578eaff318749eL,0xf68770442b6052c7L,0xb3d507b51cb82483L, + 0x25504c1ca62c3df6L,0x57caf5be0a74d81eL,0x09fa69a1666c57ecL, + 0xdf66521cb8e4c36bL,0x02a68a2f0dd04c8eL,0x00000000000000b0L }, + { 0x928aa60db5b35d53L,0x10e8be306c50bae7L,0x361aaed1f0adf725L, + 0x01168db479a6b055L,0xb9e4511c0bc87e60L,0xbae0e9bff45879e4L, + 0xe55823837ab6e8e5L,0x4bcb4072658c5b05L,0x00000000000001dcL } }, + /* 64 << 504 */ + { { 0xfa5944257d7d3912L,0x208af690f57dc595L,0xa277e9808287e9a5L, + 0xfe15c6e2d556fa0bL,0x4b4612ee81a8260cL,0x6e171bd7cda05ab2L, + 0xc417d3d1de584e28L,0xaff5e9b98eff9ff5L,0x0000000000000188L }, + { 0xb0700568c6c3151aL,0xccd55dc01f049293L,0x7c6e61ab443d10a0L, + 0x9c2c964b0a4546a9L,0xf470d97e7fa4474dL,0x378d416f74b749fdL, + 0xc821766f1d9fea8fL,0x71c9fab02e3d8dd9L,0x0000000000000027L } }, + /* 0 << 511 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 511 */ + { { 0xbba0d1f988468571L,0x4b15325d14edf415L,0xba40ad2e5b315437L, + 0x4cce24b45214deb0L,0x33ccc948f3217ca3L,0xc9a387c88d2d7d56L, + 0x19c908921569445eL,0x6b925ff0c50a9417L,0x00000000000001afL }, + { 0x55fcfc767fa33215L,0x2b7b66383e2b25b5L,0xb765a15b602d0860L, + 0xefdebace1a252b65L,0x8117e367f315947eL,0xbdd3a76a501fa6a2L, + 0x0f224474f4b0cc16L,0x694e2254515f0dcbL,0x000000000000013eL } }, + /* 2 << 511 */ + { { 0x22b8eccb56b092a2L,0x0a47d70e736ea9aaL,0x3eefc772d64a7d08L, + 0x85408cecbb2e0f14L,0x73047afd285cb70eL,0x1732f01663297a2aL, + 0x4dfe656a4cf37eacL,0x147d6d66c0ec357bL,0x00000000000000c7L }, + { 0x1f3e59103c115125L,0x0d2d18fcbfb07241L,0x5d6aa860ba4b009dL, + 0x8d0d9368806ff8edL,0xcc5ca3701a05c049L,0x7bf21caee9ab4c41L, + 0x6632c3e0001d5ccaL,0x3a4f99b6b60e5593L,0x0000000000000008L } }, + /* 3 << 511 */ + { { 0x67f918a4e83a74c7L,0xb5b93d318aeeb8daL,0x04f033bddf8b5ab2L, + 0x8625cb2864df3389L,0xeeec07936b2c8470L,0x7af4facc85a3edf6L, + 0x3c591c09ce38c9fdL,0x56714b52dc84e5aaL,0x000000000000000dL }, + { 0xa6fa0f8d93455dfcL,0x9436150e130b8d9fL,0xf87d15ea9c4e016cL, + 0x9845852f510467aaL,0xe512bdcfb63d9a99L,0x820a8b737c6c01ffL, + 0x30dcf512698b9654L,0x3917e6a8bc52d8e3L,0x0000000000000062L } }, + /* 4 << 511 */ + { { 0xaed7666ae6b2853fL,0xa1fc0332be96140bL,0xb99a5dc81c1e38b1L, + 0x56194675f38c5243L,0x16c6f80e80d38681L,0x1117841314205f46L, + 0xd2dc67bc6ee03089L,0x5e18245ed3dad965L,0x00000000000001e3L }, + { 0x389e59cafda7741eL,0xf341d649259e4dcbL,0xec15d27a260b32b0L, + 0xf3a1a08207640ceaL,0x15ac31456e811313L,0xb9027ce47cf68896L, + 0xb43e4fd77008a356L,0x541aa5b4640c2c60L,0x00000000000001c3L } }, + /* 5 << 511 */ + { { 0x9a29c770d75994f6L,0x324345412f6c1df7L,0x8339d6aef2b285a4L, + 0x8bb1ff0a6feaab2dL,0x8f9137cbcc3a0a42L,0x19e09b7c1e385985L, + 0x8ca610ef45e2f771L,0x8482833a4b37ae8fL,0x000000000000003eL }, + { 0xa305da774a619736L,0x9f3bb1d47475f158L,0x5b30ac0741aa2ff4L, + 0x19963057a9bfc2f6L,0x89a3d94d1f12dadbL,0xb5da354359d81991L, + 0xcaa559b88668161fL,0x2cbb1a1af8ddae34L,0x000000000000006cL } }, + /* 6 << 511 */ + { { 0x3c32f4e34cb99695L,0x0db56646c1241e0cL,0xa4cea13b28515d66L, + 0x1c8f63975f3ab5dcL,0xd466a1b34defd3ebL,0xcdbb66b5e6d860d2L, + 0x26fc2fd5fe87183eL,0x02d81a03d18233a9L,0x00000000000000d2L }, + { 0xdc570644caa08959L,0x7791cc35cf763c20L,0x3c0dc132272a1068L, + 0xd7ad702cc8a74578L,0x7689c46eada1d72cL,0xbfc2e06f2b8b4369L, + 0xde85e36ec0bbd6c4L,0xd879e086f9849084L,0x0000000000000077L } }, + /* 7 << 511 */ + { { 0x35702db34e0b5199L,0xbb02636c273c5fdeL,0xf73cabb563fc329fL, + 0x1a280b288c1addefL,0x4844dcd4422f07d6L,0xdd6851167518acd7L, + 0x9ff8c1efd2bd73e5L,0x9465a3b26afb5416L,0x0000000000000194L }, + { 0x1d68a6e413b729ccL,0x10758206a54e8db6L,0x20f8fbba9708de79L, + 0x1c5eefd2c1abd968L,0x3c7742c2525f1fe4L,0x3175190b2c612f17L, + 0x4020af7742207e24L,0xde4bed37fa5fa6c2L,0x00000000000001a9L } }, + /* 8 << 511 */ + { { 0xef192aeddba16581L,0xff1342112e8db0fdL,0x32551c61bdca8d0dL, + 0x96e1ff06c55b9aa3L,0xa211b1107e4c635fL,0x2708e5427c4b6b4aL, + 0x70ff7c3fa6575526L,0xd26e2db636092ec8L,0x00000000000001aeL }, + { 0x1fafd3c2533251d0L,0x1602462d8a9a439dL,0xc057595a1cc5af27L, + 0xa27197f3be839c9aL,0x03c1cc4bbfc1318aL,0xb78c7b7f34d0a378L, + 0x0f9d754e161b4257L,0x8dcf43cbf355d916L,0x0000000000000194L } }, + /* 9 << 511 */ + { { 0x9181244f3e0d4347L,0xb189acf795b45d19L,0x8877efde70a55c91L, + 0x924b0d70c1d9c777L,0x7c3a5a49b703ad70L,0x72c4fad3e44f1b1eL, + 0xc1efa48525544d96L,0x1f6ee5617af50683L,0x000000000000000eL }, + { 0x518630dd1fa2e8dcL,0x3507130b38be0359L,0x989190f52993ccfcL, + 0x793361df738a29bbL,0x44700a46d43eebf6L,0x03622bdca60b214bL, + 0x956992cddd6703c9L,0xd85935a3d7ee66dcL,0x0000000000000148L } }, + /* 10 << 511 */ + { { 0x51df490ddc3577ecL,0x6d8a70e97e28be3eL,0xd87a7ed31de95e73L, + 0xf97c968da6c7086eL,0xc47b64dc6b4afe00L,0x489f1d988b432676L, + 0x9f2b7996f72c2153L,0x4acd5ae8f820af70L,0x00000000000001a9L }, + { 0xa5cc854bd7263fcfL,0x7bd94b2f864febf7L,0xb3def0ff9460c5d4L, + 0x5f0766b5042d47b5L,0x9e8484f28ae51b32L,0x26ec4ea6f8c729e7L, + 0xf279d7336c2ad315L,0xb2d22e6edac3e8d0L,0x00000000000000f6L } }, + /* 11 << 511 */ + { { 0xc9b6366728cbdea1L,0x6920862b9e8756dcL,0xd522608028c90b04L, + 0x52f9c421f5bde803L,0xe075ad1141b5ce80L,0x2aeee7e51c04e56dL, + 0xcc6053a428eb7657L,0xb523d16d4cfcb113L,0x0000000000000041L }, + { 0x33028ceb6e812f6eL,0x654b5ffcc2cc46ceL,0xc9d28de56f143379L, + 0x6ce0fd8343a626dbL,0xbcc0a10be9fdcf7eL,0x50b2f0747119a936L, + 0x91b1782d11f4af5cL,0x7472e053ed0b5481L,0x00000000000001edL } }, + /* 12 << 511 */ + { { 0xb2b4d4085967b43cL,0xc660c6c1f35923d2L,0x696d9140a27fde01L, + 0xfcd766eb259432a9L,0x6831fde1cad2a4e0L,0xa157be88928838eaL, + 0x4b31c1d6791d2740L,0xf1958bba850a1760L,0x0000000000000085L }, + { 0x23a6800ed777e0d8L,0x84d2780d71cb04d2L,0x691076f7d7a8f688L, + 0xcff1a453e2237b7fL,0xb1d9dd8f0a9037d4L,0xface3a31d9199814L, + 0xff7bfc81da563993L,0xa0f655abc61dd36eL,0x0000000000000111L } }, + /* 13 << 511 */ + { { 0x9199785a30677670L,0xc74d3404c0ade42eL,0x8ce0d96b0613519fL, + 0x3c7c64533e40200cL,0x02cd19101dbe0935L,0x0c37218968cc3a24L, + 0x51887d02a1588133L,0x33757613c105adcfL,0x0000000000000032L }, + { 0x74a955d103f1fa25L,0x518b2de6fb22b0d7L,0xfa48229fdfe08961L, + 0x47df7eebb4cb58d9L,0x5b9c683f4601e92aL,0x4a3a2c3f81d5dfd8L, + 0x15d6b44f4a5c99edL,0x6646f77e020eb968L,0x00000000000001c5L } }, + /* 14 << 511 */ + { { 0x43c5bdf15d73ed9aL,0xe3766738e71c6e7fL,0x625cb8c43947ea97L, + 0xadf7ccc082241ba7L,0x629f45b307329130L,0x9cfac1748d0c281aL, + 0x92069e577d14ca06L,0xca121495dc9f83a5L,0x00000000000000cfL }, + { 0xf4e6da377de97fccL,0xd3a271b7025d39dfL,0xbf2ebfe8389e12eaL, + 0xf434f29932f184f2L,0xfbc3ccb9366ef539L,0x4e159cdacdef4a8fL, + 0xc311bd82b0f0bb8eL,0x380b2eb1779669adL,0x00000000000000b8L } }, + /* 15 << 511 */ + { { 0xfdaa86c09b3aac40L,0x7dad3911133885d4L,0x09897075e7c6b586L, + 0x79f816ab78daf275L,0x914786aab75eab7aL,0x7713bfd59584eaa4L, + 0xa7e01a7440496a4bL,0x165c6601831641c8L,0x000000000000010cL }, + { 0x73be34b6f05c5a20L,0x7ef7738886b6d280L,0x872cfe9eaa2fc4c6L, + 0xa56cb3513c57c990L,0x7b533f43964fd18fL,0x036822e86d1c2d7eL, + 0xf59ffb19ddce594aL,0x823a71fd3d012d0dL,0x00000000000001f3L } }, + /* 16 << 511 */ + { { 0x9a0062d31c2df559L,0xc130a85d2d4e9996L,0xb226e6cf370a02f6L, + 0x3fa94ade7386b10fL,0xddec1e75d4576547L,0x0251109d1dba7f8fL, + 0x9a3f5313c3a003a9L,0x4742778bab26e65eL,0x0000000000000094L }, + { 0x828ca5adfb997e14L,0x3826b0be65001ed7L,0x53fb9cd3cfec55a5L, + 0x53e96d2831a1b7e7L,0xb5cbfde924f2dfb5L,0x3a26a665e944f4a4L, + 0xd637639834e527b2L,0x709e156dedadf5b8L,0x000000000000018aL } }, + /* 17 << 511 */ + { { 0x35cf82a447dc8eecL,0x321037e6a09222b5L,0x27a2eded92b90121L, + 0x27f5b4de081ba487L,0xc7855daee753515dL,0x4454e808f3591d41L, + 0xb0b1bac6d009559eL,0x89b3ca825e2e4cd7L,0x00000000000001e9L }, + { 0xb84d611afe9e0356L,0x1c90bc63e04ffc14L,0xf6a8250546133950L, + 0x95c570d8599722d8L,0x6bda993379908046L,0xde91eb54417a3742L, + 0xb87c0e1adedebb29L,0xe1a3b0370fc4207fL,0x0000000000000166L } }, + /* 18 << 511 */ + { { 0x412eb9d00d3825aeL,0xed91e11c20b658ebL,0x0da403aee6918918L, + 0x8ec67ea3b084b920L,0xa213aa3a1f3d3ff2L,0x21dfb5519efc3cadL, + 0x30ee2a8a19845c68L,0x07b47d281835550aL,0x00000000000000b6L }, + { 0xb3ef46192657bec2L,0x607d10122cc2c528L,0xa86d31edc07db93eL, + 0xc07b3da980c25cf2L,0x5c56cd075c83c1f7L,0xf32a98b3a0d7ea59L, + 0x5f1e6fe10cef77c1L,0xfad8a85ec5b5a9b7L,0x00000000000001d5L } }, + /* 19 << 511 */ + { { 0x31a101bb4067b430L,0x5c825d7d1311d821L,0xcd7b3b4589274960L, + 0x3d1a68f1f9774bd2L,0xb0807f2674397634L,0xa36f44f5f95f9f93L, + 0x55f6d9fbd1943507L,0x2f86208dec85260fL,0x00000000000001c9L }, + { 0x3dc94ef8f634d6fdL,0xb924b8297d89b934L,0x2479d0aa6b8f4da3L, + 0xd6c651f63d5f8200L,0x43215c18aa04ae4aL,0x76dcb5b5408f7727L, + 0xae66f252907a0f8eL,0xf528e4222387bab1L,0x0000000000000046L } }, + /* 20 << 511 */ + { { 0xab146802bc0d4745L,0xf767df8ffe603917L,0x2bbae80c1988e88dL, + 0x69e9c7e1cfde1857L,0x25a609f58a7c8c60L,0x46bcbd1b57cf84d4L, + 0x89ae0be245c52763L,0x13482d1d98558305L,0x000000000000003aL }, + { 0x66ac9ee936a3d944L,0x6822ce1171ec895fL,0x8c5f2551938d1792L, + 0x7d5157bcdc481d3bL,0x0aab532d63fa5debL,0xad1fb55b99adbfe7L, + 0x5338d6ca9fcc46deL,0x84b2a63bae3aa998L,0x000000000000018eL } }, + /* 21 << 511 */ + { { 0xb2dcb83ac5a10aa8L,0x93a449f901878026L,0x91f2d986f475e7a7L, + 0xc6a6be5fdd461273L,0x8dcbd0524f0e65daL,0x48d908800b3a8247L, + 0xd0306f8630fc4481L,0xa4ed5cbd7f05a36aL,0x00000000000001edL }, + { 0xe6a4688114185d74L,0xd20cfe6e0d17aaa9L,0x48b5ebf4b67cbf53L, + 0x00ed88d0eca0b0e5L,0xa184ea8a01795a9eL,0x2a26f41a8b1e4ab4L, + 0x1640fbfcf7e86bf4L,0x49379fd35b08a012L,0x0000000000000160L } }, + /* 22 << 511 */ + { { 0x220024890128270fL,0x60df1294574584f3L,0x80da07d1339afaa1L, + 0x49cb3fb54873bbe5L,0x5e274ce99e32907eL,0x2ab695c69c2bd23bL, + 0x63a53145084084b6L,0x881ea559b7c43b59L,0x00000000000000faL }, + { 0x3d9de6ffec7f7938L,0x1b40e323c53223bdL,0x2b6f33785d3fce2bL, + 0x5a16083f94bf2ff0L,0x4da0ae8fc30458b5L,0x743dfc12f0d0b1c2L, + 0xffe8b8591863d312L,0x1fcba995114da460L,0x0000000000000065L } }, + /* 23 << 511 */ + { { 0x0ef498c2fda0bc58L,0x84f03c51e18d1f4eL,0xada00b832b89b834L, + 0x2ac43953cbf7d8fdL,0x8a7a37ddec0eba45L,0x24ed7dcfadafee4dL, + 0x2e900677c0a554d5L,0xc961ce69f179b0c6L,0x00000000000000c5L }, + { 0x21769ee979d32556L,0x14c417fb317b715dL,0xa2848ec69b034410L, + 0x651c9465f29126dcL,0x08446de16d88dbd4L,0x3b03622149b26d96L, + 0x2d9102293b1512a3L,0xc909f04560ab613aL,0x0000000000000032L } }, + /* 24 << 511 */ + { { 0x119638ae44d87d78L,0x8980cddeff77f891L,0xb001aa0f2e3c9f32L, + 0x8cc959d96c93b9a3L,0x3889ddd42516f042L,0x390fb5c1b2d34007L, + 0xa39b106f093e4a57L,0xcb1856652168bca7L,0x0000000000000015L }, + { 0x85adb26d33e39fbfL,0x68b6c5946ed3932cL,0x320813886ff4fb12L, + 0x24b50dda14c7e9f7L,0x3e84edee6eb8db2cL,0x0d0879a304222282L, + 0xd091b141db79827fL,0x41c1ea1c63d30563L,0x00000000000000c5L } }, + /* 25 << 511 */ + { { 0x33b7058ab3cb6db3L,0xe247bc85c826ca4dL,0x430cecac23018f67L, + 0x2d633897b9e1fe93L,0x93de63d283b8bac6L,0x84deea4de2294567L, + 0x9116e7bfd9d60cf1L,0x32715396bb9627e7L,0x0000000000000020L }, + { 0x49452782bbea6240L,0xa0247a9b29a23527L,0x7576093c9e6ad610L, + 0x3d7cce9ba983807cL,0x3cae59f6e5b737c7L,0xec383fbf5090c8caL, + 0x3c09262f7fb72ae4L,0x3cd1a9d116ad7cceL,0x00000000000000a7L } }, + /* 26 << 511 */ + { { 0x394547b643684ad6L,0x27c93a87a4efc0b7L,0x97914b633832c7ecL, + 0x89291edd995801b4L,0x1b7e633b9ed8dfb4L,0xa2a21e650cf5637cL, + 0x4afebef08428be9aL,0x527fc65f42269b08L,0x000000000000011bL }, + { 0x30099375278302baL,0x0b15cc9a6b1ade38L,0x6b6ad14ea4bb5288L, + 0xc0dbbe3f6425657eL,0xa611247f7bc91657L,0x0ff0aa026ba54b82L, + 0xa695a6941a44fa38L,0x7ff5f88e2e491f15L,0x000000000000005fL } }, + /* 27 << 511 */ + { { 0x98fbc213d8430bb6L,0x785226503a768732L,0xc44c1313c77b0847L, + 0x61de63d2a21a8f5cL,0xc57d7ad64ee27d3cL,0x0d70a8414f19405fL, + 0x696647aa7f6721ecL,0x067d6891c409e1e9L,0x0000000000000025L }, + { 0x4188632f2b8f5085L,0x8e9b1a3789693e29L,0xd12668d218a2eb06L, + 0xf3aed630596b2ef8L,0x1f8368bbc0589c86L,0x4722d8261eb0b305L, + 0x797cf073488c1be0L,0x835aba08032ba315L,0x0000000000000154L } }, + /* 28 << 511 */ + { { 0x6994d68d7acdb128L,0xea6c1bcaf3e42194L,0x45b4e1e5c14a3782L, + 0xc1d3ff5ca5089907L,0xc7d8b8bab2ecc0c8L,0x5756897b8af09517L, + 0x54248bc42d37631bL,0x4ec2b153748dc34dL,0x0000000000000018L }, + { 0x0be76102bcd4b8f5L,0xf59b6d2fae1fbc73L,0x3a82d42614de6312L, + 0x603fd1ba99d221deL,0x31371c44349ada8dL,0xecb2cfab22e3eea0L, + 0x5134413e744a9e46L,0x1d91fe459e0de97eL,0x0000000000000042L } }, + /* 29 << 511 */ + { { 0xbe0d267fabe7ca12L,0x00aa0e5de45c1440L,0x3f943a133936bcebL, + 0xbe7e6ee2c10a5afaL,0x9a05de6433854a57L,0xd2635265bd538c38L, + 0x2d8b1ee1bc23b270L,0x1b817b956662d16dL,0x000000000000005bL }, + { 0x0b34b0d6993f32abL,0xd29e841ca594d824L,0x659f13d59ae853cdL, + 0x50bd99b1f134a1ebL,0x96185d6777657f3bL,0x9866e7a14b16d264L, + 0xd6f7f2ac20d2efc8L,0x27030aa51ea5a842L,0x00000000000000c1L } }, + /* 30 << 511 */ + { { 0xca2a69328e4c1bb9L,0x5406d95b89a810adL,0x181690a007059605L, + 0xf6b80571db36c201L,0xcdb7ceefc0da0fb1L,0x0e0f2c08382dd238L, + 0x530c9a2e943fd3d9L,0xc14d7c55a5f3e1f4L,0x000000000000005fL }, + { 0x38c5afdfccf6ebc7L,0x9f2e2d25f0d0c1b8L,0xcb96d5873af9bf69L, + 0x150f940c89391daaL,0x5d80c9765901c338L,0xca2c7c5f7134a0b6L, + 0x615abd199a37b224L,0x03504959e2b8790aL,0x0000000000000067L } }, + /* 31 << 511 */ + { { 0x99d98ce9c151c763L,0x45d75a156c9f8403L,0x398ca5858ad4d96cL, + 0x5aea77ac7ab5a6e2L,0x31daacac60ca3cd8L,0x0be70a0c33089a45L, + 0x37d11b417ae57ec8L,0x0daa79b92b36c7a1L,0x00000000000000fcL }, + { 0x86fe4b66e6bfbeedL,0x471e1284dbc755aeL,0x93f985d974ad1658L, + 0xfc9301d7c5cfe717L,0xcdfec01a08b28faeL,0x38a8a97d15684b4cL, + 0xef3c8e4da413cd92L,0xa3a9ea581f848825L,0x0000000000000038L } }, + /* 32 << 511 */ + { { 0x1d45c5c8dd04afceL,0xce141a50bbd9c392L,0x44ca5fa5c9e0fc05L, + 0x4b9496ff60ccfb3cL,0xbe260bc6db5552a6L,0xa0eabfdd7d3a7a09L, + 0xe9cc06ae1c77c1f9L,0xfa792d042aabee9eL,0x00000000000001dbL }, + { 0xf2224e6f72765171L,0x3f9ea54811b89e04L,0x1e07c6eb138f1a43L, + 0xd9b61817621f1f7eL,0x08e851aa2f69af1aL,0x3e75da6fc0d63f32L, + 0x49a72d31f0f0d74dL,0xfccfc561c516a10cL,0x000000000000009fL } }, + /* 33 << 511 */ + { { 0x50f7e4f10a927e25L,0x21a09f957b043982L,0xe57eb5d0a7f4ae19L, + 0xaa5f21c688c23fa0L,0x345eb906b671a416L,0x3f75a771e1cd246eL, + 0xdcb1400211d9833bL,0xcf9f9effd4757fadL,0x0000000000000027L }, + { 0x8cc47957a51fbfd0L,0xf2f7514db21b49dcL,0x342c1525b167a27fL, + 0xd9189df73bfe544bL,0xf1e992d72ef945cfL,0xbdb74deff88ba1f9L, + 0xfd978291b82afc9fL,0x64c2d06b8072b2ccL,0x000000000000004eL } }, + /* 34 << 511 */ + { { 0x02b8ce6504071b42L,0x846c0eca3f395d37L,0x5f02aa27aa3c2f7dL, + 0x6812ff735c5b86fdL,0x617425a2fed24a25L,0x8c4aa371a388dc78L, + 0x866aabc850041de7L,0x8634b3cbd1ff9bdeL,0x0000000000000193L }, + { 0x0b64b43ea717f98aL,0xfc2f70a8c9b0fe04L,0x00811a41e3545cc7L, + 0xe073c45bc396da28L,0x56fe07f4905ade48L,0xb123dab359ca3c9bL, + 0x155282356f0803e0L,0xd66e80e281b40eb2L,0x0000000000000141L } }, + /* 35 << 511 */ + { { 0xe6df057a87307af0L,0x57673587e4a49774L,0xb4626535243828f5L, + 0x74f829480d6b51b3L,0xcb1c59f43de3f59dL,0x34e8ac234df85c57L, + 0x363c7f11a6d0c631L,0x050904e0163f8845L,0x00000000000000ecL }, + { 0xa2d31e7ae522c580L,0x2192828a1418a8e4L,0x8bf7419f81f5c03bL, + 0x610522a6cf674c6eL,0xd038897d6b1d1b0cL,0x3ecb81e6b9a1192aL, + 0x0e88a8d53e80564bL,0x395f9bf85d76c33cL,0x0000000000000032L } }, + /* 36 << 511 */ + { { 0x5390df114314f744L,0x545733637f8ff5eeL,0x22442fa33849bbf4L, + 0x1c462aadb46045f4L,0x2b0758050817d8a3L,0xffd73fc3f5669e27L, + 0x546917fa1aa20f00L,0xb7143ce6713fb9a7L,0x00000000000000b5L }, + { 0xa4c372206dd0bf11L,0x52731f73af632b4bL,0x7c309663118ea369L, + 0x076a35880a8a687bL,0x07e93992e0a835c4L,0x7e5f1bbd4bea801cL, + 0x3b3f6fdc83bc661aL,0xdb55c7e2364cf3a1L,0x0000000000000039L } }, + /* 37 << 511 */ + { { 0x4f6b908de90be946L,0xe1f9650ffb405564L,0x1db8139fdd3a7d6bL, + 0x71d6a50705d77cd1L,0x3b858f748f73a748L,0x217b6c7d9298eb99L, + 0xbb713b613fca2518L,0x676e6f89d0dea256L,0x000000000000016cL }, + { 0x05c293ef29c8372aL,0xd3f26db950326885L,0x66177b402cea076cL, + 0x5e590103c94a3e60L,0x12f5268c5f24c0e7L,0x79cb7296cd30051dL, + 0xc7c9b49b4b77f8fdL,0x8a19a8c92fa04a11L,0x0000000000000002L } }, + /* 38 << 511 */ + { { 0x22401bc6535dfb08L,0x893c44e95b4abfddL,0x3ea62c91a4e38edbL, + 0x5b2bc290ca12f3ffL,0xec707dd69051cbf2L,0xb07a24f8e229c022L, + 0x30ef2af96883d74eL,0x2ff30fbdcb8b1f4aL,0x0000000000000108L }, + { 0xdeda6c35fb49ddd0L,0xcbd14fcfe75c49cfL,0x1e4bc372ab2fbcccL, + 0x9b3a3efac145242fL,0x6ce05f8162c39e2bL,0xa5bc8d92adbfae6aL, + 0x171424988855b100L,0x29892ee2b0f012b1L,0x0000000000000056L } }, + /* 39 << 511 */ + { { 0xeeb9875d19d809c6L,0x8dd24196e2064977L,0xabf1f5c25807f8cdL, + 0x317fcb9d75d9af79L,0x0bd27847641a0dd7L,0x52fdf8fddfc25539L, + 0x0bb20880b72d79b4L,0x7403cab9b050f06dL,0x000000000000008cL }, + { 0x39ac8805f83224eeL,0xe74625294c06d65cL,0x9dd00df370d5aae3L, + 0x8ddaf294a460289eL,0xaad00c0b8c2b12c8L,0x897fb3d5c570bcabL, + 0x7f069c7b7d06c691L,0x40f5a2d2f44b375fL,0x0000000000000147L } }, + /* 40 << 511 */ + { { 0x66fba7b2e7441cb4L,0x9703fa4d0edf3396L,0xff524baf4f3a5b75L, + 0x9a8f25db1d173b34L,0x29d81a1a1cdb278aL,0x4cffa124a410cdeeL, + 0x5388c4036feec1d2L,0xcd9411a470cc643fL,0x00000000000000cfL }, + { 0x7b229e6550e05667L,0x3175d10dbbdddf46L,0x9dfefe2390b31338L, + 0xf97db78253a89eeaL,0xcb8acd29da67068cL,0x61c34b5725f19838L, + 0x856cf76a6c605d31L,0xf4ae77b201effc73L,0x00000000000000d0L } }, + /* 41 << 511 */ + { { 0xaf3cf777a26b8056L,0x5c43dd61241360a6L,0x58b083c67fdb2f60L, + 0xd2e7c4c8d351c4e3L,0x3dd61d56f94892e7L,0x195876a7e8995a49L, + 0x01a0a17dabe083d9L,0x52beb3136552590eL,0x0000000000000066L }, + { 0x1a3347effc405688L,0x26bd6de805ecb62cL,0x857d636da52f40baL, + 0x58a784783188a7a7L,0xe0ea2b9cfce84abfL,0x2fdf6d3e5a673decL, + 0xa18d4112bde7405cL,0x4271c5f9c015e112L,0x000000000000010bL } }, + /* 42 << 511 */ + { { 0xf370ed52a459ca74L,0x7cf21ea4025cd3f2L,0xf925a200a1bbac0aL, + 0x3abcad9afbc36076L,0x10ff56a08e382f60L,0x2f72b3e29adb35bfL, + 0x57e668316c2a7131L,0x6201c63b1cfb3b53L,0x00000000000001e6L }, + { 0x6fc57e654e3919cdL,0x5d34bc0e789ab429L,0x0f1401b897c4f56bL, + 0x1ab6888a8678c37cL,0x90c07e9d168f4c19L,0x9c4fbaadf2f7efa0L, + 0xd917c926142b5340L,0xd92f2667a5ca3f5dL,0x0000000000000173L } }, + /* 43 << 511 */ + { { 0xc8625d648fa89b66L,0xd7a2e7ef484c0085L,0x296d5cec57a26972L, + 0x624e03edc087e675L,0x3eaedbd4bcc8ccf4L,0x24267aa04f5cab98L, + 0x494f2a3554d6a0c3L,0x4782932db4554a33L,0x00000000000001deL }, + { 0xf2ea212c17c4784aL,0xdbfc5551c152619cL,0x2643df61118f875dL, + 0x6577ff540fb8865dL,0x2653fc8fabd035f6L,0xc1ae1c669ed7ef7bL, + 0x4d602fb24a25ccd2L,0x6391a7332afe8120L,0x00000000000001e8L } }, + /* 44 << 511 */ + { { 0x04b2f9f2ccbe7de0L,0x8870c0dae6c58facL,0x69fceb5b23174377L, + 0x8f1ad157f12131c8L,0x3e925501ee3048cdL,0x96a0b31e386d5003L, + 0x586a530340e9c76eL,0x517d83e314857a31L,0x0000000000000115L }, + { 0x4d02d42902d8ecbbL,0x68b811f62e9c8238L,0xe213c3949310f7b5L, + 0x200ec52ba8d8187eL,0xd80c192da0fb4110L,0x35c985ff7c450678L, + 0x43577470db1e8f13L,0x6810ef036900807cL,0x000000000000012eL } }, + /* 45 << 511 */ + { { 0x379ee09f103bd6f9L,0xc6455764cd4b3d01L,0x785ac6571d53dda2L, + 0xb6c194c93eba3017L,0x6aec6839ee84180fL,0xc94dff5ad09c51bdL, + 0x933c64be4038c3caL,0xfb3e230565c87b9fL,0x0000000000000077L }, + { 0x04b71ef8ac8ab240L,0x6a16d0e5193fd7e0L,0x86fcef220c40a10dL, + 0x28228cc910f211c4L,0x5ede88e706ae9d8fL,0x67067909a5f0e8feL, + 0xad79f0c8df338ba8L,0xe39836048ee0c62eL,0x00000000000001f7L } }, + /* 46 << 511 */ + { { 0x7d703295f5e4f7f8L,0xc3fdc161143af599L,0x25ae8c6d10a6b4ecL, + 0x8f3ffce44eda9576L,0xa2383338f649674fL,0x38642f550eb3892eL, + 0xeabb7046f0cb8779L,0x57dabaa2dee84d91L,0x0000000000000154L }, + { 0xbb850ab36173a285L,0xbca5655928b6fb38L,0x8e3428380df79eeeL, + 0xe52853dffecc7940L,0x792e753ad9dd12adL,0x7e2cb3406420ebd6L, + 0xb585ab566d827017L,0x2dc2202c5a41aa40L,0x000000000000008eL } }, + /* 47 << 511 */ + { { 0x23845c7c715a4da2L,0xb337c0ba6e0aff7aL,0x41b68812f9a9f520L, + 0x4a10ab9bad1a5c83L,0xd8fcea40ccc1a37cL,0x51d69fdfb25a1b3eL, + 0x83ce07a3022b27b1L,0x762e56c9f29621f0L,0x0000000000000017L }, + { 0xc193077f19482f39L,0xcd5eeec001d3bf64L,0xb2d5f8a51d7e8890L, + 0xf63d9476672f223eL,0x6cee15df7c3f98abL,0x17e3a150da97838eL, + 0xcedd10e73c50762eL,0x22d5f479795cd480L,0x000000000000018cL } }, + /* 48 << 511 */ + { { 0xf854e6e695c41349L,0xe7d4dcb0476ad721L,0xcaf79afb764dcaecL, + 0x99a065a2e85595acL,0xcb613dd111beed86L,0x1ec36351b5a4d300L, + 0xa1acae740f8b5580L,0x6d6bd5a7708071a2L,0x00000000000001a9L }, + { 0xfcac06c11f4f15a1L,0x75a9003c1050adf6L,0x61b71ef7ea874ec8L, + 0x0de589f5ba499bebL,0xc1470e300d93fee4L,0xc981a814d175ce74L, + 0x2549b4a3bc96e422L,0x14d44f006728ed6aL,0x0000000000000085L } }, + /* 49 << 511 */ + { { 0x798a73bec4e4028bL,0x6388051a3353c08cL,0x028b0677e9ede279L, + 0xcc3d488cf6339bcdL,0x87c818187e5ccd8cL,0x4e7186598c7b25c7L, + 0x4196e50586f7ef16L,0x9b107e634d57b458L,0x0000000000000061L }, + { 0x4d6c0c1fef82fb6eL,0x8c244a96888b56dcL,0x8b464d425577066bL, + 0xf7ecc8ca8261ddb0L,0x3570d8a77002aa15L,0x8e694bd46197aea4L, + 0xff32a72d301d8b31L,0x8e7f4f8ce64abfd9L,0x000000000000008cL } }, + /* 50 << 511 */ + { { 0x4192297482a6c06dL,0x5d74d871af00d803L,0x8d08becbbb060085L, + 0x73c0f98f88c68d5fL,0x57ab930e1eee2808L,0x58bce0b0994c1dedL, + 0x84b44834720f64a7L,0x026e1e6cb2fbd380L,0x0000000000000001L }, + { 0x80b288943d2bd6f1L,0xdecf8fc035fca89dL,0xae9673adc3dbebccL, + 0x52b2a6114c7205c0L,0xbf9efb9567a057eeL,0x55e70e9e8da648dcL, + 0x4497e043e39ba91dL,0x12b586111fccb6d0L,0x0000000000000143L } }, + /* 51 << 511 */ + { { 0x1375591eb29c6942L,0x558b9028695298ffL,0xd72d549d7f26580fL, + 0x499eb5853b5c56a0L,0x2f077d8fb9ce1359L,0x1262a69d0d7ba25cL, + 0xbf740566f77d49ddL,0xb61e1de4089024f2L,0x0000000000000031L }, + { 0x9600de6a5cea3678L,0x369af7f9e5016941L,0x4ed8550d502f6ecbL, + 0x6dc1c264ad1f1bbbL,0x9f0c69ee1f11dae6L,0xb5f7f9e25a6587b8L, + 0x4213ef97bc54eabeL,0xbe9a1fda336d54a4L,0x0000000000000079L } }, + /* 52 << 511 */ + { { 0xbc149ee5e49ef413L,0x39b60fa4d360cf79L,0x2e8d787ba5ac8562L, + 0x49c84eb7f42102ebL,0x648e9db745aaaa29L,0xc425b3d214ab53acL, + 0x76ae3baa7dc413f2L,0xd2d1545197d30a00L,0x00000000000001fbL }, + { 0xdc8dee9ab8b66612L,0xa4158d3140baa079L,0xad72d22df0cba4fcL, + 0x8086cf0dbf8743a7L,0xd3f991da2520bd39L,0x97506e140f96e56cL, + 0x552b394be00ac04fL,0x51565a258066c1ffL,0x0000000000000046L } }, + /* 53 << 511 */ + { { 0x6c3d617801a96295L,0x43814219fd3c708aL,0x5c897caed13c8708L, + 0x9f5aeb293f89dd2eL,0x9b939a34a97dfc4eL,0x6ab7bf493c11ac17L, + 0xe4361a842a64d18aL,0x4e403a2ce2e79fe2L,0x000000000000018eL }, + { 0x799b8d70aa46cf1cL,0x6c2ea52272cbe39fL,0xbe454c9d7f37d359L, + 0x4d467bcf8dc10196L,0xb2ff7e3b3592d9c8L,0xeea5653d9533501fL, + 0xd5ffd9b567a4b6f9L,0xfde6e9228a137ffeL,0x000000000000001aL } }, + /* 54 << 511 */ + { { 0x1e3ce488a047b061L,0xbc63c1934dd0b80bL,0x215eb936521502deL, + 0x9783e9936f19aa62L,0x619021eb9f16efd8L,0xec8ec3c5369a3bf0L, + 0x1676e68515ae4400L,0xe53e62515b175359L,0x00000000000001a7L }, + { 0xdfa54a3b97edcc3aL,0x09008a0bf05833c1L,0x977fd92b8a8da5aaL, + 0xf7fad1857cfe5d6dL,0xe2fa47a6dc11ca11L,0x24bd7530d9ebf4bfL, + 0x7cebf5a35680963fL,0x4fb7ab14c8665ef7L,0x0000000000000005L } }, + /* 55 << 511 */ + { { 0x7e11d9fb45a8dc00L,0x3d5367616a5d6065L,0xe6e07b82e29c3187L, + 0x4bec2dd98770e1b7L,0x68475bdfcd079a33L,0x16b08e0e6e3f02d2L, + 0xed11161910a457f2L,0xb0e36f686f05051aL,0x000000000000005fL }, + { 0xc0029dee068f8671L,0xc4040ef2ff0a6398L,0x87250bb6bc66c114L, + 0x76a6bb7afcc07b67L,0x8bc7c85758b6b874L,0x83aac6467921d89cL, + 0x134f5777e7990ddbL,0xb1b5da6baf81c59fL,0x0000000000000024L } }, + /* 56 << 511 */ + { { 0x49e0ca1c42892d9aL,0x5b0338a5f1dd866dL,0x9f891dc9ebce0673L, + 0x9956c1820b302606L,0x2957d2bb088fc616L,0x1ac5570417b75077L, + 0x5f5910069b67b533L,0x1bf9b4bd69acf0dbL,0x00000000000001dcL }, + { 0x46de5fe669353a7cL,0xc4ad8307318c6934L,0x155bb477e235ca27L, + 0xea4bdc8302c92a8dL,0x71120f8c44755470L,0x84db9f10d607e3f1L, + 0xaa26e77d2f3256c9L,0x33a5a253d015112dL,0x000000000000007aL } }, + /* 57 << 511 */ + { { 0x20aec1362116051eL,0xffe2bc551b94c19dL,0xd91e5b74fb20a2f4L, + 0x08e0eb8f1f337f79L,0x74358d40020a9b71L,0x6c116399a54a0ff8L, + 0xe6090ccb58499d38L,0x52b4fbfff1be7a36L,0x00000000000001f2L }, + { 0xe6139d12f1cb27f3L,0x5320639cc84a439eL,0xb9d97a6f588c93dcL, + 0x47fe72529320567cL,0x978d888735525e0eL,0x9c71616fab85c178L, + 0x97b0f1a987dd691fL,0x8000752a1ea5c4d1L,0x000000000000017aL } }, + /* 58 << 511 */ + { { 0x018d394f82f47082L,0x5c3822d7194a5bcbL,0xcbaa36177f696c2eL, + 0xa479f542c7ee38baL,0x02ffb8f005e0bee0L,0x96e989ab54c7af79L, + 0xdc095377118fec7eL,0x1889c6bf473a5d75L,0x00000000000001bbL }, + { 0xc23dff30186f12dfL,0x2c1e5b533938ea78L,0xf84dec6083271f8bL, + 0x400b2677699a9bf1L,0x71e646bf1763cb95L,0x049bd5253dc8bd13L, + 0xca6987502cebc746L,0xe5faa8e720df2537L,0x000000000000011bL } }, + /* 59 << 511 */ + { { 0x5795aaa1d72a699eL,0x6ff9310c36c48097L,0xef3f26293739ff45L, + 0x6963009f1b69c613L,0xe589566bf41adbc2L,0x8519ded7ad44ff6aL, + 0x962ea751d15b222bL,0xe6fa540cfcca0dddL,0x0000000000000070L }, + { 0xedab4d678df09693L,0x764b38f4a3f32f76L,0x2acd5a3b31b914ceL, + 0x1233395075acb244L,0xced00b66c3e58eb8L,0xe3689850bc11b028L, + 0x94f3e782819636a8L,0x8c95e7bb036dd494L,0x00000000000000c7L } }, + /* 60 << 511 */ + { { 0x37439521cea7af22L,0x74ab19d02610baf9L,0xef7e24aa2c3ac33dL, + 0x3b42932bc1eae083L,0x91478ba263b52bceL,0x125d1d70532e30b3L, + 0x67ceccc80ae01dddL,0x74c6bb6376afb79fL,0x00000000000001edL }, + { 0xfc4cc8c83a4adb6bL,0x2e4071bef2fc358aL,0xd81bdde4d9c36d33L, + 0xd2a33e71c35dc1ccL,0x24eba4f21c1a3d49L,0x04f116a9177f3f5eL, + 0xd63a542d1eab2502L,0x2fc261c6ba83beb5L,0x00000000000000c5L } }, + /* 61 << 511 */ + { { 0xdb557ff48802382fL,0xeae38177f1567c96L,0xa9dbea24f20baedcL, + 0xce9397baee7504ccL,0xa4442b0a42df93b4L,0x3d7fc49deee5363eL, + 0x9ee9c35c98da97b1L,0x5d01da6517432d14L,0x0000000000000036L }, + { 0xf420abf84f2df80eL,0x99f61a31dd55807fL,0x3fc70210891bdc0cL, + 0x6c61798be047b64dL,0xc154a33bf7ac51e8L,0x5f3b12819fdfc99aL, + 0x43046ba63397a668L,0xe140dbe79e0355fcL,0x0000000000000156L } }, + /* 62 << 511 */ + { { 0xa0c1806642c1f473L,0xbcf0d250eda4e7d2L,0x1cd8ef91b57c4d80L, + 0x16d95785698e4de7L,0x9239a410825f5b62L,0xba5862c29c45add9L, + 0xf7f39ffcc844573dL,0xbc4bb8be6d3c4492L,0x00000000000001d2L }, + { 0x918841a7413d3afaL,0xd46614bf35d3c789L,0xd31164d43ae4637fL, + 0x62ec57268c2d1641L,0x54745b91bd87cd4fL,0xe3a32720b8ed340eL, + 0xfffbaeaff90a1288L,0xc61ae40a5c850d7fL,0x00000000000000a9L } }, + /* 63 << 511 */ + { { 0x77ecdd788ded8d98L,0x5c4f7da4f3a901caL,0x3f93175b91c2a241L, + 0x4c0b8664749141d4L,0x6961df53fc600db2L,0x6aee2d3b899f23e4L, + 0x4396c817fc67b9a2L,0x71043f9a1c33877aL,0x000000000000017dL }, + { 0x67d33824044cb54dL,0x8623a36b7b85049cL,0x88bfdcb338c9d75bL, + 0xb861f7b06c3dbb25L,0xd539f69f52a24d72L,0x60428e5b5dc11fa1L, + 0x3ee93ffb4f13f4ffL,0x04018a96e18516d9L,0x0000000000000101L } }, + /* 64 << 511 */ + { { 0x41b3eb86bc1bd2eeL,0x71e8430c50368484L,0xb214e9cbf2b93ad3L, + 0x13f89335138e0ebfL,0xec96c9c25e690118L,0xde786d182c22cd4cL, + 0x5b1554abe819e310L,0x399bfab4d01892d1L,0x00000000000000c5L }, + { 0x2efaee912005a9f2L,0x9cb43e2eb39471d0L,0x0b0d1b565ceb35c2L, + 0x3bff194056825120L,0xad49b05739a0dd54L,0x35dec8af0e83d425L, + 0x50b91634687d1ac8L,0x2fb0d9e42f25a98dL,0x000000000000007dL } }, + /* 0 << 518 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 << 518 */ + { { 0x58333c8c9c13146eL,0x213c8896d5d37a5dL,0x18e2132d8fd07282L, + 0x606e8133cd7d66c7L,0x21c19a128743874bL,0xe375d1f56dd96309L, + 0xe0c48d6d3e1d629cL,0xed6ada6cdee22453L,0x000000000000015cL }, + { 0xea1c974547b194a6L,0xdd549898850241e9L,0x0e225368407cb796L, + 0x17b59edb427ad8d3L,0x08d4629ec43f0a43L,0x27f4063d5201d0a4L, + 0xf6e1518fa5003274L,0xb6196e3ea3286e2cL,0x00000000000001b8L } }, + /* 2 << 518 */ + { { 0x593cf4108e41277fL,0x3ae7db6e925c9d91L,0x68519ca7a37a388eL, + 0xa93a2e815bd94e48L,0x2b8cdbac00ef816fL,0x6fc3cae6eb2796f5L, + 0x253fe0d4f8dedde4L,0x38405b3095ecc14fL,0x0000000000000092L }, + { 0x27a7d0f67882df12L,0x14b3264246332549L,0xb8d8c3e6eda10498L, + 0x6861e53c2aa1cc1aL,0xf2bd03e3a37d7254L,0x175c04f6d78b5623L, + 0xb3d5202f0b18c14fL,0x30b9b0f6967f2b7bL,0x000000000000012fL } }, + /* 3 << 518 */ + { { 0x3b770280731e2b92L,0xd8c7870c7ee974f8L,0xd6fe928343da91b3L, + 0x563cad012b7968b3L,0x48333ac2d845b648L,0xf1753bdbc02af5d4L, + 0x48cf506f04a647afL,0xac9f1d5bddcd3a9aL,0x00000000000000fcL }, + { 0xf9b4124bcb40d52dL,0xc39a85654cb0ab5bL,0xeee9291114c56558L, + 0xd5182186ff5602fbL,0x32d3441623f351ffL,0xde81de053e387e04L, + 0x19b6666948aa0ac1L,0x4cd3b987782c2ca2L,0x000000000000018cL } }, + /* 4 << 518 */ + { { 0x61829972ce8f9a17L,0x2d21913ce0fda47eL,0x84cb9da252466c05L, + 0xfcc4eaab9fd85710L,0xab1ec5412f56fb90L,0xbb435bec6a231d4aL, + 0xe1fffeaee16d3c47L,0xa7db02840f023a36L,0x000000000000002dL }, + { 0x7259e71938d77a9eL,0xcd2278b1a3661199L,0xcf7fdf77bf6dfc59L, + 0x6265f9cb96a2b632L,0x1365e44a4eb09c8aL,0xc1ac30542b23bf87L, + 0x3161a2c05aaec208L,0x1af3a61455db8ae9L,0x0000000000000098L } }, + /* 5 << 518 */ + { { 0x60e8e4d3ea396a57L,0xa56c78e0378519fdL,0x783fd1661fd3ee35L, + 0xf465f7d7174f5564L,0x6e3d2bd397a58ecaL,0x9ddc5d888ed910a7L, + 0x46b9f89cc0b9158cL,0x1d6995655fae4fa7L,0x0000000000000033L }, + { 0x8475d7fc10440e6dL,0x048d912b8c257698L,0x536c9062e7c97954L, + 0x067523a8d0f61df0L,0xfd233e353e97f9fbL,0x38b8db0ed3613d1aL, + 0xd071e57d61810f87L,0x1f4d1b2e97415a56L,0x000000000000018bL } }, + /* 6 << 518 */ + { { 0x129f3f88a1e73627L,0x4d4f1e3abc64f2ebL,0x365c51c94a97ebd4L, + 0xa681acfc1c60b2b8L,0x98bda6049c22ac9fL,0xc337336f7a3c06d8L, + 0xb706c685153282dfL,0x4217adc8eb1ce0feL,0x00000000000001f7L }, + { 0x1d147cd08ca52a15L,0x2fb7fa8e6c45139bL,0x36db6e625c7ec355L, + 0x603450e0f1b9c840L,0x46181b26f6f43cf9L,0xbe2fa18c68c6844bL, + 0xc6eb676678305319L,0x47ccf36ff7f05252L,0x00000000000000f4L } }, + /* 7 << 518 */ + { { 0x765a6a6afc884a2eL,0xb079d52bbabdf029L,0x9441e6f06f3b9515L, + 0x524cc31db56d96ddL,0xd9129ecef245efaaL,0x48c3b03bc906f1d2L, + 0xb2f379fff6334489L,0xea50ce556c3d05beL,0x00000000000001daL }, + { 0xa23e9095a8b173f3L,0x1163236c0cae0acaL,0xbad7a4ced2fe7ed3L, + 0xc96b59a64397acc1L,0xa330281002b1f51fL,0xabe9c10cf3c0800fL, + 0xc0480be858f37470L,0xbc4b63ae0678686fL,0x000000000000000dL } }, + /* 8 << 518 */ + { { 0x82e051426b4c3f67L,0x830492593fc34315L,0x2b17027d972d1c60L, + 0x650bd0df06941699L,0xc960bca9bf06dea4L,0xc9b131eef6bf6453L, + 0xc7865c906e2a0bd0L,0xffb964e05d5f6799L,0x0000000000000033L }, + { 0x32db85f6e9fa9189L,0x3f0deb0eb5544031L,0x848456bd704e30bdL, + 0xade1bb0e862032ccL,0x8deae33afcf305adL,0x918cea68c089cd96L, + 0xea115fb805a4a146L,0x978f3a2f3633d8a2L,0x0000000000000083L } }, + /* 9 << 518 */ + { { 0x62982ababf590757L,0x112fefd670d2e25eL,0xd6f63c34ed861469L, + 0x626af4f7f78223d9L,0xd26df9887581ae71L,0x884869f820b93f27L, + 0x5308341576b8e53bL,0x42c0e46f9225dabdL,0x000000000000005dL }, + { 0xe6610850fd4f594dL,0xe4be8fa1fd6f9d30L,0x5f5f5c2d554a978dL, + 0x91ba562503eff7b1L,0x278af3e2c0b61678L,0xdcec232f4bc1b63aL, + 0x05d3b83ce67e86e2L,0x2e7cfb29b15ea746L,0x0000000000000120L } }, + /* 10 << 518 */ + { { 0x868efb5c1517a09bL,0x20bfbd8722919dbcL,0x11bf475b6e776274L, + 0xef04a4135ade4e48L,0xe5b721d41702befaL,0x6613ef46c010811aL, + 0x8a70cf720286df5bL,0x31a364139961089fL,0x00000000000000cdL }, + { 0x0ff8f787313062e8L,0x6a9c58a1c7c7876bL,0x3767da6e73cfd9bfL, + 0x59569e705f92c89fL,0xd2b5ae8f16d2a77eL,0xe287323d9756a091L, + 0x4f40fefcb0114e2eL,0x5a51851d8f4adeb4L,0x00000000000000f0L } }, + /* 11 << 518 */ + { { 0x5037561bac8ebe24L,0xe7ee25348366bd2aL,0x772837d6841bae82L, + 0x08eb7b46e0ff68c5L,0x8c92b3f60c31b9dcL,0xf87d90b768dc21e4L, + 0xa6c7c9d70770b025L,0x53c3c5f384510727L,0x0000000000000047L }, + { 0xde80f1b74569f57bL,0xf3d6318e62b8842dL,0xfa2eb7e2c819f5f8L, + 0x0faa86691d96d0daL,0xf45e324d3751f6f7L,0xb12f32807a8f2951L, + 0xc455c3d540edba34L,0xc4624175f5c78269L,0x00000000000000fcL } }, + /* 12 << 518 */ + { { 0x93012bbdb3f18230L,0x69b59c81be563374L,0x0fccc9418fbd8070L, + 0xb2e3e5fddd353c6dL,0x290f2d4065dce413L,0xb79d87416598620eL, + 0x855eca8c0cfa916cL,0x45029ea197efebb1L,0x0000000000000051L }, + { 0x7c09eef390438baeL,0x898440c59ec6217dL,0x89f1331330d90670L, + 0x1588728726ae644cL,0x353e5bfae83d85f0L,0xe8f21860456b1253L, + 0xeb0fedeacc75e379L,0x215ac25219348cecL,0x0000000000000057L } }, + /* 13 << 518 */ + { { 0xc4fc152cdf7adcf8L,0xf1731d853aac8ef5L,0x755b049dd1463ed9L, + 0xb2584ead794d041dL,0xc6cbad77a4598004L,0x1baf02531eb28df8L, + 0x972ea8693d442d21L,0xcf1de4ab9d17f0c7L,0x000000000000002fL }, + { 0x175ee1ac772076f6L,0x8afe0a0b98a219bfL,0x23350b21cf7072b6L, + 0x255372ec66d6bfcdL,0xc0e878e4ca347854L,0x45943a9fc266b8c5L, + 0x8e0b70f802304611L,0x7c1914054c1a0ad8L,0x0000000000000129L } }, + /* 14 << 518 */ + { { 0xb009865603ff1b77L,0x0c3e56c816d480dfL,0x03cb7430b25b7ef4L, + 0xc1a1d84f29530ba1L,0x91ea3813a687e8a3L,0x36d765625e955cf5L, + 0x9791cbeb9871c033L,0x25ed155b29271a7eL,0x0000000000000046L }, + { 0x0201ae83b863fe15L,0x577c28d5533caf59L,0xa000a53862886d5eL, + 0xe400a338683c5322L,0x4dbecfe561107162L,0xaa3a88bcbb5603f4L, + 0x48714504ec53c8aaL,0xc5c8b13a63f8c036L,0x00000000000001beL } }, + /* 15 << 518 */ + { { 0xa1ba19f667cf8ca1L,0x71b5032e6aefd0b4L,0xeee3be6cc1649bc2L, + 0x630e7b8664949a27L,0x5c9b815b9f8d970aL,0x582c63862439a25aL, + 0xe2acb5564443a504L,0x8ce90b34e50d00ffL,0x0000000000000004L }, + { 0xd96814912a56ca70L,0x39d0c795ed552ae7L,0xf666288a8d6c3cefL, + 0x71fb44903f030a8dL,0x20a8dbdf22b5414dL,0xcfc31a6ac244c07aL, + 0xf6cba0448ef206e9L,0xe2987b8b8f2d974aL,0x000000000000004eL } }, + /* 16 << 518 */ + { { 0xfd4abdb4f6f04de3L,0x2d3399bb2708b32fL,0xe3f7a82fae7ea987L, + 0x63f0093e11c3b0b2L,0x4e771ccb50e1dc6cL,0xed3655479b91c6a3L, + 0x97c1fd0b332028a9L,0xe8199fe24a65acb3L,0x0000000000000137L }, + { 0xff2f102fed4f5410L,0xe2ec543cbed8c3c0L,0x7c79a798d4bbff64L, + 0x5baa5d12bd61bebaL,0xa4766859accb68ddL,0x79fe67260f18b724L, + 0xfbaa1890c404245dL,0xebae71de1e4be8eeL,0x000000000000016fL } }, + /* 17 << 518 */ + { { 0x7a581bdc7e2afe12L,0x8cff83cf14fb3fd1L,0xcba840cc429779e8L, + 0xf7bd183f9e5201e8L,0xc2c4a1a9fd6d9790L,0x9da08423ef26b1bfL, + 0x5291a5eee224c1f9L,0xd2fef1565468dfdfL,0x00000000000000ceL }, + { 0xccae68d1f8e2c083L,0x2b020fcdb6a2be21L,0x0369059c53f70413L, + 0x1e7d130d80ff6c07L,0x3a0bcee3ae669b29L,0x2f8501f7a3d96b7aL, + 0x37a337a485e110a0L,0x5496623a30441bd0L,0x0000000000000022L } }, + /* 18 << 518 */ + { { 0x66a32a2ff9805932L,0x2ec549c5bef0740bL,0x4e144029127ed87fL, + 0x34f547eab1728cafL,0x89a0c9026066be92L,0x2acf5a05f15277a9L, + 0x78de46f40120d365L,0xe9b87d4a61f5eed1L,0x0000000000000163L }, + { 0x2f0c42289ff30d07L,0xc2f06ed849a8b586L,0x08d431a31b47a0e0L, + 0x9b88a9bb1e3ef583L,0xc9ca97883a415e81L,0x321cd03e6b091374L, + 0xb176a99a65091a1bL,0x1be3de61e7dc0e5aL,0x0000000000000120L } }, + /* 19 << 518 */ + { { 0xedda4f501a0ce34eL,0x92df6948bf73ec51L,0xc28a8e4c1a72e03cL, + 0x2af289b610e4eb04L,0x07c600e6b91a74a4L,0x4bb54ad4f0b6a952L, + 0x13c1236dd1de5b10L,0xb8e9c19bf544bbbbL,0x00000000000001c7L }, + { 0xe7b10240b7f3dff6L,0xb54fa2387ef42552L,0x928e15bbde030c69L, + 0x86839d49beeb87f9L,0xc72c04e8fe6a4e35L,0x0bd28b2333a42b3dL, + 0x78c05d19cf75a3b7L,0x57db32cb05bedd16L,0x00000000000001b1L } }, + /* 20 << 518 */ + { { 0x895a36231bdd31fcL,0x3ff4b814a499d99fL,0x3b0b3bc1ae8418daL, + 0x3f6e8a895eaaa723L,0xf4f87e1303adbdf0L,0x989fcecd01852f42L, + 0x6dcaf5eda4426bc1L,0x36f55efea43a2bbcL,0x00000000000000ffL }, + { 0xc7180d5818832a17L,0x47d76d9329c58f84L,0xe54eca936eb5288aL, + 0x4e7a0ae6621c57b6L,0x475c9919cdad70afL,0x5e0d5c8613d73c3cL, + 0xee24c9f315db8108L,0x80ff5e2c286ddbdbL,0x000000000000002bL } }, + /* 21 << 518 */ + { { 0xe6bb9caf41166535L,0xee910574729bbb65L,0x196bc7188a30dbcbL, + 0xf5c06afb6b669532L,0x95a6445352798e9dL,0x54310e3293028bb3L, + 0x2c980460a192c5e2L,0x3e71aa09f1d44ba5L,0x000000000000004fL }, + { 0x0189f4680f956b70L,0x5d67b04a8f6b209bL,0xf82daa27b3529f92L, + 0xc4e8d5d18d611d16L,0xc0824d25adcbbbc4L,0xa3151a908a4b37a5L, + 0x570c39a5eb1b3b77L,0x4e04c57294a2bcddL,0x00000000000000c4L } }, + /* 22 << 518 */ + { { 0x817eebab56b577a7L,0x0a56039bdf679d07L,0x66bf1a96f0d319e5L, + 0xfe007b76ba26cee6L,0x55836ee4d1a82640L,0x2b123a6f568d16a7L, + 0xc55d3c2bd51ce671L,0x1e0eece0277b1fd7L,0x0000000000000074L }, + { 0x32629cda26a51b9eL,0xff08209c84140e1eL,0x6b6333578da237ebL, + 0x33c6765e901d98e4L,0xa3e1409a633fa3b5L,0xa44024d2365a82c2L, + 0x1f90df3aed9a4771L,0x22a587e4ab00783fL,0x00000000000001d3L } }, + /* 23 << 518 */ + { { 0x013713ea9be05096L,0x3843a8fdf3f844efL,0x1e5b0c16555986bdL, + 0x873570a4fe2ddef4L,0xdbd38a931534c642L,0x2cfb55c0dd37641bL, + 0xa022f3cf4c9cc92cL,0x8e68f725fec4ae5dL,0x000000000000011aL }, + { 0x174fd4dfda88abadL,0x471a712bbd3638c8L,0x78120e3fc36f3380L, + 0x0d306f6e4f257b7cL,0x3b5d78fcc2c7e110L,0xce9f9636a954a62cL, + 0xaa64c8d8bbf8bb14L,0x11f083636854bbd1L,0x00000000000001f8L } }, + /* 24 << 518 */ + { { 0x2e6ad1a9871f197cL,0x0af63210b4a58092L,0xcb9252c242a3b0faL, + 0x06dbc3149d528c5bL,0xdc5216150601bb88L,0x9db07c32aa3178b4L, + 0xbb13186bc4212f12L,0x18bdfe06eef4e9b1L,0x000000000000011fL }, + { 0xf7e64b81713bbfa1L,0x26344818b932b514L,0x3316dd2ae7ca294aL, + 0x4b266d272f30f1cbL,0xa2642af20de790f7L,0xcb11421279d5abfeL, + 0x780790487785c36eL,0x8b8a691d9db40ab5L,0x00000000000001bbL } }, + /* 25 << 518 */ + { { 0xa6f7435bfa4bffebL,0xd6e291f42bfa469aL,0x58f691c576f06844L, + 0x46c100d0ebbb4c34L,0x9c11ffde0a546878L,0x0236adb05accddeeL, + 0x2b5adb70a872847cL,0x75592f462320b5c4L,0x0000000000000173L }, + { 0xc0a19f24fddf6f87L,0x6abf006e83e9fbd2L,0x0af2d4414bf49193L, + 0xe97288f52a6dcd30L,0xb13a5e18156a6b6eL,0x250d944ec97de15fL, + 0x542ce0281d12789dL,0x7f18a27398252875L,0x0000000000000084L } }, + /* 26 << 518 */ + { { 0xe082423c26ee9683L,0x26a715041594a61fL,0x137bbd6e6ef85629L, + 0x2e75b833bf8e7d4aL,0x8912d175a0bf520fL,0x197a7b40d35ab75dL, + 0x55a51a2208213620L,0x9319af9f0a9a003eL,0x00000000000000a3L }, + { 0x82740b1ad685067bL,0xd727f51e66fec58eL,0x7f230a384522eac5L, + 0x9b2472ae2422eea7L,0xacb24446168e9049L,0x995eaf82f8b24133L, + 0xd4d63051f8b37f7bL,0xb36021bc4d003ccaL,0x00000000000000e5L } }, + /* 27 << 518 */ + { { 0x0ea8d0af43da1485L,0x229bd2eee356a58eL,0x766352201eae3818L, + 0xb1cc532299644ed3L,0x2b29d1889fe0cc0eL,0xbce5b5e6303000ffL, + 0xb035f61989e0f826L,0x510c51d80e591febL,0x0000000000000187L }, + { 0x7b2b6f57c1e35d21L,0xbe7100b1b78bcf87L,0x7a4dcd2b1764611eL, + 0xc3abd1efe0659593L,0xdee37f042bd73310L,0x83d81872a65875a4L, + 0x324574a77b7a1222L,0xcb0d36670b9b276aL,0x0000000000000108L } }, + /* 28 << 518 */ + { { 0x971696469a44620fL,0x3523058dfcfa2ea4L,0x8c3e30d1167c4c8dL, + 0xf3de3d61e93a3a6aL,0x56e42c97642614f1L,0x3c9c8d2480cc3dceL, + 0x68d62a528ce9a9d8L,0x482041d2df7be32aL,0x0000000000000127L }, + { 0xeb4439ea67c7fd4bL,0x350117320a384db6L,0x19f9a3382046f5d4L, + 0x5dbf9fcc72cacc46L,0x732caa319bb26921L,0xcd7c2088ca05b118L, + 0x6c815494dbc0ca1bL,0x5334afab825890f6L,0x0000000000000053L } }, + /* 29 << 518 */ + { { 0xb38b3455cf0b627cL,0x9e5ecaf020af8835L,0x5e15ceeeea2dd64eL, + 0xb99b86fad1f8ee12L,0x87af57750227a645L,0x0ee3dab2f604b581L, + 0x473b2504e5187026L,0x00b2e9d3e92eae4dL,0x000000000000013eL }, + { 0x609dd618331d29eeL,0x38cd2b0b4b99e2fbL,0xde71fb3f9242cff0L, + 0x67ac8f5862f6689eL,0x0a99984caefeabbeL,0x6f5dfca538b281acL, + 0x4a2a46d24dcbc8a0L,0xd595b07e6fed4a26L,0x000000000000016dL } }, + /* 30 << 518 */ + { { 0xff258a5138d1847dL,0x713ef4bd7f5e74f6L,0xe56ce9e3e0a0f09aL, + 0x88f07760772a9159L,0xbc42bf932fb37fc9L,0xb8d9041cf0f2cba3L, + 0x59dae6249a50cf7cL,0xb7546cb5920f1729L,0x0000000000000145L }, + { 0xe310812c92011458L,0x9ef42383f158e9beL,0xb3b060fe5311f175L, + 0x29af47ec20c5da5aL,0xd75668ccc0381df5L,0x6cdb1a0cf1bf133eL, + 0x6abfb17042c02d7cL,0xf5984d2fd856c84bL,0x0000000000000169L } }, + /* 31 << 518 */ + { { 0xbaf9ebcc00544abaL,0x250f8366fe960dcfL,0xa0d3d15d76c4500aL, + 0x858ea0e985f1322eL,0x443d0aadeaffd468L,0xda9043a709256191L, + 0x4b83a0035aa2afa6L,0xed05f5bffe5f1395L,0x0000000000000004L }, + { 0x50934ea9608abffcL,0x1299ef11a883495cL,0xb72333689792ef29L, + 0xc7074a39f05483b7L,0x06ef2784274fa92bL,0x64e7feb67c864a48L, + 0xf5fac1f878ca9ce3L,0x595b60d700b935dbL,0x0000000000000033L } }, + /* 32 << 518 */ + { { 0xdcd089e5c0f8abecL,0x2d11032081f91ec5L,0x244cef5d8d76055eL, + 0x7202652d1c9bdd43L,0x396ebf25643f3ca7L,0x811a3695c983ac57L, + 0x7d1f29a41e6e64caL,0x7cb13cec80827ae1L,0x00000000000000c5L }, + { 0x16b59e9c2d5c560bL,0x18b069a09dd105e4L,0xb7fc826bb6bc0c3fL, + 0xf6ab6c7485ea8193L,0x9c768ea01c4cdc7eL,0xa7a149b4d223cca5L, + 0xa54dafdc8789b618L,0x2656cb46b17328c0L,0x0000000000000100L } }, + /* 33 << 518 */ + { { 0xe6ce0ca2ccc0b2a1L,0x3d569ba1cd0023f7L,0x41aecfdb81bb3a84L, + 0x51d0e514bc37ca47L,0xcd8ea6a433b95775L,0x4bc287d36a213c69L, + 0x25543e3133ecd721L,0x73b370fd37ccf58aL,0x00000000000001f8L }, + { 0xc5f14160b6d87edcL,0xd2ad1fce324d39f3L,0x252c0755b8f7bbf5L, + 0x9a0f47dec97f5775L,0x1d026aebd617bfd4L,0x63ba7c6abc086d63L, + 0x6dc68bf907845053L,0x5a1ba87296198bb4L,0x00000000000000efL } }, + /* 34 << 518 */ + { { 0x71710160519269efL,0xf17dec367f1d6361L,0xf6dd6e7fcf9de894L, + 0x9f85f14c98a3c278L,0xbf7a86440e5e618bL,0xfbbea240f4f1a389L, + 0xc762aca8fedf3526L,0xec54e3323a3a1b77L,0x00000000000001daL }, + { 0x1852de066a518668L,0xd05e9eef6cfab15dL,0xb13386cd6b10958bL, + 0x793f12b5144bb551L,0x3ec0f36514ff524cL,0xc642095686e2cb96L, + 0x926456f12381598bL,0xc5508ab0e8c1e2ddL,0x000000000000008dL } }, + /* 35 << 518 */ + { { 0x308b59c8e3990529L,0x58f149af2d350a55L,0x6b5f70129bf20ff0L, + 0x07c324620658a155L,0xcace2e408f753462L,0xf9616b76e31fc89bL, + 0xad31cff4b6013839L,0xdf2c921ab244102bL,0x000000000000015eL }, + { 0xb8b3e02b30f6f32aL,0x072935e1835853fbL,0xd7d687c87364fa89L, + 0x5109c823998e7aceL,0x9e821b0ba8d3f444L,0x75d6211efe652482L, + 0x690314094b3f5025L,0xf8bf959ff42e9c5cL,0x000000000000002aL } }, + /* 36 << 518 */ + { { 0x7ea34b836dd01befL,0xc36e2120effab7a5L,0x1357672fb8845894L, + 0x7f462c580162aa61L,0xb00958815bae0512L,0x55e27b8ef30be373L, + 0x3b0b6882a1a367bfL,0x9177f7351f66d95fL,0x00000000000001c0L }, + { 0xeef0eba1bfbe7524L,0x0cf73ed68025aa0dL,0xae531a884d9267fcL, + 0x29042ea6a78a045dL,0xecebf42adb2582cfL,0x041babdc566d1be1L, + 0x55c1d466cfe83774L,0x751c932b40e305abL,0x0000000000000162L } }, + /* 37 << 518 */ + { { 0x59d2d2af2193cc13L,0x0321329b5b8746b1L,0x5afef594d22686f7L, + 0xb66fa579bf1807c2L,0xe3233a19a636faa9L,0x7e39da2f7c82dc5cL, + 0x24967121715dbaddL,0x0010e24a686dd74cL,0x0000000000000055L }, + { 0xaf5aa87ac1ab62b3L,0xdaec05ece1a7d955L,0x87e93b656fadda7cL, + 0x12393f01825da2a4L,0x08ee17429d2b4552L,0xd83408c0f9e092fdL, + 0x17e78cccf594ebb8L,0x26a3963bff074050L,0x000000000000012cL } }, + /* 38 << 518 */ + { { 0x43015ef7328a8d9aL,0xff26649d8c81d304L,0xfeba7112e6c1da31L, + 0x0303b542f80a8273L,0xaff964a4d5d19085L,0x4060a313fce30f5bL, + 0x85a6401a3f081f0eL,0x7aa4d2dacc3420a0L,0x000000000000008eL }, + { 0x3d81528e43a2e0baL,0x749c5e9b3b58a1ceL,0x16e5014642f8c0e0L, + 0x82223528dacd54e0L,0xe40c5e2bac8d0aabL,0x2e9c3c61230895d8L, + 0x1644c4f2646b2a74L,0x2eafbc8ea5310192L,0x0000000000000159L } }, + /* 39 << 518 */ + { { 0xc16ec56246c1b5b8L,0x677befb255de0e93L,0x764c01d2d18a1543L, + 0x4b979f483926716aL,0x9be5e4238be326dfL,0x147f78a5b3a2af6eL, + 0x75629177f36fef7bL,0x3a96d62f01823e00L,0x00000000000000e8L }, + { 0x286128a697b62907L,0x48c2f245fdb9850bL,0xe233637ed229d254L, + 0xfb4df7ec5ae54770L,0x387dc9882a0a3a7cL,0x604f12b1f105eb2eL, + 0x398f6905bef07679L,0x5083751a2d7f18b0L,0x0000000000000012L } }, + /* 40 << 518 */ + { { 0xad32e836a56a26ebL,0xad1e697d62485f0eL,0x4cd629607caa8694L, + 0xdbab5d8129e36b0dL,0x6588f4fb48b7bd27L,0x19c33f2c0923c25fL, + 0x0a630f121df8f0cfL,0xfeefce53333c63e9L,0x0000000000000045L }, + { 0x241237a74ae15886L,0x5d11a892c355d430L,0x67ceab2fba945fa6L, + 0xdb2b516730eaa436L,0x6f9cb6cd2cc960f3L,0x3419806b09e68210L, + 0x52059822c4cd6e7fL,0xd0bdda660229c0c9L,0x000000000000018dL } }, + /* 41 << 518 */ + { { 0xec861da3ab5d5cf0L,0xc7af1e4ad923888dL,0xe0c32502daab79acL, + 0xf178cf648a4086bbL,0x9a99b8ffd5c7079fL,0x0a2df19d05819dc9L, + 0x12e792933db59087L,0xc125b64b6ce54c4bL,0x0000000000000102L }, + { 0xf1961f714feacf50L,0xf969a84656d8ccd6L,0xa1efae3d911b043eL, + 0xf62c80e1692f7287L,0xc117caea48a190a4L,0x0777c46d14839864L, + 0x2899245a5a1d7208L,0x489b93c85e329852L,0x0000000000000105L } }, + /* 42 << 518 */ + { { 0x67d06d04789228a4L,0x51788e82bdc230afL,0xb7dcd90b32e890c4L, + 0x1e5d0a7db5d4c8f4L,0x0a9e15ce595e67fdL,0x90b3c15dcf4e9fd4L, + 0x49753038242e113dL,0xebadd5b8a2416549L,0x0000000000000118L }, + { 0x45821571dfe7fc21L,0x1363988e803172ffL,0xc5a06b7ae06df609L, + 0x24aaabec5b40db00L,0x6871707a9340097cL,0x47b919267335f997L, + 0x785da39e65bde875L,0xf47f662e7fef2231L,0x0000000000000047L } }, + /* 43 << 518 */ + { { 0x84be109c1190402cL,0x66ecab5ef20a1cafL,0x1a4e611f0fa6f66cL, + 0x195629f2aaf23b97L,0xd813525d7ebe9a01L,0x8f628e487ee7212eL, + 0xb12fdfb811fce1e9L,0x94ed86ec2d2f395bL,0x00000000000001b6L }, + { 0xaa6a3e5e3b167bccL,0xd70639961f5d7a11L,0xe4017cb2f17395a8L, + 0xeaff0282476d9f46L,0x1831263f6eb0ac26L,0x04ac0c75bd7aec11L, + 0x5e0d613a027c01fcL,0x2f4d1bd35bb6ae43L,0x00000000000001caL } }, + /* 44 << 518 */ + { { 0x08a92760d317c60cL,0x730d73839529b4b8L,0xfcf81c9d1a050f05L, + 0x6b2cb8059f9e8bf5L,0xd7d7c79e30ff7124L,0x572a3a7f7206c662L, + 0xe2052b8c4915d988L,0xa685a31e3d6cf04cL,0x0000000000000183L }, + { 0x14408f423e71c584L,0x8f24d13415998073L,0x1ec3ad1995f62f3cL, + 0xf2d8ea1803f06c8dL,0x8ddc0fd6087730a5L,0x7755ed4b280b7d59L, + 0x26c885dbba67938cL,0x2559753627e81374L,0x000000000000013cL } }, + /* 45 << 518 */ + { { 0xfaff0f70b8eed132L,0x45e36c7d284499edL,0x0cb980c3774b8605L, + 0x217eb3dc7c70ffa7L,0xc0f46f96c3c4a55bL,0x88d3b5cf8c437e4fL, + 0x7cfcbf0e650eaf40L,0xad4698af733e59daL,0x000000000000017fL }, + { 0x8b8c1f42717f2813L,0xaf5b8b163b4c78a2L,0x88db88f5c15667ffL, + 0xf9c0433c0e0eff6cL,0xb9aab9524af85387L,0xfdc7d7932bde3019L, + 0xdceb1293eb5d10baL,0xa7ee9901c53209e9L,0x00000000000001e2L } }, + /* 46 << 518 */ + { { 0x02c71a57c628f1c9L,0xd6b1756a6a8b91c6L,0xc5bec6323a95cbaeL, + 0x788e2eb0d9409c68L,0x086d6859d7c3fbabL,0x3be49bdb1b498ad5L, + 0x63f6e76e47bc20aaL,0x3b8aaea41745e23aL,0x0000000000000126L }, + { 0xbdc9b39f76962e66L,0xad6828ee49abdb3dL,0x6fcb091222df1a3dL, + 0x1efa4c062f1370b5L,0xff596ee1630b67e7L,0x03e4620cc21c76ffL, + 0x2cb83ef480982394L,0x65937797baa10144L,0x000000000000002cL } }, + /* 47 << 518 */ + { { 0xd09c1c996956b64dL,0xa7fd39df54e71d6dL,0xb63b0c76d813eb3cL, + 0xe6da21e09a7ccfe9L,0x1d15005aa5502e51L,0xd5358b4110306e2fL, + 0x6abc6b4667ef79a8L,0x9b5b0ad67c7376efL,0x0000000000000055L }, + { 0x31bf44b672e5d085L,0x561698e245ee9faeL,0x207d7fe17f532f76L, + 0xde53af5d389577bbL,0x931929f2a9530967L,0x428e13ee8099f9c1L, + 0xa20e4bfeabb6b4e6L,0x87d77ad792e6c4b7L,0x0000000000000165L } }, + /* 48 << 518 */ + { { 0xa779463d63ea6586L,0x055ac074f5dd2077L,0x5e63875b2be5d611L, + 0x8261be993b1c53bbL,0x86137b7ba28e14b3L,0xf706d3c77e729ef5L, + 0xb30413db74902555L,0xed91b88c4e15d0feL,0x00000000000000edL }, + { 0x0cbbfb03cee5f36bL,0xb210122bb6bab7f4L,0xffa46c283d0cf2fcL, + 0xa42293e28c05ff89L,0x014f499fa5642b7eL,0x80db63809c6e0d8aL, + 0xa819787c51d901c9L,0x17662fa8a3201327L,0x0000000000000154L } }, + /* 49 << 518 */ + { { 0xdb65de61d1446dc2L,0xa3bdc9d05d8a4c19L,0x2701196f5c624bafL, + 0x9bda0a41ef24ce3dL,0x9fb34528041c3b08L,0x7b5dbbf4242a51caL, + 0x336ce5613766cfd8L,0x172c43d2641e0b85L,0x000000000000002eL }, + { 0x996f146778312bf1L,0x3b4e1fd7426ce268L,0xcbb357014b673861L, + 0x30c2caa09146cb8eL,0x9b4a4587e6a8d053L,0x6b1a1c3f252a928cL, + 0xcce378f9f163f641L,0x82a416d80ce8fe1dL,0x00000000000000b2L } }, + /* 50 << 518 */ + { { 0x09dc0454add144caL,0x281f6fd9cd8ab4aeL,0xaf4cfdfcb0f1c86aL, + 0x439e1747e494b168L,0x3e49a5b3e4c400b5L,0x9757873935ded33aL, + 0xab59dd9ec2a6a22aL,0x4922cb7ceac1e123L,0x00000000000001b7L }, + { 0xccd8982904d455c4L,0xffafdce83923bdd5L,0xd2a6a8fcbf1c6558L, + 0xd17a45eaf484c27bL,0x2c39c953d970815fL,0x8fb6670af00d8f91L, + 0x62182f42a31b339cL,0x5cc40112d38c0768L,0x000000000000012aL } }, + /* 51 << 518 */ + { { 0xb308f945331cb928L,0x602999521379ba9fL,0x20f27b4cd0fa31adL, + 0x88eb30063b553ecfL,0x5c19d8c07231e4bbL,0x9c5844dbc4b5961eL, + 0x9a5e28be582da2d3L,0x4d9a6de0e85fbb1bL,0x00000000000001dfL }, + { 0xc22621adab9cc6e0L,0x4679d909c4d8e5e2L,0x917e8cff3e8c3c9eL, + 0xd9b13e821a1925d6L,0x0c173503c81d5d3bL,0x8cb10799e7e3931eL, + 0xa1e340b7315c7107L,0x0337254e99f39258L,0x000000000000013dL } }, + /* 52 << 518 */ + { { 0x525c70348e080aa6L,0xfb65650b703a31d0L,0xb3c9fc909708778bL, + 0x712babdd5c0de950L,0x2336b614cf922a9cL,0xc9c01fdb2d1541ccL, + 0x3b0b5acdda984124L,0xc16edb3e5ca5237eL,0x0000000000000031L }, + { 0x16d195dcba96a3b1L,0xaeb795135b2d839eL,0xff7ca75c3f468379L, + 0x195ebf452f7a4eccL,0x368bf1ac2cba9fc8L,0x282778c5ad79da65L, + 0x1ae709a382204582L,0x60da306180e162fcL,0x00000000000000a7L } }, + /* 53 << 518 */ + { { 0xc5c076fd98c1993dL,0x7c963fe6af6e50a2L,0x39921d140591b832L, + 0xaf817e598e6e78b6L,0x7d20b47053141d98L,0x80209dde2675b748L, + 0x9784d850f911da2eL,0x10d14b67cc679952L,0x0000000000000024L }, + { 0xf6f7b8551a80d7cdL,0x68fb06ce0d84480fL,0x9d0df3bf450048d6L, + 0x14848bbe6d416a5aL,0x3191edc563436e69L,0x53daff59f90fac77L, + 0x331714fb6c2043f8L,0xb67695bf641cb68fL,0x00000000000001bbL } }, + /* 54 << 518 */ + { { 0x2568730c4e5e6dbdL,0x81765faf80c64f5cL,0xbf19f28c6c1e36c7L, + 0x0530e9a6294bd1c2L,0x80e2e102e4043658L,0x0af1e3845ac951ddL, + 0x37d8ca04a7cc44b8L,0xf767884a3d138475L,0x0000000000000189L }, + { 0x8ab296a48ae43c93L,0xc0bda5e841f22f63L,0xb61e12430abf701fL, + 0xa2d6b5360284b234L,0xb417271d9c8e1f29L,0x9377f53ad346e17bL, + 0xc9410ca7a8843d62L,0x1d304904fe0dd739L,0x0000000000000118L } }, + /* 55 << 518 */ + { { 0x42e9239b9ab2b245L,0x2d966c1a1f9e9cddL,0xa2798c7ef19af9a5L, + 0xde254ef83c79337eL,0x07c5da22f1db6f95L,0x50875be600a3c98cL, + 0x2bfa5c26b48438f3L,0xb0ad90e5ab0a8365L,0x00000000000000c1L }, + { 0x83161fcd3bd1626eL,0xa7863bf855e209d5L,0x9d6ecef0d103b98bL, + 0x885bdf21f0c55498L,0xbe3cc09af953d02cL,0x7fe7985326d91204L, + 0x9016e8e4f2f0e08aL,0x051fa6822376d502L,0x0000000000000001L } }, + /* 56 << 518 */ + { { 0xcab77140311cc347L,0xab8125d2c64bb9ebL,0x834317210e447719L, + 0x1b072b94ad7f9bc5L,0xdb70c295ccff1aa9L,0x174b731e8969d354L, + 0x70c0a462f5761c43L,0x14790895dff9a408L,0x000000000000006eL }, + { 0xefc566d58f909123L,0x2775b7a810896ca6L,0xfe6927ca0b71973bL, + 0x40babac2b4c9929aL,0x9610d5214bd6f041L,0x82b6d9da8f075949L, + 0xcb2129e21e7a406cL,0x85389f05106af2acL,0x00000000000000cbL } }, + /* 57 << 518 */ + { { 0x30525170711ceca4L,0x754f1f1fba0c3956L,0x5d7049a8c91b73f8L, + 0xda62d9a77e22f0b1L,0xcd64a052ed6bda8eL,0x174eca531b83dc5dL, + 0xb4d7576bd3a8c49bL,0x172d6c7526d15902L,0x00000000000000f7L }, + { 0x88267b378585b54eL,0x8715a8b010b85121L,0xc944f85d6db0be9eL, + 0x6ca6dec0e92656b0L,0xa141e7cb217c6ed3L,0xd12f342527240ab2L, + 0x8529f959942ffa5dL,0x4a1a975915792f32L,0x00000000000001c4L } }, + /* 58 << 518 */ + { { 0xfb2ad1ae78825be2L,0xb9c0cfcb5f9f377eL,0xb4cd68b823ec81dfL, + 0xe48d71227620fa13L,0xf0292c9ccfeafd26L,0xd9e5e727625273a6L, + 0x7aaf3da42a9aafd3L,0x072f0f48a3df15deL,0x000000000000009eL }, + { 0x623b2ce93036834dL,0x85e00d655518c1c9L,0xcdb55dcfda8ec9c0L, + 0xe4cfb058df8e18e1L,0xb10fe1db3f381211L,0x26a05c013c5387ddL, + 0x6860765d2cd4e482L,0xada5d4823d37a01fL,0x0000000000000018L } }, + /* 59 << 518 */ + { { 0xe08a1d582f54610aL,0x0aa2efe66ea27082L,0xf32250e8475f591aL, + 0x38be597af2bdfed6L,0xf13a28e9717532d1L,0xc01feb0de7c1df76L, + 0xdb9be38e8b96c944L,0xf0700bfe26ea3f6dL,0x000000000000016dL }, + { 0x963cd0aa82274bcdL,0xc1ed66e6de2ab836L,0x81c6f2e2cc08c42dL, + 0xed9224a1e00d896eL,0x0ebe4ff069747a38L,0x2e70f9a40130a3a7L, + 0xeb9cc3eb55451096L,0xa84de8cfa12cec75L,0x0000000000000145L } }, + /* 60 << 518 */ + { { 0x28b801584810ce1dL,0xa18f01cf9cd2a530L,0x3f882662bbacc56bL, + 0x25a09082c8de9031L,0xee333f92c75a3487L,0xc3f231d78c93dc3cL, + 0x6d53379ad94c4907L,0x1176368211f2d227L,0x00000000000001acL }, + { 0x137ae216ab84de42L,0xbc00a7c1e4ee4c73L,0x8a42aeb8fc12b03cL, + 0x9a717ff1cf6a0950L,0x18cbebe30158b96cL,0x0184646550953719L, + 0x2c73bd13194fdb1eL,0xbd52a1cf3b0d2c34L,0x0000000000000022L } }, + /* 61 << 518 */ + { { 0x5da94915f61f7493L,0x661d3e6efe554d0dL,0x1853f3fd93d6395eL, + 0x917faaca248a6a8fL,0x31e81318e038ae34L,0xd2cf0cefafe38491L, + 0x49a2e5083c999b79L,0xd5ba94f8a42b9ea7L,0x000000000000000cL }, + { 0x0bf5a0fd34c430a3L,0x4cb5013ee01f0d5cL,0x7ea690e6337f036cL, + 0x128d92118d55e026L,0x1d2e25df6ab8ed2dL,0x95e530878237cd22L, + 0xba3649ec2875bcd7L,0x6b2be67f654dcff2L,0x00000000000001f2L } }, + /* 62 << 518 */ + { { 0x421f3e959e2146f0L,0x3a7b0161b2e052acL,0xefd12db1a6cb7e61L, + 0x887566316bd4bed8L,0x8bc563448c568736L,0xd0ae1019a26bac6bL, + 0x678f366193311795L,0x11bc3650f625f4a2L,0x00000000000001a8L }, + { 0x50c943cc23ab8aa1L,0x6aaba8bf2d3b3339L,0x6727f7ffac79f93eL, + 0x130c2153cba525a4L,0x6759f8f842058fc0L,0x356b182901c30b3bL, + 0x0bd3aa809078f323L,0x70cd052134621f20L,0x0000000000000071L } }, + /* 63 << 518 */ + { { 0xf124aa8bfa466654L,0xf48e0a111882da37L,0x57ea7b2bd0aa1f09L, + 0x1936fe19589bd6a7L,0xe9f2fb054bd7d150L,0x390250f71df9add6L, + 0xd69670f24f8651e8L,0xbc7b1bf3e03566c0L,0x00000000000000c4L }, + { 0xc3b757b4b30e6c6bL,0xdb2c75c2e95c319bL,0x2790396f3aab64b1L, + 0x9709df8df6768599L,0xc2a91d3204ced567L,0x561cef3c43ead6e4L, + 0xea56fb57983d1f3eL,0x2f15dfabc0b397f3L,0x0000000000000076L } }, + /* 64 << 518 */ + { { 0xcd6151c20e6a4eceL,0x884e5e75a2840752L,0xa6752672c41b64b7L, + 0x7585f7cbd5cd2b79L,0xac8f7a1c892fb84dL,0xfdb20c7432a80f6fL, + 0xaec2531339c7ecfeL,0xc84d7c5c93b1e75bL,0x000000000000011bL }, + { 0x30ab00a3bc363066L,0xd9a0ab44d371d11cL,0x13697c6aab047490L, + 0x1adffb54a57574cdL,0x753eafa0e548b99bL,0x51abf774a7f0df39L, + 0xe8fab02c903eaee9L,0xdf2f5e8af7542020L,0x00000000000000faL } }, +}; + +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Pre-computed table containing multiples of g times powers of 2. + * Width between powers is 7 bits. + * Accumulate into the result. + * + * r Resulting point. + * g Point to scalar multiply. + * k Scalar to multiply by. + * table Pre-computed table of points. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_add_only_9(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 rt[2]; + sp_digit tmp[2 * 9 * 6]; +#endif + sp_point_521* p = NULL; + sp_digit* negy = NULL; + int i; + ecc_recode_521 v[75]; + int err = MP_OKAY; + + (void)g; + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + negy = tmp; + p = rt + 1; + } + + if (err == MP_OKAY) { + sp_521_ecc_recode_7_9(k, v); + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + i = 74; + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_65_9(rt, &table[i * 65], v[i].i); + } + else + #endif + { + XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + } + rt->infinity = !v[i].i; + for (--i; i>=0; i--) { + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_65_9(p, &table[i * 65], v[i].i); + } + else + #endif + { + XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x)); + XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y)); + } + p->infinity = !v[i].i; + sp_521_sub_9(negy, p521_mod, p->y); + sp_521_norm_9(negy); + sp_521_cond_copy_9(p->y, negy, 0 - v[i].neg); + sp_521_proj_point_add_qz1_9(rt, rt, p, tmp); + } + if (map != 0) { + sp_521_map_9(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 9 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_9(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_add_only_9(r, NULL, p521_table, + k, map, ct, heap); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_521(const mp_int* km, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[9]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 9, km); + + err = sp_521_ecc_mulmod_base_9(point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the base point of P521 by the scalar, add point a and return + * the result. If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, + int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[9 + 9 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (9 + 9 * 2 * 6), + heap, DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 9; + + sp_521_from_mp(k, 9, km); + sp_521_point_from_ecc_point_9(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_9(point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_9(point, point, addP, tmp); + + if (map) { + sp_521_map_9(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * a A single precision integer. + */ +static void sp_521_add_one_9(sp_digit* a) +{ + __asm__ __volatile__ ( + "ldp x1, x2, [%[a], 0]\n\t" + "adds x1, x1, #1\n\t" + "ldr x3, [%[a], 16]\n\t" + "adcs x2, x2, xzr\n\t" + "ldr x4, [%[a], 24]\n\t" + "adcs x3, x3, xzr\n\t" + "stp x1, x2, [%[a], 0]\n\t" + "adcs x4, x4, xzr\n\t" + "stp x3, x4, [%[a], 16]\n\t" + "ldp x1, x2, [%[a], 32]\n\t" + "adcs x1, x1, xzr\n\t" + "ldr x3, [%[a], 48]\n\t" + "adcs x2, x2, xzr\n\t" + "ldr x4, [%[a], 56]\n\t" + "adcs x3, x3, xzr\n\t" + "stp x1, x2, [%[a], 32]\n\t" + "adcs x4, x4, xzr\n\t" + "stp x3, x4, [%[a], 48]\n\t" + "ldr x1, [%[a], 64]\n\t" + "adcs x1, x1, xzr\n\t" + "str x1, [%[a], 64]\n\t" + : + : [a] "r" (a) + : "memory", "x1", "x2", "x3", "x4", "cc" + ); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_521_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + sp_int64 nl = n; + sp_int64 bytes = size * 8; + + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_521_ecc_gen_k_9(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[66]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + buf[0] &= 0x1; + sp_521_from_bin(k, 9, buf, (int)sizeof(buf)); + if (sp_521_cmp_9(k, p521_order2) <= 0) { + sp_521_add_one_9(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; + #else + sp_point_521 point[1]; + #endif + sp_digit k[9]; +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = NULL; +#endif + int err = MP_OKAY; + + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, DYNAMIC_TYPE_ECC); + #else + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, DYNAMIC_TYPE_ECC); + #endif + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + infinity = point + 1; + #endif + + err = sp_521_ecc_gen_k_9(rng, k); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_9(point, k, 1, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_9(infinity, point, p521_order, 1, 1, NULL); + } + if (err == MP_OKAY) { + if (sp_521_iszero_9(point->x) || sp_521_iszero_9(point->y)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_521_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(point, pub); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) { + /* point is not sensitive, so no need to zeroize */ + XFREE(point, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_521_ctx { + int state; + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + sp_digit k[9]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; +#else + sp_point_521 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_521_ctx; + +int sp_ecc_make_key_521_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_521_ctx* ctx = (sp_ecc_key_gen_521_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_521_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_521_ecc_gen_k_9(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_521_ecc_mulmod_base_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p521_order, 1, 1); + if (err == MP_OKAY) { + if (sp_521_iszero_9(ctx->point->x) || + sp_521_iszero_9(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_521_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 66 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_521_to_bin_9(sp_digit* r, byte* a) +{ + int i; + int j = 0; + + a[j++] = r[8] >> 8; + a[j++] = r[8] >> 0; + for (i = 7; i >= 0; i--, j += 8) { + __asm__ __volatile__ ( + "ldr x4, [%[r]]\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x4, x4\n\t" + #endif + "str x4, [%[a]]\n\t" + : + : [r] "r" (r + i), [a] "r" (a + j) + : "memory", "x4" + ); + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_521(const mp_int* priv, const ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[9]; +#endif + int err = MP_OKAY; + + if (*outLen < 65U) { + err = BUFFER_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 9, priv); + sp_521_point_from_ecc_point_9(point, pub); + err = sp_521_ecc_mulmod_9(point, point, k, 1, 1, heap); + } + if (err == MP_OKAY) { + sp_521_to_bin_9(point->x, out); + *outLen = 66; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_521_ctx { + int state; + union { + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + }; + sp_digit k[9]; + sp_point_521 point; +} sp_ecc_sec_gen_521_ctx; + +int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_521_ctx* ctx = (sp_ecc_sec_gen_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_521_from_mp(ctx->k, 9, priv); + sp_521_point_from_ecc_point_9(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_521_to_bin_9(ctx->point.x, out); + *outLen = 66; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P521 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_521_mont_mul_order_9(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_521_mul_9(r, a, b); + sp_521_mont_reduce_order_9(r, p521_order, p521_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P521 curve. */ +static const uint64_t p521_order_minus_2[9] = { + 0xbb6fb71e91386407U,0x3bb5c9b8899c47aeU,0x7fcc0148f709a5d0U, + 0x51868783bf2f966bU,0xfffffffffffffffaU,0xffffffffffffffffU, + 0xffffffffffffffffU,0xffffffffffffffffU,0x00000000000001ffU +}; +#else +/* The low half of the order-2 of the P521 curve. */ +static const uint64_t p521_order_low[5] = { + 0xbb6fb71e91386407U,0x3bb5c9b8899c47aeU,0x7fcc0148f709a5d0U, + 0x51868783bf2f966bU,0xfffffffffffffffaU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Square number mod the order of P521 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_order_9(sp_digit* r, const sp_digit* a) +{ + sp_521_sqr_9(r, a); + sp_521_mont_reduce_order_9(r, p521_order, p521_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P521 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_n_order_9(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_521_mont_sqr_order_9(r, a); + for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + XMEMCPY(t, a, sizeof(sp_digit) * 9); + ctx->i = 519; + ctx->state = 1; + break; + case 1: + sp_521_mont_sqr_order_9(t, t); + ctx->state = 2; + break; + case 2: + if ((p521_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) { + sp_521_mont_mul_order_9(t, t, a); + } + ctx->i--; + ctx->state = (ctx->i == 0) ? 3 : 1; + break; + case 3: + XMEMCPY(r, t, sizeof(sp_digit) * 9U); + err = MP_OKAY; + break; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +static void sp_521_mont_inv_order_9(sp_digit* r, const sp_digit* a, + sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 9); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_order_9(t, t); + if ((p521_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_521_mont_mul_order_9(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 9U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 9; + sp_digit* t3 = td + 4 * 9; + int i; + + /* t = a^2 */ + sp_521_mont_sqr_order_9(t, a); + /* t = a^3 = t * a */ + sp_521_mont_mul_order_9(t, t, a); + /* t= a^c = t ^ 2 ^ 2 */ + sp_521_mont_sqr_n_order_9(t2, t, 2); + /* t = a^f = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + + /* t3 = a^1e */ + sp_521_mont_sqr_order_9(t3, t); + /* t3 = a^1f = t3 * a */ + sp_521_mont_mul_order_9(t3, t3, a); + + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_521_mont_sqr_n_order_9(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_521_mont_sqr_n_order_9(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_521_mont_sqr_n_order_9(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + + /* t2= a^ffffffff00000000 = t ^ 2 ^ 32 */ + sp_521_mont_sqr_n_order_9(t2, t, 32); + /* t = a^ffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ffffffffffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_521_mont_sqr_n_order_9(t2, t, 64); + /* t = a^ffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ffffffffffffffffffffffffffffffff00000000000000000000000000000000 = t ^ 2 ^ 128 */ + sp_521_mont_sqr_n_order_9(t2, t, 128); + /* t = a^ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0 */ + sp_521_mont_sqr_n_order_9(t2, t, 5); + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t * t3 */ + sp_521_mont_mul_order_9(t2, t2, t3); + + for (i=259; i>=1; i--) { + sp_521_mont_sqr_order_9(t2, t2); + if ((p521_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_521_mont_mul_order_9(t2, t2, a); + } + } + sp_521_mont_sqr_order_9(t2, t2); + sp_521_mont_mul_order_9(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ +#endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Calculate second signature value S from R, k and private value. + * + * s = (r * x + e) / k + * + * s Signature value. + * r First signature value. + * k Ephemeral private key. + * x Private key as a number. + * e Hash of message as a number. + * tmp Temporary storage for intermediate numbers. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, + sp_digit* x, const sp_digit* e, sp_digit* tmp) +{ + int err; + sp_digit carry; + sp_int64 c; + sp_digit* kInv = k; + + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_9(k, k, p521_norm_order); + err = sp_521_mod_9(k, k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_9(k); + + /* kInv = 1/k mod order */ + sp_521_mont_inv_order_9(kInv, k, tmp); + sp_521_norm_9(kInv); + + /* s = r * x + e */ + sp_521_mul_9(x, x, r); + err = sp_521_mod_9(x, x, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_9(x); + carry = sp_521_add_9(s, e, x); + sp_521_cond_sub_9(s, s, p521_order, 0 - carry); + sp_521_norm_9(s); + c = sp_521_cmp_9(s, p521_order); + sp_521_cond_sub_9(s, s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_9(s, s, kInv); + sp_521_norm_9(s); + } + + return err; +} + +/* Sign the hash using the private key. + * e = [hash, 521 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_521* point = NULL; +#else + sp_digit e[7 * 2 * 9]; + sp_point_521 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int64 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 9, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 9; + k = e + 4 * 9; + r = e + 6 * 9; + tmp = e + 8 * 9; + s = e; + + if (hashLen > 66U) { + hashLen = 66U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_9(rng, k); + } + else { + sp_521_from_mp(k, 9, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_9(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 9U); + sp_521_norm_9(r); + c = sp_521_cmp_9(r, p521_order); + sp_521_cond_sub_9(r, r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(r); + + if (!sp_521_iszero_9(r)) { + /* x is modified in calculation of s. */ + sp_521_from_mp(x, 9, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_521_from_bin(e, 9, hash, (int)hashLen); + + /* Take 521 leftmost bits of hash. */ + if (hashLen == 66U) { + sp_521_rshift_9(e, e, 7); + } + + err = sp_521_calc_s_9(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_521_iszero_9(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 9); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_521)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sign_521_ctx { + int state; + union { + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + sp_521_mont_inv_order_9_ctx mont_inv_order_ctx; + }; + sp_digit e[2*9]; + sp_digit x[2*9]; + sp_digit k[2*9]; + sp_digit r[2*9]; + sp_digit tmp[3 * 2*9]; + sp_point_521 point; + sp_digit* s; + sp_digit* kInv; + int i; +} sp_ecc_sign_521_ctx; + +int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng, + mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sign_521_ctx* ctx = (sp_ecc_sign_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sign_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->s = ctx->e; + ctx->kInv = ctx->k; + + ctx->i = SP_ECC_MAX_SIG_GEN; + ctx->state = 1; + break; + case 1: /* GEN */ + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_9(rng, ctx->k); + } + else { + sp_521_from_mp(ctx->k, 9, km); + mp_zero(km); + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + break; + case 2: /* MULMOD */ + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &p521_base, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + ctx->state = 3; + } + break; + case 3: /* MODORDER */ + { + sp_int64 c; + /* r = point->x mod order */ + XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 9U); + sp_521_norm_9(ctx->r); + c = sp_521_cmp_9(ctx->r, p521_order); + sp_521_cond_sub_9(ctx->r, ctx->r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(ctx->r); + + if (hashLen > 66U) { + hashLen = 66U; + } + sp_521_from_mp(ctx->x, 9, priv); + sp_521_from_bin(ctx->e, 9, hash, (int)hashLen); + if (hashLen == 66U) { + sp_521_rshift_9(ctx->e, ctx->e, 7); + } + ctx->state = 4; + break; + } + case 4: /* KMODORDER */ + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_9(ctx->k, ctx->k, p521_norm_order); + err = sp_521_mod_9(ctx->k, ctx->k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_9(ctx->k); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 5; + } + break; + case 5: /* KINV */ + /* kInv = 1/k mod order */ + err = sp_521_mont_inv_order_9_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp); + if (err == MP_OKAY) { + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 6; + } + break; + case 6: /* KINVNORM */ + sp_521_norm_9(ctx->kInv); + ctx->state = 7; + break; + case 7: /* R */ + /* s = r * x + e */ + sp_521_mul_9(ctx->x, ctx->x, ctx->r); + ctx->state = 8; + break; + case 8: /* S1 */ + err = sp_521_mod_9(ctx->x, ctx->x, p521_order); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* S2 */ + { + sp_digit carry; + sp_int64 c; + sp_521_norm_9(ctx->x); + carry = sp_521_add_9(ctx->s, ctx->e, ctx->x); + sp_521_cond_sub_9(ctx->s, ctx->s, + p521_order, 0 - carry); + sp_521_norm_9(ctx->s); + c = sp_521_cmp_9(ctx->s, p521_order); + sp_521_cond_sub_9(ctx->s, ctx->s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(ctx->s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_9(ctx->s, ctx->s, ctx->kInv); + sp_521_norm_9(ctx->s); + + /* Check that signature is usable. */ + if (sp_521_iszero_9(ctx->s) == 0) { + ctx->state = 10; + break; + } + #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + ctx->i = 1; + #endif + + /* not usable gen, try again */ + ctx->i--; + if (ctx->i == 0) { + err = RNG_FAILURE_E; + } + ctx->state = 1; + break; + } + case 10: /* RES */ + err = sp_521_to_mp(ctx->r, rm); + if (err == MP_OKAY) { + err = sp_521_to_mp(ctx->s, sm); + } + break; + } + + if (err == MP_OKAY && ctx->state != 10) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 9U); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_SIGN */ + +#ifndef WOLFSSL_SP_SMALL +/* Divide the number by 2 mod the modulus. (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus. + */ +static void sp_521_div2_mod_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + __asm__ __volatile__ ( + "ldr x3, [%[a], 0]\n\t" + "ldr x4, [%[a], 8]\n\t" + "ldr x5, [%[a], 16]\n\t" + "ldr x6, [%[a], 24]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[a], 40]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 56]\n\t" + "ldr x11, [%[a], 64]\n\t" + "ldr x12, [%[m], 0]\n\t" + "ldr x13, [%[m], 8]\n\t" + "ldr x14, [%[m], 16]\n\t" + "ldr x15, [%[m], 24]\n\t" + "ldr x16, [%[m], 32]\n\t" + "ldr x17, [%[m], 40]\n\t" + "ldr x19, [%[m], 48]\n\t" + "ldr x20, [%[m], 56]\n\t" + "ldr x21, [%[m], 64]\n\t" + "ands x22, x3, 1\n\t" + "b.eq 1f\n\t" + "adds x3, x3, x12\n\t" + "adcs x4, x4, x13\n\t" + "adcs x5, x5, x14\n\t" + "adcs x6, x6, x15\n\t" + "adcs x7, x7, x16\n\t" + "adcs x8, x8, x17\n\t" + "adcs x9, x9, x19\n\t" + "adcs x10, x10, x20\n\t" + "adcs x11, x11, x21\n\t" + "cset x22, cs\n\t" + "\n1:\n\t" + "extr x3, x4, x3, 1\n\t" + "extr x4, x5, x4, 1\n\t" + "extr x5, x6, x5, 1\n\t" + "extr x6, x7, x6, 1\n\t" + "extr x7, x8, x7, 1\n\t" + "extr x8, x9, x8, 1\n\t" + "extr x9, x10, x9, 1\n\t" + "extr x10, x11, x10, 1\n\t" + "extr x11, x22, x11, 1\n\t" + "str x3, [%[r], 0]\n\t" + "str x4, [%[r], 8]\n\t" + "str x5, [%[r], 16]\n\t" + "str x6, [%[r], 24]\n\t" + "str x7, [%[r], 32]\n\t" + "str x8, [%[r], 40]\n\t" + "str x9, [%[r], 48]\n\t" + "str x10, [%[r], 56]\n\t" + "str x11, [%[r], 64]\n\t" + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + ); +} + +static int sp_521_num_bits_64_9(sp_digit n) +{ + int64_t r = -1; + + __asm__ __volatile__ ( + "mov x1, 64\n\t" + "clz %[r], %[n]\n\t" + "sub %[r], x1, %[r]" + : [r] "+r" (r) + : [n] "r" (n) + : "x1" + ); + + return (int)(r + 1); +} + +static int sp_521_num_bits_9(const sp_digit* a) +{ + int i; + int r = 0; + + for (i=8; i>=0; i--) { + if (a[i] != 0) { + r = sp_521_num_bits_64_9(a[i]); + r += i * 64; + break; + } + } + + return r; +} + +/* Non-constant time modular inversion. + * + * @param [out] r Resulting number. + * @param [in] a Number to invert. + * @param [in] m Modulus. + * @return MP_OKAY on success. + */ +static int sp_521_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit u[9]; + sp_digit v[9]; + sp_digit b[9]; + sp_digit d[9]; + int ut, vt; + sp_digit o; + + + XMEMCPY(u, m, sizeof(u)); + XMEMCPY(v, a, sizeof(v)); + + ut = sp_521_num_bits_9(u); + vt = sp_521_num_bits_9(v); + + XMEMSET(b, 0, sizeof(b)); + if ((v[0] & 1) == 0) { + sp_521_rshift1_9(v, v); + XMEMCPY(d, m, sizeof(u)); + d[0] += 1; + sp_521_rshift1_9(d, d); + vt--; + + while ((v[0] & 1) == 0) { + sp_521_rshift1_9(v, v); + sp_521_div2_mod_9(d, d, m); + vt--; + } + } + else { + XMEMSET(d+1, 0, sizeof(d)-sizeof(sp_digit)); + d[0] = 1; + } + + while (ut > 1 && vt > 1) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_9(u, v) >= 0))) { + sp_521_sub_9(u, u, v); + o = sp_521_sub_9(b, b, d); + if (o != 0) + sp_521_add_9(b, b, m); + ut = sp_521_num_bits_9(u); + + do { + sp_521_rshift1_9(u, u); + sp_521_div2_mod_9(b, b, m); + ut--; + } + while (ut > 0 && (u[0] & 1) == 0); + } + else { + sp_521_sub_9(v, v, u); + o = sp_521_sub_9(d, d, b); + if (o != 0) + sp_521_add_9(d, d, m); + vt = sp_521_num_bits_9(v); + + do { + sp_521_rshift1_9(v, v); + sp_521_div2_mod_9(d, d, m); + vt--; + } + while (vt > 0 && (v[0] & 1) == 0); + } + } + + if (ut == 1) + XMEMCPY(r, b, sizeof(b)); + else + XMEMCPY(r, d, sizeof(d)); + + + return MP_OKAY; +} + +#endif /* WOLFSSL_SP_SMALL */ + +/* Add point p1 into point p2. Handles p1 == p2 and result at infinity. + * + * p1 First point to add and holds result. + * p2 Second point to add. + * tmp Temporary storage for intermediate numbers. + */ +static void sp_521_add_points_9(sp_point_521* p1, const sp_point_521* p2, + sp_digit* tmp) +{ + + sp_521_proj_point_add_9(p1, p1, p2, tmp); + if (sp_521_iszero_9(p1->z)) { + if (sp_521_iszero_9(p1->x) && sp_521_iszero_9(p1->y)) { + sp_521_proj_point_dbl_9(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + XMEMCPY(p1->z, p521_norm_mod, sizeof(p521_norm_mod)); + } + } +} + +/* Calculate the verification point: [e/s]G + [r/s]Q + * + * p1 Calculated point. + * p2 Public point and temporary. + * s Second part of signature as a number. + * u1 Temporary number. + * u2 Temporary number. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_vfy_point_9(sp_point_521* p1, sp_point_521* p2, + sp_digit* s, sp_digit* u1, sp_digit* u2, sp_digit* tmp, void* heap) +{ + int err; + +#ifndef WOLFSSL_SP_SMALL + err = sp_521_mod_inv_9(s, s, p521_order); + if (err == MP_OKAY) +#endif /* !WOLFSSL_SP_SMALL */ + { + sp_521_mul_9(s, s, p521_norm_order); + err = sp_521_mod_9(s, s, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_9(s); +#ifdef WOLFSSL_SP_SMALL + { + sp_521_mont_inv_order_9(s, s, tmp); + sp_521_mont_mul_order_9(u1, u1, s); + sp_521_mont_mul_order_9(u2, u2, s); + } +#else + { + sp_521_mont_mul_order_9(u1, u1, s); + sp_521_mont_mul_order_9(u2, u2, s); + } +#endif /* WOLFSSL_SP_SMALL */ + { + err = sp_521_ecc_mulmod_base_9(p1, u1, 0, 0, heap); + } + } + if ((err == MP_OKAY) && sp_521_iszero_9(p1->z)) { + p1->infinity = 1; + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_9(p2, p2, u2, 0, 0, heap); + } + if ((err == MP_OKAY) && sp_521_iszero_9(p2->z)) { + p2->infinity = 1; + } + + if (err == MP_OKAY) { + sp_521_add_points_9(p1, p2, tmp); + } + + return err; +} + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 521) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_verify_521(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_521* p1 = NULL; +#else + sp_digit u1[18 * 9]; + sp_point_521 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_521* p2 = NULL; + sp_digit carry; + sp_int64 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 9, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 9; + s = u1 + 4 * 9; + tmp = u1 + 6 * 9; + p2 = p1 + 1; + + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(u1, 9, hash, (int)hashLen); + sp_521_from_mp(u2, 9, rm); + sp_521_from_mp(s, 9, sm); + sp_521_from_mp(p2->x, 9, pX); + sp_521_from_mp(p2->y, 9, pY); + sp_521_from_mp(p2->z, 9, pZ); + + if (hashLen == 66U) { + sp_521_rshift_9(u1, u1, 7); + } + + err = sp_521_calc_vfy_point_9(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(u2, 9, rm); + err = sp_521_mod_mul_norm_9(u2, u2, p521_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_9(p1->z, p1->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(u1, u2, p1->z, p521_mod, p521_mp_mod); + *res = (int)(sp_521_cmp_9(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_521_from_mp(u2, 9, rm); + carry = sp_521_add_9(u2, u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_9(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_9(u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_9(u2, u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_521_mont_mul_9(u1, u2, p1->z, p521_mod, p521_mp_mod); + } + *res = (sp_521_cmp_9(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_verify_521_ctx { + int state; + union { + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + sp_521_mont_inv_order_9_ctx mont_inv_order_ctx; + sp_521_proj_point_dbl_9_ctx dbl_ctx; + sp_521_proj_point_add_9_ctx add_ctx; + }; + sp_digit u1[2*9]; + sp_digit u2[2*9]; + sp_digit s[2*9]; + sp_digit tmp[2*9 * 6]; + sp_point_521 p1; + sp_point_521 p2; +} sp_ecc_verify_521_ctx; + +int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, + word32 hashLen, const mp_int* pX, const mp_int* pY, const mp_int* pZ, + const mp_int* rm, const mp_int* sm, int* res, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_verify_521_ctx* ctx = (sp_ecc_verify_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_verify_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(ctx->u1, 9, hash, (int)hashLen); + sp_521_from_mp(ctx->u2, 9, rm); + sp_521_from_mp(ctx->s, 9, sm); + sp_521_from_mp(ctx->p2.x, 9, pX); + sp_521_from_mp(ctx->p2.y, 9, pY); + sp_521_from_mp(ctx->p2.z, 9, pZ); + if (hashLen == 66U) { + sp_521_rshift_9(ctx->u1, ctx->u1, 7); + } + ctx->state = 1; + break; + case 1: /* NORMS0 */ + sp_521_mul_9(ctx->s, ctx->s, p521_norm_order); + err = sp_521_mod_9(ctx->s, ctx->s, p521_order); + if (err == MP_OKAY) + ctx->state = 2; + break; + case 2: /* NORMS1 */ + sp_521_norm_9(ctx->s); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 3; + break; + case 3: /* NORMS2 */ + err = sp_521_mont_inv_order_9_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp); + if (err == MP_OKAY) { + ctx->state = 4; + } + break; + case 4: /* NORMS3 */ + sp_521_mont_mul_order_9(ctx->u1, ctx->u1, ctx->s); + ctx->state = 5; + break; + case 5: /* NORMS4 */ + sp_521_mont_mul_order_9(ctx->u2, ctx->u2, ctx->s); + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 6; + break; + case 6: /* MULBASE */ + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p521_base, ctx->u1, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_9(ctx->p1.z)) { + ctx->p1.infinity = 1; + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 7; + } + break; + case 7: /* MULMOD */ + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_9(ctx->p2.z)) { + ctx->p2.infinity = 1; + } + XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx)); + ctx->state = 8; + } + break; + case 8: /* ADD */ + err = sp_521_proj_point_add_9_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* MONT */ + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(ctx->u2, 9, rm); + err = sp_521_mod_mul_norm_9(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) + ctx->state = 10; + break; + case 10: /* SQR */ + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_9(ctx->p1.z, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: /* MUL */ + sp_521_mont_mul_9(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 12; + break; + case 12: /* RES */ + { + sp_int64 c = 0; + err = MP_OKAY; /* math okay, now check result */ + *res = (int)(sp_521_cmp_9(ctx->p1.x, ctx->u1) == 0); + if (*res == 0) { + sp_digit carry; + + /* Reload r and add order. */ + sp_521_from_mp(ctx->u2, 9, rm); + carry = sp_521_add_9(ctx->u2, ctx->u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_9(ctx->u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_9(ctx->u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_9(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_521_mont_mul_9(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, + p521_mp_mod); + *res = (int)(sp_521_cmp_9(ctx->p1.x, ctx->u1) == 0); + } + } + break; + } + } /* switch */ + + if (err == MP_OKAY && ctx->state != 12) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y ordinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_521_ecc_is_point_9(const sp_point_521* point, + void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t1 = NULL; +#else + sp_digit t1[9 * 4]; +#endif + sp_digit* t2 = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 4, heap, DYNAMIC_TYPE_ECC); + if (t1 == NULL) + err = MEMORY_E; +#endif + (void)heap; + + if (err == MP_OKAY) { + t2 = t1 + 2 * 9; + + /* y^2 - x^3 - a.x = b */ + sp_521_sqr_9(t1, point->y); + (void)sp_521_mod_9(t1, t1, p521_mod); + sp_521_sqr_9(t2, point->x); + (void)sp_521_mod_9(t2, t2, p521_mod); + sp_521_mul_9(t2, t2, point->x); + (void)sp_521_mod_9(t2, t2, p521_mod); + sp_521_mont_sub_9(t1, t1, t2, p521_mod); + + /* y^2 - x^3 + 3.x = b, when a = -3 */ + sp_521_mont_add_9(t1, t1, point->x, p521_mod); + sp_521_mont_add_9(t1, t1, point->x, p521_mod); + sp_521_mont_add_9(t1, t1, point->x, p521_mod); + + + if (sp_521_cmp_9(t1, p521_b) != 0) { + err = MP_VAL; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t1 != NULL) + XFREE(t1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the x and y ordinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_521(const mp_int* pX, const mp_int* pY) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* pub = NULL; +#else + sp_point_521 pub[1]; +#endif + const byte one[1] = { 1 }; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(pub->x, 9, pX); + sp_521_from_mp(pub->y, 9, pY); + sp_521_from_bin(pub->z, 9, one, (int)sizeof(one)); + + err = sp_521_ecc_is_point_9(pub, NULL); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_521(const mp_int* pX, const mp_int* pY, + const mp_int* privm, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* priv = NULL; + sp_point_521* pub = NULL; +#else + sp_digit priv[9]; + sp_point_521 pub[2]; +#endif + sp_point_521* p = NULL; + const byte one[1] = { 1 }; + int err = MP_OKAY; + + + /* Quick check the lengs of public key ordinates and private key are in + * range. Proper check later. + */ + if (((mp_count_bits(pX) > 521) || + (mp_count_bits(pY) > 521) || + ((privm != NULL) && (mp_count_bits(privm) > 521)))) { + err = ECC_OUT_OF_RANGE_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY && privm) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = pub + 1; + + sp_521_from_mp(pub->x, 9, pX); + sp_521_from_mp(pub->y, 9, pY); + sp_521_from_bin(pub->z, 9, one, (int)sizeof(one)); + if (privm) + sp_521_from_mp(priv, 9, privm); + + /* Check point at infinitiy. */ + if ((sp_521_iszero_9(pub->x) != 0) && + (sp_521_iszero_9(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + /* Check range of X and Y */ + if ((err == MP_OKAY) && + ((sp_521_cmp_9(pub->x, p521_mod) >= 0) || + (sp_521_cmp_9(pub->y, p521_mod) >= 0))) { + err = ECC_OUT_OF_RANGE_E; + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_521_ecc_is_point_9(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_521_ecc_mulmod_9(p, pub, p521_order, 1, 1, heap); + } + /* Check result is infinity */ + if ((err == MP_OKAY) && ((sp_521_iszero_9(p->x) == 0) || + (sp_521_iszero_9(p->y) == 0))) { + err = ECC_INF_E; + } + + if (privm) { + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_521_ecc_mulmod_base_9(p, priv, 1, 1, heap); + } + /* Check result is public key */ + if ((err == MP_OKAY) && + ((sp_521_cmp_9(p->x, pub->x) != 0) || + (sp_521_cmp_9(p->y, pub->y) != 0))) { + err = ECC_PRIV_KEY_E; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, heap, DYNAMIC_TYPE_ECC); + if (priv != NULL) + XFREE(priv, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 9 * 6]; + sp_point_521 p[2]; +#endif + sp_point_521* q = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { + q = p + 1; + + sp_521_from_mp(p->x, 9, pX); + sp_521_from_mp(p->y, 9, pY); + sp_521_from_mp(p->z, 9, pZ); + sp_521_from_mp(q->x, 9, qX); + sp_521_from_mp(q->y, 9, qY); + sp_521_from_mp(q->z, 9, qZ); + p->infinity = sp_521_iszero_9(p->x) & + sp_521_iszero_9(p->y); + q->infinity = sp_521_iszero_9(q->x) & + sp_521_iszero_9(q->y); + + sp_521_proj_point_add_9(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 9 * 2]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 9, pX); + sp_521_from_mp(p->y, 9, pY); + sp_521_from_mp(p->z, 9, pZ); + p->infinity = sp_521_iszero_9(p->x) & + sp_521_iszero_9(p->y); + + sp_521_proj_point_dbl_9(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_521(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 9 * 5]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 9, pX); + sp_521_from_mp(p->y, 9, pY); + sp_521_from_mp(p->z, 9, pZ); + p->infinity = sp_521_iszero_9(p->x) & + sp_521_iszero_9(p->y); + + sp_521_map_9(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Square root power for the P521 curve. */ +static const uint64_t p521_sqrt_power[9] = { + 0x0000000000000000,0x0000000000000000,0x0000000000000000, + 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000, + 0x0000000000000080 +}; + +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mont_sqrt_9(sp_digit* y) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t = NULL; +#else + sp_digit t[2 * 9]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + + { + int i; + + XMEMCPY(t, y, sizeof(sp_digit) * 9); + for (i=518; i>=0; i--) { + sp_521_mont_sqr_9(t, t, p521_mod, p521_mp_mod); + if (p521_sqrt_power[i / 64] & ((sp_digit)1 << (i % 64))) + sp_521_mont_mul_9(t, t, y, p521_mod, p521_mp_mod); + } + XMEMCPY(y, t, sizeof(sp_digit) * 9); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_521(mp_int* xm, int odd, mp_int* ym) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* x = NULL; +#else + sp_digit x[4 * 9]; +#endif + sp_digit* y = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 9, NULL, DYNAMIC_TYPE_ECC); + if (x == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + y = x + 2 * 9; + + sp_521_from_mp(x, 9, xm); + err = sp_521_mod_mul_norm_9(x, x, p521_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_521_mont_sqr_9(y, x, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, y, x, p521_mod, p521_mp_mod); + } + /* y = x^3 - 3x */ + sp_521_mont_sub_9(y, y, x, p521_mod); + sp_521_mont_sub_9(y, y, x, p521_mod); + sp_521_mont_sub_9(y, y, x, p521_mod); + /* y = x^3 - 3x + b */ + err = sp_521_mod_mul_norm_9(x, p521_b, p521_mod); + } + if (err == MP_OKAY) { + sp_521_mont_add_9(y, y, x, p521_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_521_mont_sqrt_9(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 9, 0, 9U * sizeof(sp_digit)); + sp_521_mont_reduce_9(y, p521_mod, p521_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_521_mont_sub_9(y, p521_mod, y, p521_mod); + } + + err = sp_521_to_mp(y, ym); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (x != NULL) + XFREE(x, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_521 */ +#ifdef WOLFCRYPT_HAVE_SAKKE #ifdef WOLFSSL_SP_1024 /* Point structure to use. */ @@ -65475,7 +114109,7 @@ static void sp_1024_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "stp x5, x3, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "cc" ); } @@ -65494,165 +114128,165 @@ static void sp_1024_sqr_8(sp_digit* r, const sp_digit* a) "ldp x25, x26, [%[a], 32]\n\t" "ldp x27, x28, [%[a], 48]\n\t" "# A[0] * A[1]\n\t" - "mul x6, x21, x22\n\t" - "umulh x7, x21, x22\n\t" + "mul x6, x21, x22\n\t" + "umulh x7, x21, x22\n\t" "# A[0] * A[2]\n\t" - "mul x4, x21, x23\n\t" - "umulh x5, x21, x23\n\t" - "adds x7, x7, x4\n\t" + "mul x4, x21, x23\n\t" + "umulh x5, x21, x23\n\t" + "adds x7, x7, x4\n\t" "# A[0] * A[3]\n\t" - "mul x4, x21, x24\n\t" - "adc x8, xzr, x5\n\t" - "umulh x5, x21, x24\n\t" - "adds x8, x8, x4\n\t" + "mul x4, x21, x24\n\t" + "adc x8, xzr, x5\n\t" + "umulh x5, x21, x24\n\t" + "adds x8, x8, x4\n\t" "# A[1] * A[2]\n\t" - "mul x4, x22, x23\n\t" - "adc x9, xzr, x5\n\t" - "umulh x5, x22, x23\n\t" - "adds x8, x8, x4\n\t" + "mul x4, x22, x23\n\t" + "adc x9, xzr, x5\n\t" + "umulh x5, x22, x23\n\t" + "adds x8, x8, x4\n\t" "# A[0] * A[4]\n\t" - "mul x4, x21, x25\n\t" - "adcs x9, x9, x5\n\t" - "umulh x5, x21, x25\n\t" - "adc x10, xzr, xzr\n\t" - "adds x9, x9, x4\n\t" + "mul x4, x21, x25\n\t" + "adcs x9, x9, x5\n\t" + "umulh x5, x21, x25\n\t" + "adc x10, xzr, xzr\n\t" + "adds x9, x9, x4\n\t" "# A[1] * A[3]\n\t" - "mul x4, x22, x24\n\t" - "adc x10, x10, x5\n\t" - "umulh x5, x22, x24\n\t" - "adds x9, x9, x4\n\t" + "mul x4, x22, x24\n\t" + "adc x10, x10, x5\n\t" + "umulh x5, x22, x24\n\t" + "adds x9, x9, x4\n\t" "# A[0] * A[5]\n\t" - "mul x4, x21, x26\n\t" - "adcs x10, x10, x5\n\t" - "umulh x5, x21, x26\n\t" - "adc x11, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" + "mul x4, x21, x26\n\t" + "adcs x10, x10, x5\n\t" + "umulh x5, x21, x26\n\t" + "adc x11, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" "# A[1] * A[4]\n\t" - "mul x4, x22, x25\n\t" - "adc x11, x11, x5\n\t" - "umulh x5, x22, x25\n\t" - "adds x10, x10, x4\n\t" + "mul x4, x22, x25\n\t" + "adc x11, x11, x5\n\t" + "umulh x5, x22, x25\n\t" + "adds x10, x10, x4\n\t" "# A[2] * A[3]\n\t" - "mul x4, x23, x24\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x23, x24\n\t" - "adc x12, xzr, xzr\n\t" - "adds x10, x10, x4\n\t" + "mul x4, x23, x24\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x23, x24\n\t" + "adc x12, xzr, xzr\n\t" + "adds x10, x10, x4\n\t" "# A[0] * A[6]\n\t" - "mul x4, x21, x27\n\t" - "adcs x11, x11, x5\n\t" - "umulh x5, x21, x27\n\t" - "adc x12, x12, xzr\n\t" - "adds x11, x11, x4\n\t" + "mul x4, x21, x27\n\t" + "adcs x11, x11, x5\n\t" + "umulh x5, x21, x27\n\t" + "adc x12, x12, xzr\n\t" + "adds x11, x11, x4\n\t" "# A[1] * A[5]\n\t" - "mul x4, x22, x26\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x22, x26\n\t" - "adc x13, xzr, xzr\n\t" - "adds x11, x11, x4\n\t" + "mul x4, x22, x26\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x22, x26\n\t" + "adc x13, xzr, xzr\n\t" + "adds x11, x11, x4\n\t" "# A[2] * A[4]\n\t" - "mul x4, x23, x25\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x23, x25\n\t" - "adc x13, x13, xzr\n\t" - "adds x11, x11, x4\n\t" + "mul x4, x23, x25\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x23, x25\n\t" + "adc x13, x13, xzr\n\t" + "adds x11, x11, x4\n\t" "# A[0] * A[7]\n\t" - "mul x4, x21, x28\n\t" - "adcs x12, x12, x5\n\t" - "umulh x5, x21, x28\n\t" - "adc x13, x13, xzr\n\t" - "adds x12, x12, x4\n\t" + "mul x4, x21, x28\n\t" + "adcs x12, x12, x5\n\t" + "umulh x5, x21, x28\n\t" + "adc x13, x13, xzr\n\t" + "adds x12, x12, x4\n\t" "# A[1] * A[6]\n\t" - "mul x4, x22, x27\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x22, x27\n\t" - "adc x14, xzr, xzr\n\t" - "adds x12, x12, x4\n\t" + "mul x4, x22, x27\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x22, x27\n\t" + "adc x14, xzr, xzr\n\t" + "adds x12, x12, x4\n\t" "# A[2] * A[5]\n\t" - "mul x4, x23, x26\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x23, x26\n\t" - "adc x14, x14, xzr\n\t" - "adds x12, x12, x4\n\t" + "mul x4, x23, x26\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x23, x26\n\t" + "adc x14, x14, xzr\n\t" + "adds x12, x12, x4\n\t" "# A[3] * A[4]\n\t" - "mul x4, x24, x25\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x24, x25\n\t" - "adc x14, x14, xzr\n\t" - "adds x12, x12, x4\n\t" + "mul x4, x24, x25\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x24, x25\n\t" + "adc x14, x14, xzr\n\t" + "adds x12, x12, x4\n\t" "# A[1] * A[7]\n\t" - "mul x4, x22, x28\n\t" - "adcs x13, x13, x5\n\t" - "umulh x5, x22, x28\n\t" - "adc x14, x14, xzr\n\t" - "adds x13, x13, x4\n\t" + "mul x4, x22, x28\n\t" + "adcs x13, x13, x5\n\t" + "umulh x5, x22, x28\n\t" + "adc x14, x14, xzr\n\t" + "adds x13, x13, x4\n\t" "# A[2] * A[6]\n\t" - "mul x4, x23, x27\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x23, x27\n\t" - "adc x15, xzr, xzr\n\t" - "adds x13, x13, x4\n\t" + "mul x4, x23, x27\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x23, x27\n\t" + "adc x15, xzr, xzr\n\t" + "adds x13, x13, x4\n\t" "# A[3] * A[5]\n\t" - "mul x4, x24, x26\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x24, x26\n\t" - "adc x15, x15, xzr\n\t" - "adds x13, x13, x4\n\t" + "mul x4, x24, x26\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x24, x26\n\t" + "adc x15, x15, xzr\n\t" + "adds x13, x13, x4\n\t" "# A[2] * A[7]\n\t" - "mul x4, x23, x28\n\t" - "adcs x14, x14, x5\n\t" - "umulh x5, x23, x28\n\t" - "adc x15, x15, xzr\n\t" - "adds x14, x14, x4\n\t" + "mul x4, x23, x28\n\t" + "adcs x14, x14, x5\n\t" + "umulh x5, x23, x28\n\t" + "adc x15, x15, xzr\n\t" + "adds x14, x14, x4\n\t" "# A[3] * A[6]\n\t" - "mul x4, x24, x27\n\t" - "adcs x15, x15, x5\n\t" - "umulh x5, x24, x27\n\t" - "adc x16, xzr, xzr\n\t" - "adds x14, x14, x4\n\t" + "mul x4, x24, x27\n\t" + "adcs x15, x15, x5\n\t" + "umulh x5, x24, x27\n\t" + "adc x16, xzr, xzr\n\t" + "adds x14, x14, x4\n\t" "# A[4] * A[5]\n\t" - "mul x4, x25, x26\n\t" - "adcs x15, x15, x5\n\t" - "umulh x5, x25, x26\n\t" - "adc x16, x16, xzr\n\t" - "adds x14, x14, x4\n\t" + "mul x4, x25, x26\n\t" + "adcs x15, x15, x5\n\t" + "umulh x5, x25, x26\n\t" + "adc x16, x16, xzr\n\t" + "adds x14, x14, x4\n\t" "# A[3] * A[7]\n\t" - "mul x4, x24, x28\n\t" - "adcs x15, x15, x5\n\t" - "umulh x5, x24, x28\n\t" - "adc x16, x16, xzr\n\t" - "adds x15, x15, x4\n\t" + "mul x4, x24, x28\n\t" + "adcs x15, x15, x5\n\t" + "umulh x5, x24, x28\n\t" + "adc x16, x16, xzr\n\t" + "adds x15, x15, x4\n\t" "# A[4] * A[6]\n\t" - "mul x4, x25, x27\n\t" - "adcs x16, x16, x5\n\t" - "umulh x5, x25, x27\n\t" - "adc x17, xzr, xzr\n\t" - "adds x15, x15, x4\n\t" + "mul x4, x25, x27\n\t" + "adcs x16, x16, x5\n\t" + "umulh x5, x25, x27\n\t" + "adc x17, xzr, xzr\n\t" + "adds x15, x15, x4\n\t" "# A[4] * A[7]\n\t" - "mul x4, x25, x28\n\t" - "adcs x16, x16, x5\n\t" - "umulh x5, x25, x28\n\t" - "adc x17, x17, xzr\n\t" - "adds x16, x16, x4\n\t" + "mul x4, x25, x28\n\t" + "adcs x16, x16, x5\n\t" + "umulh x5, x25, x28\n\t" + "adc x17, x17, xzr\n\t" + "adds x16, x16, x4\n\t" "# A[5] * A[6]\n\t" - "mul x4, x26, x27\n\t" - "adcs x17, x17, x5\n\t" - "umulh x5, x26, x27\n\t" - "adc x19, xzr, xzr\n\t" - "adds x16, x16, x4\n\t" + "mul x4, x26, x27\n\t" + "adcs x17, x17, x5\n\t" + "umulh x5, x26, x27\n\t" + "adc x19, xzr, xzr\n\t" + "adds x16, x16, x4\n\t" "# A[5] * A[7]\n\t" - "mul x4, x26, x28\n\t" - "adcs x17, x17, x5\n\t" - "umulh x5, x26, x28\n\t" - "adc x19, x19, xzr\n\t" - "adds x17, x17, x4\n\t" + "mul x4, x26, x28\n\t" + "adcs x17, x17, x5\n\t" + "umulh x5, x26, x28\n\t" + "adc x19, x19, xzr\n\t" + "adds x17, x17, x4\n\t" "# A[6] * A[7]\n\t" - "mul x4, x27, x28\n\t" - "adcs x19, x19, x5\n\t" - "umulh x5, x27, x28\n\t" - "adc x20, xzr, xzr\n\t" - "adds x19, x19, x4\n\t" - "adc x20, x20, x5\n\t" + "mul x4, x27, x28\n\t" + "adcs x19, x19, x5\n\t" + "umulh x5, x27, x28\n\t" + "adc x20, xzr, xzr\n\t" + "adds x19, x19, x4\n\t" + "adc x20, x20, x5\n\t" "# Double\n\t" "adds x6, x6, x6\n\t" "adcs x7, x7, x7\n\t" @@ -65668,44 +114302,44 @@ static void sp_1024_sqr_8(sp_digit* r, const sp_digit* a) "adcs x17, x17, x17\n\t" "adcs x19, x19, x19\n\t" "# A[0] * A[0]\n\t" - "mul x5, x21, x21\n\t" + "mul x5, x21, x21\n\t" "adcs x20, x20, x20\n\t" - "umulh x2, x21, x21\n\t" + "umulh x2, x21, x21\n\t" "cset x21, cs\n\t" "# A[1] * A[1]\n\t" - "mul x3, x22, x22\n\t" + "mul x3, x22, x22\n\t" "adds x6, x6, x2\n\t" - "umulh x4, x22, x22\n\t" + "umulh x4, x22, x22\n\t" "adcs x7, x7, x3\n\t" "# A[2] * A[2]\n\t" - "mul x2, x23, x23\n\t" + "mul x2, x23, x23\n\t" "adcs x8, x8, x4\n\t" - "umulh x3, x23, x23\n\t" + "umulh x3, x23, x23\n\t" "adcs x9, x9, x2\n\t" "# A[3] * A[3]\n\t" - "mul x4, x24, x24\n\t" + "mul x4, x24, x24\n\t" "adcs x10, x10, x3\n\t" - "umulh x2, x24, x24\n\t" + "umulh x2, x24, x24\n\t" "adcs x11, x11, x4\n\t" "# A[4] * A[4]\n\t" - "mul x3, x25, x25\n\t" + "mul x3, x25, x25\n\t" "adcs x12, x12, x2\n\t" - "umulh x4, x25, x25\n\t" + "umulh x4, x25, x25\n\t" "adcs x13, x13, x3\n\t" "# A[5] * A[5]\n\t" - "mul x2, x26, x26\n\t" + "mul x2, x26, x26\n\t" "adcs x14, x14, x4\n\t" - "umulh x3, x26, x26\n\t" + "umulh x3, x26, x26\n\t" "adcs x15, x15, x2\n\t" "# A[6] * A[6]\n\t" - "mul x4, x27, x27\n\t" + "mul x4, x27, x27\n\t" "adcs x16, x16, x3\n\t" - "umulh x2, x27, x27\n\t" + "umulh x2, x27, x27\n\t" "adcs x17, x17, x4\n\t" "# A[7] * A[7]\n\t" - "mul x3, x28, x28\n\t" + "mul x3, x28, x28\n\t" "adcs x19, x19, x2\n\t" - "umulh x4, x28, x28\n\t" + "umulh x4, x28, x28\n\t" "adcs x20, x20, x3\n\t" "stp x5, x6, [%[r], 0]\n\t" "adc x21, x21, x4\n\t" @@ -65718,7 +114352,7 @@ static void sp_1024_sqr_8(sp_digit* r, const sp_digit* a) "stp x20, x21, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" ); } @@ -65752,15 +114386,47 @@ static sp_digit sp_1024_add_8(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 32]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 48]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } +/* Add digit to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_1024_add_word_8(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "adds x3, x3, %[b]\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "adcs x3, x3, xzr\n\t" + "adcs x4, x4, xzr\n\t" + "adcs x5, x5, xzr\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "adcs x6, x6, xzr\n\t" + "stp x5, x6, [%[r], 48]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "cc" + ); +} + /* Sub b from a into a. (a -= b) * * a A single precision integer and result. @@ -65812,7 +114478,7 @@ static sp_digit sp_1024_sub_in_place_16(sp_digit* a, const sp_digit* b) "csetm %[a], cc\n\t" : [a] "+r" (a) : [b] "r" (b) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); return (sp_digit)a; @@ -65868,72 +114534,66 @@ static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r], 96]\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 112]\n\t" - "cset %[r], cs\n\t" + "adc %[r], xzr, xzr\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -/* AND m into each word of a and store in r. +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. */ -static void sp_1024_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<8; i++) { - r[i] = a[i] & m; - } -#else - r[0] = a[0] & m; - r[1] = a[1] & m; - r[2] = a[2] & m; - r[3] = a[3] & m; - r[4] = a[4] & m; - r[5] = a[5] & m; - r[6] = a[6] & m; - r[7] = a[7] & m; -#endif -} - -/* Add digit to a into r. (r = a + b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static void sp_1024_add_zero_8(sp_digit* r, const sp_digit* a, - const sp_digit d) +static sp_digit sp_1024_cond_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "adds x3, x3, %[d]\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "adcs x3, x3, xzr\n\t" - "adcs x4, x4, xzr\n\t" - "adcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, xzr\n\t" - "stp x5, x6, [%[r], 48]\n\t" - : - : [r] "r" (r), [a] "r" (a), [d] "r" (d) - : "memory", "x3", "x4", "x5", "x6" + + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" + "and x11, x11, %[m]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "cset %[r], cs\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); + + return (sp_digit)r; } +#endif /* !WOLFSSL_SP_SMALL */ /* Multiply a and b into r. (r = a * b) * @@ -65948,95 +114608,67 @@ SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, sp_digit z1[16]; sp_digit a1[8]; sp_digit b1[8]; - sp_digit z2[16]; - sp_digit u, ca, cb; + sp_digit* z2 = r + 16; + sp_digit u; + sp_digit ca; + sp_digit cb; ca = sp_1024_add_8(a1, a, &a[8]); cb = sp_1024_add_8(b1, b, &b[8]); u = ca & cb; - sp_1024_mul_8(z1, a1, b1); + sp_1024_mul_8(z2, &a[8], &b[8]); sp_1024_mul_8(z0, a, b); - sp_1024_mask_8(r + 16, a1, 0 - cb); - sp_1024_mask_8(b1, b1, 0 - ca); - u += sp_1024_add_8(r + 16, r + 16, b1); - u += sp_1024_sub_in_place_16(z1, z2); + sp_1024_mul_8(z1, a1, b1); + u += sp_1024_sub_in_place_16(z1, z0); + u += sp_1024_sub_in_place_16(z1, z2); + u += sp_1024_cond_add_8(z1 + 8, z1 + 8, a1, 0 - cb); + u += sp_1024_cond_add_8(z1 + 8, z1 + 8, b1, 0 - ca); + u += sp_1024_add_16(r + 8, r + 8, z1); - u += sp_1024_add_8(r + 16, r + 16, z2); - sp_1024_add_zero_8(r + 24, z2 + 8, u); + (void)sp_1024_add_word_8(r + 24, r + 24, u); } -#ifdef WOLFSSL_SP_SMALL -/* Double a into r. (r = a + a) +/* Sub b from a into r. (r = a - b) * * r A single precision integer. * a A single precision integer. + * b A single precision integer. */ -static sp_digit sp_1024_dbl_8(sp_digit* r, const sp_digit* a) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add x11, %[a], 64\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "adcs x3, x3, x3\n\t" - "adcs x4, x4, x4\n\t" - "adcs x5, x5, x5\n\t" - "stp x3, x4, [%[r]], #16\n\t" - "adcs x6, x6, x6\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a) - : - : "memory", "x3", "x4", "x5", "x6", "x11" - ); - - return c; -} - -#else -/* Double a into r. (r = a + a) - * - * r A single precision integer. - * a A single precision integer. - */ -static sp_digit sp_1024_dbl_8(sp_digit* r, const sp_digit* a) +static sp_digit sp_1024_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) { __asm__ __volatile__ ( "ldp x3, x4, [%[a], 0]\n\t" - "adds x3, x3, x3\n\t" - "ldr x5, [%[a], 16]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 24]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "subs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 0]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 16]\n\t" "ldp x3, x4, [%[a], 32]\n\t" - "adcs x3, x3, x3\n\t" - "ldr x5, [%[a], 48]\n\t" - "adcs x4, x4, x4\n\t" - "ldr x6, [%[a], 56]\n\t" - "adcs x5, x5, x5\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "sbcs x3, x3, x7\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "sbcs x4, x4, x8\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "sbcs x5, x5, x9\n\t" "stp x3, x4, [%[r], 32]\n\t" - "adcs x6, x6, x6\n\t" + "sbcs x6, x6, x10\n\t" "stp x5, x6, [%[r], 48]\n\t" - "cset %[r], cs\n\t" + "csetm %[r], cc\n\t" : [r] "+r" (r) - : [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6" + : [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return (sp_digit)r; } -#endif /* WOLFSSL_SP_SMALL */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -66045,22 +114677,31 @@ static sp_digit sp_1024_dbl_8(sp_digit* r, const sp_digit* a) SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z2[16]; + sp_digit* z2 = r + 16; sp_digit z1[16]; - sp_digit a1[8]; + sp_digit* a1 = z1; + sp_digit* zero = z1 + 8; sp_digit u; + sp_digit mask; + sp_digit* p1; + sp_digit* p2; + + XMEMSET(zero, 0, sizeof(sp_digit) * 8); + + mask = sp_1024_sub_8(a1, a, &a[8]); + p1 = (sp_digit*)(((sp_digit)zero & mask ) | ((sp_digit)a1 & (~mask))); + p2 = (sp_digit*)(((sp_digit)zero & (~mask)) | ((sp_digit)a1 & mask )); + (void)sp_1024_sub_8(a1, p1, p2); - u = sp_1024_add_8(a1, a, &a[8]); - sp_1024_sqr_8(z1, a1); sp_1024_sqr_8(z2, &a[8]); sp_1024_sqr_8(z0, a); - sp_1024_mask_8(r + 16, a1, 0 - u); - u += sp_1024_dbl_8(r + 16, r + 16); - u += sp_1024_sub_in_place_16(z1, z2); - u += sp_1024_sub_in_place_16(z1, z0); - u += sp_1024_add_16(r + 8, r + 8, z1); - u += sp_1024_add_8(r + 16, r + 16, z2); - sp_1024_add_zero_8(r + 24, z2 + 8, u); + sp_1024_sqr_8(z1, a1); + + u = 0; + u -= sp_1024_sub_in_place_16(z1, z2); + u -= sp_1024_sub_in_place_16(z1, z0); + u += sp_1024_sub_in_place_16(r + 8, z1); + sp_1024_add_word_8(r + 24, r + 24, u); } #else @@ -66075,10 +114716,10 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) sp_digit tmp[32]; __asm__ __volatile__ ( - "mov x5, 0\n\t" - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" + "mov x5, xzr\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" "\n1:\n\t" "subs x3, x5, 120\n\t" "csel x3, xzr, x3, cc\n\t" @@ -66108,7 +114749,7 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -66124,10 +114765,10 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) sp_digit tmp[32]; __asm__ __volatile__ ( - "mov x6, 0\n\t" - "mov x7, 0\n\t" - "mov x8, 0\n\t" - "mov x5, 0\n\t" + "mov x6, xzr\n\t" + "mov x7, xzr\n\t" + "mov x8, xzr\n\t" + "mov x5, xzr\n\t" "\n1:\n\t" "subs x3, x5, 120\n\t" "csel x3, xzr, x3, cc\n\t" @@ -66173,7 +114814,7 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "str x6, [%[r], x5]\n\t" : : [r] "r" (tmp), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -66285,7 +114926,7 @@ static sp_digit sp_1024_sub_in_place_16(sp_digit* a, const sp_digit* b) "b.ne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); return c; @@ -66321,7 +114962,7 @@ static sp_digit sp_1024_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; @@ -66387,7 +115028,7 @@ static sp_digit sp_1024_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_dig "csetm %[r], cc\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12", "cc" ); return (sp_digit)r; @@ -66420,12 +115061,12 @@ static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, "stp x3, x4, [%[r]], #16\n\t" "adcs x6, x6, x10\n\t" "stp x5, x6, [%[r]], #16\n\t" - "cset %[c], cs\n\t" + "adc %[c], xzr, xzr\n\t" "cmp %[a], x11\n\t" "b.ne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return c; @@ -66447,9 +115088,9 @@ static void sp_1024_mul_d_16(sp_digit* r, const sp_digit* a, "ldr x8, [%[a]]\n\t" "mul x5, %[b], x8\n\t" "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" + "mov x4, xzr\n\t" "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" + "mov x5, xzr\n\t" "mov x9, #8\n\t" "1:\n\t" "ldr x8, [%[a], x9]\n\t" @@ -66468,180 +115109,183 @@ static void sp_1024_mul_d_16(sp_digit* r, const sp_digit* a, "str x3, [%[r], 128]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #else __asm__ __volatile__ ( "# A[0] * B\n\t" - "ldp x8, x9, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" + "ldp x9, x10, [%[a]]\n\t" + "mul x3, %[b], x9\n\t" + "umulh x4, %[b], x9\n\t" + "mov x5, xzr\n\t" "# A[1] * B\n\t" "str x3, [%[r]]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adds x4, x4, x6\n\t" "# A[2] * B\n\t" - "ldp x8, x9, [%[a], 16]\n\t" + "ldp x9, x10, [%[a], 16]\n\t" "str x4, [%[r], 8]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[3] * B\n\t" "str x5, [%[r], 16]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[4] * B\n\t" - "ldp x8, x9, [%[a], 32]\n\t" + "ldp x9, x10, [%[a], 32]\n\t" "str x3, [%[r], 24]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[5] * B\n\t" "str x4, [%[r], 32]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[6] * B\n\t" - "ldp x8, x9, [%[a], 48]\n\t" + "ldp x9, x10, [%[a], 48]\n\t" "str x5, [%[r], 40]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[7] * B\n\t" "str x3, [%[r], 48]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[8] * B\n\t" - "ldp x8, x9, [%[a], 64]\n\t" + "ldp x9, x10, [%[a], 64]\n\t" "str x4, [%[r], 56]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[9] * B\n\t" "str x5, [%[r], 64]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[10] * B\n\t" - "ldp x8, x9, [%[a], 80]\n\t" + "ldp x9, x10, [%[a], 80]\n\t" "str x3, [%[r], 72]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[11] * B\n\t" "str x4, [%[r], 80]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[12] * B\n\t" - "ldp x8, x9, [%[a], 96]\n\t" + "ldp x9, x10, [%[a], 96]\n\t" "str x5, [%[r], 88]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x5, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "# A[13] * B\n\t" "str x3, [%[r], 96]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x9\n\t" "adcs x4, x4, x7\n\t" - "umulh x7, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" + "mov x3, xzr\n\t" + "umulh x7, %[b], x10\n\t" "adc x5, xzr, xzr\n\t" "adds x4, x4, x6\n\t" "# A[14] * B\n\t" - "ldp x8, x9, [%[a], 112]\n\t" + "ldp x9, x10, [%[a], 112]\n\t" "str x4, [%[r], 104]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" "adcs x5, x5, x7\n\t" - "umulh x7, %[b], x8\n\t" + "mul x6, %[b], x9\n\t" + "mov x4, xzr\n\t" + "umulh x7, %[b], x9\n\t" "adc x3, xzr, xzr\n\t" "adds x5, x5, x6\n\t" "# A[15] * B\n\t" "str x5, [%[r], 112]\n\t" - "mul x6, %[b], x9\n\t" + "mul x6, %[b], x10\n\t" "adcs x3, x3, x7\n\t" - "umulh x7, %[b], x9\n\t" + "umulh x7, %[b], x10\n\t" "adc x4, xzr, xzr\n\t" "adds x3, x3, x6\n\t" "adc x4, x4, x7\n\t" "stp x3, x4, [%[r], 120]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #endif } -/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * Assumes divisor has highest bit set. * * d1 The high order half of the number to divide. * d0 The low order half of the number to divide. - * div The dividend. + * div The divisor. * returns the result of the division. */ static sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r; - __asm__ __volatile__ ( - "lsr x5, %[div], 32\n\t" - "add x5, x5, 1\n\t" + "lsr x8, %[div], 32\n\t" + "add x5, x8, 1\n\t" "udiv x3, %[d1], x5\n\t" + "lsl x7, %[div], 32\n\t" + "movz x9, #1, lsl 32\n\t" "lsl x6, x3, 32\n\t" "mul x4, %[div], x6\n\t" "umulh x3, %[div], x6\n\t" "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "udiv x3, %[d1], x5\n\t" - "lsl x3, x3, 32\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "umulh x3, %[div], x3\n\t" - "subs %[d0], %[d0], x4\n\t" - "sbc %[d1], %[d1], x3\n\t" + "cmp %[d1], x5\n\t" + "cset x9, ge\n\t" + "csetm x10, ge\n\t" + "lsl x9, x9, #32\n\t" + "and x7, x7, x10\n\t" + "and x8, x8, x10\n\t" + "subs %[d0], %[d0], x7\n\t" + "add x6, x6, x9\n\t" + "sbc %[d1], %[d1], x8\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" "udiv x3, x3, x5\n\t" "add x6, x6, x3\n\t" @@ -66650,23 +115294,22 @@ static sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, sp_digit div) "subs %[d0], %[d0], x4\n\t" "sbc %[d1], %[d1], x3\n\t" - "lsr x3, %[d0], 32\n\t" - "orr x3, x3, %[d1], lsl 32\n\t" + "extr x3, %[d1], %[d0], 32\n\t" - "udiv x3, x3, x5\n\t" - "add x6, x6, x3\n\t" - "mul x4, %[div], x3\n\t" - "sub %[d0], %[d0], x4\n\t" + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "sub %[d0], %[d0], x4\n\t" "udiv x3, %[d0], %[div]\n\t" - "add %[r], x6, x3\n\t" + "add %[d1], x6, x3\n\t" - : [r] "=r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "x3", "x4", "x5", "x6" + : [d1] "+r" (d1), [d0] "+r" (d0) + : [div] "r" (div) + : "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); - return r; + return d1; } /* AND m into each word of a and store in r. @@ -66710,147 +115353,139 @@ static sp_int64 sp_1024_cmp_16(const sp_digit* a, const sp_digit* b) { #ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "mov x5, 120\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "mov x10, #16\n\t" + "add %[a], %[a], #112\n\t" + "add %[b], %[b], #112\n\t" "1:\n\t" - "ldr x6, [%[a], x5]\n\t" - "ldr x7, [%[b], x5]\n\t" - "and x6, x6, x4\n\t" - "and x7, x7, x4\n\t" - "subs x6, x6, x7\n\t" - "csel x2, x3, x2, hi\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "subs x5, x5, #8\n\t" - "b.cs 1b\n\t" - "eor %[a], x2, x4\n\t" - : [a] "+r" (a) - : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + "ldp x6, x7, [%[a]], -16\n\t" + "ldp x8, x9, [%[b]], -16\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x6, x6, x8\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "subs x10, x10, #2\n\t" + "b.ne 1b\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" ); #else __asm__ __volatile__ ( - "mov x2, -1\n\t" - "mov x3, 1\n\t" - "mov x4, -1\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "ldp x7, x8, [%[b], 112]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "mov x3, #0\n\t" + "mov x2, #-1\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 96]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "ldp x7, x8, [%[b], 80]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 64]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "ldp x7, x8, [%[b], 48]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 32]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "ldp x7, x8, [%[b], 16]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "ldp x5, x6, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "and x6, x6, x4\n\t" - "and x8, x8, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "ldp x6, x7, [%[a], 0]\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "subs x7, x7, x9\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" "subs x6, x6, x8\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "and x5, x5, x4\n\t" - "and x7, x7, x4\n\t" - "subs x5, x5, x7\n\t" - "csel x2, x4, x2, lo\n\t" - "csel x4, x4, xzr, eq\n\t" - "csel x2, x3, x2, hi\n\t" - "eor %[a], x2, x4\n\t" + "csel x4, x2, xzr, lo\n\t" + "csetm x5, eq\n\t" + "orr x3, x3, x4\n\t" + "and x2, x2, x5\n\t" + "cmp x2, #0\n\t" + "cset %[a], eq\n\t" + "orr %[a], %[a], x3\n\t" : [a] "+r" (a) : [b] "r" (b) - : "x2", "x3", "x4", "x5", "x6", "x7", "x8" + : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" ); #endif @@ -66866,8 +115501,8 @@ static sp_int64 sp_1024_cmp_16(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[32], t2[17]; sp_digit div, r1; @@ -66877,9 +115512,13 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig div = d[15]; XMEMCPY(t1, a, sizeof(*t1) * 2 * 16); - for (i=15; i>=0; i--) { - sp_digit hi = t1[16 + i] - (t1[16 + i] == div); + r1 = sp_1024_cmp_16(&t1[16], d) >= 0; + sp_1024_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1); + for (i = 15; i >= 0; i--) { + volatile sp_digit mask = (sp_digit)0 - (t1[16 + i] == div); + sp_digit hi = t1[16 + i] + mask; r1 = div_1024_word_16(hi, t1[16 + i - 1], div); + r1 |= mask; sp_1024_mul_d_16(t2, d, r1); t1[16 + i] += sp_1024_sub_in_place_16(&t1[i], t2); @@ -66936,7 +115575,8 @@ static int sp_1024_point_new_ex_16(void* heap, sp_point_1024* sp, { int ret = MP_OKAY; (void)heap; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) (void)sp; *p = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); #else @@ -66948,7 +115588,8 @@ static int sp_1024_point_new_ex_16(void* heap, sp_point_1024* sp, return ret; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* Allocate memory for point and return error. */ #define sp_1024_point_new_16(heap, sp, p) sp_1024_point_new_ex_16((heap), NULL, &(p)) #else @@ -66965,7 +115606,8 @@ static int sp_1024_point_new_ex_16(void* heap, sp_point_1024* sp, */ static void sp_1024_point_free_16(sp_point_1024* p, int clear, void* heap) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* If valid pointer then clear point data if requested and free data. */ if (p != NULL) { if (clear != 0) { @@ -66992,20 +115634,23 @@ static void sp_1024_point_free_16(sp_point_1024* p, int clear, void* heap) static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 64 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 63); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 63); } #elif DIGIT_BIT > 64 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xffffffffffffffffl; s = 64U - s; @@ -67035,12 +115680,12 @@ static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 64) { r[j] &= 0xffffffffffffffffl; @@ -67259,7 +115904,7 @@ static void sp_1024_cond_copy_16(sp_digit* r, const sp_digit* a, sp_digit m) "stp x3, x4, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x3", "x4", "x5", "x6" + : "memory", "x3", "x4", "x5", "x6", "cc" ); } @@ -67272,230 +115917,205 @@ static void sp_1024_cond_copy_16(sp_digit* r, const sp_digit* a, sp_digit m) SP_NOINLINE static void sp_1024_mont_reduce_16(sp_digit* a, const sp_digit* m, sp_digit mp) { - __asm__ __volatile__ ( - "ldp x14, x15, [%[m], 0]\n\t" - "ldp x16, x17, [%[m], 16]\n\t" - "ldp x19, x20, [%[m], 32]\n\t" - "ldp x21, x22, [%[m], 48]\n\t" - "ldp x23, x24, [%[m], 64]\n\t" - "ldp x25, x26, [%[m], 80]\n\t" - "ldp x27, x28, [%[m], 96]\n\t" - "mov x3, xzr\n\t" - "# i = 16\n\t" - "mov x4, 16\n\t" "ldp x12, x13, [%[a], 0]\n\t" + "ldp x14, x15, [%[a], 16]\n\t" + "ldp x16, x17, [%[a], 32]\n\t" + "ldp x19, x20, [%[a], 48]\n\t" + "ldp x21, x22, [%[a], 64]\n\t" + "ldp x23, x24, [%[a], 80]\n\t" + "ldp x25, x26, [%[a], 96]\n\t" + "ldp x27, x28, [%[a], 112]\n\t" + "mov x3, xzr\n\t" + "# i = 0..15\n\t" + "mov x4, 16\n\t" "\n1:\n\t" "# mu = a[i] * mp\n\t" "mul x9, %[mp], x12\n\t" "# a[i+0] += m[0] * mu\n\t" - "mul x7, x14, x9\n\t" - "umulh x8, x14, x9\n\t" + "ldp x10, x11, [%[m], 0]\n\t" + "mul x7, x10, x9\n\t" + "umulh x8, x10, x9\n\t" "adds x12, x12, x7\n\t" "# a[i+1] += m[1] * mu\n\t" - "mul x7, x15, x9\n\t" "adc x6, x8, xzr\n\t" - "umulh x8, x15, x9\n\t" + "mul x7, x11, x9\n\t" + "umulh x8, x11, x9\n\t" "adds x12, x13, x7\n\t" "# a[i+2] += m[2] * mu\n\t" - "ldr x13, [%[a], 16]\n\t" + "ldp x11, x10, [%[m], 16]\n\t" "adc x5, x8, xzr\n\t" - "mul x7, x16, x9\n\t" "adds x12, x12, x6\n\t" - "umulh x8, x16, x9\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "adds x13, x13, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x13, x14, x7\n\t" "# a[i+3] += m[3] * mu\n\t" - "ldr x10, [%[a], 24]\n\t" "adc x6, x8, xzr\n\t" - "mul x7, x17, x9\n\t" "adds x13, x13, x5\n\t" - "umulh x8, x17, x9\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x14, x15, x7\n\t" "# a[i+4] += m[4] * mu\n\t" - "ldr x11, [%[a], 32]\n\t" + "ldp x11, x10, [%[m], 32]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x19, x9\n\t" + "adds x14, x14, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x19, x9\n\t" - "str x10, [%[a], 24]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x15, x16, x7\n\t" "# a[i+5] += m[5] * mu\n\t" - "ldr x10, [%[a], 40]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x20, x9\n\t" + "adds x15, x15, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x20, x9\n\t" - "str x11, [%[a], 32]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x16, x17, x7\n\t" "# a[i+6] += m[6] * mu\n\t" - "ldr x11, [%[a], 48]\n\t" + "ldp x11, x10, [%[m], 48]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x21, x9\n\t" + "adds x16, x16, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x21, x9\n\t" - "str x10, [%[a], 40]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x17, x19, x7\n\t" "# a[i+7] += m[7] * mu\n\t" - "ldr x10, [%[a], 56]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x22, x9\n\t" + "adds x17, x17, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x22, x9\n\t" - "str x11, [%[a], 48]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x19, x20, x7\n\t" "# a[i+8] += m[8] * mu\n\t" - "ldr x11, [%[a], 64]\n\t" + "ldp x11, x10, [%[m], 64]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x23, x9\n\t" + "adds x19, x19, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x23, x9\n\t" - "str x10, [%[a], 56]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x20, x21, x7\n\t" "# a[i+9] += m[9] * mu\n\t" - "ldr x10, [%[a], 72]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x24, x9\n\t" + "adds x20, x20, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x24, x9\n\t" - "str x11, [%[a], 64]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x21, x22, x7\n\t" "# a[i+10] += m[10] * mu\n\t" - "ldr x11, [%[a], 80]\n\t" + "ldp x11, x10, [%[m], 80]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x25, x9\n\t" + "adds x21, x21, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x25, x9\n\t" - "str x10, [%[a], 72]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x22, x23, x7\n\t" "# a[i+11] += m[11] * mu\n\t" - "ldr x10, [%[a], 88]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x26, x9\n\t" + "adds x22, x22, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x26, x9\n\t" - "str x11, [%[a], 80]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x23, x24, x7\n\t" "# a[i+12] += m[12] * mu\n\t" - "ldr x11, [%[a], 96]\n\t" + "ldp x11, x10, [%[m], 96]\n\t" "adc x5, x8, xzr\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x27, x9\n\t" + "adds x23, x23, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x27, x9\n\t" - "str x10, [%[a], 88]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x24, x25, x7\n\t" "# a[i+13] += m[13] * mu\n\t" - "ldr x10, [%[a], 104]\n\t" "adc x6, x8, xzr\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x28, x9\n\t" + "adds x24, x24, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x28, x9\n\t" - "str x11, [%[a], 96]\n\t" - "adds x10, x10, x7\n\t" + "umulh x8, x10, x9\n\t" + "adds x25, x26, x7\n\t" "# a[i+14] += m[14] * mu\n\t" - "ldr x11, [%[a], 112]\n\t" + "ldp x11, x10, [%[m], 112]\n\t" "adc x5, x8, xzr\n\t" - "ldr x8, [%[m], 112]\n\t" - "adds x10, x10, x6\n\t" - "mul x7, x8, x9\n\t" + "adds x25, x25, x6\n\t" + "mul x7, x11, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x8, x9\n\t" - "str x10, [%[a], 104]\n\t" - "adds x11, x11, x7\n\t" + "umulh x8, x11, x9\n\t" + "adds x26, x27, x7\n\t" "# a[i+15] += m[15] * mu\n\t" - "ldr x10, [%[a], 120]\n\t" + "ldr x10, [%[m], 120]\n\t" "adc x6, x8, xzr\n\t" - "ldr x8, [%[m], 120]\n\t" - "adds x11, x11, x5\n\t" - "mul x7, x8, x9\n\t" + "adds x26, x26, x5\n\t" + "mul x7, x10, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x8, x9\n\t" + "umulh x8, x10, x9\n\t" "adds x6, x6, x7\n\t" "adcs x8, x8, x3\n\t" - "str x11, [%[a], 112]\n\t" - "cset x3, cs\n\t" - "adds x10, x10, x6\n\t" - "ldr x11, [%[a], 128]\n\t" - "str x10, [%[a], 120]\n\t" - "adcs x11, x11, x8\n\t" - "str x11, [%[a], 128]\n\t" + "adc x3, xzr, xzr\n\t" + "adds x27, x28, x6\n\t" + "ldr x28, [%[a], 128]\n\t" + "adcs x28, x28, x8\n\t" "adc x3, x3, xzr\n\t" "subs x4, x4, 1\n\t" "add %[a], %[a], 8\n\t" "bne 1b\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" "# Create mask\n\t" - "ldr x9, [%[m], 120]\n\t" - "subs x11, x9, x11\n\t" - "neg x3, x3\n\t" - "sbc x11, x11, x11\n\t" - "orr x3, x3, x11\n\t" - "mov x9, %[a]\n\t" - "sub %[a], %[a], 128\n\t" + "subs x11, x10, x28\n\t" + "neg x3, x3\n\t" + "sbc x11, x11, x11\n\t" + "orr x3, x3, x11\n\t" + "mov x9, %[a]\n\t" + "sub %[a], %[a], 128\n\t" "# Subtract masked modulus\n\t" - "# x12 and x13 hold a[0] and a[1]\n\t" - "and x14, x14, x3\n\t" - "ldp x11, x10, [x9, 16]\n\t" - "and x15, x15, x3\n\t" - "subs x12, x12, x14\n\t" - "and x16, x16, x3\n\t" - "sbcs x13, x13, x15\n\t" - "and x17, x17, x3\n\t" - "sbcs x11, x11, x16\n\t" - "stp x12, x13, [%[a], 0]\n\t" - "sbcs x10, x10, x17\n\t" - "stp x11, x10, [%[a], 16]\n\t" - "ldp x12, x13, [x9, 32]\n\t" - "and x19, x19, x3\n\t" - "ldp x11, x10, [x9, 48]\n\t" - "and x20, x20, x3\n\t" - "sbcs x12, x12, x19\n\t" - "and x21, x21, x3\n\t" - "sbcs x13, x13, x20\n\t" - "and x22, x22, x3\n\t" - "sbcs x11, x11, x21\n\t" - "stp x12, x13, [%[a], 32]\n\t" - "sbcs x10, x10, x22\n\t" - "stp x11, x10, [%[a], 48]\n\t" - "ldp x12, x13, [x9, 64]\n\t" - "and x23, x23, x3\n\t" - "ldp x11, x10, [x9, 80]\n\t" - "and x24, x24, x3\n\t" - "sbcs x12, x12, x23\n\t" - "and x25, x25, x3\n\t" - "sbcs x13, x13, x24\n\t" - "and x26, x26, x3\n\t" - "sbcs x11, x11, x25\n\t" - "stp x12, x13, [%[a], 64]\n\t" - "sbcs x10, x10, x26\n\t" - "stp x11, x10, [%[a], 80]\n\t" - "ldp x7, x8, [%[m], 112]\n\t" - "ldp x12, x13, [x9, 96]\n\t" - "and x27, x27, x3\n\t" - "ldp x11, x10, [x9, 112]\n\t" - "and x28, x28, x3\n\t" - "sbcs x12, x12, x27\n\t" - "and x7, x7, x3\n\t" - "sbcs x13, x13, x28\n\t" - "and x8, x8, x3\n\t" - "sbcs x11, x11, x7\n\t" - "stp x12, x13, [%[a], 96]\n\t" - "sbcs x10, x10, x8\n\t" - "stp x11, x10, [%[a], 112]\n\t" - : [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + "ldp x4, x5, [%[m], 0]\n\t" + "ldp x6, x7, [%[m], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x12, x12, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x13, x13, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x14, x14, x6\n\t" + "stp x12, x13, [%[a], 0]\n\t" + "sbcs x15, x15, x7\n\t" + "stp x14, x15, [%[a], 16]\n\t" + "ldp x4, x5, [%[m], 32]\n\t" + "ldp x6, x7, [%[m], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x16, x16, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x17, x17, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x19, x19, x6\n\t" + "stp x16, x17, [%[a], 32]\n\t" + "sbcs x20, x20, x7\n\t" + "stp x19, x20, [%[a], 48]\n\t" + "ldp x4, x5, [%[m], 64]\n\t" + "ldp x6, x7, [%[m], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x21, x21, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x22, x22, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x23, x23, x6\n\t" + "stp x21, x22, [%[a], 64]\n\t" + "sbcs x24, x24, x7\n\t" + "stp x23, x24, [%[a], 80]\n\t" + "ldp x4, x5, [%[m], 96]\n\t" + "ldp x6, x7, [%[m], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "sbcs x25, x25, x4\n\t" + "and x6, x6, x3\n\t" + "sbcs x26, x26, x5\n\t" + "and x7, x7, x3\n\t" + "sbcs x27, x27, x6\n\t" + "stp x25, x26, [%[a], 96]\n\t" + "sbcs x28, x28, x7\n\t" + "stp x27, x28, [%[a], 112]\n\t" + : [a] "+r" (a), [mp] "+r" (mp) + : [m] "r" (m) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" ); } @@ -67507,9 +116127,9 @@ SP_NOINLINE static void sp_1024_mont_reduce_16(sp_digit* a, const sp_digit* m, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_mul_16(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_1024_mul_16(r, a, b); @@ -67521,9 +116141,9 @@ static void sp_1024_mont_mul_16(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_sqr_16(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_1024_sqr_16(r, a); @@ -67563,11 +116183,14 @@ static const uint8_t p1024_mod_minus_2[] = { static void sp_1024_mont_inv_16(sp_digit* r, const sp_digit* a, sp_digit* td) { - sp_digit* t = td; + sp_digit* t = &td[32 * 2 * 16]; int i; int j; - sp_digit table[32][2 * 16]; + sp_digit* table[32]; + for (i = 0; i < 32; i++) { + table[i] = &td[2 * 16 * i]; + } XMEMCPY(table[0], a, sizeof(sp_digit) * 16); for (i = 1; i < 6; i++) { sp_1024_mont_sqr_16(table[0], table[0], p1024_mod, p1024_mp_mod); @@ -67614,27 +116237,24 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_16(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 16, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); - sp_1024_cond_sub_16(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->x, r->x, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->x); /* y /= z^3 */ sp_1024_mont_mul_16(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 16, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); - sp_1024_cond_sub_16(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->y, r->y, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -67644,8 +116264,8 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -67735,7 +116355,7 @@ static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, const sp_digit* "stp x19, x20, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "cc" ); } @@ -67745,7 +116365,8 @@ static void sp_1024_mont_add_16(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -67827,7 +116448,7 @@ static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, const sp_digit* "stp x19, x20, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "cc" ); } @@ -67837,7 +116458,8 @@ static void sp_1024_mont_dbl_16(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -67988,7 +116610,7 @@ static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, const sp_digit* "stp x19, x20, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "cc" ); } @@ -67999,8 +116621,8 @@ static void sp_1024_mont_tpl_16(sp_digit* r, const sp_digit* a, const sp_digit* * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( "ldp x4, x5, [%[a], 0]\n\t" @@ -68086,10 +116708,11 @@ static void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* "stp x19, x20, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "cc" ); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -68101,7 +116724,6 @@ static void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* static sp_digit sp_1024_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { -#ifdef WOLFSSL_SP_SMALL sp_digit c = 0; __asm__ __volatile__ ( @@ -68119,148 +116741,146 @@ static sp_digit sp_1024_cond_add_16(sp_digit* r, const sp_digit* a, const sp_dig "b.lt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x8", "x9", "x10", "x11", "x12", "cc" ); return c; -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_1024_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ __asm__ __volatile__ ( - "ldp x5, x7, [%[b], 0]\n\t" - "ldp x11, x12, [%[b], 16]\n\t" - "ldp x4, x6, [%[a], 0]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 16]\n\t" - "and x7, x7, %[m]\n\t" - "adds x4, x4, x5\n\t" + "ldp x8, x9, [%[b], 0]\n\t" + "ldp x10, x11, [%[b], 16]\n\t" + "ldp x4, x5, [%[a], 0]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 16]\n\t" + "and x9, x9, %[m]\n\t" + "adds x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 0]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 16]\n\t" - "ldp x5, x7, [%[b], 32]\n\t" - "ldp x11, x12, [%[b], 48]\n\t" - "ldp x4, x6, [%[a], 32]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 48]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 0]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 16]\n\t" + "ldp x8, x9, [%[b], 32]\n\t" + "ldp x10, x11, [%[b], 48]\n\t" + "ldp x4, x5, [%[a], 32]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 48]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 32]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 48]\n\t" - "ldp x5, x7, [%[b], 64]\n\t" - "ldp x11, x12, [%[b], 80]\n\t" - "ldp x4, x6, [%[a], 64]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 80]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 32]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 48]\n\t" + "ldp x8, x9, [%[b], 64]\n\t" + "ldp x10, x11, [%[b], 80]\n\t" + "ldp x4, x5, [%[a], 64]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 80]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 64]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 80]\n\t" - "ldp x5, x7, [%[b], 96]\n\t" - "ldp x11, x12, [%[b], 112]\n\t" - "ldp x4, x6, [%[a], 96]\n\t" - "and x5, x5, %[m]\n\t" - "ldp x9, x10, [%[a], 112]\n\t" - "and x7, x7, %[m]\n\t" - "adcs x4, x4, x5\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 64]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 80]\n\t" + "ldp x8, x9, [%[b], 96]\n\t" + "ldp x10, x11, [%[b], 112]\n\t" + "ldp x4, x5, [%[a], 96]\n\t" + "and x8, x8, %[m]\n\t" + "ldp x6, x7, [%[a], 112]\n\t" + "and x9, x9, %[m]\n\t" + "adcs x4, x4, x8\n\t" + "and x10, x10, %[m]\n\t" + "adcs x5, x5, x9\n\t" "and x11, x11, %[m]\n\t" - "adcs x6, x6, x7\n\t" - "and x12, x12, %[m]\n\t" - "adcs x9, x9, x11\n\t" - "stp x4, x6, [%[r], 96]\n\t" - "adcs x10, x10, x12\n\t" - "stp x9, x10, [%[r], 112]\n\t" + "adcs x6, x6, x10\n\t" + "stp x4, x5, [%[r], 96]\n\t" + "adcs x7, x7, x11\n\t" + "stp x6, x7, [%[r], 112]\n\t" "cset %[r], cs\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" ); return (sp_digit)r; -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ -static void sp_1024_rshift1_16(sp_digit* r, sp_digit* a) +static void sp_1024_rshift1_16(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "ldr x2, [%[a]]\n\t" + "ldp x2, x3, [%[a]]\n\t" "ldr x3, [%[a], 8]\n\t" - "lsr x2, x2, 1\n\t" - "orr x2, x2, x3, lsl 63\n\t" - "lsr x3, x3, 1\n\t" + "extr x2, x3, x2, #1\n\t" "ldr x4, [%[a], 16]\n\t" "str x2, [%[r], 0]\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "lsr x4, x4, 1\n\t" + "extr x3, x4, x3, #1\n\t" "ldr x2, [%[a], 24]\n\t" "str x3, [%[r], 8]\n\t" - "orr x4, x4, x2, lsl 63\n\t" - "lsr x2, x2, 1\n\t" + "extr x4, x2, x4, #1\n\t" "ldr x3, [%[a], 32]\n\t" "str x4, [%[r], 16]\n\t" - "orr x2, x2, x3, lsl 63\n\t" - "lsr x3, x3, 1\n\t" + "extr x2, x3, x2, #1\n\t" "ldr x4, [%[a], 40]\n\t" "str x2, [%[r], 24]\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "lsr x4, x4, 1\n\t" + "extr x3, x4, x3, #1\n\t" "ldr x2, [%[a], 48]\n\t" "str x3, [%[r], 32]\n\t" - "orr x4, x4, x2, lsl 63\n\t" - "lsr x2, x2, 1\n\t" + "extr x4, x2, x4, #1\n\t" "ldr x3, [%[a], 56]\n\t" "str x4, [%[r], 40]\n\t" - "orr x2, x2, x3, lsl 63\n\t" - "lsr x3, x3, 1\n\t" + "extr x2, x3, x2, #1\n\t" "ldr x4, [%[a], 64]\n\t" "str x2, [%[r], 48]\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "lsr x4, x4, 1\n\t" + "extr x3, x4, x3, #1\n\t" "ldr x2, [%[a], 72]\n\t" "str x3, [%[r], 56]\n\t" - "orr x4, x4, x2, lsl 63\n\t" - "lsr x2, x2, 1\n\t" + "extr x4, x2, x4, #1\n\t" "ldr x3, [%[a], 80]\n\t" "str x4, [%[r], 64]\n\t" - "orr x2, x2, x3, lsl 63\n\t" - "lsr x3, x3, 1\n\t" + "extr x2, x3, x2, #1\n\t" "ldr x4, [%[a], 88]\n\t" "str x2, [%[r], 72]\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "lsr x4, x4, 1\n\t" + "extr x3, x4, x3, #1\n\t" "ldr x2, [%[a], 96]\n\t" "str x3, [%[r], 80]\n\t" - "orr x4, x4, x2, lsl 63\n\t" - "lsr x2, x2, 1\n\t" + "extr x4, x2, x4, #1\n\t" "ldr x3, [%[a], 104]\n\t" "str x4, [%[r], 88]\n\t" - "orr x2, x2, x3, lsl 63\n\t" - "lsr x3, x3, 1\n\t" + "extr x2, x3, x2, #1\n\t" "ldr x4, [%[a], 112]\n\t" "str x2, [%[r], 96]\n\t" - "orr x3, x3, x4, lsl 63\n\t" - "lsr x4, x4, 1\n\t" + "extr x3, x4, x3, #1\n\t" "ldr x2, [%[a], 120]\n\t" "str x3, [%[r], 104]\n\t" - "orr x4, x4, x2, lsl 63\n\t" - "lsr x2, x2, 1\n\t" - "str x4, [%[r], 112]\n\t" - "str x2, [%[r], 120]\n\t" + "extr x4, x2, x4, #1\n\t" + "lsr x2, x2, #1\n\t" + "stp x4, x2, [%[r], 112]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x2", "x3", "x4" + : "memory", "x2", "x3", "x4", "cc" ); } @@ -68270,7 +116890,8 @@ static void sp_1024_rshift1_16(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_div2_16(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -68285,6 +116906,61 @@ static void sp_1024_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_16(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_16(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_16(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_16(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_16(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_16(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_16(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_16(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_mont_div2_16(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_16(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_16(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_16(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_16(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_16(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_16(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_16_ctx { int state; @@ -68295,7 +116971,14 @@ typedef struct sp_1024_proj_point_dbl_16_ctx { sp_digit* z; } sp_1024_proj_point_dbl_16_ctx; -static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_16_ctx* ctx = (sp_1024_proj_point_dbl_16_ctx*)sp_ctx->data; @@ -68369,7 +117052,7 @@ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_16(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_16(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -68419,61 +117102,6 @@ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_16(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_16(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_16(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_16(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_16(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_16(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_16(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_16(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_16(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_16(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_16(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_16(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_16(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_16(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_16(y, y, t2, p1024_mod); -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -68481,7 +117109,7 @@ static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, * n Number of times to double * t Temporary ordinate data. */ -static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int n, +static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_digit* t) { sp_digit* w = t; @@ -68492,6 +117120,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -68502,7 +117131,6 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int n, /* W = Z^4 */ sp_1024_mont_sqr_16(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_16(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -68520,9 +117148,12 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int n, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_16(t2, b, x, p1024_mod); + sp_1024_mont_dbl_16(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_16(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_16(t1, t1, p1024_mod, p1024_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -68532,9 +117163,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int n, sp_1024_mont_mul_16(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_16(y, b, x, p1024_mod); - sp_1024_mont_mul_16(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(y, y, p1024_mod); + sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); } #ifndef WOLFSSL_SP_SMALL @@ -68549,118 +117178,21 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int n, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_16(t2, b, x, p1024_mod); + sp_1024_mont_dbl_16(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_16(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_16(t1, t1, p1024_mod, p1024_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_16(y, b, x, p1024_mod); - sp_1024_mont_mul_16(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(y, y, p1024_mod); + sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); -#endif - /* Y = Y/2 */ - sp_1024_div2_16(y, y, p1024_mod); -} - -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add x11, %[a], 128\n\t" - "\n1:\n\t" - "subs %[c], xzr, %[c]\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "ldp x7, x8, [%[b]], #16\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x9, x10, [%[b]], #16\n\t" - "sbcs x4, x4, x8\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r]], #16\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "csetm %[c], cc\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" - ); - - return c; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "subs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 16]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 48]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 80]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "ldp x3, x4, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 112]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 96]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 112]\n\t" - "csetm %[r], cc\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" - ); - - return (sp_digit)r; -} - #endif /* WOLFSSL_SP_SMALL */ + /* Y = Y/2 */ + sp_1024_mont_div2_16(y, y, p1024_mod); +} + /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -68678,6 +117210,19 @@ static int sp_1024_cmp_equal_16(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_16(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -68685,6 +117230,84 @@ static int sp_1024_cmp_equal_16(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*16; + sp_digit* t2 = t + 4*16; + sp_digit* t3 = t + 6*16; + sp_digit* t4 = t + 8*16; + sp_digit* t5 = t + 10*16; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(t2, t1) & + sp_1024_cmp_equal_16(t4, t3)) { + sp_1024_proj_point_dbl_16(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_16(t3, y, p1024_mod); + sp_1024_mont_sub_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_16(y, y, x, p1024_mod); + sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, y, t5, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_16_ctx { @@ -68697,11 +117320,19 @@ typedef struct sp_1024_proj_point_add_16_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_1024_proj_point_add_16_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -68720,261 +117351,168 @@ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*16; - ctx->t3 = t + 4*16; - ctx->t4 = t + 6*16; - ctx->t5 = t + 8*16; + ctx->t6 = t; + ctx->t1 = t + 2*16; + ctx->t2 = t + 4*16; + ctx->t3 = t + 6*16; + ctx->t4 = t + 8*16; + ctx->t5 = t + 10*16; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_16(ctx->t1, p1024_mod, q->y); - sp_1024_norm_16(ctx->t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_1024_proj_point_dbl_16_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_1024)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<16; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<16; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<16; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; + break; + case 2: + sp_1024_mont_mul_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; + break; + case 3: + sp_1024_mont_mul_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_16(ctx->t1, ctx->t1, ctx->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_16(ctx->t4, ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_16(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_16(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(ctx->t3, ctx->t3, ctx->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_16(ctx->z, ctx->z, ctx->t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - sp_1024_mont_sqr_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_dbl_16(ctx->t1, ctx->y, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t1, p1024_mod); + sp_1024_mont_mul_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->x, p1024_mod); + sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - sp_1024_mont_mul_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_16(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* t3 = t + 4*16; - sp_digit* t4 = t + 6*16; - sp_digit* t5 = t + 8*16; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_1024* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_16(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<16; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<16; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<16; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t1, t1, x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(t3, t3, y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(z, z, t2, p1024_mod, p1024_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, x, t5, p1024_mod); - sp_1024_mont_dbl_16(t1, y, p1024_mod); - sp_1024_mont_sub_16(x, x, t1, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_16(y, y, x, p1024_mod); - sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, y, t5, p1024_mod); - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -69020,30 +117558,30 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, sp_1024_mont_sub_16(t1, t1, w, p1024_mod); sp_1024_mont_tpl_16(a, t1, p1024_mod); /* B = X*Y^2 */ - sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(b, t2, x, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(t1, y, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(b, t1, x, p1024_mod, p1024_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(t1, b, p1024_mod); - sp_1024_mont_sub_16(x, x, t1, p1024_mod); + sp_1024_mont_dbl_16(t2, b, p1024_mod); + sp_1024_mont_sub_16(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_16(t2, b, x, p1024_mod); + sp_1024_mont_dbl_16(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_16(r[j].z, z, y, p1024_mod, p1024_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_1024_mont_sqr_16(t2, t2, p1024_mod, p1024_mp_mod); + /* t1 = Y^4 */ + sp_1024_mont_sqr_16(t1, t1, p1024_mod, p1024_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_1024_mont_mul_16(w, w, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_16(y, b, x, p1024_mod); - sp_1024_mont_mul_16(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(y, y, p1024_mod); - sp_1024_mont_sub_16(y, y, t2, p1024_mod); - + sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_16(r[j].y, y, p1024_mod); + sp_1024_mont_div2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -69066,30 +117604,30 @@ static void sp_1024_proj_point_add_sub_16(sp_point_1024* ra, sp_digit* t4 = t + 6*16; sp_digit* t5 = t + 8*16; sp_digit* t6 = t + 10*16; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t1, t1, x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t1, t1, xa, p1024_mod, p1024_mp_mod); /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(t2, za, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, za, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(t3, t3, y, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t3, t3, ya, p1024_mod, p1024_mp_mod); /* S2 = Y2*Z1^3 */ sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - U1 */ @@ -69100,30 +117638,30 @@ static void sp_1024_proj_point_add_sub_16(sp_point_1024* ra, sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(z, z, t2, p1024_mod, p1024_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_1024_mont_mul_16(za, za, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(za, za, t2, p1024_mod, p1024_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(xa, t4, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_16(xs, t6, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ya, t1, t5, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, x, t5, p1024_mod); + sp_1024_mont_sub_16(xa, xa, t5, p1024_mod); sp_1024_mont_sub_16(xs, xs, t5, p1024_mod); - sp_1024_mont_dbl_16(t1, y, p1024_mod); - sp_1024_mont_sub_16(x, x, t1, p1024_mod); + sp_1024_mont_dbl_16(t1, ya, p1024_mod); + sp_1024_mont_sub_16(xa, xa, t1, p1024_mod); sp_1024_mont_sub_16(xs, xs, t1, p1024_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_1024_mont_sub_16(ys, y, xs, p1024_mod); - sp_1024_mont_sub_16(y, y, x, p1024_mod); - sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(ys, ya, xs, p1024_mod); + sp_1024_mont_sub_16(ya, ya, xa, p1024_mod); + sp_1024_mont_mul_16(ya, ya, t4, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(t6, p1024_mod, t6, p1024_mod); sp_1024_mont_mul_16(ys, ys, t6, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, y, t5, p1024_mod); + sp_1024_mont_sub_16(ya, ya, t5, p1024_mod); sp_1024_mont_sub_16(ys, ys, t5, p1024_mod); } @@ -69227,12 +117765,12 @@ static void sp_1024_ecc_recode_7_16(const sp_digit* k, ecc_recode_1024* v) static int sp_1024_ecc_mulmod_win_add_sub_16(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; sp_digit* tmp = NULL; #else sp_point_1024 t[65+2]; - sp_digit tmp[2 * 16 * 6]; + sp_digit tmp[2 * 16 * 37]; #endif sp_point_1024* rt = NULL; sp_point_1024* p = NULL; @@ -69245,13 +117783,13 @@ static int sp_1024_ecc_mulmod_win_add_sub_16(sp_point_1024* r, const sp_point_10 (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * (65+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 16 * 6, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 16 * 37, heap, DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; @@ -69345,7 +117883,7 @@ static int sp_1024_ecc_mulmod_win_add_sub_16(sp_point_1024* r, const sp_point_10 } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -69372,76 +117910,75 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* t3 = t + 4*16; - sp_digit* t4 = t + 6*16; - sp_digit* t5 = t + 8*16; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*16; + sp_digit* t6 = t + 4*16; + sp_digit* t1 = t + 6*16; + sp_digit* t4 = t + 8*16; + sp_digit* t5 = t + 10*16; - /* Check double */ - (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(p->x, t2) & + sp_1024_cmp_equal_16(p->y, t4)) { sp_1024_proj_point_dbl_16(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<16; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<16; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<16; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ - sp_1024_mont_sub_16(t2, t2, x, p1024_mod); + sp_1024_mont_sub_16(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ - sp_1024_mont_sub_16(t4, t4, y, p1024_mod); + sp_1024_mont_sub_16(t4, t4, p->y, p1024_mod); /* Z3 = H*Z1 */ - sp_1024_mont_mul_16(z, z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_1024_mont_sqr_16(t1, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t3, x, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, t1, t5, p1024_mod); - sp_1024_mont_dbl_16(t1, t3, p1024_mod); - sp_1024_mont_sub_16(x, x, t1, p1024_mod); + sp_1024_mont_sqr_16(t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t3, p->x, t1, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t1, t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(t2, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); + sp_1024_mont_dbl_16(t5, t3, p1024_mod); + sp_1024_mont_sub_16(x, t2, t5, p1024_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_1024_mont_sub_16(t3, t3, x, p1024_mod); sp_1024_mont_mul_16(t3, t3, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, t3, t5, p1024_mod); + sp_1024_mont_mul_16(t1, t1, p->y, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, t3, t1, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -69482,7 +118019,7 @@ static void sp_1024_proj_to_affine_16(sp_point_1024* a, sp_digit* t) static int sp_1024_gen_stripe_table_16(const sp_point_1024* a, sp_table_entry_1024* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; #else sp_point_1024 t[3]; @@ -69495,7 +118032,7 @@ static int sp_1024_gen_stripe_table_16(const sp_point_1024* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -69550,7 +118087,7 @@ static int sp_1024_gen_stripe_table_16(const sp_point_1024* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -69579,12 +118116,12 @@ static int sp_1024_ecc_mulmod_stripe_16(sp_point_1024* r, const sp_point_1024* g const sp_table_entry_1024* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* rt = NULL; sp_digit* t = NULL; #else sp_point_1024 rt[2]; - sp_digit t[2 * 16 * 5]; + sp_digit t[2 * 16 * 37]; #endif sp_point_1024* p = NULL; int i; @@ -69599,13 +118136,13 @@ static int sp_1024_ecc_mulmod_stripe_16(sp_point_1024* r, const sp_point_1024* g (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 16 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 16 * 37, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -69650,7 +118187,7 @@ static int sp_1024_ecc_mulmod_stripe_16(sp_point_1024* r, const sp_point_1024* g } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -69693,7 +118230,7 @@ static THREAD_LS_T int sp_cache_1024_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache) @@ -69764,23 +118301,36 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_16(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 16 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 16 * 38]; +#endif sp_cache_1024_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_1024 == 0) { - wc_InitMutex(&sp_cache_1024_lock); - initCacheMutex_1024 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 16 * 38, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_1024 == 0) { + wc_InitMutex(&sp_cache_1024_lock); + initCacheMutex_1024 = 1; + } + if (wc_LockMutex(&sp_cache_1024_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_1024_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -69801,6 +118351,9 @@ static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, const } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -69818,7 +118371,7 @@ static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, const int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -69827,7 +118380,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -69850,7 +118403,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_1024_point_to_ecc_point_16(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -73220,7 +121773,7 @@ static int sp_1024_ecc_mulmod_base_16(sp_point_1024* r, const sp_digit* k, */ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -73229,7 +121782,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -73251,7 +121804,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_1024_point_to_ecc_point_16(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -73265,7 +121818,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -73275,25 +121828,25 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else sp_point_1024 point[2]; - sp_digit k[16 + 16 * 2 * 5]; + sp_digit k[16 + 16 * 2 * 37]; #endif sp_point_1024* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (16 + 16 * 2 * 5), + sizeof(sp_digit) * (16 + 16 * 2 * 37), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -73329,7 +121882,7 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, err = sp_1024_point_to_ecc_point_16(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -73352,12 +121905,12 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* t = NULL; #else sp_point_1024 point[1]; - sp_digit t[5 * 2 * 16]; + sp_digit t[38 * 2 * 16]; #endif int err = MP_OKAY; @@ -73373,7 +121926,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); @@ -73381,7 +121934,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = MEMORY_E; } if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 16, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 2 * 16, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -73397,7 +121950,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, *len = sizeof(sp_table_entry_1024) * 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -73455,7 +122008,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -73464,7 +122017,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) { @@ -73493,7 +122046,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, err = sp_1024_point_to_ecc_point_16(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -73503,7 +122056,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, return err; } -/* Multiply p* in projective co-ordinates by q*. +/* Multiply p* in projective coordinates by q*. * * r.x = p.x - (p.y * q.y) * r.y = (p.x * q.y) + p.y @@ -73529,7 +122082,7 @@ static void sp_1024_proj_mul_qx1_16(sp_digit* px, sp_digit* py, sp_1024_mont_add_16(py, t1, py, p1024_mod); } -/* Square p* in projective co-ordinates. +/* Square p* in projective coordinates. * * px' = (p.x + p.y) * (p.x - p.y) = p.x^2 - p.y^2 * py' = 2 * p.x * p.y @@ -73568,8 +122121,8 @@ static void sp_1024_proj_sqr_16(sp_digit* px, sp_digit* py, sp_digit* t) */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; @@ -73577,7 +122130,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) sp_digit* b; sp_digit* e; #else - sp_digit t[4 * 2 * 16]; + sp_digit t[36 * 2 * 16]; sp_digit tx[2 * 16]; sp_digit ty[2 * 16]; sp_digit b[2 * 16]; @@ -73588,9 +122141,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) int bits; int i; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 16 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 40 * 16 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -73598,13 +122151,13 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 16 * 2; - ty = td + 5 * 16 * 2; - b = td + 6 * 16 * 2; - e = td + 7 * 16 * 2; + tx = td + 36 * 16 * 2; + ty = td + 37 * 16 * 2; + b = td + 38 * 16 * 2; + e = td + 39 * 16 * 2; #endif r = ty; @@ -73642,8 +122195,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -75211,14 +123764,14 @@ static const sp_digit sp_1024_g_table[256][16] = { */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; sp_digit* ty; #else - sp_digit t[4 * 2 * 16]; + sp_digit t[36 * 2 * 16]; sp_digit tx[2 * 16]; sp_digit ty[2 * 16]; #endif @@ -75230,9 +123783,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) (void)base; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 16 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 16 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -75240,11 +123793,11 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 16 * 2; - ty = td + 5 * 16 * 2; + tx = td + 36 * 16 * 2; + ty = td + 37 * 16 * 2; #endif r = ty; @@ -75284,8 +123837,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -75294,7 +123847,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) } #endif /* WOLFSSL_SP_SMALL */ -/* Multiply p* by q* in projective co-ordinates. +/* Multiply p* by q* in projective coordinates. * * p.x' = (p.x * q.x) - (p.y * q.y) * p.y' = (p.x * q.y) + (p.y * q.x) @@ -75409,7 +123962,7 @@ static void sp_1024_accumulate_line_dbl_16(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_16(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_16(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_16(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -75429,7 +123982,7 @@ static void sp_1024_accumulate_line_dbl_16(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_16(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_16(t1, t1, p1024_mod); + sp_1024_mont_div2_16(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_16(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -75555,15 +124108,15 @@ static void sp_1024_accumulate_line_add_one_16(sp_digit* vx, sp_digit* vy, int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err = MP_OKAY; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; sp_digit* vy; sp_digit* qx_px; #else - sp_digit t[6 * 2 * 16]; + sp_digit t[36 * 2 * 16]; sp_digit vx[2 * 16]; sp_digit vy[2 * 16]; sp_digit qx_px[2 * 16]; @@ -75585,10 +124138,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_16(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 16 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 16 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -75597,12 +124150,12 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 16 * 2; - vy = td + 7 * 16 * 2; - qx_px = td + 8 * 16 * 2; + vx = td + 36 * 16 * 2; + vy = td + 37 * 16 * 2; + qx_px = td + 38 * 16 * 2; #endif r = vy; @@ -75654,8 +124207,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -75847,7 +124400,7 @@ static void sp_1024_accumulate_line_dbl_n_16(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_16(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_16(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_16(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -75885,7 +124438,7 @@ static void sp_1024_accumulate_line_dbl_n_16(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_16(p->y, p->y, p1024_mod); + sp_1024_mont_div2_16(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -75933,8 +124486,8 @@ static const signed char sp_1024_order_op[] = { int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -75944,7 +124497,7 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) sp_digit (*pre_nvy)[32]; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 16]; + sp_digit t[36 * 2 * 16]; sp_digit vx[2 * 16]; sp_digit vy[2 * 16]; sp_digit pre_vx[16][32]; @@ -75970,10 +124523,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_16(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 16 * 2 + 16 * sizeof(sp_point_1024), NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 16 * 2 + 16 * sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -75982,15 +124535,15 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 16 * 2; - vy = td + 7 * 16 * 2; - pre_vx = (sp_digit(*)[32])(td + 8 * 16 * 2); - pre_vy = (sp_digit(*)[32])(td + 24 * 16 * 2); - pre_nvy = (sp_digit(*)[32])(td + 40 * 16 * 2); - pre_p = (sp_point_1024*)(td + 56 * 16 * 2); + vx = td + 36 * 16 * 2; + vy = td + 37 * 16 * 2; + pre_vx = (sp_digit(*)[32])(td + 38 * 16 * 2); + pre_vy = (sp_digit(*)[32])(td + 54 * 16 * 2); + pre_nvy = (sp_digit(*)[32])(td + 70 * 16 * 2); + pre_p = (sp_point_1024*)(td + 86 * 16 * 2); #endif r = vy; @@ -76081,8 +124634,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -76165,10 +124718,9 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, static void sp_1024_accum_dbl_calc_lc_16(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 16; - sp_digit* t2 = t + 2 * 2 * 16; - sp_digit* l = t + 4 * 2 * 16; - + sp_digit* t1 = t + 33 * 2 * 16; + sp_digit* t2 = t + 34 * 2 * 16; + sp_digit* l = t + 35 * 2 * 16; /* l = 1 / 2 * p.y */ sp_1024_mont_dbl_16(l, py, p1024_mod); @@ -76210,10 +124762,9 @@ static void sp_1024_accum_add_calc_lc_16(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, const sp_digit* cx, const sp_digit* cy, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 16; - sp_digit* c = t + 2 * 2 * 16; - sp_digit* l = t + 4 * 2 * 16; - + sp_digit* t1 = t + 33 * 2 * 16; + sp_digit* c = t + 34 * 2 * 16; + sp_digit* l = t + 35 * 2 * 16; /* l = 1 / (c.x - p.x) */ sp_1024_mont_sub_16(l, cx, px, p1024_mod); @@ -76324,13 +124875,13 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 16]; + sp_digit t[36 * 2 * 16]; sp_point_1024 pre_p[16]; sp_point_1024 pd; sp_point_1024 cd; @@ -76364,11 +124915,11 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, err = sp_1024_point_new_16(NULL, negd, neg); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 16 * 2 + 16 * sizeof(sp_point_1024), NULL, - DYNAMIC_TYPE_TMP_BUFFER); + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 16 * 2 + 16 * + sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; } @@ -76376,10 +124927,10 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - pre_p = (sp_point_1024*)(td + 6 * 16 * 2); + pre_p = (sp_point_1024*)(td + 36 * 16 * 2); #endif sp_1024_point_from_ecc_point_16(p, pm); @@ -76410,7 +124961,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, XMEMCPY(c, &pre_p[j], sizeof(sp_point_1024)); for (j = 0; j < sp_1024_order_op_pre[1]; j++) { - sp_1024_accum_dbl_calc_lc_16(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_16(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_16(c, c, t); sp_1024_mont_map_16(c, t); @@ -76439,7 +124991,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, } for (j = 0; j < sp_1024_order_op_pre[i + 1]; j++) { - sp_1024_accum_dbl_calc_lc_16(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_16(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_16(c, c, t); sp_1024_mont_map_16(c, t); @@ -76449,8 +125002,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, *len = sizeof(sp_table_entry_1024) * 1167; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -76484,8 +125037,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res, const byte* table, word32 len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -76494,7 +125047,7 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, sp_digit (*pre_vy)[32]; sp_digit (*pre_nvy)[32]; #else - sp_digit t[6 * 2 * 16]; + sp_digit t[36 * 2 * 16]; sp_digit vx[2 * 16]; sp_digit vy[2 * 16]; sp_digit pre_vx[16][32]; @@ -76527,10 +125080,10 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_point_new_16(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 16 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 16 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -76539,14 +125092,14 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 16 * 2; - vy = td + 7 * 16 * 2; - pre_vx = (sp_digit(*)[32])(td + 8 * 16 * 2); - pre_vy = (sp_digit(*)[32])(td + 24 * 16 * 2); - pre_nvy = (sp_digit(*)[32])(td + 40 * 16 * 2); + vx = td + 36 * 16 * 2; + vy = td + 37 * 16 * 2; + pre_vx = (sp_digit(*)[32])(td + 38 * 16 * 2); + pre_vy = (sp_digit(*)[32])(td + 54 * 16 * 2); + pre_nvy = (sp_digit(*)[32])(td + 70 * 16 * 2); #endif r = vy; @@ -76644,8 +125197,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -76657,18 +125210,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_16(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * @@ -76679,44 +125220,111 @@ static int sp_1024_iszero_16(const sp_digit* a) */ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) { - int i; - int j; - byte* d; + sp_int64 nl = n; + sp_int64 bytes = size * 8; - for (i = n - 1,j = 0; i >= 7; i -= 8) { - r[j] = ((sp_digit)a[i - 0] << 0) | - ((sp_digit)a[i - 1] << 8) | - ((sp_digit)a[i - 2] << 16) | - ((sp_digit)a[i - 3] << 24) | - ((sp_digit)a[i - 4] << 32) | - ((sp_digit)a[i - 5] << 40) | - ((sp_digit)a[i - 6] << 48) | - ((sp_digit)a[i - 7] << 56); - j++; - } - - if (i >= 0) { - r[j] = 0; - - d = (byte*)r; - switch (i) { - case 6: d[n - 1 - 6] = a[6]; //fallthrough - case 5: d[n - 1 - 5] = a[5]; //fallthrough - case 4: d[n - 1 - 4] = a[4]; //fallthrough - case 3: d[n - 1 - 3] = a[3]; //fallthrough - case 2: d[n - 1 - 2] = a[2]; //fallthrough - case 1: d[n - 1 - 1] = a[1]; //fallthrough - case 0: d[n - 1 - 0] = a[0]; //fallthrough - } - j++; - } - - for (; j < size; j++) { - r[j] = 0; - } + __asm__ __volatile__ ( + "add x4, %[a], %[n]\n\t" + "mov x5, %[r]\n\t" + "sub x4, x4, 8\n\t" + "subs x6, %[n], 8\n\t" + "mov x7, xzr\n\t" + "blt 2f\n\t" + /* Put in multiples of 8 bytes. */ + "1:\n\t" + "ldr x8, [x4], -8\n\t" + "subs x6, x6, 8\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "rev x8, x8\n\t" + #endif + "str x8, [x5], 8\n\t" + "add x7, x7, 8\n\t" + "b.ge 1b\n\t" + "2:\n\t" + "cmp x6, -7\n\t" + "b.lt 20f\n\t" + /* Put in less than 8 bytes. */ + #ifdef LITTLE_ENDIAN_ORDER + "str xzr, [x5]\n\t" + #else + "str xzr, [x5], 7\n\t" + #endif + "add x7, x7, 8\n\t" + "add x4, x4, 7\n\t" + "b.eq 17f\n\t" + "cmp x6, -5\n\t" + "b.lt 16f\n\t" + "b.eq 15f\n\t" + "cmp x6, -3\n\t" + "b.lt 14f\n\t" + "b.eq 13f\n\t" + "cmp x6, -2\n\t" + "b.eq 12f\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "12:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "13:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "14:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "15:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "16:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "17:\n\t" + "ldrb w8, [x4], -1\n\t" + #ifdef LITTLE_ENDIAN_ORDER + "strb w8, [x5], 1\n\t" + #else + "strb w8, [x5], -1\n\t" + #endif + "20:\n\t" + "add x5, %[r], x7\n\t" + "subs x7, %[size], x7\n\t" + "b.eq 30f\n\t" + /* Zero out remaining words. */ + "21:\n\t" + "subs x7, x7, 8\n\t" + "str xzr, [x5], 8\n\t" + "b.gt 21b\n\t" + "30:\n\t" + : + : [r] "r" (r), [size] "r" (bytes), [a] "r" (a), [n] "r" (nl) + : "memory", "x4", "x5", "x6", "x7", "x8", "cc" + ); } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -76726,7 +125334,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) static int sp_1024_ecc_is_point_16(const sp_point_1024* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[16 * 4]; @@ -76735,7 +125343,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, sp_int64 n; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -76745,29 +125353,30 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 16; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_16(t1, point->y); (void)sp_1024_mod_16(t1, t1, p1024_mod); sp_1024_sqr_16(t2, point->x); (void)sp_1024_mod_16(t2, t2, p1024_mod); sp_1024_mul_16(t2, t2, point->x); (void)sp_1024_mod_16(t2, t2, p1024_mod); - (void)sp_1024_sub_16(t2, p1024_mod, t2); - sp_1024_mont_add_16(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_16(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_16(t1, p1024_mod); - sp_1024_cond_sub_16(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(t1, t1, p1024_mod, ~(n >> 63)); sp_1024_norm_16(t1); if (!sp_1024_iszero_16(t1)) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -76775,7 +125384,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -76784,7 +125393,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, */ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* pub = NULL; #else sp_point_1024 pub[1]; @@ -76792,7 +125401,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -76807,7 +125416,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) err = sp_1024_ecc_is_point_16(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -76829,7 +125438,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_1024* pub = NULL; #else @@ -76850,7 +125459,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); @@ -76916,7 +125525,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -76927,6 +125536,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } #endif #endif /* WOLFSSL_SP_1024 */ +#endif /* WOLFCRYPT_HAVE_SAKKE */ #endif /* WOLFSSL_HAVE_SP_ECC */ #endif /* WOLFSSL_SP_ARM64_ASM */ #endif /* WOLFSSL_HAVE_SP_RSA | WOLFSSL_HAVE_SP_DH | WOLFSSL_HAVE_SP_ECC */ diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 31a6d14aa..2720b63a4 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -1,22 +1,12 @@ /* sp.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ /* Implementation by Sean Parkinson. */ @@ -49,37 +39,59 @@ #endif #endif +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#undef WOLFSSL_SP_SMALL_STACK +#define WOLFSSL_SP_SMALL_STACK +#endif + #include +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif + #ifndef WOLFSSL_SP_ASM #if SP_WORD_SIZE == 32 #define SP_PRINT_NUM(var, name, total, words, bits) \ do { \ int ii; \ - byte n[bits / 8]; \ - sp_digit s[words]; \ - XMEMCPY(s, var, sizeof(s)); \ - sp_##total##_norm_##words(s); \ - sp_##total##_to_bin_##words(s, n); \ + byte nb[(bits + 7) / 8]; \ + sp_digit _s[words]; \ + XMEMCPY(_s, var, sizeof(_s)); \ + sp_##total##_norm_##words(_s); \ + sp_##total##_to_bin_##words(_s, nb); \ fprintf(stderr, name "=0x"); \ - for (ii=0; iiused; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 28); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 28); } #elif DIGIT_BIT > 29 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1fffffff; s = 29U - s; @@ -172,12 +187,12 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 29) { r[j] &= 0x1fffffff; @@ -222,9 +237,9 @@ static void sp_2048_to_bin_72(sp_digit* r, byte* a) r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - j = 2048 / 8 - 1; + j = 2055 / 8 - 1; a[j] = 0; - for (i=0; i<72 && j>=0; i++) { + for (i=0; i<71 && j>=0; i++) { b = 0; /* lint allow cast of mismatch sp_digit and int */ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ @@ -325,287 +340,179 @@ static void sp_2048_norm_72(sp_digit* a) SP_NOINLINE static void sp_2048_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_uint64 t0 = ((sp_uint64)a[ 0]) * b[ 0]; - sp_uint64 t1 = ((sp_uint64)a[ 0]) * b[ 1] - + ((sp_uint64)a[ 1]) * b[ 0]; - sp_uint64 t2 = ((sp_uint64)a[ 0]) * b[ 2] - + ((sp_uint64)a[ 1]) * b[ 1] - + ((sp_uint64)a[ 2]) * b[ 0]; - sp_uint64 t3 = ((sp_uint64)a[ 0]) * b[ 3] - + ((sp_uint64)a[ 1]) * b[ 2] - + ((sp_uint64)a[ 2]) * b[ 1] - + ((sp_uint64)a[ 3]) * b[ 0]; - sp_uint64 t4 = ((sp_uint64)a[ 0]) * b[ 4] - + ((sp_uint64)a[ 1]) * b[ 3] - + ((sp_uint64)a[ 2]) * b[ 2] - + ((sp_uint64)a[ 3]) * b[ 1] - + ((sp_uint64)a[ 4]) * b[ 0]; - sp_uint64 t5 = ((sp_uint64)a[ 0]) * b[ 5] - + ((sp_uint64)a[ 1]) * b[ 4] - + ((sp_uint64)a[ 2]) * b[ 3] - + ((sp_uint64)a[ 3]) * b[ 2] - + ((sp_uint64)a[ 4]) * b[ 1] - + ((sp_uint64)a[ 5]) * b[ 0]; - sp_uint64 t6 = ((sp_uint64)a[ 0]) * b[ 6] - + ((sp_uint64)a[ 1]) * b[ 5] - + ((sp_uint64)a[ 2]) * b[ 4] - + ((sp_uint64)a[ 3]) * b[ 3] - + ((sp_uint64)a[ 4]) * b[ 2] - + ((sp_uint64)a[ 5]) * b[ 1] - + ((sp_uint64)a[ 6]) * b[ 0]; - sp_uint64 t7 = ((sp_uint64)a[ 0]) * b[ 7] - + ((sp_uint64)a[ 1]) * b[ 6] - + ((sp_uint64)a[ 2]) * b[ 5] - + ((sp_uint64)a[ 3]) * b[ 4] - + ((sp_uint64)a[ 4]) * b[ 3] - + ((sp_uint64)a[ 5]) * b[ 2] - + ((sp_uint64)a[ 6]) * b[ 1] - + ((sp_uint64)a[ 7]) * b[ 0]; - sp_uint64 t8 = ((sp_uint64)a[ 0]) * b[ 8] - + ((sp_uint64)a[ 1]) * b[ 7] - + ((sp_uint64)a[ 2]) * b[ 6] - + ((sp_uint64)a[ 3]) * b[ 5] - + ((sp_uint64)a[ 4]) * b[ 4] - + ((sp_uint64)a[ 5]) * b[ 3] - + ((sp_uint64)a[ 6]) * b[ 2] - + ((sp_uint64)a[ 7]) * b[ 1] - + ((sp_uint64)a[ 8]) * b[ 0]; - sp_uint64 t9 = ((sp_uint64)a[ 0]) * b[ 9] - + ((sp_uint64)a[ 1]) * b[ 8] - + ((sp_uint64)a[ 2]) * b[ 7] - + ((sp_uint64)a[ 3]) * b[ 6] - + ((sp_uint64)a[ 4]) * b[ 5] - + ((sp_uint64)a[ 5]) * b[ 4] - + ((sp_uint64)a[ 6]) * b[ 3] - + ((sp_uint64)a[ 7]) * b[ 2] - + ((sp_uint64)a[ 8]) * b[ 1] - + ((sp_uint64)a[ 9]) * b[ 0]; - sp_uint64 t10 = ((sp_uint64)a[ 0]) * b[10] - + ((sp_uint64)a[ 1]) * b[ 9] - + ((sp_uint64)a[ 2]) * b[ 8] - + ((sp_uint64)a[ 3]) * b[ 7] - + ((sp_uint64)a[ 4]) * b[ 6] - + ((sp_uint64)a[ 5]) * b[ 5] - + ((sp_uint64)a[ 6]) * b[ 4] - + ((sp_uint64)a[ 7]) * b[ 3] - + ((sp_uint64)a[ 8]) * b[ 2] - + ((sp_uint64)a[ 9]) * b[ 1] - + ((sp_uint64)a[10]) * b[ 0]; - sp_uint64 t11 = ((sp_uint64)a[ 0]) * b[11] - + ((sp_uint64)a[ 1]) * b[10] - + ((sp_uint64)a[ 2]) * b[ 9] - + ((sp_uint64)a[ 3]) * b[ 8] - + ((sp_uint64)a[ 4]) * b[ 7] - + ((sp_uint64)a[ 5]) * b[ 6] - + ((sp_uint64)a[ 6]) * b[ 5] - + ((sp_uint64)a[ 7]) * b[ 4] - + ((sp_uint64)a[ 8]) * b[ 3] - + ((sp_uint64)a[ 9]) * b[ 2] - + ((sp_uint64)a[10]) * b[ 1] - + ((sp_uint64)a[11]) * b[ 0]; - sp_uint64 t12 = ((sp_uint64)a[ 1]) * b[11] - + ((sp_uint64)a[ 2]) * b[10] - + ((sp_uint64)a[ 3]) * b[ 9] - + ((sp_uint64)a[ 4]) * b[ 8] - + ((sp_uint64)a[ 5]) * b[ 7] - + ((sp_uint64)a[ 6]) * b[ 6] - + ((sp_uint64)a[ 7]) * b[ 5] - + ((sp_uint64)a[ 8]) * b[ 4] - + ((sp_uint64)a[ 9]) * b[ 3] - + ((sp_uint64)a[10]) * b[ 2] - + ((sp_uint64)a[11]) * b[ 1]; - sp_uint64 t13 = ((sp_uint64)a[ 2]) * b[11] - + ((sp_uint64)a[ 3]) * b[10] - + ((sp_uint64)a[ 4]) * b[ 9] - + ((sp_uint64)a[ 5]) * b[ 8] - + ((sp_uint64)a[ 6]) * b[ 7] - + ((sp_uint64)a[ 7]) * b[ 6] - + ((sp_uint64)a[ 8]) * b[ 5] - + ((sp_uint64)a[ 9]) * b[ 4] - + ((sp_uint64)a[10]) * b[ 3] - + ((sp_uint64)a[11]) * b[ 2]; - sp_uint64 t14 = ((sp_uint64)a[ 3]) * b[11] - + ((sp_uint64)a[ 4]) * b[10] - + ((sp_uint64)a[ 5]) * b[ 9] - + ((sp_uint64)a[ 6]) * b[ 8] - + ((sp_uint64)a[ 7]) * b[ 7] - + ((sp_uint64)a[ 8]) * b[ 6] - + ((sp_uint64)a[ 9]) * b[ 5] - + ((sp_uint64)a[10]) * b[ 4] - + ((sp_uint64)a[11]) * b[ 3]; - sp_uint64 t15 = ((sp_uint64)a[ 4]) * b[11] - + ((sp_uint64)a[ 5]) * b[10] - + ((sp_uint64)a[ 6]) * b[ 9] - + ((sp_uint64)a[ 7]) * b[ 8] - + ((sp_uint64)a[ 8]) * b[ 7] - + ((sp_uint64)a[ 9]) * b[ 6] - + ((sp_uint64)a[10]) * b[ 5] - + ((sp_uint64)a[11]) * b[ 4]; - sp_uint64 t16 = ((sp_uint64)a[ 5]) * b[11] - + ((sp_uint64)a[ 6]) * b[10] - + ((sp_uint64)a[ 7]) * b[ 9] - + ((sp_uint64)a[ 8]) * b[ 8] - + ((sp_uint64)a[ 9]) * b[ 7] - + ((sp_uint64)a[10]) * b[ 6] - + ((sp_uint64)a[11]) * b[ 5]; - sp_uint64 t17 = ((sp_uint64)a[ 6]) * b[11] - + ((sp_uint64)a[ 7]) * b[10] - + ((sp_uint64)a[ 8]) * b[ 9] - + ((sp_uint64)a[ 9]) * b[ 8] - + ((sp_uint64)a[10]) * b[ 7] - + ((sp_uint64)a[11]) * b[ 6]; - sp_uint64 t18 = ((sp_uint64)a[ 7]) * b[11] - + ((sp_uint64)a[ 8]) * b[10] - + ((sp_uint64)a[ 9]) * b[ 9] - + ((sp_uint64)a[10]) * b[ 8] - + ((sp_uint64)a[11]) * b[ 7]; - sp_uint64 t19 = ((sp_uint64)a[ 8]) * b[11] - + ((sp_uint64)a[ 9]) * b[10] - + ((sp_uint64)a[10]) * b[ 9] - + ((sp_uint64)a[11]) * b[ 8]; - sp_uint64 t20 = ((sp_uint64)a[ 9]) * b[11] - + ((sp_uint64)a[10]) * b[10] - + ((sp_uint64)a[11]) * b[ 9]; - sp_uint64 t21 = ((sp_uint64)a[10]) * b[11] - + ((sp_uint64)a[11]) * b[10]; - sp_uint64 t22 = ((sp_uint64)a[11]) * b[11]; + sp_uint64 t0; + sp_uint64 t1; + sp_digit t[12]; - t1 += t0 >> 29; r[ 0] = t0 & 0x1fffffff; - t2 += t1 >> 29; r[ 1] = t1 & 0x1fffffff; - t3 += t2 >> 29; r[ 2] = t2 & 0x1fffffff; - t4 += t3 >> 29; r[ 3] = t3 & 0x1fffffff; - t5 += t4 >> 29; r[ 4] = t4 & 0x1fffffff; - t6 += t5 >> 29; r[ 5] = t5 & 0x1fffffff; - t7 += t6 >> 29; r[ 6] = t6 & 0x1fffffff; - t8 += t7 >> 29; r[ 7] = t7 & 0x1fffffff; - t9 += t8 >> 29; r[ 8] = t8 & 0x1fffffff; - t10 += t9 >> 29; r[ 9] = t9 & 0x1fffffff; - t11 += t10 >> 29; r[10] = t10 & 0x1fffffff; - t12 += t11 >> 29; r[11] = t11 & 0x1fffffff; - t13 += t12 >> 29; r[12] = t12 & 0x1fffffff; - t14 += t13 >> 29; r[13] = t13 & 0x1fffffff; - t15 += t14 >> 29; r[14] = t14 & 0x1fffffff; - t16 += t15 >> 29; r[15] = t15 & 0x1fffffff; - t17 += t16 >> 29; r[16] = t16 & 0x1fffffff; - t18 += t17 >> 29; r[17] = t17 & 0x1fffffff; - t19 += t18 >> 29; r[18] = t18 & 0x1fffffff; - t20 += t19 >> 29; r[19] = t19 & 0x1fffffff; - t21 += t20 >> 29; r[20] = t20 & 0x1fffffff; - t22 += t21 >> 29; r[21] = t21 & 0x1fffffff; - r[23] = (sp_digit)(t22 >> 29); - r[22] = t22 & 0x1fffffff; -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_12(sp_digit* r, const sp_digit* a) -{ - sp_uint64 t0 = ((sp_uint64)a[ 0]) * a[ 0]; - sp_uint64 t1 = (((sp_uint64)a[ 0]) * a[ 1]) * 2; - sp_uint64 t2 = (((sp_uint64)a[ 0]) * a[ 2]) * 2 - + ((sp_uint64)a[ 1]) * a[ 1]; - sp_uint64 t3 = (((sp_uint64)a[ 0]) * a[ 3] - + ((sp_uint64)a[ 1]) * a[ 2]) * 2; - sp_uint64 t4 = (((sp_uint64)a[ 0]) * a[ 4] - + ((sp_uint64)a[ 1]) * a[ 3]) * 2 - + ((sp_uint64)a[ 2]) * a[ 2]; - sp_uint64 t5 = (((sp_uint64)a[ 0]) * a[ 5] - + ((sp_uint64)a[ 1]) * a[ 4] - + ((sp_uint64)a[ 2]) * a[ 3]) * 2; - sp_uint64 t6 = (((sp_uint64)a[ 0]) * a[ 6] - + ((sp_uint64)a[ 1]) * a[ 5] - + ((sp_uint64)a[ 2]) * a[ 4]) * 2 - + ((sp_uint64)a[ 3]) * a[ 3]; - sp_uint64 t7 = (((sp_uint64)a[ 0]) * a[ 7] - + ((sp_uint64)a[ 1]) * a[ 6] - + ((sp_uint64)a[ 2]) * a[ 5] - + ((sp_uint64)a[ 3]) * a[ 4]) * 2; - sp_uint64 t8 = (((sp_uint64)a[ 0]) * a[ 8] - + ((sp_uint64)a[ 1]) * a[ 7] - + ((sp_uint64)a[ 2]) * a[ 6] - + ((sp_uint64)a[ 3]) * a[ 5]) * 2 - + ((sp_uint64)a[ 4]) * a[ 4]; - sp_uint64 t9 = (((sp_uint64)a[ 0]) * a[ 9] - + ((sp_uint64)a[ 1]) * a[ 8] - + ((sp_uint64)a[ 2]) * a[ 7] - + ((sp_uint64)a[ 3]) * a[ 6] - + ((sp_uint64)a[ 4]) * a[ 5]) * 2; - sp_uint64 t10 = (((sp_uint64)a[ 0]) * a[10] - + ((sp_uint64)a[ 1]) * a[ 9] - + ((sp_uint64)a[ 2]) * a[ 8] - + ((sp_uint64)a[ 3]) * a[ 7] - + ((sp_uint64)a[ 4]) * a[ 6]) * 2 - + ((sp_uint64)a[ 5]) * a[ 5]; - sp_uint64 t11 = (((sp_uint64)a[ 0]) * a[11] - + ((sp_uint64)a[ 1]) * a[10] - + ((sp_uint64)a[ 2]) * a[ 9] - + ((sp_uint64)a[ 3]) * a[ 8] - + ((sp_uint64)a[ 4]) * a[ 7] - + ((sp_uint64)a[ 5]) * a[ 6]) * 2; - sp_uint64 t12 = (((sp_uint64)a[ 1]) * a[11] - + ((sp_uint64)a[ 2]) * a[10] - + ((sp_uint64)a[ 3]) * a[ 9] - + ((sp_uint64)a[ 4]) * a[ 8] - + ((sp_uint64)a[ 5]) * a[ 7]) * 2 - + ((sp_uint64)a[ 6]) * a[ 6]; - sp_uint64 t13 = (((sp_uint64)a[ 2]) * a[11] - + ((sp_uint64)a[ 3]) * a[10] - + ((sp_uint64)a[ 4]) * a[ 9] - + ((sp_uint64)a[ 5]) * a[ 8] - + ((sp_uint64)a[ 6]) * a[ 7]) * 2; - sp_uint64 t14 = (((sp_uint64)a[ 3]) * a[11] - + ((sp_uint64)a[ 4]) * a[10] - + ((sp_uint64)a[ 5]) * a[ 9] - + ((sp_uint64)a[ 6]) * a[ 8]) * 2 - + ((sp_uint64)a[ 7]) * a[ 7]; - sp_uint64 t15 = (((sp_uint64)a[ 4]) * a[11] - + ((sp_uint64)a[ 5]) * a[10] - + ((sp_uint64)a[ 6]) * a[ 9] - + ((sp_uint64)a[ 7]) * a[ 8]) * 2; - sp_uint64 t16 = (((sp_uint64)a[ 5]) * a[11] - + ((sp_uint64)a[ 6]) * a[10] - + ((sp_uint64)a[ 7]) * a[ 9]) * 2 - + ((sp_uint64)a[ 8]) * a[ 8]; - sp_uint64 t17 = (((sp_uint64)a[ 6]) * a[11] - + ((sp_uint64)a[ 7]) * a[10] - + ((sp_uint64)a[ 8]) * a[ 9]) * 2; - sp_uint64 t18 = (((sp_uint64)a[ 7]) * a[11] - + ((sp_uint64)a[ 8]) * a[10]) * 2 - + ((sp_uint64)a[ 9]) * a[ 9]; - sp_uint64 t19 = (((sp_uint64)a[ 8]) * a[11] - + ((sp_uint64)a[ 9]) * a[10]) * 2; - sp_uint64 t20 = (((sp_uint64)a[ 9]) * a[11]) * 2 - + ((sp_uint64)a[10]) * a[10]; - sp_uint64 t21 = (((sp_uint64)a[10]) * a[11]) * 2; - sp_uint64 t22 = ((sp_uint64)a[11]) * a[11]; - - t1 += t0 >> 29; r[ 0] = t0 & 0x1fffffff; - t2 += t1 >> 29; r[ 1] = t1 & 0x1fffffff; - t3 += t2 >> 29; r[ 2] = t2 & 0x1fffffff; - t4 += t3 >> 29; r[ 3] = t3 & 0x1fffffff; - t5 += t4 >> 29; r[ 4] = t4 & 0x1fffffff; - t6 += t5 >> 29; r[ 5] = t5 & 0x1fffffff; - t7 += t6 >> 29; r[ 6] = t6 & 0x1fffffff; - t8 += t7 >> 29; r[ 7] = t7 & 0x1fffffff; - t9 += t8 >> 29; r[ 8] = t8 & 0x1fffffff; - t10 += t9 >> 29; r[ 9] = t9 & 0x1fffffff; - t11 += t10 >> 29; r[10] = t10 & 0x1fffffff; - t12 += t11 >> 29; r[11] = t11 & 0x1fffffff; - t13 += t12 >> 29; r[12] = t12 & 0x1fffffff; - t14 += t13 >> 29; r[13] = t13 & 0x1fffffff; - t15 += t14 >> 29; r[14] = t14 & 0x1fffffff; - t16 += t15 >> 29; r[15] = t15 & 0x1fffffff; - t17 += t16 >> 29; r[16] = t16 & 0x1fffffff; - t18 += t17 >> 29; r[17] = t17 & 0x1fffffff; - t19 += t18 >> 29; r[18] = t18 & 0x1fffffff; - t20 += t19 >> 29; r[19] = t19 & 0x1fffffff; - t21 += t20 >> 29; r[20] = t20 & 0x1fffffff; - t22 += t21 >> 29; r[21] = t21 & 0x1fffffff; - r[23] = (sp_digit)(t22 >> 29); - r[22] = t22 & 0x1fffffff; + t0 = ((sp_uint64)a[ 0]) * b[ 0]; + t1 = ((sp_uint64)a[ 0]) * b[ 1] + + ((sp_uint64)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 0]) * b[ 2] + + ((sp_uint64)a[ 1]) * b[ 1] + + ((sp_uint64)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 0]) * b[ 3] + + ((sp_uint64)a[ 1]) * b[ 2] + + ((sp_uint64)a[ 2]) * b[ 1] + + ((sp_uint64)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 0]) * b[ 4] + + ((sp_uint64)a[ 1]) * b[ 3] + + ((sp_uint64)a[ 2]) * b[ 2] + + ((sp_uint64)a[ 3]) * b[ 1] + + ((sp_uint64)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 0]) * b[ 5] + + ((sp_uint64)a[ 1]) * b[ 4] + + ((sp_uint64)a[ 2]) * b[ 3] + + ((sp_uint64)a[ 3]) * b[ 2] + + ((sp_uint64)a[ 4]) * b[ 1] + + ((sp_uint64)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 0]) * b[ 6] + + ((sp_uint64)a[ 1]) * b[ 5] + + ((sp_uint64)a[ 2]) * b[ 4] + + ((sp_uint64)a[ 3]) * b[ 3] + + ((sp_uint64)a[ 4]) * b[ 2] + + ((sp_uint64)a[ 5]) * b[ 1] + + ((sp_uint64)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 0]) * b[ 7] + + ((sp_uint64)a[ 1]) * b[ 6] + + ((sp_uint64)a[ 2]) * b[ 5] + + ((sp_uint64)a[ 3]) * b[ 4] + + ((sp_uint64)a[ 4]) * b[ 3] + + ((sp_uint64)a[ 5]) * b[ 2] + + ((sp_uint64)a[ 6]) * b[ 1] + + ((sp_uint64)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 0]) * b[ 8] + + ((sp_uint64)a[ 1]) * b[ 7] + + ((sp_uint64)a[ 2]) * b[ 6] + + ((sp_uint64)a[ 3]) * b[ 5] + + ((sp_uint64)a[ 4]) * b[ 4] + + ((sp_uint64)a[ 5]) * b[ 3] + + ((sp_uint64)a[ 6]) * b[ 2] + + ((sp_uint64)a[ 7]) * b[ 1] + + ((sp_uint64)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 0]) * b[ 9] + + ((sp_uint64)a[ 1]) * b[ 8] + + ((sp_uint64)a[ 2]) * b[ 7] + + ((sp_uint64)a[ 3]) * b[ 6] + + ((sp_uint64)a[ 4]) * b[ 5] + + ((sp_uint64)a[ 5]) * b[ 4] + + ((sp_uint64)a[ 6]) * b[ 3] + + ((sp_uint64)a[ 7]) * b[ 2] + + ((sp_uint64)a[ 8]) * b[ 1] + + ((sp_uint64)a[ 9]) * b[ 0]; + t[ 8] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 0]) * b[10] + + ((sp_uint64)a[ 1]) * b[ 9] + + ((sp_uint64)a[ 2]) * b[ 8] + + ((sp_uint64)a[ 3]) * b[ 7] + + ((sp_uint64)a[ 4]) * b[ 6] + + ((sp_uint64)a[ 5]) * b[ 5] + + ((sp_uint64)a[ 6]) * b[ 4] + + ((sp_uint64)a[ 7]) * b[ 3] + + ((sp_uint64)a[ 8]) * b[ 2] + + ((sp_uint64)a[ 9]) * b[ 1] + + ((sp_uint64)a[10]) * b[ 0]; + t[ 9] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 0]) * b[11] + + ((sp_uint64)a[ 1]) * b[10] + + ((sp_uint64)a[ 2]) * b[ 9] + + ((sp_uint64)a[ 3]) * b[ 8] + + ((sp_uint64)a[ 4]) * b[ 7] + + ((sp_uint64)a[ 5]) * b[ 6] + + ((sp_uint64)a[ 6]) * b[ 5] + + ((sp_uint64)a[ 7]) * b[ 4] + + ((sp_uint64)a[ 8]) * b[ 3] + + ((sp_uint64)a[ 9]) * b[ 2] + + ((sp_uint64)a[10]) * b[ 1] + + ((sp_uint64)a[11]) * b[ 0]; + t[10] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 1]) * b[11] + + ((sp_uint64)a[ 2]) * b[10] + + ((sp_uint64)a[ 3]) * b[ 9] + + ((sp_uint64)a[ 4]) * b[ 8] + + ((sp_uint64)a[ 5]) * b[ 7] + + ((sp_uint64)a[ 6]) * b[ 6] + + ((sp_uint64)a[ 7]) * b[ 5] + + ((sp_uint64)a[ 8]) * b[ 4] + + ((sp_uint64)a[ 9]) * b[ 3] + + ((sp_uint64)a[10]) * b[ 2] + + ((sp_uint64)a[11]) * b[ 1]; + t[11] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 2]) * b[11] + + ((sp_uint64)a[ 3]) * b[10] + + ((sp_uint64)a[ 4]) * b[ 9] + + ((sp_uint64)a[ 5]) * b[ 8] + + ((sp_uint64)a[ 6]) * b[ 7] + + ((sp_uint64)a[ 7]) * b[ 6] + + ((sp_uint64)a[ 8]) * b[ 5] + + ((sp_uint64)a[ 9]) * b[ 4] + + ((sp_uint64)a[10]) * b[ 3] + + ((sp_uint64)a[11]) * b[ 2]; + r[12] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 3]) * b[11] + + ((sp_uint64)a[ 4]) * b[10] + + ((sp_uint64)a[ 5]) * b[ 9] + + ((sp_uint64)a[ 6]) * b[ 8] + + ((sp_uint64)a[ 7]) * b[ 7] + + ((sp_uint64)a[ 8]) * b[ 6] + + ((sp_uint64)a[ 9]) * b[ 5] + + ((sp_uint64)a[10]) * b[ 4] + + ((sp_uint64)a[11]) * b[ 3]; + r[13] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 4]) * b[11] + + ((sp_uint64)a[ 5]) * b[10] + + ((sp_uint64)a[ 6]) * b[ 9] + + ((sp_uint64)a[ 7]) * b[ 8] + + ((sp_uint64)a[ 8]) * b[ 7] + + ((sp_uint64)a[ 9]) * b[ 6] + + ((sp_uint64)a[10]) * b[ 5] + + ((sp_uint64)a[11]) * b[ 4]; + r[14] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 5]) * b[11] + + ((sp_uint64)a[ 6]) * b[10] + + ((sp_uint64)a[ 7]) * b[ 9] + + ((sp_uint64)a[ 8]) * b[ 8] + + ((sp_uint64)a[ 9]) * b[ 7] + + ((sp_uint64)a[10]) * b[ 6] + + ((sp_uint64)a[11]) * b[ 5]; + r[15] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 6]) * b[11] + + ((sp_uint64)a[ 7]) * b[10] + + ((sp_uint64)a[ 8]) * b[ 9] + + ((sp_uint64)a[ 9]) * b[ 8] + + ((sp_uint64)a[10]) * b[ 7] + + ((sp_uint64)a[11]) * b[ 6]; + r[16] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 7]) * b[11] + + ((sp_uint64)a[ 8]) * b[10] + + ((sp_uint64)a[ 9]) * b[ 9] + + ((sp_uint64)a[10]) * b[ 8] + + ((sp_uint64)a[11]) * b[ 7]; + r[17] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[ 8]) * b[11] + + ((sp_uint64)a[ 9]) * b[10] + + ((sp_uint64)a[10]) * b[ 9] + + ((sp_uint64)a[11]) * b[ 8]; + r[18] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[ 9]) * b[11] + + ((sp_uint64)a[10]) * b[10] + + ((sp_uint64)a[11]) * b[ 9]; + r[19] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_uint64)a[10]) * b[11] + + ((sp_uint64)a[11]) * b[10]; + r[20] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[11]) * b[11]; + r[21] = t1 & 0x1fffffff; t0 += t1 >> 29; + r[22] = t0 & 0x1fffffff; + r[23] = (sp_digit)(t0 >> 29); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -804,55 +711,6 @@ SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a, sp_2048_norm_72(r); } -/* Square a into r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) -{ - sp_digit p0[24]; - sp_digit p1[24]; - sp_digit p2[24]; - sp_digit p3[24]; - sp_digit p4[24]; - sp_digit p5[24]; - sp_digit t0[24]; - sp_digit t1[24]; - sp_digit t2[24]; - sp_digit a0[12]; - sp_digit a1[12]; - sp_digit a2[12]; - (void)sp_2048_add_12(a0, a, &a[12]); - sp_2048_norm_12(a0); - (void)sp_2048_add_12(a1, &a[12], &a[24]); - sp_2048_norm_12(a1); - (void)sp_2048_add_12(a2, a0, &a[24]); - sp_2048_norm_12(a2); - sp_2048_sqr_12(p0, a); - sp_2048_sqr_12(p2, &a[12]); - sp_2048_sqr_12(p4, &a[24]); - sp_2048_sqr_12(p1, a0); - sp_2048_sqr_12(p3, a1); - sp_2048_sqr_12(p5, a2); - XMEMSET(r, 0, sizeof(*r)*2U*36U); - (void)sp_2048_sub_24(t0, p3, p2); - (void)sp_2048_sub_24(t1, p1, p2); - (void)sp_2048_sub_24(t2, p5, t0); - (void)sp_2048_sub_24(t2, t2, t1); - sp_2048_norm_24(t2); - (void)sp_2048_sub_24(t0, t0, p4); - sp_2048_norm_24(t0); - (void)sp_2048_sub_24(t1, t1, p0); - sp_2048_norm_24(t1); - (void)sp_2048_add_24(r, r, p0); - (void)sp_2048_add_24(&r[12], &r[12], t1); - (void)sp_2048_add_24(&r[24], &r[24], t2); - (void)sp_2048_add_24(&r[36], &r[36], t0); - (void)sp_2048_add_24(&r[48], &r[48], p4); - sp_2048_norm_72(r); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -993,6 +851,171 @@ SP_NOINLINE static void sp_2048_mul_72(sp_digit* r, const sp_digit* a, sp_2048_norm_144(r); } +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_12(sp_digit* r, const sp_digit* a) +{ + sp_uint64 t0; + sp_uint64 t1; + sp_digit t[12]; + + t0 = ((sp_uint64)a[ 0]) * a[ 0]; + t1 = (((sp_uint64)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 0]) * a[ 2]) * 2 + + ((sp_uint64)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 0]) * a[ 3] + + ((sp_uint64)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 0]) * a[ 4] + + ((sp_uint64)a[ 1]) * a[ 3]) * 2 + + ((sp_uint64)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 0]) * a[ 5] + + ((sp_uint64)a[ 1]) * a[ 4] + + ((sp_uint64)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 0]) * a[ 6] + + ((sp_uint64)a[ 1]) * a[ 5] + + ((sp_uint64)a[ 2]) * a[ 4]) * 2 + + ((sp_uint64)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 0]) * a[ 7] + + ((sp_uint64)a[ 1]) * a[ 6] + + ((sp_uint64)a[ 2]) * a[ 5] + + ((sp_uint64)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 0]) * a[ 8] + + ((sp_uint64)a[ 1]) * a[ 7] + + ((sp_uint64)a[ 2]) * a[ 6] + + ((sp_uint64)a[ 3]) * a[ 5]) * 2 + + ((sp_uint64)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 0]) * a[ 9] + + ((sp_uint64)a[ 1]) * a[ 8] + + ((sp_uint64)a[ 2]) * a[ 7] + + ((sp_uint64)a[ 3]) * a[ 6] + + ((sp_uint64)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 0]) * a[10] + + ((sp_uint64)a[ 1]) * a[ 9] + + ((sp_uint64)a[ 2]) * a[ 8] + + ((sp_uint64)a[ 3]) * a[ 7] + + ((sp_uint64)a[ 4]) * a[ 6]) * 2 + + ((sp_uint64)a[ 5]) * a[ 5]; + t[ 9] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 0]) * a[11] + + ((sp_uint64)a[ 1]) * a[10] + + ((sp_uint64)a[ 2]) * a[ 9] + + ((sp_uint64)a[ 3]) * a[ 8] + + ((sp_uint64)a[ 4]) * a[ 7] + + ((sp_uint64)a[ 5]) * a[ 6]) * 2; + t[10] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 1]) * a[11] + + ((sp_uint64)a[ 2]) * a[10] + + ((sp_uint64)a[ 3]) * a[ 9] + + ((sp_uint64)a[ 4]) * a[ 8] + + ((sp_uint64)a[ 5]) * a[ 7]) * 2 + + ((sp_uint64)a[ 6]) * a[ 6]; + t[11] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 2]) * a[11] + + ((sp_uint64)a[ 3]) * a[10] + + ((sp_uint64)a[ 4]) * a[ 9] + + ((sp_uint64)a[ 5]) * a[ 8] + + ((sp_uint64)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 3]) * a[11] + + ((sp_uint64)a[ 4]) * a[10] + + ((sp_uint64)a[ 5]) * a[ 9] + + ((sp_uint64)a[ 6]) * a[ 8]) * 2 + + ((sp_uint64)a[ 7]) * a[ 7]; + r[13] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 4]) * a[11] + + ((sp_uint64)a[ 5]) * a[10] + + ((sp_uint64)a[ 6]) * a[ 9] + + ((sp_uint64)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 5]) * a[11] + + ((sp_uint64)a[ 6]) * a[10] + + ((sp_uint64)a[ 7]) * a[ 9]) * 2 + + ((sp_uint64)a[ 8]) * a[ 8]; + r[15] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 6]) * a[11] + + ((sp_uint64)a[ 7]) * a[10] + + ((sp_uint64)a[ 8]) * a[ 9]) * 2; + r[16] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 7]) * a[11] + + ((sp_uint64)a[ 8]) * a[10]) * 2 + + ((sp_uint64)a[ 9]) * a[ 9]; + r[17] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[ 8]) * a[11] + + ((sp_uint64)a[ 9]) * a[10]) * 2; + r[18] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_uint64)a[ 9]) * a[11]) * 2 + + ((sp_uint64)a[10]) * a[10]; + r[19] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_uint64)a[10]) * a[11]) * 2; + r[20] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_uint64)a[11]) * a[11]; + r[21] = t1 & 0x1fffffff; t0 += t1 >> 29; + r[22] = t0 & 0x1fffffff; + r[23] = (sp_digit)(t0 >> 29); + XMEMCPY(r, t, sizeof(t)); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[24]; + sp_digit p1[24]; + sp_digit p2[24]; + sp_digit p3[24]; + sp_digit p4[24]; + sp_digit p5[24]; + sp_digit t0[24]; + sp_digit t1[24]; + sp_digit t2[24]; + sp_digit a0[12]; + sp_digit a1[12]; + sp_digit a2[12]; + (void)sp_2048_add_12(a0, a, &a[12]); + sp_2048_norm_12(a0); + (void)sp_2048_add_12(a1, &a[12], &a[24]); + sp_2048_norm_12(a1); + (void)sp_2048_add_12(a2, a0, &a[24]); + sp_2048_norm_12(a2); + sp_2048_sqr_12(p0, a); + sp_2048_sqr_12(p2, &a[12]); + sp_2048_sqr_12(p4, &a[24]); + sp_2048_sqr_12(p1, a0); + sp_2048_sqr_12(p3, a1); + sp_2048_sqr_12(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*36U); + (void)sp_2048_sub_24(t0, p3, p2); + (void)sp_2048_sub_24(t1, p1, p2); + (void)sp_2048_sub_24(t2, p5, t0); + (void)sp_2048_sub_24(t2, t2, t1); + sp_2048_norm_24(t2); + (void)sp_2048_sub_24(t0, t0, p4); + sp_2048_norm_24(t0); + (void)sp_2048_sub_24(t1, t1, p0); + sp_2048_norm_24(t1); + (void)sp_2048_add_24(r, r, p0); + (void)sp_2048_add_24(&r[12], &r[12], t1); + (void)sp_2048_add_24(&r[24], &r[24], t2); + (void)sp_2048_add_24(&r[36], &r[36], t0); + (void)sp_2048_add_24(&r[48], &r[48], p4); + sp_2048_norm_72(r); +} + /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -1369,7 +1392,7 @@ SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) #endif /* WOLFSSL_SP_SMALL */ #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -1499,24 +1522,24 @@ static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b) int i; for (i=35; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } #else int i; r |= (a[35] - b[35]) & (0 - (sp_digit)1); - r |= (a[34] - b[34]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[33] - b[33]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[32] - b[32]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[34] - b[34]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[33] - b[33]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[32] - b[32]) & ~(((sp_digit)0 - r) >> 28); for (i = 24; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 28); } #endif /* WOLFSSL_SP_SMALL */ @@ -1720,21 +1743,22 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_36(a + 36); for (i=0; i<35; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_2048_mul_add_36(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x1ffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1ffL; sp_2048_mul_add_36(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_2048_mont_shift_36(a, a); - sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] - m[35]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[35] - m[35]; + sp_2048_cond_sub_36(a, a, m, ~((over - 1) >> 31)); sp_2048_norm_36(a); } @@ -1745,9 +1769,9 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_36(r, a, b); @@ -1759,9 +1783,9 @@ static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_36(r, a); @@ -1821,6 +1845,7 @@ SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -1832,13 +1857,26 @@ SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a, static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; for (i = 0; i < 36; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ int i; for (i = 0; i < 32; i += 8) { @@ -1855,8 +1893,8 @@ static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a, r[33] = a[33] + (b[33] & m); r[34] = a[34] + (b[34] & m); r[35] = a[35] + (b[35] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_2048_rshift_36(sp_digit* r, const sp_digit* a, byte n) @@ -1885,135 +1923,96 @@ SP_NOINLINE static void sp_2048_rshift_36(sp_digit* r, const sp_digit* a, r[35] = a[35] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 29); + sp_digit t0 = (sp_digit)(d & 0x1fffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 27; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 28) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 29); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 58) - (sp_digit)(d >> 58); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_2048_word_div_word_36(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -2030,11 +2029,10 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 36 + 3]; @@ -2045,7 +2043,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 36 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -2064,14 +2062,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + 36] += t1[36 + 36 - 1] >> 29; t1[36 + 36 - 1] &= 0x1fffffff; for (i=36; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[36 + i]; - d1 <<= 29; - d1 += t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv); -#endif sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_sub_36(&t1[i], &t1[i], t2); @@ -2079,14 +2070,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + i] -= t2[36]; t1[36 + i] += t1[36 + i - 1] >> 29; t1[36 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[36 + i]; - d1 <<= 29; - d1 -= t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(-t1[36 + i], -t1[36 + i - 1], dv); -#endif r1 -= t1[36 + i]; sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_add_36(&t1[i], &t1[i], t2); @@ -2095,7 +2079,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, } t1[36 - 1] += t1[36 - 2] >> 29; t1[36 - 2] &= 0x1fffffff; - r1 = t1[36 - 1] / dv; + r1 = sp_2048_word_div_word_36(t1[36 - 1], dv); sp_2048_mul_d_36(t2, sd, r1); sp_2048_sub_36(t1, t1, t2); @@ -2104,14 +2088,13 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_2048_cond_add_36(r, r, sd, 0 - ((r[35] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_36(r, r, sd, r[35] >> 31); sp_2048_norm_36(r); sp_2048_rshift_36(r, r, 20); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -2138,13 +2121,15 @@ static int sp_2048_mod_36(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 72]; @@ -2158,11 +2143,17 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -2217,20 +2208,19 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 72]; @@ -2244,11 +2234,17 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -2303,19 +2299,18 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 72) + 72]; @@ -2330,11 +2325,17 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 72) + 72), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 72) + 72), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -2444,12 +2445,11 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(rt, m, mp); n = sp_2048_cmp_36(rt, m); - sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 72); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -2519,20 +2519,20 @@ static sp_digit sp_2048_cmp_72(const sp_digit* a, const sp_digit* b) int i; for (i=71; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } #else int i; for (i = 64; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 28); } #endif /* WOLFSSL_SP_SMALL */ @@ -2747,17 +2747,18 @@ static void sp_2048_mont_reduce_72(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_72(a + 71); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<70; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_2048_mul_add_72(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x3ffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffL; sp_2048_mul_add_72(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; @@ -2775,18 +2776,18 @@ static void sp_2048_mont_reduce_72(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<70; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_2048_mul_add_72(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x3ffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffL; sp_2048_mul_add_72(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; #endif sp_2048_mont_shift_72(a, a); - sp_2048_cond_sub_72(a, a, m, 0 - (((a[70] - m[70]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[70] - m[70]; + sp_2048_cond_sub_72(a, a, m, ~((over - 1) >> 31)); sp_2048_norm_72(a); } @@ -2797,9 +2798,9 @@ static void sp_2048_mont_reduce_72(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_72(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_72(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_72(r, a, b); @@ -2811,9 +2812,9 @@ static void sp_2048_mont_mul_72(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_72(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_72(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_72(r, a); @@ -2906,6 +2907,7 @@ SP_NOINLINE static void sp_2048_mul_d_144(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -2917,16 +2919,29 @@ SP_NOINLINE static void sp_2048_mul_d_144(sp_digit* r, const sp_digit* a, static void sp_2048_cond_add_72(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 71; i++) { + for (i = 0; i < 72; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_2048_cond_add_72(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ int i; - for (i = 0; i < 64; i += 8) { + for (i = 0; i < 72; i += 8) { r[i + 0] = a[i + 0] + (b[i + 0] & m); r[i + 1] = a[i + 1] + (b[i + 1] & m); r[i + 2] = a[i + 2] + (b[i + 2] & m); @@ -2936,15 +2951,8 @@ static void sp_2048_cond_add_72(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } - r[64] = a[64] + (b[64] & m); - r[65] = a[65] + (b[65] & m); - r[66] = a[66] + (b[66] & m); - r[67] = a[67] + (b[67] & m); - r[68] = a[68] + (b[68] & m); - r[69] = a[69] + (b[69] & m); - r[70] = a[70] + (b[70] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_2048_rshift_72(sp_digit* r, const sp_digit* a, byte n) @@ -2977,135 +2985,96 @@ SP_NOINLINE static void sp_2048_rshift_72(sp_digit* r, const sp_digit* a, r[71] = a[71] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_2048_div_word_72(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 29); + sp_digit t0 = (sp_digit)(d & 0x1fffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 27; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 28) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 29); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 58) - (sp_digit)(d >> 58); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_2048_word_div_word_72(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -3122,11 +3091,10 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 72 + 3]; @@ -3137,7 +3105,7 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 72 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -3156,28 +3124,14 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, t1[71 + 71] += t1[71 + 71 - 1] >> 29; t1[71 + 71 - 1] &= 0x1fffffff; for (i=71; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[71 + i]; - d1 <<= 29; - d1 += t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_72(t1[71 + i], t1[71 + i - 1], dv); -#endif sp_2048_mul_d_72(t2, sd, r1); (void)sp_2048_sub_72(&t1[i], &t1[i], t2); sp_2048_norm_71(&t1[i]); t1[71 + i] += t1[71 + i - 1] >> 29; t1[71 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[71 + i]; - d1 <<= 29; - d1 -= t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_72(-t1[71 + i], -t1[71 + i - 1], dv); -#endif r1 -= t1[71 + i]; sp_2048_mul_d_72(t2, sd, r1); (void)sp_2048_add_72(&t1[i], &t1[i], t2); @@ -3186,7 +3140,7 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, } t1[71 - 1] += t1[71 - 2] >> 29; t1[71 - 2] &= 0x1fffffff; - r1 = t1[71 - 1] / dv; + r1 = sp_2048_word_div_word_72(t1[71 - 1], dv); sp_2048_mul_d_72(t2, sd, r1); sp_2048_sub_72(t1, t1, t2); @@ -3195,15 +3149,14 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_2048_cond_add_72(r, r, sd, 0 - ((r[70] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_72(r, r, sd, r[70] >> 31); sp_2048_norm_71(r); sp_2048_rshift_72(r, r, 11); r[71] = 0; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -3233,13 +3186,15 @@ static int sp_2048_mod_72(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 144]; @@ -3253,11 +3208,17 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 72 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 72 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -3312,20 +3273,19 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_72(t[0], m, mp); n = sp_2048_cmp_72(t[0], m); - sp_2048_cond_sub_72(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 72 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 144]; @@ -3339,11 +3299,17 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 72 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 72 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -3398,19 +3364,18 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_72(t[0], m, mp); n = sp_2048_cmp_72(t[0], m); - sp_2048_cond_sub_72(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 72 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 144) + 144]; @@ -3425,11 +3390,17 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 144) + 144), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 144) + 144), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -3522,12 +3493,11 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_72(rt, m, mp); n = sp_2048_cmp_72(rt, m); - sp_2048_cond_sub_72(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 144); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -3556,7 +3526,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[72 * 5]; @@ -3564,8 +3534,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -3574,7 +3544,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 29) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 256U) { @@ -3588,7 +3558,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -3603,12 +3573,12 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_2048_from_bin(a, 72, in, inLen); -#if DIGIT_BIT >= 29 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -3627,7 +3597,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = sp_2048_mod_72(a, a, m); } if (err == MP_OKAY) { - for (i=28; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -3643,21 +3613,20 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_72(r, m, mp); mp = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_2048_cond_sub_72(r, r, m, ~(mp >> 31)); sp_2048_to_bin_72(r, out); *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[72 * 5]; @@ -3665,14 +3634,14 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 256U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 29) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 256U) { @@ -3686,7 +3655,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -3701,12 +3670,12 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, m = r + 72 * 2; sp_2048_from_bin(a, 72, in, inLen); -#if DIGIT_BIT >= 29 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -3736,7 +3705,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = sp_2048_mod_72(a, a, m); if (err == MP_OKAY) { - for (i=28; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -3752,8 +3721,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_72(r, m, mp); mp = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(r, r, m, ~(mp >> 31)); } } } @@ -3763,7 +3731,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -3798,7 +3766,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[72 * 4]; @@ -3832,7 +3800,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -3857,21 +3825,21 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 72); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[72 * 4]; @@ -3905,7 +3873,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -3930,14 +3898,14 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 72); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -3946,7 +3914,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[36 * 8]; @@ -3976,9 +3944,15 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -4009,6 +3983,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_norm_36(tmpa); sp_2048_cond_add_36(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[35] >> 31)); sp_2048_cond_add_36(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[35] >> 31)); + sp_2048_norm_36(tmpa); sp_2048_from_mp(qi, 36, qim); sp_2048_mul_36(tmpa, tmpa, qi); @@ -4025,19 +4000,19 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 36 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[36 * 13]; @@ -4068,9 +4043,15 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -4107,6 +4088,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_norm_36(tmpa); sp_2048_cond_add_36(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[35] >> 31)); sp_2048_cond_add_36(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[35] >> 31)); + sp_2048_norm_36(tmpa); sp_2048_mul_36(tmpa, tmpa, qi); err = sp_2048_mod_36(tmpa, tmpa, p); } @@ -4120,12 +4102,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 36 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -4151,8 +4133,8 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT); if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ #if DIGIT_BIT == 29 - XMEMCPY(r->dp, a, sizeof(sp_digit) * 72); - r->used = 72; + XMEMCPY(r->dp, a, sizeof(sp_digit) * 71); + r->used = 71; mp_clamp(r); #elif DIGIT_BIT < 29 int i; @@ -4160,7 +4142,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 72; i++) { + for (i = 0; i < 71; i++) { r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; s = DIGIT_BIT - s; @@ -4185,7 +4167,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 72; i++) { + for (i = 0; i < 71; i++) { r->dp[j] |= ((mp_digit)a[i]) << s; if (s + 29 >= DIGIT_BIT) { #if DIGIT_BIT != 32 && DIGIT_BIT != 64 @@ -4221,7 +4203,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[72 * 4]; @@ -4244,7 +4226,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 4, NULL, DYNAMIC_TYPE_DH); @@ -4269,20 +4251,20 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_2048_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 72U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[72 * 4]; @@ -4306,7 +4288,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -4331,14 +4313,14 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 72U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -4518,11 +4500,13 @@ SP_NOINLINE static void sp_2048_lshift_72(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_2048_mod_exp_2_72(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[217]; @@ -4537,11 +4521,17 @@ static int sp_2048_mod_exp_2_72(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 217, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 217, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -4606,17 +4596,15 @@ static int sp_2048_mod_exp_2_72(sp_digit* r, const sp_digit* e, int bits, const (void)sp_2048_add_72(r, r, tmp); sp_2048_norm_72(r); o = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(r, r, m, ~(o >> 31)); } sp_2048_mont_reduce_72(r, m, mp); n = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(r, r, m, ~(n >> 31)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -4641,7 +4629,7 @@ static int sp_2048_mod_exp_2_72(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[72 * 4]; @@ -4665,7 +4653,7 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 72 * 4, NULL, DYNAMIC_TYPE_DH); @@ -4706,14 +4694,14 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 72U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -4736,7 +4724,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[36 * 4]; @@ -4759,7 +4747,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_DH); @@ -4785,20 +4773,20 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_2048_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 72U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[36 * 4]; @@ -4822,7 +4810,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -4848,14 +4836,14 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 72U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -4914,20 +4902,23 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 29 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 28); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 28); } #elif DIGIT_BIT > 29 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1fffffff; s = 29U - s; @@ -4957,12 +4948,12 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 29) { r[j] &= 0x1fffffff; @@ -5007,7 +4998,7 @@ static void sp_3072_to_bin_106(sp_digit* r, byte* a) r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - j = 3072 / 8 - 1; + j = 3079 / 8 - 1; a[j] = 0; for (i=0; i<106 && j>=0; i++) { b = 0; @@ -5181,7 +5172,7 @@ SP_NOINLINE static void sp_3072_sqr_106(sp_digit* r, const sp_digit* a) r[0] = (sp_digit)(c >> 29); } -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -5278,7 +5269,7 @@ static sp_digit sp_3072_cmp_53(const sp_digit* a, const sp_digit* b) int i; for (i=52; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -5379,21 +5370,22 @@ static void sp_3072_mont_reduce_53(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_53(a + 53); for (i=0; i<52; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_3072_mul_add_53(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0xfffffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffffL; sp_3072_mul_add_53(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_3072_mont_shift_53(a, a); - sp_3072_cond_sub_53(a, a, m, 0 - (((a[52] - m[52]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[52] - m[52]; + sp_3072_cond_sub_53(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_53(a); } @@ -5459,9 +5451,9 @@ SP_NOINLINE static void sp_3072_mul_53(sp_digit* r, const sp_digit* a, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_53(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_53(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_53(r, a, b); @@ -5537,9 +5529,9 @@ SP_NOINLINE static void sp_3072_sqr_53(sp_digit* r, const sp_digit* a) * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_53(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_53(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_53(r, a); @@ -5567,6 +5559,7 @@ SP_NOINLINE static void sp_3072_mul_d_53(sp_digit* r, const sp_digit* a, r[53] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -5584,6 +5577,7 @@ static void sp_3072_cond_add_53(sp_digit* r, const sp_digit* a, r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -5614,135 +5608,96 @@ SP_NOINLINE static void sp_3072_rshift_53(sp_digit* r, const sp_digit* a, r[52] = a[52] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_53(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 29); + sp_digit t0 = (sp_digit)(d & 0x1fffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 27; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 28) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 29); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 58) - (sp_digit)(d >> 58); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_53(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -5759,11 +5714,10 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 53 + 3]; @@ -5774,7 +5728,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 53 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -5793,14 +5747,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, t1[53 + 53] += t1[53 + 53 - 1] >> 29; t1[53 + 53 - 1] &= 0x1fffffff; for (i=53; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[53 + i]; - d1 <<= 29; - d1 += t1[53 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_53(t1[53 + i], t1[53 + i - 1], dv); -#endif sp_3072_mul_d_53(t2, sd, r1); (void)sp_3072_sub_53(&t1[i], &t1[i], t2); @@ -5808,14 +5755,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, t1[53 + i] -= t2[53]; t1[53 + i] += t1[53 + i - 1] >> 29; t1[53 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[53 + i]; - d1 <<= 29; - d1 -= t1[53 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_53(-t1[53 + i], -t1[53 + i - 1], dv); -#endif r1 -= t1[53 + i]; sp_3072_mul_d_53(t2, sd, r1); (void)sp_3072_add_53(&t1[i], &t1[i], t2); @@ -5824,7 +5764,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, } t1[53 - 1] += t1[53 - 2] >> 29; t1[53 - 2] &= 0x1fffffff; - r1 = t1[53 - 1] / dv; + r1 = sp_3072_word_div_word_53(t1[53 - 1], dv); sp_3072_mul_d_53(t2, sd, r1); sp_3072_sub_53(t1, t1, t2); @@ -5833,14 +5773,13 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_3072_cond_add_53(r, r, sd, 0 - ((r[52] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_53(r, r, sd, r[52] >> 31); sp_3072_norm_53(r); sp_3072_rshift_53(r, r, 1); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -5867,13 +5806,15 @@ static int sp_3072_mod_53(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 106]; @@ -5887,11 +5828,17 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 53 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 53 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -5946,20 +5893,19 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_53(t[0], m, mp); n = sp_3072_cmp_53(t[0], m); - sp_3072_cond_sub_53(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_53(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 53 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 106]; @@ -5973,11 +5919,17 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 53 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 53 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -6032,19 +5984,18 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_53(t[0], m, mp); n = sp_3072_cmp_53(t[0], m); - sp_3072_cond_sub_53(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_53(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 53 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 106) + 106]; @@ -6059,11 +6010,17 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 106) + 106), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 106) + 106), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -6173,12 +6130,11 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_53(rt, m, mp); n = sp_3072_cmp_53(rt, m); - sp_3072_cond_sub_53(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_53(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 106); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -6243,7 +6199,7 @@ static sp_digit sp_3072_cmp_106(const sp_digit* a, const sp_digit* b) int i; for (i=105; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -6347,17 +6303,18 @@ static void sp_3072_mont_reduce_106(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_106(a + 106); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<105; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_3072_mul_add_106(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x7ffffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x7ffffffL; sp_3072_mul_add_106(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; @@ -6375,18 +6332,18 @@ static void sp_3072_mont_reduce_106(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<105; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_3072_mul_add_106(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x7ffffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x7ffffffL; sp_3072_mul_add_106(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; #endif sp_3072_mont_shift_106(a, a); - sp_3072_cond_sub_106(a, a, m, 0 - (((a[105] - m[105]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[105] - m[105]; + sp_3072_cond_sub_106(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_106(a); } @@ -6397,9 +6354,9 @@ static void sp_3072_mont_reduce_106(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_106(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_106(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_106(r, a, b); @@ -6411,9 +6368,9 @@ static void sp_3072_mont_mul_106(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_106(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_106(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_106(r, a); @@ -6441,6 +6398,7 @@ SP_NOINLINE static void sp_3072_mul_d_212(sp_digit* r, const sp_digit* a, r[212] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -6454,10 +6412,11 @@ static void sp_3072_cond_add_106(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 53; i++) { + for (i = 0; i < 106; i++) { r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -6488,135 +6447,96 @@ SP_NOINLINE static void sp_3072_rshift_106(sp_digit* r, const sp_digit* a, r[105] = a[105] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_106(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 29); + sp_digit t0 = (sp_digit)(d & 0x1fffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 27; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 28) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 29); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 58) - (sp_digit)(d >> 58); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_106(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -6633,11 +6553,10 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 106 + 3]; @@ -6648,7 +6567,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 106 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -6667,14 +6586,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, t1[106 + 106] += t1[106 + 106 - 1] >> 29; t1[106 + 106 - 1] &= 0x1fffffff; for (i=106; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[106 + i]; - d1 <<= 29; - d1 += t1[106 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_106(t1[106 + i], t1[106 + i - 1], dv); -#endif sp_3072_mul_d_106(t2, sd, r1); (void)sp_3072_sub_106(&t1[i], &t1[i], t2); @@ -6682,14 +6594,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, t1[106 + i] -= t2[106]; t1[106 + i] += t1[106 + i - 1] >> 29; t1[106 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[106 + i]; - d1 <<= 29; - d1 -= t1[106 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_106(-t1[106 + i], -t1[106 + i - 1], dv); -#endif r1 -= t1[106 + i]; sp_3072_mul_d_106(t2, sd, r1); (void)sp_3072_add_106(&t1[i], &t1[i], t2); @@ -6698,7 +6603,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, } t1[106 - 1] += t1[106 - 2] >> 29; t1[106 - 2] &= 0x1fffffff; - r1 = t1[106 - 1] / dv; + r1 = sp_3072_word_div_word_106(t1[106 - 1], dv); sp_3072_mul_d_106(t2, sd, r1); sp_3072_sub_106(t1, t1, t2); @@ -6707,14 +6612,13 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_3072_cond_add_106(r, r, sd, 0 - ((r[105] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_106(r, r, sd, r[105] >> 31); sp_3072_norm_106(r); sp_3072_rshift_106(r, r, 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -6742,13 +6646,15 @@ static int sp_3072_mod_106(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 212]; @@ -6762,11 +6668,17 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 106 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 106 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -6821,20 +6733,19 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_106(t[0], m, mp); n = sp_3072_cmp_106(t[0], m); - sp_3072_cond_sub_106(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 106 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 212]; @@ -6848,11 +6759,17 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 106 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 106 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -6907,19 +6824,18 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_106(t[0], m, mp); n = sp_3072_cmp_106(t[0], m); - sp_3072_cond_sub_106(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 106 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 212) + 212]; @@ -6934,11 +6850,17 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 212) + 212), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 212) + 212), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -7031,12 +6953,11 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_106(rt, m, mp); n = sp_3072_cmp_106(rt, m); - sp_3072_cond_sub_106(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 212); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -7063,7 +6984,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[106 * 5]; @@ -7071,8 +6992,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -7081,7 +7002,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 29) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -7095,7 +7016,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -7110,12 +7031,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_3072_from_bin(a, 106, in, inLen); -#if DIGIT_BIT >= 29 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -7134,7 +7055,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_106(a, a, m); } if (err == MP_OKAY) { - for (i=28; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -7150,21 +7071,20 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_106(r, m, mp); mp = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_106(r, r, m, ~(mp >> 31)); sp_3072_to_bin_106(r, out); *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[106 * 5]; @@ -7172,14 +7092,14 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 384U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 29) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -7193,7 +7113,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -7208,12 +7128,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, m = r + 106 * 2; sp_3072_from_bin(a, 106, in, inLen); -#if DIGIT_BIT >= 29 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -7243,7 +7163,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_106(a, a, m); if (err == MP_OKAY) { - for (i=28; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -7259,8 +7179,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_106(r, m, mp); mp = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(r, r, m, ~(mp >> 31)); } } } @@ -7270,7 +7189,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -7305,7 +7224,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[106 * 4]; @@ -7339,7 +7258,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -7364,21 +7283,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 106); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[106 * 4]; @@ -7412,7 +7331,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -7437,14 +7356,14 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 106); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -7453,7 +7372,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[53 * 8]; @@ -7483,9 +7402,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 53 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -7516,6 +7441,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_53(tmpa); sp_3072_cond_add_53(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[52] >> 31)); sp_3072_cond_add_53(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[52] >> 31)); + sp_3072_norm_53(tmpa); sp_3072_from_mp(qi, 53, qim); sp_3072_mul_53(tmpa, tmpa, qi); @@ -7532,19 +7458,19 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 53 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[53 * 13]; @@ -7575,9 +7501,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 53 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -7614,6 +7546,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_53(tmpa); sp_3072_cond_add_53(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[52] >> 31)); sp_3072_cond_add_53(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[52] >> 31)); + sp_3072_norm_53(tmpa); sp_3072_mul_53(tmpa, tmpa, qi); err = sp_3072_mod_53(tmpa, tmpa, p); } @@ -7627,12 +7560,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 53 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -7728,7 +7661,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[106 * 4]; @@ -7751,7 +7684,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 4, NULL, DYNAMIC_TYPE_DH); @@ -7776,20 +7709,20 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 106U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[106 * 4]; @@ -7813,7 +7746,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -7838,14 +7771,14 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 106U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -7875,11 +7808,13 @@ SP_NOINLINE static void sp_3072_lshift_106(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_3072_mod_exp_2_106(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[319]; @@ -7894,11 +7829,17 @@ static int sp_3072_mod_exp_2_106(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 319, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 319, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -7963,17 +7904,15 @@ static int sp_3072_mod_exp_2_106(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_106(r, r, tmp); sp_3072_norm_106(r); o = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(r, r, m, ~(o >> 31)); } sp_3072_mont_reduce_106(r, m, mp); n = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(r, r, m, ~(n >> 31)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -7998,7 +7937,7 @@ static int sp_3072_mod_exp_2_106(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[106 * 4]; @@ -8022,7 +7961,7 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 106 * 4, NULL, DYNAMIC_TYPE_DH); @@ -8063,14 +8002,14 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 106U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -8093,7 +8032,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[53 * 4]; @@ -8116,7 +8055,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 53 * 4, NULL, DYNAMIC_TYPE_DH); @@ -8142,20 +8081,20 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 106U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[53 * 4]; @@ -8179,7 +8118,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 53 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -8205,14 +8144,14 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 106U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -8268,20 +8207,23 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 28 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 27); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 27); } #elif DIGIT_BIT > 28 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xfffffff; s = 28U - s; @@ -8311,12 +8253,12 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 28) { r[j] &= 0xfffffff; @@ -8361,9 +8303,9 @@ static void sp_3072_to_bin_112(sp_digit* r, byte* a) r[i+1] += r[i] >> 28; r[i] &= 0xfffffff; } - j = 3072 / 8 - 1; + j = 3079 / 8 - 1; a[j] = 0; - for (i=0; i<112 && j>=0; i++) { + for (i=0; i<110 && j>=0; i++) { b = 0; /* lint allow cast of mismatch sp_digit and int */ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ @@ -8501,374 +8443,235 @@ static void sp_3072_norm_110(sp_digit* a) SP_NOINLINE static void sp_3072_mul_14(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_uint64 t0 = ((sp_uint64)a[ 0]) * b[ 0]; - sp_uint64 t1 = ((sp_uint64)a[ 0]) * b[ 1] - + ((sp_uint64)a[ 1]) * b[ 0]; - sp_uint64 t2 = ((sp_uint64)a[ 0]) * b[ 2] - + ((sp_uint64)a[ 1]) * b[ 1] - + ((sp_uint64)a[ 2]) * b[ 0]; - sp_uint64 t3 = ((sp_uint64)a[ 0]) * b[ 3] - + ((sp_uint64)a[ 1]) * b[ 2] - + ((sp_uint64)a[ 2]) * b[ 1] - + ((sp_uint64)a[ 3]) * b[ 0]; - sp_uint64 t4 = ((sp_uint64)a[ 0]) * b[ 4] - + ((sp_uint64)a[ 1]) * b[ 3] - + ((sp_uint64)a[ 2]) * b[ 2] - + ((sp_uint64)a[ 3]) * b[ 1] - + ((sp_uint64)a[ 4]) * b[ 0]; - sp_uint64 t5 = ((sp_uint64)a[ 0]) * b[ 5] - + ((sp_uint64)a[ 1]) * b[ 4] - + ((sp_uint64)a[ 2]) * b[ 3] - + ((sp_uint64)a[ 3]) * b[ 2] - + ((sp_uint64)a[ 4]) * b[ 1] - + ((sp_uint64)a[ 5]) * b[ 0]; - sp_uint64 t6 = ((sp_uint64)a[ 0]) * b[ 6] - + ((sp_uint64)a[ 1]) * b[ 5] - + ((sp_uint64)a[ 2]) * b[ 4] - + ((sp_uint64)a[ 3]) * b[ 3] - + ((sp_uint64)a[ 4]) * b[ 2] - + ((sp_uint64)a[ 5]) * b[ 1] - + ((sp_uint64)a[ 6]) * b[ 0]; - sp_uint64 t7 = ((sp_uint64)a[ 0]) * b[ 7] - + ((sp_uint64)a[ 1]) * b[ 6] - + ((sp_uint64)a[ 2]) * b[ 5] - + ((sp_uint64)a[ 3]) * b[ 4] - + ((sp_uint64)a[ 4]) * b[ 3] - + ((sp_uint64)a[ 5]) * b[ 2] - + ((sp_uint64)a[ 6]) * b[ 1] - + ((sp_uint64)a[ 7]) * b[ 0]; - sp_uint64 t8 = ((sp_uint64)a[ 0]) * b[ 8] - + ((sp_uint64)a[ 1]) * b[ 7] - + ((sp_uint64)a[ 2]) * b[ 6] - + ((sp_uint64)a[ 3]) * b[ 5] - + ((sp_uint64)a[ 4]) * b[ 4] - + ((sp_uint64)a[ 5]) * b[ 3] - + ((sp_uint64)a[ 6]) * b[ 2] - + ((sp_uint64)a[ 7]) * b[ 1] - + ((sp_uint64)a[ 8]) * b[ 0]; - sp_uint64 t9 = ((sp_uint64)a[ 0]) * b[ 9] - + ((sp_uint64)a[ 1]) * b[ 8] - + ((sp_uint64)a[ 2]) * b[ 7] - + ((sp_uint64)a[ 3]) * b[ 6] - + ((sp_uint64)a[ 4]) * b[ 5] - + ((sp_uint64)a[ 5]) * b[ 4] - + ((sp_uint64)a[ 6]) * b[ 3] - + ((sp_uint64)a[ 7]) * b[ 2] - + ((sp_uint64)a[ 8]) * b[ 1] - + ((sp_uint64)a[ 9]) * b[ 0]; - sp_uint64 t10 = ((sp_uint64)a[ 0]) * b[10] - + ((sp_uint64)a[ 1]) * b[ 9] - + ((sp_uint64)a[ 2]) * b[ 8] - + ((sp_uint64)a[ 3]) * b[ 7] - + ((sp_uint64)a[ 4]) * b[ 6] - + ((sp_uint64)a[ 5]) * b[ 5] - + ((sp_uint64)a[ 6]) * b[ 4] - + ((sp_uint64)a[ 7]) * b[ 3] - + ((sp_uint64)a[ 8]) * b[ 2] - + ((sp_uint64)a[ 9]) * b[ 1] - + ((sp_uint64)a[10]) * b[ 0]; - sp_uint64 t11 = ((sp_uint64)a[ 0]) * b[11] - + ((sp_uint64)a[ 1]) * b[10] - + ((sp_uint64)a[ 2]) * b[ 9] - + ((sp_uint64)a[ 3]) * b[ 8] - + ((sp_uint64)a[ 4]) * b[ 7] - + ((sp_uint64)a[ 5]) * b[ 6] - + ((sp_uint64)a[ 6]) * b[ 5] - + ((sp_uint64)a[ 7]) * b[ 4] - + ((sp_uint64)a[ 8]) * b[ 3] - + ((sp_uint64)a[ 9]) * b[ 2] - + ((sp_uint64)a[10]) * b[ 1] - + ((sp_uint64)a[11]) * b[ 0]; - sp_uint64 t12 = ((sp_uint64)a[ 0]) * b[12] - + ((sp_uint64)a[ 1]) * b[11] - + ((sp_uint64)a[ 2]) * b[10] - + ((sp_uint64)a[ 3]) * b[ 9] - + ((sp_uint64)a[ 4]) * b[ 8] - + ((sp_uint64)a[ 5]) * b[ 7] - + ((sp_uint64)a[ 6]) * b[ 6] - + ((sp_uint64)a[ 7]) * b[ 5] - + ((sp_uint64)a[ 8]) * b[ 4] - + ((sp_uint64)a[ 9]) * b[ 3] - + ((sp_uint64)a[10]) * b[ 2] - + ((sp_uint64)a[11]) * b[ 1] - + ((sp_uint64)a[12]) * b[ 0]; - sp_uint64 t13 = ((sp_uint64)a[ 0]) * b[13] - + ((sp_uint64)a[ 1]) * b[12] - + ((sp_uint64)a[ 2]) * b[11] - + ((sp_uint64)a[ 3]) * b[10] - + ((sp_uint64)a[ 4]) * b[ 9] - + ((sp_uint64)a[ 5]) * b[ 8] - + ((sp_uint64)a[ 6]) * b[ 7] - + ((sp_uint64)a[ 7]) * b[ 6] - + ((sp_uint64)a[ 8]) * b[ 5] - + ((sp_uint64)a[ 9]) * b[ 4] - + ((sp_uint64)a[10]) * b[ 3] - + ((sp_uint64)a[11]) * b[ 2] - + ((sp_uint64)a[12]) * b[ 1] - + ((sp_uint64)a[13]) * b[ 0]; - sp_uint64 t14 = ((sp_uint64)a[ 1]) * b[13] - + ((sp_uint64)a[ 2]) * b[12] - + ((sp_uint64)a[ 3]) * b[11] - + ((sp_uint64)a[ 4]) * b[10] - + ((sp_uint64)a[ 5]) * b[ 9] - + ((sp_uint64)a[ 6]) * b[ 8] - + ((sp_uint64)a[ 7]) * b[ 7] - + ((sp_uint64)a[ 8]) * b[ 6] - + ((sp_uint64)a[ 9]) * b[ 5] - + ((sp_uint64)a[10]) * b[ 4] - + ((sp_uint64)a[11]) * b[ 3] - + ((sp_uint64)a[12]) * b[ 2] - + ((sp_uint64)a[13]) * b[ 1]; - sp_uint64 t15 = ((sp_uint64)a[ 2]) * b[13] - + ((sp_uint64)a[ 3]) * b[12] - + ((sp_uint64)a[ 4]) * b[11] - + ((sp_uint64)a[ 5]) * b[10] - + ((sp_uint64)a[ 6]) * b[ 9] - + ((sp_uint64)a[ 7]) * b[ 8] - + ((sp_uint64)a[ 8]) * b[ 7] - + ((sp_uint64)a[ 9]) * b[ 6] - + ((sp_uint64)a[10]) * b[ 5] - + ((sp_uint64)a[11]) * b[ 4] - + ((sp_uint64)a[12]) * b[ 3] - + ((sp_uint64)a[13]) * b[ 2]; - sp_uint64 t16 = ((sp_uint64)a[ 3]) * b[13] - + ((sp_uint64)a[ 4]) * b[12] - + ((sp_uint64)a[ 5]) * b[11] - + ((sp_uint64)a[ 6]) * b[10] - + ((sp_uint64)a[ 7]) * b[ 9] - + ((sp_uint64)a[ 8]) * b[ 8] - + ((sp_uint64)a[ 9]) * b[ 7] - + ((sp_uint64)a[10]) * b[ 6] - + ((sp_uint64)a[11]) * b[ 5] - + ((sp_uint64)a[12]) * b[ 4] - + ((sp_uint64)a[13]) * b[ 3]; - sp_uint64 t17 = ((sp_uint64)a[ 4]) * b[13] - + ((sp_uint64)a[ 5]) * b[12] - + ((sp_uint64)a[ 6]) * b[11] - + ((sp_uint64)a[ 7]) * b[10] - + ((sp_uint64)a[ 8]) * b[ 9] - + ((sp_uint64)a[ 9]) * b[ 8] - + ((sp_uint64)a[10]) * b[ 7] - + ((sp_uint64)a[11]) * b[ 6] - + ((sp_uint64)a[12]) * b[ 5] - + ((sp_uint64)a[13]) * b[ 4]; - sp_uint64 t18 = ((sp_uint64)a[ 5]) * b[13] - + ((sp_uint64)a[ 6]) * b[12] - + ((sp_uint64)a[ 7]) * b[11] - + ((sp_uint64)a[ 8]) * b[10] - + ((sp_uint64)a[ 9]) * b[ 9] - + ((sp_uint64)a[10]) * b[ 8] - + ((sp_uint64)a[11]) * b[ 7] - + ((sp_uint64)a[12]) * b[ 6] - + ((sp_uint64)a[13]) * b[ 5]; - sp_uint64 t19 = ((sp_uint64)a[ 6]) * b[13] - + ((sp_uint64)a[ 7]) * b[12] - + ((sp_uint64)a[ 8]) * b[11] - + ((sp_uint64)a[ 9]) * b[10] - + ((sp_uint64)a[10]) * b[ 9] - + ((sp_uint64)a[11]) * b[ 8] - + ((sp_uint64)a[12]) * b[ 7] - + ((sp_uint64)a[13]) * b[ 6]; - sp_uint64 t20 = ((sp_uint64)a[ 7]) * b[13] - + ((sp_uint64)a[ 8]) * b[12] - + ((sp_uint64)a[ 9]) * b[11] - + ((sp_uint64)a[10]) * b[10] - + ((sp_uint64)a[11]) * b[ 9] - + ((sp_uint64)a[12]) * b[ 8] - + ((sp_uint64)a[13]) * b[ 7]; - sp_uint64 t21 = ((sp_uint64)a[ 8]) * b[13] - + ((sp_uint64)a[ 9]) * b[12] - + ((sp_uint64)a[10]) * b[11] - + ((sp_uint64)a[11]) * b[10] - + ((sp_uint64)a[12]) * b[ 9] - + ((sp_uint64)a[13]) * b[ 8]; - sp_uint64 t22 = ((sp_uint64)a[ 9]) * b[13] - + ((sp_uint64)a[10]) * b[12] - + ((sp_uint64)a[11]) * b[11] - + ((sp_uint64)a[12]) * b[10] - + ((sp_uint64)a[13]) * b[ 9]; - sp_uint64 t23 = ((sp_uint64)a[10]) * b[13] - + ((sp_uint64)a[11]) * b[12] - + ((sp_uint64)a[12]) * b[11] - + ((sp_uint64)a[13]) * b[10]; - sp_uint64 t24 = ((sp_uint64)a[11]) * b[13] - + ((sp_uint64)a[12]) * b[12] - + ((sp_uint64)a[13]) * b[11]; - sp_uint64 t25 = ((sp_uint64)a[12]) * b[13] - + ((sp_uint64)a[13]) * b[12]; - sp_uint64 t26 = ((sp_uint64)a[13]) * b[13]; + sp_uint64 t0; + sp_uint64 t1; + sp_digit t[14]; - t1 += t0 >> 28; r[ 0] = t0 & 0xfffffff; - t2 += t1 >> 28; r[ 1] = t1 & 0xfffffff; - t3 += t2 >> 28; r[ 2] = t2 & 0xfffffff; - t4 += t3 >> 28; r[ 3] = t3 & 0xfffffff; - t5 += t4 >> 28; r[ 4] = t4 & 0xfffffff; - t6 += t5 >> 28; r[ 5] = t5 & 0xfffffff; - t7 += t6 >> 28; r[ 6] = t6 & 0xfffffff; - t8 += t7 >> 28; r[ 7] = t7 & 0xfffffff; - t9 += t8 >> 28; r[ 8] = t8 & 0xfffffff; - t10 += t9 >> 28; r[ 9] = t9 & 0xfffffff; - t11 += t10 >> 28; r[10] = t10 & 0xfffffff; - t12 += t11 >> 28; r[11] = t11 & 0xfffffff; - t13 += t12 >> 28; r[12] = t12 & 0xfffffff; - t14 += t13 >> 28; r[13] = t13 & 0xfffffff; - t15 += t14 >> 28; r[14] = t14 & 0xfffffff; - t16 += t15 >> 28; r[15] = t15 & 0xfffffff; - t17 += t16 >> 28; r[16] = t16 & 0xfffffff; - t18 += t17 >> 28; r[17] = t17 & 0xfffffff; - t19 += t18 >> 28; r[18] = t18 & 0xfffffff; - t20 += t19 >> 28; r[19] = t19 & 0xfffffff; - t21 += t20 >> 28; r[20] = t20 & 0xfffffff; - t22 += t21 >> 28; r[21] = t21 & 0xfffffff; - t23 += t22 >> 28; r[22] = t22 & 0xfffffff; - t24 += t23 >> 28; r[23] = t23 & 0xfffffff; - t25 += t24 >> 28; r[24] = t24 & 0xfffffff; - t26 += t25 >> 28; r[25] = t25 & 0xfffffff; - r[27] = (sp_digit)(t26 >> 28); - r[26] = t26 & 0xfffffff; -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_14(sp_digit* r, const sp_digit* a) -{ - sp_uint64 t0 = ((sp_uint64)a[ 0]) * a[ 0]; - sp_uint64 t1 = (((sp_uint64)a[ 0]) * a[ 1]) * 2; - sp_uint64 t2 = (((sp_uint64)a[ 0]) * a[ 2]) * 2 - + ((sp_uint64)a[ 1]) * a[ 1]; - sp_uint64 t3 = (((sp_uint64)a[ 0]) * a[ 3] - + ((sp_uint64)a[ 1]) * a[ 2]) * 2; - sp_uint64 t4 = (((sp_uint64)a[ 0]) * a[ 4] - + ((sp_uint64)a[ 1]) * a[ 3]) * 2 - + ((sp_uint64)a[ 2]) * a[ 2]; - sp_uint64 t5 = (((sp_uint64)a[ 0]) * a[ 5] - + ((sp_uint64)a[ 1]) * a[ 4] - + ((sp_uint64)a[ 2]) * a[ 3]) * 2; - sp_uint64 t6 = (((sp_uint64)a[ 0]) * a[ 6] - + ((sp_uint64)a[ 1]) * a[ 5] - + ((sp_uint64)a[ 2]) * a[ 4]) * 2 - + ((sp_uint64)a[ 3]) * a[ 3]; - sp_uint64 t7 = (((sp_uint64)a[ 0]) * a[ 7] - + ((sp_uint64)a[ 1]) * a[ 6] - + ((sp_uint64)a[ 2]) * a[ 5] - + ((sp_uint64)a[ 3]) * a[ 4]) * 2; - sp_uint64 t8 = (((sp_uint64)a[ 0]) * a[ 8] - + ((sp_uint64)a[ 1]) * a[ 7] - + ((sp_uint64)a[ 2]) * a[ 6] - + ((sp_uint64)a[ 3]) * a[ 5]) * 2 - + ((sp_uint64)a[ 4]) * a[ 4]; - sp_uint64 t9 = (((sp_uint64)a[ 0]) * a[ 9] - + ((sp_uint64)a[ 1]) * a[ 8] - + ((sp_uint64)a[ 2]) * a[ 7] - + ((sp_uint64)a[ 3]) * a[ 6] - + ((sp_uint64)a[ 4]) * a[ 5]) * 2; - sp_uint64 t10 = (((sp_uint64)a[ 0]) * a[10] - + ((sp_uint64)a[ 1]) * a[ 9] - + ((sp_uint64)a[ 2]) * a[ 8] - + ((sp_uint64)a[ 3]) * a[ 7] - + ((sp_uint64)a[ 4]) * a[ 6]) * 2 - + ((sp_uint64)a[ 5]) * a[ 5]; - sp_uint64 t11 = (((sp_uint64)a[ 0]) * a[11] - + ((sp_uint64)a[ 1]) * a[10] - + ((sp_uint64)a[ 2]) * a[ 9] - + ((sp_uint64)a[ 3]) * a[ 8] - + ((sp_uint64)a[ 4]) * a[ 7] - + ((sp_uint64)a[ 5]) * a[ 6]) * 2; - sp_uint64 t12 = (((sp_uint64)a[ 0]) * a[12] - + ((sp_uint64)a[ 1]) * a[11] - + ((sp_uint64)a[ 2]) * a[10] - + ((sp_uint64)a[ 3]) * a[ 9] - + ((sp_uint64)a[ 4]) * a[ 8] - + ((sp_uint64)a[ 5]) * a[ 7]) * 2 - + ((sp_uint64)a[ 6]) * a[ 6]; - sp_uint64 t13 = (((sp_uint64)a[ 0]) * a[13] - + ((sp_uint64)a[ 1]) * a[12] - + ((sp_uint64)a[ 2]) * a[11] - + ((sp_uint64)a[ 3]) * a[10] - + ((sp_uint64)a[ 4]) * a[ 9] - + ((sp_uint64)a[ 5]) * a[ 8] - + ((sp_uint64)a[ 6]) * a[ 7]) * 2; - sp_uint64 t14 = (((sp_uint64)a[ 1]) * a[13] - + ((sp_uint64)a[ 2]) * a[12] - + ((sp_uint64)a[ 3]) * a[11] - + ((sp_uint64)a[ 4]) * a[10] - + ((sp_uint64)a[ 5]) * a[ 9] - + ((sp_uint64)a[ 6]) * a[ 8]) * 2 - + ((sp_uint64)a[ 7]) * a[ 7]; - sp_uint64 t15 = (((sp_uint64)a[ 2]) * a[13] - + ((sp_uint64)a[ 3]) * a[12] - + ((sp_uint64)a[ 4]) * a[11] - + ((sp_uint64)a[ 5]) * a[10] - + ((sp_uint64)a[ 6]) * a[ 9] - + ((sp_uint64)a[ 7]) * a[ 8]) * 2; - sp_uint64 t16 = (((sp_uint64)a[ 3]) * a[13] - + ((sp_uint64)a[ 4]) * a[12] - + ((sp_uint64)a[ 5]) * a[11] - + ((sp_uint64)a[ 6]) * a[10] - + ((sp_uint64)a[ 7]) * a[ 9]) * 2 - + ((sp_uint64)a[ 8]) * a[ 8]; - sp_uint64 t17 = (((sp_uint64)a[ 4]) * a[13] - + ((sp_uint64)a[ 5]) * a[12] - + ((sp_uint64)a[ 6]) * a[11] - + ((sp_uint64)a[ 7]) * a[10] - + ((sp_uint64)a[ 8]) * a[ 9]) * 2; - sp_uint64 t18 = (((sp_uint64)a[ 5]) * a[13] - + ((sp_uint64)a[ 6]) * a[12] - + ((sp_uint64)a[ 7]) * a[11] - + ((sp_uint64)a[ 8]) * a[10]) * 2 - + ((sp_uint64)a[ 9]) * a[ 9]; - sp_uint64 t19 = (((sp_uint64)a[ 6]) * a[13] - + ((sp_uint64)a[ 7]) * a[12] - + ((sp_uint64)a[ 8]) * a[11] - + ((sp_uint64)a[ 9]) * a[10]) * 2; - sp_uint64 t20 = (((sp_uint64)a[ 7]) * a[13] - + ((sp_uint64)a[ 8]) * a[12] - + ((sp_uint64)a[ 9]) * a[11]) * 2 - + ((sp_uint64)a[10]) * a[10]; - sp_uint64 t21 = (((sp_uint64)a[ 8]) * a[13] - + ((sp_uint64)a[ 9]) * a[12] - + ((sp_uint64)a[10]) * a[11]) * 2; - sp_uint64 t22 = (((sp_uint64)a[ 9]) * a[13] - + ((sp_uint64)a[10]) * a[12]) * 2 - + ((sp_uint64)a[11]) * a[11]; - sp_uint64 t23 = (((sp_uint64)a[10]) * a[13] - + ((sp_uint64)a[11]) * a[12]) * 2; - sp_uint64 t24 = (((sp_uint64)a[11]) * a[13]) * 2 - + ((sp_uint64)a[12]) * a[12]; - sp_uint64 t25 = (((sp_uint64)a[12]) * a[13]) * 2; - sp_uint64 t26 = ((sp_uint64)a[13]) * a[13]; - - t1 += t0 >> 28; r[ 0] = t0 & 0xfffffff; - t2 += t1 >> 28; r[ 1] = t1 & 0xfffffff; - t3 += t2 >> 28; r[ 2] = t2 & 0xfffffff; - t4 += t3 >> 28; r[ 3] = t3 & 0xfffffff; - t5 += t4 >> 28; r[ 4] = t4 & 0xfffffff; - t6 += t5 >> 28; r[ 5] = t5 & 0xfffffff; - t7 += t6 >> 28; r[ 6] = t6 & 0xfffffff; - t8 += t7 >> 28; r[ 7] = t7 & 0xfffffff; - t9 += t8 >> 28; r[ 8] = t8 & 0xfffffff; - t10 += t9 >> 28; r[ 9] = t9 & 0xfffffff; - t11 += t10 >> 28; r[10] = t10 & 0xfffffff; - t12 += t11 >> 28; r[11] = t11 & 0xfffffff; - t13 += t12 >> 28; r[12] = t12 & 0xfffffff; - t14 += t13 >> 28; r[13] = t13 & 0xfffffff; - t15 += t14 >> 28; r[14] = t14 & 0xfffffff; - t16 += t15 >> 28; r[15] = t15 & 0xfffffff; - t17 += t16 >> 28; r[16] = t16 & 0xfffffff; - t18 += t17 >> 28; r[17] = t17 & 0xfffffff; - t19 += t18 >> 28; r[18] = t18 & 0xfffffff; - t20 += t19 >> 28; r[19] = t19 & 0xfffffff; - t21 += t20 >> 28; r[20] = t20 & 0xfffffff; - t22 += t21 >> 28; r[21] = t21 & 0xfffffff; - t23 += t22 >> 28; r[22] = t22 & 0xfffffff; - t24 += t23 >> 28; r[23] = t23 & 0xfffffff; - t25 += t24 >> 28; r[24] = t24 & 0xfffffff; - t26 += t25 >> 28; r[25] = t25 & 0xfffffff; - r[27] = (sp_digit)(t26 >> 28); - r[26] = t26 & 0xfffffff; + t0 = ((sp_uint64)a[ 0]) * b[ 0]; + t1 = ((sp_uint64)a[ 0]) * b[ 1] + + ((sp_uint64)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 0]) * b[ 2] + + ((sp_uint64)a[ 1]) * b[ 1] + + ((sp_uint64)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 0]) * b[ 3] + + ((sp_uint64)a[ 1]) * b[ 2] + + ((sp_uint64)a[ 2]) * b[ 1] + + ((sp_uint64)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 0]) * b[ 4] + + ((sp_uint64)a[ 1]) * b[ 3] + + ((sp_uint64)a[ 2]) * b[ 2] + + ((sp_uint64)a[ 3]) * b[ 1] + + ((sp_uint64)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 0]) * b[ 5] + + ((sp_uint64)a[ 1]) * b[ 4] + + ((sp_uint64)a[ 2]) * b[ 3] + + ((sp_uint64)a[ 3]) * b[ 2] + + ((sp_uint64)a[ 4]) * b[ 1] + + ((sp_uint64)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 0]) * b[ 6] + + ((sp_uint64)a[ 1]) * b[ 5] + + ((sp_uint64)a[ 2]) * b[ 4] + + ((sp_uint64)a[ 3]) * b[ 3] + + ((sp_uint64)a[ 4]) * b[ 2] + + ((sp_uint64)a[ 5]) * b[ 1] + + ((sp_uint64)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 0]) * b[ 7] + + ((sp_uint64)a[ 1]) * b[ 6] + + ((sp_uint64)a[ 2]) * b[ 5] + + ((sp_uint64)a[ 3]) * b[ 4] + + ((sp_uint64)a[ 4]) * b[ 3] + + ((sp_uint64)a[ 5]) * b[ 2] + + ((sp_uint64)a[ 6]) * b[ 1] + + ((sp_uint64)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 0]) * b[ 8] + + ((sp_uint64)a[ 1]) * b[ 7] + + ((sp_uint64)a[ 2]) * b[ 6] + + ((sp_uint64)a[ 3]) * b[ 5] + + ((sp_uint64)a[ 4]) * b[ 4] + + ((sp_uint64)a[ 5]) * b[ 3] + + ((sp_uint64)a[ 6]) * b[ 2] + + ((sp_uint64)a[ 7]) * b[ 1] + + ((sp_uint64)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 0]) * b[ 9] + + ((sp_uint64)a[ 1]) * b[ 8] + + ((sp_uint64)a[ 2]) * b[ 7] + + ((sp_uint64)a[ 3]) * b[ 6] + + ((sp_uint64)a[ 4]) * b[ 5] + + ((sp_uint64)a[ 5]) * b[ 4] + + ((sp_uint64)a[ 6]) * b[ 3] + + ((sp_uint64)a[ 7]) * b[ 2] + + ((sp_uint64)a[ 8]) * b[ 1] + + ((sp_uint64)a[ 9]) * b[ 0]; + t[ 8] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 0]) * b[10] + + ((sp_uint64)a[ 1]) * b[ 9] + + ((sp_uint64)a[ 2]) * b[ 8] + + ((sp_uint64)a[ 3]) * b[ 7] + + ((sp_uint64)a[ 4]) * b[ 6] + + ((sp_uint64)a[ 5]) * b[ 5] + + ((sp_uint64)a[ 6]) * b[ 4] + + ((sp_uint64)a[ 7]) * b[ 3] + + ((sp_uint64)a[ 8]) * b[ 2] + + ((sp_uint64)a[ 9]) * b[ 1] + + ((sp_uint64)a[10]) * b[ 0]; + t[ 9] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 0]) * b[11] + + ((sp_uint64)a[ 1]) * b[10] + + ((sp_uint64)a[ 2]) * b[ 9] + + ((sp_uint64)a[ 3]) * b[ 8] + + ((sp_uint64)a[ 4]) * b[ 7] + + ((sp_uint64)a[ 5]) * b[ 6] + + ((sp_uint64)a[ 6]) * b[ 5] + + ((sp_uint64)a[ 7]) * b[ 4] + + ((sp_uint64)a[ 8]) * b[ 3] + + ((sp_uint64)a[ 9]) * b[ 2] + + ((sp_uint64)a[10]) * b[ 1] + + ((sp_uint64)a[11]) * b[ 0]; + t[10] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 0]) * b[12] + + ((sp_uint64)a[ 1]) * b[11] + + ((sp_uint64)a[ 2]) * b[10] + + ((sp_uint64)a[ 3]) * b[ 9] + + ((sp_uint64)a[ 4]) * b[ 8] + + ((sp_uint64)a[ 5]) * b[ 7] + + ((sp_uint64)a[ 6]) * b[ 6] + + ((sp_uint64)a[ 7]) * b[ 5] + + ((sp_uint64)a[ 8]) * b[ 4] + + ((sp_uint64)a[ 9]) * b[ 3] + + ((sp_uint64)a[10]) * b[ 2] + + ((sp_uint64)a[11]) * b[ 1] + + ((sp_uint64)a[12]) * b[ 0]; + t[11] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 0]) * b[13] + + ((sp_uint64)a[ 1]) * b[12] + + ((sp_uint64)a[ 2]) * b[11] + + ((sp_uint64)a[ 3]) * b[10] + + ((sp_uint64)a[ 4]) * b[ 9] + + ((sp_uint64)a[ 5]) * b[ 8] + + ((sp_uint64)a[ 6]) * b[ 7] + + ((sp_uint64)a[ 7]) * b[ 6] + + ((sp_uint64)a[ 8]) * b[ 5] + + ((sp_uint64)a[ 9]) * b[ 4] + + ((sp_uint64)a[10]) * b[ 3] + + ((sp_uint64)a[11]) * b[ 2] + + ((sp_uint64)a[12]) * b[ 1] + + ((sp_uint64)a[13]) * b[ 0]; + t[12] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 1]) * b[13] + + ((sp_uint64)a[ 2]) * b[12] + + ((sp_uint64)a[ 3]) * b[11] + + ((sp_uint64)a[ 4]) * b[10] + + ((sp_uint64)a[ 5]) * b[ 9] + + ((sp_uint64)a[ 6]) * b[ 8] + + ((sp_uint64)a[ 7]) * b[ 7] + + ((sp_uint64)a[ 8]) * b[ 6] + + ((sp_uint64)a[ 9]) * b[ 5] + + ((sp_uint64)a[10]) * b[ 4] + + ((sp_uint64)a[11]) * b[ 3] + + ((sp_uint64)a[12]) * b[ 2] + + ((sp_uint64)a[13]) * b[ 1]; + t[13] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 2]) * b[13] + + ((sp_uint64)a[ 3]) * b[12] + + ((sp_uint64)a[ 4]) * b[11] + + ((sp_uint64)a[ 5]) * b[10] + + ((sp_uint64)a[ 6]) * b[ 9] + + ((sp_uint64)a[ 7]) * b[ 8] + + ((sp_uint64)a[ 8]) * b[ 7] + + ((sp_uint64)a[ 9]) * b[ 6] + + ((sp_uint64)a[10]) * b[ 5] + + ((sp_uint64)a[11]) * b[ 4] + + ((sp_uint64)a[12]) * b[ 3] + + ((sp_uint64)a[13]) * b[ 2]; + r[14] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 3]) * b[13] + + ((sp_uint64)a[ 4]) * b[12] + + ((sp_uint64)a[ 5]) * b[11] + + ((sp_uint64)a[ 6]) * b[10] + + ((sp_uint64)a[ 7]) * b[ 9] + + ((sp_uint64)a[ 8]) * b[ 8] + + ((sp_uint64)a[ 9]) * b[ 7] + + ((sp_uint64)a[10]) * b[ 6] + + ((sp_uint64)a[11]) * b[ 5] + + ((sp_uint64)a[12]) * b[ 4] + + ((sp_uint64)a[13]) * b[ 3]; + r[15] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 4]) * b[13] + + ((sp_uint64)a[ 5]) * b[12] + + ((sp_uint64)a[ 6]) * b[11] + + ((sp_uint64)a[ 7]) * b[10] + + ((sp_uint64)a[ 8]) * b[ 9] + + ((sp_uint64)a[ 9]) * b[ 8] + + ((sp_uint64)a[10]) * b[ 7] + + ((sp_uint64)a[11]) * b[ 6] + + ((sp_uint64)a[12]) * b[ 5] + + ((sp_uint64)a[13]) * b[ 4]; + r[16] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 5]) * b[13] + + ((sp_uint64)a[ 6]) * b[12] + + ((sp_uint64)a[ 7]) * b[11] + + ((sp_uint64)a[ 8]) * b[10] + + ((sp_uint64)a[ 9]) * b[ 9] + + ((sp_uint64)a[10]) * b[ 8] + + ((sp_uint64)a[11]) * b[ 7] + + ((sp_uint64)a[12]) * b[ 6] + + ((sp_uint64)a[13]) * b[ 5]; + r[17] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 6]) * b[13] + + ((sp_uint64)a[ 7]) * b[12] + + ((sp_uint64)a[ 8]) * b[11] + + ((sp_uint64)a[ 9]) * b[10] + + ((sp_uint64)a[10]) * b[ 9] + + ((sp_uint64)a[11]) * b[ 8] + + ((sp_uint64)a[12]) * b[ 7] + + ((sp_uint64)a[13]) * b[ 6]; + r[18] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 7]) * b[13] + + ((sp_uint64)a[ 8]) * b[12] + + ((sp_uint64)a[ 9]) * b[11] + + ((sp_uint64)a[10]) * b[10] + + ((sp_uint64)a[11]) * b[ 9] + + ((sp_uint64)a[12]) * b[ 8] + + ((sp_uint64)a[13]) * b[ 7]; + r[19] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[ 8]) * b[13] + + ((sp_uint64)a[ 9]) * b[12] + + ((sp_uint64)a[10]) * b[11] + + ((sp_uint64)a[11]) * b[10] + + ((sp_uint64)a[12]) * b[ 9] + + ((sp_uint64)a[13]) * b[ 8]; + r[20] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[ 9]) * b[13] + + ((sp_uint64)a[10]) * b[12] + + ((sp_uint64)a[11]) * b[11] + + ((sp_uint64)a[12]) * b[10] + + ((sp_uint64)a[13]) * b[ 9]; + r[21] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[10]) * b[13] + + ((sp_uint64)a[11]) * b[12] + + ((sp_uint64)a[12]) * b[11] + + ((sp_uint64)a[13]) * b[10]; + r[22] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[11]) * b[13] + + ((sp_uint64)a[12]) * b[12] + + ((sp_uint64)a[13]) * b[11]; + r[23] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = ((sp_uint64)a[12]) * b[13] + + ((sp_uint64)a[13]) * b[12]; + r[24] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[13]) * b[13]; + r[25] = t1 & 0xfffffff; t0 += t1 >> 28; + r[26] = t0 & 0xfffffff; + r[27] = (sp_digit)(t0 >> 28); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -9004,28 +8807,6 @@ SP_NOINLINE static void sp_3072_mul_28(sp_digit* r, const sp_digit* a, sp_3072_norm_56(r); } -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_28(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z1[28]; - sp_digit* a1 = z1; - sp_digit* z2 = r + 28; - (void)sp_3072_add_14(a1, a, &a[14]); - sp_3072_norm_14(a1); - sp_3072_sqr_14(z2, &a[14]); - sp_3072_sqr_14(z0, a); - sp_3072_sqr_14(z1, a1); - (void)sp_3072_sub_28(z1, z1, z2); - (void)sp_3072_sub_28(z1, z1, z0); - (void)sp_3072_add_28(r + 14, r + 14, z1); - sp_3072_norm_56(r); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -9125,28 +8906,6 @@ SP_NOINLINE static void sp_3072_mul_56(sp_digit* r, const sp_digit* a, sp_3072_norm_112(r); } -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_56(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z1[56]; - sp_digit* a1 = z1; - sp_digit* z2 = r + 56; - (void)sp_3072_add_28(a1, a, &a[28]); - sp_3072_norm_28(a1); - sp_3072_sqr_28(z2, &a[28]); - sp_3072_sqr_28(z0, a); - sp_3072_sqr_28(z1, a1); - (void)sp_3072_sub_56(z1, z1, z2); - (void)sp_3072_sub_56(z1, z1, z0); - (void)sp_3072_add_56(r + 28, r + 28, z1); - sp_3072_norm_112(r); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -9250,6 +9009,197 @@ SP_NOINLINE static void sp_3072_mul_112(sp_digit* r, const sp_digit* a, sp_3072_norm_224(r); } +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_14(sp_digit* r, const sp_digit* a) +{ + sp_uint64 t0; + sp_uint64 t1; + sp_digit t[14]; + + t0 = ((sp_uint64)a[ 0]) * a[ 0]; + t1 = (((sp_uint64)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 0]) * a[ 2]) * 2 + + ((sp_uint64)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 0]) * a[ 3] + + ((sp_uint64)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 0]) * a[ 4] + + ((sp_uint64)a[ 1]) * a[ 3]) * 2 + + ((sp_uint64)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 0]) * a[ 5] + + ((sp_uint64)a[ 1]) * a[ 4] + + ((sp_uint64)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 0]) * a[ 6] + + ((sp_uint64)a[ 1]) * a[ 5] + + ((sp_uint64)a[ 2]) * a[ 4]) * 2 + + ((sp_uint64)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 0]) * a[ 7] + + ((sp_uint64)a[ 1]) * a[ 6] + + ((sp_uint64)a[ 2]) * a[ 5] + + ((sp_uint64)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 0]) * a[ 8] + + ((sp_uint64)a[ 1]) * a[ 7] + + ((sp_uint64)a[ 2]) * a[ 6] + + ((sp_uint64)a[ 3]) * a[ 5]) * 2 + + ((sp_uint64)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 0]) * a[ 9] + + ((sp_uint64)a[ 1]) * a[ 8] + + ((sp_uint64)a[ 2]) * a[ 7] + + ((sp_uint64)a[ 3]) * a[ 6] + + ((sp_uint64)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 0]) * a[10] + + ((sp_uint64)a[ 1]) * a[ 9] + + ((sp_uint64)a[ 2]) * a[ 8] + + ((sp_uint64)a[ 3]) * a[ 7] + + ((sp_uint64)a[ 4]) * a[ 6]) * 2 + + ((sp_uint64)a[ 5]) * a[ 5]; + t[ 9] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 0]) * a[11] + + ((sp_uint64)a[ 1]) * a[10] + + ((sp_uint64)a[ 2]) * a[ 9] + + ((sp_uint64)a[ 3]) * a[ 8] + + ((sp_uint64)a[ 4]) * a[ 7] + + ((sp_uint64)a[ 5]) * a[ 6]) * 2; + t[10] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 0]) * a[12] + + ((sp_uint64)a[ 1]) * a[11] + + ((sp_uint64)a[ 2]) * a[10] + + ((sp_uint64)a[ 3]) * a[ 9] + + ((sp_uint64)a[ 4]) * a[ 8] + + ((sp_uint64)a[ 5]) * a[ 7]) * 2 + + ((sp_uint64)a[ 6]) * a[ 6]; + t[11] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 0]) * a[13] + + ((sp_uint64)a[ 1]) * a[12] + + ((sp_uint64)a[ 2]) * a[11] + + ((sp_uint64)a[ 3]) * a[10] + + ((sp_uint64)a[ 4]) * a[ 9] + + ((sp_uint64)a[ 5]) * a[ 8] + + ((sp_uint64)a[ 6]) * a[ 7]) * 2; + t[12] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 1]) * a[13] + + ((sp_uint64)a[ 2]) * a[12] + + ((sp_uint64)a[ 3]) * a[11] + + ((sp_uint64)a[ 4]) * a[10] + + ((sp_uint64)a[ 5]) * a[ 9] + + ((sp_uint64)a[ 6]) * a[ 8]) * 2 + + ((sp_uint64)a[ 7]) * a[ 7]; + t[13] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 2]) * a[13] + + ((sp_uint64)a[ 3]) * a[12] + + ((sp_uint64)a[ 4]) * a[11] + + ((sp_uint64)a[ 5]) * a[10] + + ((sp_uint64)a[ 6]) * a[ 9] + + ((sp_uint64)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 3]) * a[13] + + ((sp_uint64)a[ 4]) * a[12] + + ((sp_uint64)a[ 5]) * a[11] + + ((sp_uint64)a[ 6]) * a[10] + + ((sp_uint64)a[ 7]) * a[ 9]) * 2 + + ((sp_uint64)a[ 8]) * a[ 8]; + r[15] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 4]) * a[13] + + ((sp_uint64)a[ 5]) * a[12] + + ((sp_uint64)a[ 6]) * a[11] + + ((sp_uint64)a[ 7]) * a[10] + + ((sp_uint64)a[ 8]) * a[ 9]) * 2; + r[16] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 5]) * a[13] + + ((sp_uint64)a[ 6]) * a[12] + + ((sp_uint64)a[ 7]) * a[11] + + ((sp_uint64)a[ 8]) * a[10]) * 2 + + ((sp_uint64)a[ 9]) * a[ 9]; + r[17] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 6]) * a[13] + + ((sp_uint64)a[ 7]) * a[12] + + ((sp_uint64)a[ 8]) * a[11] + + ((sp_uint64)a[ 9]) * a[10]) * 2; + r[18] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 7]) * a[13] + + ((sp_uint64)a[ 8]) * a[12] + + ((sp_uint64)a[ 9]) * a[11]) * 2 + + ((sp_uint64)a[10]) * a[10]; + r[19] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[ 8]) * a[13] + + ((sp_uint64)a[ 9]) * a[12] + + ((sp_uint64)a[10]) * a[11]) * 2; + r[20] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[ 9]) * a[13] + + ((sp_uint64)a[10]) * a[12]) * 2 + + ((sp_uint64)a[11]) * a[11]; + r[21] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[10]) * a[13] + + ((sp_uint64)a[11]) * a[12]) * 2; + r[22] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = (((sp_uint64)a[11]) * a[13]) * 2 + + ((sp_uint64)a[12]) * a[12]; + r[23] = t1 & 0xfffffff; t0 += t1 >> 28; + t1 = (((sp_uint64)a[12]) * a[13]) * 2; + r[24] = t0 & 0xfffffff; t1 += t0 >> 28; + t0 = ((sp_uint64)a[13]) * a[13]; + r[25] = t1 & 0xfffffff; t0 += t1 >> 28; + r[26] = t0 & 0xfffffff; + r[27] = (sp_digit)(t0 >> 28); + XMEMCPY(r, t, sizeof(t)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_28(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[28]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 28; + (void)sp_3072_add_14(a1, a, &a[14]); + sp_3072_norm_14(a1); + sp_3072_sqr_14(z2, &a[14]); + sp_3072_sqr_14(z0, a); + sp_3072_sqr_14(z1, a1); + (void)sp_3072_sub_28(z1, z1, z2); + (void)sp_3072_sub_28(z1, z1, z0); + (void)sp_3072_add_28(r + 14, r + 14, z1); + sp_3072_norm_56(r); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_56(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[56]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 56; + (void)sp_3072_add_28(a1, a, &a[28]); + sp_3072_norm_28(a1); + sp_3072_sqr_28(z2, &a[28]); + sp_3072_sqr_28(z0, a); + sp_3072_sqr_28(z1, a1); + (void)sp_3072_sub_56(z1, z1, z2); + (void)sp_3072_sub_56(z1, z1, z0); + (void)sp_3072_add_56(r + 28, r + 28, z1); + sp_3072_norm_112(r); +} + /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -9273,7 +9223,7 @@ SP_NOINLINE static void sp_3072_sqr_112(sp_digit* r, const sp_digit* a) } #endif /* !WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -9385,14 +9335,14 @@ static sp_digit sp_3072_cmp_56(const sp_digit* a, const sp_digit* b) int i; for (i = 48; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 27); } return r; @@ -9551,21 +9501,22 @@ static void sp_3072_mont_reduce_56(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_56(a + 55); for (i=0; i<54; i++) { - mu = (a[i] * mp) & 0xfffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffff; sp_3072_mul_add_56(a+i, m, mu); a[i+1] += a[i] >> 28; } - mu = (a[i] * mp) & 0xffffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xffffffL; sp_3072_mul_add_56(a+i, m, mu); a[i+1] += a[i] >> 28; a[i] &= 0xfffffff; sp_3072_mont_shift_56(a, a); - sp_3072_cond_sub_56(a, a, m, 0 - (((a[54] - m[54]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[54] - m[54]; + sp_3072_cond_sub_56(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_56(a); } @@ -9576,9 +9527,9 @@ static void sp_3072_mont_reduce_56(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_56(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_56(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_56(r, a, b); @@ -9590,9 +9541,9 @@ static void sp_3072_mont_mul_56(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_56(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_56(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_56(r, a); @@ -9639,6 +9590,7 @@ SP_NOINLINE static void sp_3072_mul_d_56(sp_digit* r, const sp_digit* a, r[56] = (sp_digit)(t & 0xfffffff); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -9652,7 +9604,7 @@ static void sp_3072_cond_add_56(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 48; i += 8) { + for (i = 0; i < 56; i += 8) { r[i + 0] = a[i + 0] + (b[i + 0] & m); r[i + 1] = a[i + 1] + (b[i + 1] & m); r[i + 2] = a[i + 2] + (b[i + 2] & m); @@ -9662,14 +9614,8 @@ static void sp_3072_cond_add_56(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } - r[48] = a[48] + (b[48] & m); - r[49] = a[49] + (b[49] & m); - r[50] = a[50] + (b[50] & m); - r[51] = a[51] + (b[51] & m); - r[52] = a[52] + (b[52] & m); - r[53] = a[53] + (b[53] & m); - r[54] = a[54] + (b[54] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_3072_rshift_56(sp_digit* r, const sp_digit* a, byte n) @@ -9696,95 +9642,96 @@ SP_NOINLINE static void sp_3072_rshift_56(sp_digit* r, const sp_digit* a, r[55] = a[55] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_56(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 28) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 28); + sp_digit t0 = (sp_digit)(d & 0xfffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 26; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 27) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 28); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 56) - (sp_digit)(d >> 56); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 13) + 1; - /* All 28 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 25); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 22) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 26); + t = (t / dv) << 13; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 19) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 11); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 16) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 13) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 10) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 7) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 4) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 1) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 28 bits from d1 and top 3 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_56(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -9801,11 +9748,10 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 56 + 3]; @@ -9816,7 +9762,7 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 56 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -9835,28 +9781,14 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, t1[55 + 55] += t1[55 + 55 - 1] >> 28; t1[55 + 55 - 1] &= 0xfffffff; for (i=55; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[55 + i]; - d1 <<= 28; - d1 += t1[55 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_56(t1[55 + i], t1[55 + i - 1], dv); -#endif sp_3072_mul_d_56(t2, sd, r1); (void)sp_3072_sub_56(&t1[i], &t1[i], t2); sp_3072_norm_55(&t1[i]); t1[55 + i] += t1[55 + i - 1] >> 28; t1[55 + i - 1] &= 0xfffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[55 + i]; - d1 <<= 28; - d1 -= t1[55 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_56(-t1[55 + i], -t1[55 + i - 1], dv); -#endif r1 -= t1[55 + i]; sp_3072_mul_d_56(t2, sd, r1); (void)sp_3072_add_56(&t1[i], &t1[i], t2); @@ -9865,7 +9797,7 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, } t1[55 - 1] += t1[55 - 2] >> 28; t1[55 - 2] &= 0xfffffff; - r1 = t1[55 - 1] / dv; + r1 = sp_3072_word_div_word_56(t1[55 - 1], dv); sp_3072_mul_d_56(t2, sd, r1); sp_3072_sub_56(t1, t1, t2); @@ -9874,15 +9806,14 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 28; r[i] &= 0xfffffff; } - sp_3072_cond_add_56(r, r, sd, 0 - ((r[54] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_56(r, r, sd, r[54] >> 31); sp_3072_norm_55(r); sp_3072_rshift_56(r, r, 4); r[55] = 0; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -9909,13 +9840,15 @@ static int sp_3072_mod_56(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 112]; @@ -9929,11 +9862,17 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 56 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 56 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -9988,20 +9927,19 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_56(t[0], m, mp); n = sp_3072_cmp_56(t[0], m); - sp_3072_cond_sub_56(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_56(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 56 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 112]; @@ -10015,11 +9953,17 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 56 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 56 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -10074,19 +10018,18 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_56(t[0], m, mp); n = sp_3072_cmp_56(t[0], m); - sp_3072_cond_sub_56(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_56(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 56 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 112) + 112]; @@ -10101,11 +10044,17 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 112) + 112), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 112) + 112), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -10215,12 +10164,11 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_56(rt, m, mp); n = sp_3072_cmp_56(rt, m); - sp_3072_cond_sub_56(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_56(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 112); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -10281,14 +10229,14 @@ static sp_digit sp_3072_cmp_112(const sp_digit* a, const sp_digit* b) int i; for (i = 104; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 27); } return r; @@ -10446,17 +10394,18 @@ static void sp_3072_mont_reduce_112(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_112(a + 110); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<109; i++) { - mu = (a[i] * mp) & 0xfffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffff; sp_3072_mul_add_112(a+i, m, mu); a[i+1] += a[i] >> 28; } - mu = (a[i] * mp) & 0xfffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffL; sp_3072_mul_add_112(a+i, m, mu); a[i+1] += a[i] >> 28; a[i] &= 0xfffffff; @@ -10474,18 +10423,18 @@ static void sp_3072_mont_reduce_112(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<109; i++) { - mu = (a[i] * mp) & 0xfffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffff; sp_3072_mul_add_112(a+i, m, mu); a[i+1] += a[i] >> 28; } - mu = (a[i] * mp) & 0xfffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffL; sp_3072_mul_add_112(a+i, m, mu); a[i+1] += a[i] >> 28; a[i] &= 0xfffffff; #endif sp_3072_mont_shift_112(a, a); - sp_3072_cond_sub_112(a, a, m, 0 - (((a[109] - m[109]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[109] - m[109]; + sp_3072_cond_sub_112(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_112(a); } @@ -10496,9 +10445,9 @@ static void sp_3072_mont_reduce_112(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_112(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_112(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_112(r, a, b); @@ -10510,9 +10459,9 @@ static void sp_3072_mont_mul_112(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_112(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_112(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_112(r, a); @@ -10559,6 +10508,7 @@ SP_NOINLINE static void sp_3072_mul_d_224(sp_digit* r, const sp_digit* a, r[224] = (sp_digit)(t & 0xfffffff); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -10572,7 +10522,7 @@ static void sp_3072_cond_add_112(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 104; i += 8) { + for (i = 0; i < 112; i += 8) { r[i + 0] = a[i + 0] + (b[i + 0] & m); r[i + 1] = a[i + 1] + (b[i + 1] & m); r[i + 2] = a[i + 2] + (b[i + 2] & m); @@ -10582,13 +10532,8 @@ static void sp_3072_cond_add_112(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } - r[104] = a[104] + (b[104] & m); - r[105] = a[105] + (b[105] & m); - r[106] = a[106] + (b[106] & m); - r[107] = a[107] + (b[107] & m); - r[108] = a[108] + (b[108] & m); - r[109] = a[109] + (b[109] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_3072_rshift_112(sp_digit* r, const sp_digit* a, byte n) @@ -10615,95 +10560,96 @@ SP_NOINLINE static void sp_3072_rshift_112(sp_digit* r, const sp_digit* a, r[111] = a[111] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_112(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 28) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 28); + sp_digit t0 = (sp_digit)(d & 0xfffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 26; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 27) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 28); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 56) - (sp_digit)(d >> 56); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 13) + 1; - /* All 28 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 25); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 22) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 26); + t = (t / dv) << 13; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 19) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 11); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 16) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 13) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 10) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 7) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 4) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 1) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 28 bits from d1 and top 3 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_112(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -10720,11 +10666,10 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 112 + 3]; @@ -10735,7 +10680,7 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 112 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -10754,28 +10699,14 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, t1[110 + 110] += t1[110 + 110 - 1] >> 28; t1[110 + 110 - 1] &= 0xfffffff; for (i=110; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[110 + i]; - d1 <<= 28; - d1 += t1[110 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_112(t1[110 + i], t1[110 + i - 1], dv); -#endif sp_3072_mul_d_112(t2, sd, r1); (void)sp_3072_sub_112(&t1[i], &t1[i], t2); sp_3072_norm_110(&t1[i]); t1[110 + i] += t1[110 + i - 1] >> 28; t1[110 + i - 1] &= 0xfffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[110 + i]; - d1 <<= 28; - d1 -= t1[110 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_112(-t1[110 + i], -t1[110 + i - 1], dv); -#endif r1 -= t1[110 + i]; sp_3072_mul_d_112(t2, sd, r1); (void)sp_3072_add_112(&t1[i], &t1[i], t2); @@ -10784,7 +10715,7 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, } t1[110 - 1] += t1[110 - 2] >> 28; t1[110 - 2] &= 0xfffffff; - r1 = t1[110 - 1] / dv; + r1 = sp_3072_word_div_word_112(t1[110 - 1], dv); sp_3072_mul_d_112(t2, sd, r1); sp_3072_sub_112(t1, t1, t2); @@ -10793,8 +10724,7 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 28; r[i] &= 0xfffffff; } - sp_3072_cond_add_112(r, r, sd, 0 - ((r[109] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_112(r, r, sd, r[109] >> 31); sp_3072_norm_110(r); sp_3072_rshift_112(r, r, 8); @@ -10802,7 +10732,7 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, r[111] = 0; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -10832,13 +10762,15 @@ static int sp_3072_mod_112(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 224]; @@ -10852,11 +10784,17 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 112 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 112 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -10911,20 +10849,19 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_112(t[0], m, mp); n = sp_3072_cmp_112(t[0], m); - sp_3072_cond_sub_112(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 112 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 224]; @@ -10938,11 +10875,17 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 112 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 112 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -10997,19 +10940,18 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_112(t[0], m, mp); n = sp_3072_cmp_112(t[0], m); - sp_3072_cond_sub_112(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 112 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 224) + 224]; @@ -11024,11 +10966,17 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 224) + 224), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 224) + 224), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -11121,12 +11069,11 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_112(rt, m, mp); n = sp_3072_cmp_112(rt, m); - sp_3072_cond_sub_112(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 224); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -11155,7 +11102,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[112 * 5]; @@ -11163,8 +11110,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -11173,7 +11120,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 28) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -11187,7 +11134,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -11202,12 +11149,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_3072_from_bin(a, 112, in, inLen); -#if DIGIT_BIT >= 28 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -11226,7 +11173,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_112(a, a, m); } if (err == MP_OKAY) { - for (i=27; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -11242,21 +11189,20 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_112(r, m, mp); mp = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_112(r, r, m, ~(mp >> 31)); sp_3072_to_bin_112(r, out); *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[112 * 5]; @@ -11264,14 +11210,14 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 384U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 28) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -11285,7 +11231,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -11300,12 +11246,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, m = r + 112 * 2; sp_3072_from_bin(a, 112, in, inLen); -#if DIGIT_BIT >= 28 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -11335,7 +11281,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_112(a, a, m); if (err == MP_OKAY) { - for (i=27; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -11351,8 +11297,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_112(r, m, mp); mp = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(r, r, m, ~(mp >> 31)); } } } @@ -11362,7 +11307,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -11397,7 +11342,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[112 * 4]; @@ -11431,7 +11376,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -11456,21 +11401,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 112); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[112 * 4]; @@ -11504,7 +11449,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -11529,14 +11474,14 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 112); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -11545,7 +11490,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[56 * 8]; @@ -11575,9 +11520,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -11608,6 +11559,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_55(tmpa); sp_3072_cond_add_56(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[54] >> 31)); sp_3072_cond_add_56(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[54] >> 31)); + sp_3072_norm_56(tmpa); sp_3072_from_mp(qi, 56, qim); sp_3072_mul_56(tmpa, tmpa, qi); @@ -11624,19 +11576,19 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 56 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[56 * 13]; @@ -11667,9 +11619,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -11706,6 +11664,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_55(tmpa); sp_3072_cond_add_56(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[54] >> 31)); sp_3072_cond_add_56(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[54] >> 31)); + sp_3072_norm_56(tmpa); sp_3072_mul_56(tmpa, tmpa, qi); err = sp_3072_mod_56(tmpa, tmpa, p); } @@ -11719,12 +11678,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 56 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -11750,8 +11709,8 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ #if DIGIT_BIT == 28 - XMEMCPY(r->dp, a, sizeof(sp_digit) * 112); - r->used = 112; + XMEMCPY(r->dp, a, sizeof(sp_digit) * 110); + r->used = 110; mp_clamp(r); #elif DIGIT_BIT < 28 int i; @@ -11759,7 +11718,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 112; i++) { + for (i = 0; i < 110; i++) { r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; s = DIGIT_BIT - s; @@ -11784,7 +11743,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 112; i++) { + for (i = 0; i < 110; i++) { r->dp[j] |= ((mp_digit)a[i]) << s; if (s + 28 >= DIGIT_BIT) { #if DIGIT_BIT != 32 && DIGIT_BIT != 64 @@ -11820,7 +11779,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[112 * 4]; @@ -11843,7 +11802,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 4, NULL, DYNAMIC_TYPE_DH); @@ -11868,20 +11827,20 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 112U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[112 * 4]; @@ -11905,7 +11864,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -11930,14 +11889,14 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 112U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -12188,11 +12147,13 @@ SP_NOINLINE static void sp_3072_lshift_112(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_3072_mod_exp_2_112(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[337]; @@ -12207,11 +12168,17 @@ static int sp_3072_mod_exp_2_112(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 337, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 337, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12276,17 +12243,15 @@ static int sp_3072_mod_exp_2_112(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_112(r, r, tmp); sp_3072_norm_112(r); o = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(r, r, m, ~(o >> 31)); } sp_3072_mont_reduce_112(r, m, mp); n = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(r, r, m, ~(n >> 31)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -12311,7 +12276,7 @@ static int sp_3072_mod_exp_2_112(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[112 * 4]; @@ -12335,7 +12300,7 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 112 * 4, NULL, DYNAMIC_TYPE_DH); @@ -12376,14 +12341,14 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 112U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -12406,7 +12371,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[56 * 4]; @@ -12429,7 +12394,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 4, NULL, DYNAMIC_TYPE_DH); @@ -12455,20 +12420,20 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 112U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[56 * 4]; @@ -12492,7 +12457,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -12518,14 +12483,14 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 112U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -12585,20 +12550,23 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 29 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 28); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 28); } #elif DIGIT_BIT > 29 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1fffffff; s = 29U - s; @@ -12628,12 +12596,12 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 29) { r[j] &= 0x1fffffff; @@ -12678,7 +12646,7 @@ static void sp_4096_to_bin_142(sp_digit* r, byte* a) r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - j = 4096 / 8 - 1; + j = 4103 / 8 - 1; a[j] = 0; for (i=0; i<142 && j>=0; i++) { b = 0; @@ -12854,7 +12822,7 @@ SP_NOINLINE static void sp_4096_sqr_142(sp_digit* r, const sp_digit* a) r[0] = (sp_digit)(c >> 29); } -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -12952,7 +12920,7 @@ static sp_digit sp_4096_cmp_71(const sp_digit* a, const sp_digit* b) int i; for (i=70; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -13059,21 +13027,22 @@ static void sp_4096_mont_reduce_71(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_71(a + 71); for (i=0; i<70; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_4096_mul_add_71(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x3ffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffL; sp_4096_mul_add_71(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_4096_mont_shift_71(a, a); - sp_4096_cond_sub_71(a, a, m, 0 - (((a[70] - m[70]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[70] - m[70]; + sp_4096_cond_sub_71(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_71(a); } @@ -13139,9 +13108,9 @@ SP_NOINLINE static void sp_4096_mul_71(sp_digit* r, const sp_digit* a, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_71(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_71(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_71(r, a, b); @@ -13217,9 +13186,9 @@ SP_NOINLINE static void sp_4096_sqr_71(sp_digit* r, const sp_digit* a) * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_71(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_71(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_71(r, a); @@ -13247,6 +13216,7 @@ SP_NOINLINE static void sp_4096_mul_d_71(sp_digit* r, const sp_digit* a, r[71] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -13264,6 +13234,7 @@ static void sp_4096_cond_add_71(sp_digit* r, const sp_digit* a, r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -13294,135 +13265,96 @@ SP_NOINLINE static void sp_4096_rshift_71(sp_digit* r, const sp_digit* a, r[70] = a[70] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_71(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 29); + sp_digit t0 = (sp_digit)(d & 0x1fffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 27; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 28) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 29); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 58) - (sp_digit)(d >> 58); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_71(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -13439,11 +13371,10 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 71 + 3]; @@ -13454,7 +13385,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 71 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -13473,14 +13404,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, t1[71 + 71] += t1[71 + 71 - 1] >> 29; t1[71 + 71 - 1] &= 0x1fffffff; for (i=71; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[71 + i]; - d1 <<= 29; - d1 += t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_71(t1[71 + i], t1[71 + i - 1], dv); -#endif sp_4096_mul_d_71(t2, sd, r1); (void)sp_4096_sub_71(&t1[i], &t1[i], t2); @@ -13488,14 +13412,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, t1[71 + i] -= t2[71]; t1[71 + i] += t1[71 + i - 1] >> 29; t1[71 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[71 + i]; - d1 <<= 29; - d1 -= t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_71(-t1[71 + i], -t1[71 + i - 1], dv); -#endif r1 -= t1[71 + i]; sp_4096_mul_d_71(t2, sd, r1); (void)sp_4096_add_71(&t1[i], &t1[i], t2); @@ -13504,7 +13421,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, } t1[71 - 1] += t1[71 - 2] >> 29; t1[71 - 2] &= 0x1fffffff; - r1 = t1[71 - 1] / dv; + r1 = sp_4096_word_div_word_71(t1[71 - 1], dv); sp_4096_mul_d_71(t2, sd, r1); sp_4096_sub_71(t1, t1, t2); @@ -13513,14 +13430,13 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_4096_cond_add_71(r, r, sd, 0 - ((r[70] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_71(r, r, sd, r[70] >> 31); sp_4096_norm_71(r); sp_4096_rshift_71(r, r, 11); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -13547,13 +13463,15 @@ static int sp_4096_mod_71(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 142]; @@ -13567,11 +13485,17 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 71 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 71 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -13626,20 +13550,19 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_71(t[0], m, mp); n = sp_4096_cmp_71(t[0], m); - sp_4096_cond_sub_71(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_71(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 71 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 142]; @@ -13653,11 +13576,17 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 71 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 71 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -13712,19 +13641,18 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_71(t[0], m, mp); n = sp_4096_cmp_71(t[0], m); - sp_4096_cond_sub_71(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_71(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 71 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 142) + 142]; @@ -13739,11 +13667,17 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 142) + 142), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 142) + 142), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -13853,12 +13787,11 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_71(rt, m, mp); n = sp_4096_cmp_71(rt, m); - sp_4096_cond_sub_71(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_71(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 142); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -13924,7 +13857,7 @@ static sp_digit sp_4096_cmp_142(const sp_digit* a, const sp_digit* b) int i; for (i=141; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -14028,17 +13961,18 @@ static void sp_4096_mont_reduce_142(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_142(a + 142); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<141; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_4096_mul_add_142(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x7fL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x7fL; sp_4096_mul_add_142(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; @@ -14056,18 +13990,18 @@ static void sp_4096_mont_reduce_142(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<141; i++) { - mu = (a[i] * mp) & 0x1fffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_4096_mul_add_142(a+i, m, mu); a[i+1] += a[i] >> 29; } - mu = (a[i] * mp) & 0x7fL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x7fL; sp_4096_mul_add_142(a+i, m, mu); a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; #endif sp_4096_mont_shift_142(a, a); - sp_4096_cond_sub_142(a, a, m, 0 - (((a[141] - m[141]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[141] - m[141]; + sp_4096_cond_sub_142(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_142(a); } @@ -14078,9 +14012,9 @@ static void sp_4096_mont_reduce_142(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_142(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_142(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_142(r, a, b); @@ -14092,9 +14026,9 @@ static void sp_4096_mont_mul_142(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_142(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_142(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_142(r, a); @@ -14122,6 +14056,7 @@ SP_NOINLINE static void sp_4096_mul_d_284(sp_digit* r, const sp_digit* a, r[284] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -14135,10 +14070,11 @@ static void sp_4096_cond_add_142(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 71; i++) { + for (i = 0; i < 142; i++) { r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -14169,135 +14105,96 @@ SP_NOINLINE static void sp_4096_rshift_142(sp_digit* r, const sp_digit* a, r[141] = a[141] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_142(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 29); + sp_digit t0 = (sp_digit)(d & 0x1fffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 27; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 28) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 29); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 58) - (sp_digit)(d >> 58); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_142(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -14314,11 +14211,10 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 142 + 3]; @@ -14329,7 +14225,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 142 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -14348,14 +14244,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, t1[142 + 142] += t1[142 + 142 - 1] >> 29; t1[142 + 142 - 1] &= 0x1fffffff; for (i=142; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[142 + i]; - d1 <<= 29; - d1 += t1[142 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_142(t1[142 + i], t1[142 + i - 1], dv); -#endif sp_4096_mul_d_142(t2, sd, r1); (void)sp_4096_sub_142(&t1[i], &t1[i], t2); @@ -14363,14 +14252,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, t1[142 + i] -= t2[142]; t1[142 + i] += t1[142 + i - 1] >> 29; t1[142 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[142 + i]; - d1 <<= 29; - d1 -= t1[142 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_142(-t1[142 + i], -t1[142 + i - 1], dv); -#endif r1 -= t1[142 + i]; sp_4096_mul_d_142(t2, sd, r1); (void)sp_4096_add_142(&t1[i], &t1[i], t2); @@ -14379,7 +14261,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, } t1[142 - 1] += t1[142 - 2] >> 29; t1[142 - 2] &= 0x1fffffff; - r1 = t1[142 - 1] / dv; + r1 = sp_4096_word_div_word_142(t1[142 - 1], dv); sp_4096_mul_d_142(t2, sd, r1); sp_4096_sub_142(t1, t1, t2); @@ -14388,14 +14270,13 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_4096_cond_add_142(r, r, sd, 0 - ((r[141] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_142(r, r, sd, r[141] >> 31); sp_4096_norm_142(r); sp_4096_rshift_142(r, r, 22); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -14423,13 +14304,15 @@ static int sp_4096_mod_142(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 284]; @@ -14443,11 +14326,17 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 142 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 142 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -14502,20 +14391,19 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_142(t[0], m, mp); n = sp_4096_cmp_142(t[0], m); - sp_4096_cond_sub_142(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 142 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 284]; @@ -14529,11 +14417,17 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 142 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 142 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -14588,19 +14482,18 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_142(t[0], m, mp); n = sp_4096_cmp_142(t[0], m); - sp_4096_cond_sub_142(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 142 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 284) + 284]; @@ -14615,11 +14508,17 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 284) + 284), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 284) + 284), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -14712,12 +14611,11 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_142(rt, m, mp); n = sp_4096_cmp_142(rt, m); - sp_4096_cond_sub_142(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 284); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -14744,7 +14642,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[142 * 5]; @@ -14752,8 +14650,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -14762,7 +14660,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 29) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -14776,7 +14674,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -14791,12 +14689,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_4096_from_bin(a, 142, in, inLen); -#if DIGIT_BIT >= 29 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -14815,7 +14713,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_142(a, a, m); } if (err == MP_OKAY) { - for (i=28; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -14831,21 +14729,20 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_142(r, m, mp); mp = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_142(r, r, m, ~(mp >> 31)); sp_4096_to_bin_142(r, out); *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[142 * 5]; @@ -14853,14 +14750,14 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 512U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 29) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -14874,7 +14771,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -14889,12 +14786,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, m = r + 142 * 2; sp_4096_from_bin(a, 142, in, inLen); -#if DIGIT_BIT >= 29 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -14924,7 +14821,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_142(a, a, m); if (err == MP_OKAY) { - for (i=28; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -14940,8 +14837,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_142(r, m, mp); mp = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(r, r, m, ~(mp >> 31)); } } } @@ -14951,7 +14847,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -14986,7 +14882,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[142 * 4]; @@ -15020,7 +14916,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -15045,21 +14941,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 142); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[142 * 4]; @@ -15093,7 +14989,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -15118,14 +15014,14 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 142); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -15134,7 +15030,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[71 * 8]; @@ -15164,9 +15060,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 71 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -15197,6 +15099,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_71(tmpa); sp_4096_cond_add_71(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[70] >> 31)); sp_4096_cond_add_71(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[70] >> 31)); + sp_4096_norm_71(tmpa); sp_4096_from_mp(qi, 71, qim); sp_4096_mul_71(tmpa, tmpa, qi); @@ -15213,19 +15116,19 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 71 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[71 * 13]; @@ -15256,9 +15159,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 71 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -15295,6 +15204,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_71(tmpa); sp_4096_cond_add_71(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[70] >> 31)); sp_4096_cond_add_71(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[70] >> 31)); + sp_4096_norm_71(tmpa); sp_4096_mul_71(tmpa, tmpa, qi); err = sp_4096_mod_71(tmpa, tmpa, p); } @@ -15308,12 +15218,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 71 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -15409,7 +15319,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[142 * 4]; @@ -15432,7 +15342,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 4, NULL, DYNAMIC_TYPE_DH); @@ -15457,20 +15367,20 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_4096_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 142U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[142 * 4]; @@ -15494,7 +15404,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -15519,14 +15429,14 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 142U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -15556,11 +15466,13 @@ SP_NOINLINE static void sp_4096_lshift_142(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_4096_mod_exp_2_142(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[427]; @@ -15575,11 +15487,17 @@ static int sp_4096_mod_exp_2_142(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 427, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 427, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -15644,17 +15562,15 @@ static int sp_4096_mod_exp_2_142(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_142(r, r, tmp); sp_4096_norm_142(r); o = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(r, r, m, ~(o >> 31)); } sp_4096_mont_reduce_142(r, m, mp); n = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(r, r, m, ~(n >> 31)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -15679,7 +15595,7 @@ static int sp_4096_mod_exp_2_142(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[142 * 4]; @@ -15703,7 +15619,7 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 142 * 4, NULL, DYNAMIC_TYPE_DH); @@ -15744,14 +15660,14 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 142U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -15807,20 +15723,23 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 26 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 25); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 25); } #elif DIGIT_BIT > 26 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x3ffffff; s = 26U - s; @@ -15850,12 +15769,12 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 26) { r[j] &= 0x3ffffff; @@ -15900,9 +15819,9 @@ static void sp_4096_to_bin_162(sp_digit* r, byte* a) r[i+1] += r[i] >> 26; r[i] &= 0x3ffffff; } - j = 4096 / 8 - 1; + j = 4103 / 8 - 1; a[j] = 0; - for (i=0; i<162 && j>=0; i++) { + for (i=0; i<158 && j>=0; i++) { b = 0; /* lint allow cast of mismatch sp_digit and int */ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ @@ -16029,179 +15948,110 @@ static void sp_4096_norm_158(sp_digit* a) SP_NOINLINE static void sp_4096_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_uint64 t0 = ((sp_uint64)a[ 0]) * b[ 0]; - sp_uint64 t1 = ((sp_uint64)a[ 0]) * b[ 1] - + ((sp_uint64)a[ 1]) * b[ 0]; - sp_uint64 t2 = ((sp_uint64)a[ 0]) * b[ 2] - + ((sp_uint64)a[ 1]) * b[ 1] - + ((sp_uint64)a[ 2]) * b[ 0]; - sp_uint64 t3 = ((sp_uint64)a[ 0]) * b[ 3] - + ((sp_uint64)a[ 1]) * b[ 2] - + ((sp_uint64)a[ 2]) * b[ 1] - + ((sp_uint64)a[ 3]) * b[ 0]; - sp_uint64 t4 = ((sp_uint64)a[ 0]) * b[ 4] - + ((sp_uint64)a[ 1]) * b[ 3] - + ((sp_uint64)a[ 2]) * b[ 2] - + ((sp_uint64)a[ 3]) * b[ 1] - + ((sp_uint64)a[ 4]) * b[ 0]; - sp_uint64 t5 = ((sp_uint64)a[ 0]) * b[ 5] - + ((sp_uint64)a[ 1]) * b[ 4] - + ((sp_uint64)a[ 2]) * b[ 3] - + ((sp_uint64)a[ 3]) * b[ 2] - + ((sp_uint64)a[ 4]) * b[ 1] - + ((sp_uint64)a[ 5]) * b[ 0]; - sp_uint64 t6 = ((sp_uint64)a[ 0]) * b[ 6] - + ((sp_uint64)a[ 1]) * b[ 5] - + ((sp_uint64)a[ 2]) * b[ 4] - + ((sp_uint64)a[ 3]) * b[ 3] - + ((sp_uint64)a[ 4]) * b[ 2] - + ((sp_uint64)a[ 5]) * b[ 1] - + ((sp_uint64)a[ 6]) * b[ 0]; - sp_uint64 t7 = ((sp_uint64)a[ 0]) * b[ 7] - + ((sp_uint64)a[ 1]) * b[ 6] - + ((sp_uint64)a[ 2]) * b[ 5] - + ((sp_uint64)a[ 3]) * b[ 4] - + ((sp_uint64)a[ 4]) * b[ 3] - + ((sp_uint64)a[ 5]) * b[ 2] - + ((sp_uint64)a[ 6]) * b[ 1] - + ((sp_uint64)a[ 7]) * b[ 0]; - sp_uint64 t8 = ((sp_uint64)a[ 0]) * b[ 8] - + ((sp_uint64)a[ 1]) * b[ 7] - + ((sp_uint64)a[ 2]) * b[ 6] - + ((sp_uint64)a[ 3]) * b[ 5] - + ((sp_uint64)a[ 4]) * b[ 4] - + ((sp_uint64)a[ 5]) * b[ 3] - + ((sp_uint64)a[ 6]) * b[ 2] - + ((sp_uint64)a[ 7]) * b[ 1] - + ((sp_uint64)a[ 8]) * b[ 0]; - sp_uint64 t9 = ((sp_uint64)a[ 1]) * b[ 8] - + ((sp_uint64)a[ 2]) * b[ 7] - + ((sp_uint64)a[ 3]) * b[ 6] - + ((sp_uint64)a[ 4]) * b[ 5] - + ((sp_uint64)a[ 5]) * b[ 4] - + ((sp_uint64)a[ 6]) * b[ 3] - + ((sp_uint64)a[ 7]) * b[ 2] - + ((sp_uint64)a[ 8]) * b[ 1]; - sp_uint64 t10 = ((sp_uint64)a[ 2]) * b[ 8] - + ((sp_uint64)a[ 3]) * b[ 7] - + ((sp_uint64)a[ 4]) * b[ 6] - + ((sp_uint64)a[ 5]) * b[ 5] - + ((sp_uint64)a[ 6]) * b[ 4] - + ((sp_uint64)a[ 7]) * b[ 3] - + ((sp_uint64)a[ 8]) * b[ 2]; - sp_uint64 t11 = ((sp_uint64)a[ 3]) * b[ 8] - + ((sp_uint64)a[ 4]) * b[ 7] - + ((sp_uint64)a[ 5]) * b[ 6] - + ((sp_uint64)a[ 6]) * b[ 5] - + ((sp_uint64)a[ 7]) * b[ 4] - + ((sp_uint64)a[ 8]) * b[ 3]; - sp_uint64 t12 = ((sp_uint64)a[ 4]) * b[ 8] - + ((sp_uint64)a[ 5]) * b[ 7] - + ((sp_uint64)a[ 6]) * b[ 6] - + ((sp_uint64)a[ 7]) * b[ 5] - + ((sp_uint64)a[ 8]) * b[ 4]; - sp_uint64 t13 = ((sp_uint64)a[ 5]) * b[ 8] - + ((sp_uint64)a[ 6]) * b[ 7] - + ((sp_uint64)a[ 7]) * b[ 6] - + ((sp_uint64)a[ 8]) * b[ 5]; - sp_uint64 t14 = ((sp_uint64)a[ 6]) * b[ 8] - + ((sp_uint64)a[ 7]) * b[ 7] - + ((sp_uint64)a[ 8]) * b[ 6]; - sp_uint64 t15 = ((sp_uint64)a[ 7]) * b[ 8] - + ((sp_uint64)a[ 8]) * b[ 7]; - sp_uint64 t16 = ((sp_uint64)a[ 8]) * b[ 8]; + sp_uint64 t0; + sp_uint64 t1; + sp_digit t[9]; - t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; - t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; - t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; - t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; - t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; - t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; - t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; - t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; - t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; - t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; - t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; - t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; - t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; - t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; - t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; - t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; - r[17] = (sp_digit)(t16 >> 26); - r[16] = t16 & 0x3ffffff; -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_4096_sqr_9(sp_digit* r, const sp_digit* a) -{ - sp_uint64 t0 = ((sp_uint64)a[ 0]) * a[ 0]; - sp_uint64 t1 = (((sp_uint64)a[ 0]) * a[ 1]) * 2; - sp_uint64 t2 = (((sp_uint64)a[ 0]) * a[ 2]) * 2 - + ((sp_uint64)a[ 1]) * a[ 1]; - sp_uint64 t3 = (((sp_uint64)a[ 0]) * a[ 3] - + ((sp_uint64)a[ 1]) * a[ 2]) * 2; - sp_uint64 t4 = (((sp_uint64)a[ 0]) * a[ 4] - + ((sp_uint64)a[ 1]) * a[ 3]) * 2 - + ((sp_uint64)a[ 2]) * a[ 2]; - sp_uint64 t5 = (((sp_uint64)a[ 0]) * a[ 5] - + ((sp_uint64)a[ 1]) * a[ 4] - + ((sp_uint64)a[ 2]) * a[ 3]) * 2; - sp_uint64 t6 = (((sp_uint64)a[ 0]) * a[ 6] - + ((sp_uint64)a[ 1]) * a[ 5] - + ((sp_uint64)a[ 2]) * a[ 4]) * 2 - + ((sp_uint64)a[ 3]) * a[ 3]; - sp_uint64 t7 = (((sp_uint64)a[ 0]) * a[ 7] - + ((sp_uint64)a[ 1]) * a[ 6] - + ((sp_uint64)a[ 2]) * a[ 5] - + ((sp_uint64)a[ 3]) * a[ 4]) * 2; - sp_uint64 t8 = (((sp_uint64)a[ 0]) * a[ 8] - + ((sp_uint64)a[ 1]) * a[ 7] - + ((sp_uint64)a[ 2]) * a[ 6] - + ((sp_uint64)a[ 3]) * a[ 5]) * 2 - + ((sp_uint64)a[ 4]) * a[ 4]; - sp_uint64 t9 = (((sp_uint64)a[ 1]) * a[ 8] - + ((sp_uint64)a[ 2]) * a[ 7] - + ((sp_uint64)a[ 3]) * a[ 6] - + ((sp_uint64)a[ 4]) * a[ 5]) * 2; - sp_uint64 t10 = (((sp_uint64)a[ 2]) * a[ 8] - + ((sp_uint64)a[ 3]) * a[ 7] - + ((sp_uint64)a[ 4]) * a[ 6]) * 2 - + ((sp_uint64)a[ 5]) * a[ 5]; - sp_uint64 t11 = (((sp_uint64)a[ 3]) * a[ 8] - + ((sp_uint64)a[ 4]) * a[ 7] - + ((sp_uint64)a[ 5]) * a[ 6]) * 2; - sp_uint64 t12 = (((sp_uint64)a[ 4]) * a[ 8] - + ((sp_uint64)a[ 5]) * a[ 7]) * 2 - + ((sp_uint64)a[ 6]) * a[ 6]; - sp_uint64 t13 = (((sp_uint64)a[ 5]) * a[ 8] - + ((sp_uint64)a[ 6]) * a[ 7]) * 2; - sp_uint64 t14 = (((sp_uint64)a[ 6]) * a[ 8]) * 2 - + ((sp_uint64)a[ 7]) * a[ 7]; - sp_uint64 t15 = (((sp_uint64)a[ 7]) * a[ 8]) * 2; - sp_uint64 t16 = ((sp_uint64)a[ 8]) * a[ 8]; - - t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; - t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; - t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; - t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; - t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; - t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; - t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; - t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; - t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; - t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; - t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; - t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; - t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; - t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; - t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; - t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; - r[17] = (sp_digit)(t16 >> 26); - r[16] = t16 & 0x3ffffff; + t0 = ((sp_uint64)a[ 0]) * b[ 0]; + t1 = ((sp_uint64)a[ 0]) * b[ 1] + + ((sp_uint64)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 0]) * b[ 2] + + ((sp_uint64)a[ 1]) * b[ 1] + + ((sp_uint64)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 0]) * b[ 3] + + ((sp_uint64)a[ 1]) * b[ 2] + + ((sp_uint64)a[ 2]) * b[ 1] + + ((sp_uint64)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 0]) * b[ 4] + + ((sp_uint64)a[ 1]) * b[ 3] + + ((sp_uint64)a[ 2]) * b[ 2] + + ((sp_uint64)a[ 3]) * b[ 1] + + ((sp_uint64)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 0]) * b[ 5] + + ((sp_uint64)a[ 1]) * b[ 4] + + ((sp_uint64)a[ 2]) * b[ 3] + + ((sp_uint64)a[ 3]) * b[ 2] + + ((sp_uint64)a[ 4]) * b[ 1] + + ((sp_uint64)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 0]) * b[ 6] + + ((sp_uint64)a[ 1]) * b[ 5] + + ((sp_uint64)a[ 2]) * b[ 4] + + ((sp_uint64)a[ 3]) * b[ 3] + + ((sp_uint64)a[ 4]) * b[ 2] + + ((sp_uint64)a[ 5]) * b[ 1] + + ((sp_uint64)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 0]) * b[ 7] + + ((sp_uint64)a[ 1]) * b[ 6] + + ((sp_uint64)a[ 2]) * b[ 5] + + ((sp_uint64)a[ 3]) * b[ 4] + + ((sp_uint64)a[ 4]) * b[ 3] + + ((sp_uint64)a[ 5]) * b[ 2] + + ((sp_uint64)a[ 6]) * b[ 1] + + ((sp_uint64)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 0]) * b[ 8] + + ((sp_uint64)a[ 1]) * b[ 7] + + ((sp_uint64)a[ 2]) * b[ 6] + + ((sp_uint64)a[ 3]) * b[ 5] + + ((sp_uint64)a[ 4]) * b[ 4] + + ((sp_uint64)a[ 5]) * b[ 3] + + ((sp_uint64)a[ 6]) * b[ 2] + + ((sp_uint64)a[ 7]) * b[ 1] + + ((sp_uint64)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 1]) * b[ 8] + + ((sp_uint64)a[ 2]) * b[ 7] + + ((sp_uint64)a[ 3]) * b[ 6] + + ((sp_uint64)a[ 4]) * b[ 5] + + ((sp_uint64)a[ 5]) * b[ 4] + + ((sp_uint64)a[ 6]) * b[ 3] + + ((sp_uint64)a[ 7]) * b[ 2] + + ((sp_uint64)a[ 8]) * b[ 1]; + t[ 8] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 2]) * b[ 8] + + ((sp_uint64)a[ 3]) * b[ 7] + + ((sp_uint64)a[ 4]) * b[ 6] + + ((sp_uint64)a[ 5]) * b[ 5] + + ((sp_uint64)a[ 6]) * b[ 4] + + ((sp_uint64)a[ 7]) * b[ 3] + + ((sp_uint64)a[ 8]) * b[ 2]; + r[ 9] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 3]) * b[ 8] + + ((sp_uint64)a[ 4]) * b[ 7] + + ((sp_uint64)a[ 5]) * b[ 6] + + ((sp_uint64)a[ 6]) * b[ 5] + + ((sp_uint64)a[ 7]) * b[ 4] + + ((sp_uint64)a[ 8]) * b[ 3]; + r[10] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 4]) * b[ 8] + + ((sp_uint64)a[ 5]) * b[ 7] + + ((sp_uint64)a[ 6]) * b[ 6] + + ((sp_uint64)a[ 7]) * b[ 5] + + ((sp_uint64)a[ 8]) * b[ 4]; + r[11] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 5]) * b[ 8] + + ((sp_uint64)a[ 6]) * b[ 7] + + ((sp_uint64)a[ 7]) * b[ 6] + + ((sp_uint64)a[ 8]) * b[ 5]; + r[12] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 6]) * b[ 8] + + ((sp_uint64)a[ 7]) * b[ 7] + + ((sp_uint64)a[ 8]) * b[ 6]; + r[13] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_uint64)a[ 7]) * b[ 8] + + ((sp_uint64)a[ 8]) * b[ 7]; + r[14] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 8]) * b[ 8]; + r[15] = t1 & 0x3ffffff; t0 += t1 >> 26; + r[16] = t0 & 0x3ffffff; + r[17] = (sp_digit)(t0 >> 26); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -16400,55 +16250,6 @@ SP_NOINLINE static void sp_4096_mul_27(sp_digit* r, const sp_digit* a, sp_4096_norm_54(r); } -/* Square a into r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_4096_sqr_27(sp_digit* r, const sp_digit* a) -{ - sp_digit p0[18]; - sp_digit p1[18]; - sp_digit p2[18]; - sp_digit p3[18]; - sp_digit p4[18]; - sp_digit p5[18]; - sp_digit t0[18]; - sp_digit t1[18]; - sp_digit t2[18]; - sp_digit a0[9]; - sp_digit a1[9]; - sp_digit a2[9]; - (void)sp_4096_add_9(a0, a, &a[9]); - sp_4096_norm_9(a0); - (void)sp_4096_add_9(a1, &a[9], &a[18]); - sp_4096_norm_9(a1); - (void)sp_4096_add_9(a2, a0, &a[18]); - sp_4096_norm_9(a2); - sp_4096_sqr_9(p0, a); - sp_4096_sqr_9(p2, &a[9]); - sp_4096_sqr_9(p4, &a[18]); - sp_4096_sqr_9(p1, a0); - sp_4096_sqr_9(p3, a1); - sp_4096_sqr_9(p5, a2); - XMEMSET(r, 0, sizeof(*r)*2U*27U); - (void)sp_4096_sub_18(t0, p3, p2); - (void)sp_4096_sub_18(t1, p1, p2); - (void)sp_4096_sub_18(t2, p5, t0); - (void)sp_4096_sub_18(t2, t2, t1); - sp_4096_norm_18(t2); - (void)sp_4096_sub_18(t0, t0, p4); - sp_4096_norm_18(t0); - (void)sp_4096_sub_18(t1, t1, p0); - sp_4096_norm_18(t1); - (void)sp_4096_add_18(r, r, p0); - (void)sp_4096_add_18(&r[9], &r[9], t1); - (void)sp_4096_add_18(&r[18], &r[18], t2); - (void)sp_4096_add_18(&r[27], &r[27], t0); - (void)sp_4096_add_18(&r[36], &r[36], p4); - sp_4096_norm_54(r); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -16620,55 +16421,6 @@ SP_NOINLINE static void sp_4096_mul_81(sp_digit* r, const sp_digit* a, sp_4096_norm_162(r); } -/* Square a into r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_4096_sqr_81(sp_digit* r, const sp_digit* a) -{ - sp_digit p0[54]; - sp_digit p1[54]; - sp_digit p2[54]; - sp_digit p3[54]; - sp_digit p4[54]; - sp_digit p5[54]; - sp_digit t0[54]; - sp_digit t1[54]; - sp_digit t2[54]; - sp_digit a0[27]; - sp_digit a1[27]; - sp_digit a2[27]; - (void)sp_4096_add_27(a0, a, &a[27]); - sp_4096_norm_27(a0); - (void)sp_4096_add_27(a1, &a[27], &a[54]); - sp_4096_norm_27(a1); - (void)sp_4096_add_27(a2, a0, &a[54]); - sp_4096_norm_27(a2); - sp_4096_sqr_27(p0, a); - sp_4096_sqr_27(p2, &a[27]); - sp_4096_sqr_27(p4, &a[54]); - sp_4096_sqr_27(p1, a0); - sp_4096_sqr_27(p3, a1); - sp_4096_sqr_27(p5, a2); - XMEMSET(r, 0, sizeof(*r)*2U*81U); - (void)sp_4096_sub_54(t0, p3, p2); - (void)sp_4096_sub_54(t1, p1, p2); - (void)sp_4096_sub_54(t2, p5, t0); - (void)sp_4096_sub_54(t2, t2, t1); - sp_4096_norm_54(t2); - (void)sp_4096_sub_54(t0, t0, p4); - sp_4096_norm_54(t0); - (void)sp_4096_sub_54(t1, t1, p0); - sp_4096_norm_54(t1); - (void)sp_4096_add_54(r, r, p0); - (void)sp_4096_add_54(&r[27], &r[27], t1); - (void)sp_4096_add_54(&r[54], &r[54], t2); - (void)sp_4096_add_54(&r[81], &r[81], t0); - (void)sp_4096_add_54(&r[108], &r[108], p4); - sp_4096_norm_162(r); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -16798,6 +16550,181 @@ SP_NOINLINE static void sp_4096_mul_162(sp_digit* r, const sp_digit* a, sp_4096_norm_324(r); } +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_9(sp_digit* r, const sp_digit* a) +{ + sp_uint64 t0; + sp_uint64 t1; + sp_digit t[9]; + + t0 = ((sp_uint64)a[ 0]) * a[ 0]; + t1 = (((sp_uint64)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 0]) * a[ 2]) * 2 + + ((sp_uint64)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 0]) * a[ 3] + + ((sp_uint64)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 0]) * a[ 4] + + ((sp_uint64)a[ 1]) * a[ 3]) * 2 + + ((sp_uint64)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 0]) * a[ 5] + + ((sp_uint64)a[ 1]) * a[ 4] + + ((sp_uint64)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 0]) * a[ 6] + + ((sp_uint64)a[ 1]) * a[ 5] + + ((sp_uint64)a[ 2]) * a[ 4]) * 2 + + ((sp_uint64)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 0]) * a[ 7] + + ((sp_uint64)a[ 1]) * a[ 6] + + ((sp_uint64)a[ 2]) * a[ 5] + + ((sp_uint64)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 0]) * a[ 8] + + ((sp_uint64)a[ 1]) * a[ 7] + + ((sp_uint64)a[ 2]) * a[ 6] + + ((sp_uint64)a[ 3]) * a[ 5]) * 2 + + ((sp_uint64)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 1]) * a[ 8] + + ((sp_uint64)a[ 2]) * a[ 7] + + ((sp_uint64)a[ 3]) * a[ 6] + + ((sp_uint64)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 2]) * a[ 8] + + ((sp_uint64)a[ 3]) * a[ 7] + + ((sp_uint64)a[ 4]) * a[ 6]) * 2 + + ((sp_uint64)a[ 5]) * a[ 5]; + r[ 9] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 3]) * a[ 8] + + ((sp_uint64)a[ 4]) * a[ 7] + + ((sp_uint64)a[ 5]) * a[ 6]) * 2; + r[10] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 4]) * a[ 8] + + ((sp_uint64)a[ 5]) * a[ 7]) * 2 + + ((sp_uint64)a[ 6]) * a[ 6]; + r[11] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 5]) * a[ 8] + + ((sp_uint64)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_uint64)a[ 6]) * a[ 8]) * 2 + + ((sp_uint64)a[ 7]) * a[ 7]; + r[13] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_uint64)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_uint64)a[ 8]) * a[ 8]; + r[15] = t1 & 0x3ffffff; t0 += t1 >> 26; + r[16] = t0 & 0x3ffffff; + r[17] = (sp_digit)(t0 >> 26); + XMEMCPY(r, t, sizeof(t)); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_27(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[18]; + sp_digit p1[18]; + sp_digit p2[18]; + sp_digit p3[18]; + sp_digit p4[18]; + sp_digit p5[18]; + sp_digit t0[18]; + sp_digit t1[18]; + sp_digit t2[18]; + sp_digit a0[9]; + sp_digit a1[9]; + sp_digit a2[9]; + (void)sp_4096_add_9(a0, a, &a[9]); + sp_4096_norm_9(a0); + (void)sp_4096_add_9(a1, &a[9], &a[18]); + sp_4096_norm_9(a1); + (void)sp_4096_add_9(a2, a0, &a[18]); + sp_4096_norm_9(a2); + sp_4096_sqr_9(p0, a); + sp_4096_sqr_9(p2, &a[9]); + sp_4096_sqr_9(p4, &a[18]); + sp_4096_sqr_9(p1, a0); + sp_4096_sqr_9(p3, a1); + sp_4096_sqr_9(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*27U); + (void)sp_4096_sub_18(t0, p3, p2); + (void)sp_4096_sub_18(t1, p1, p2); + (void)sp_4096_sub_18(t2, p5, t0); + (void)sp_4096_sub_18(t2, t2, t1); + sp_4096_norm_18(t2); + (void)sp_4096_sub_18(t0, t0, p4); + sp_4096_norm_18(t0); + (void)sp_4096_sub_18(t1, t1, p0); + sp_4096_norm_18(t1); + (void)sp_4096_add_18(r, r, p0); + (void)sp_4096_add_18(&r[9], &r[9], t1); + (void)sp_4096_add_18(&r[18], &r[18], t2); + (void)sp_4096_add_18(&r[27], &r[27], t0); + (void)sp_4096_add_18(&r[36], &r[36], p4); + sp_4096_norm_54(r); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_81(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[54]; + sp_digit p1[54]; + sp_digit p2[54]; + sp_digit p3[54]; + sp_digit p4[54]; + sp_digit p5[54]; + sp_digit t0[54]; + sp_digit t1[54]; + sp_digit t2[54]; + sp_digit a0[27]; + sp_digit a1[27]; + sp_digit a2[27]; + (void)sp_4096_add_27(a0, a, &a[27]); + sp_4096_norm_27(a0); + (void)sp_4096_add_27(a1, &a[27], &a[54]); + sp_4096_norm_27(a1); + (void)sp_4096_add_27(a2, a0, &a[54]); + sp_4096_norm_27(a2); + sp_4096_sqr_27(p0, a); + sp_4096_sqr_27(p2, &a[27]); + sp_4096_sqr_27(p4, &a[54]); + sp_4096_sqr_27(p1, a0); + sp_4096_sqr_27(p3, a1); + sp_4096_sqr_27(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*81U); + (void)sp_4096_sub_54(t0, p3, p2); + (void)sp_4096_sub_54(t1, p1, p2); + (void)sp_4096_sub_54(t2, p5, t0); + (void)sp_4096_sub_54(t2, t2, t1); + sp_4096_norm_54(t2); + (void)sp_4096_sub_54(t0, t0, p4); + sp_4096_norm_54(t0); + (void)sp_4096_sub_54(t1, t1, p0); + sp_4096_norm_54(t1); + (void)sp_4096_add_54(r, r, p0); + (void)sp_4096_add_54(&r[27], &r[27], t1); + (void)sp_4096_add_54(&r[54], &r[54], t2); + (void)sp_4096_add_54(&r[81], &r[81], t0); + (void)sp_4096_add_54(&r[108], &r[108], p4); + sp_4096_norm_162(r); +} + /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -16821,7 +16748,7 @@ SP_NOINLINE static void sp_4096_sqr_162(sp_digit* r, const sp_digit* a) } #endif /* !WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -16968,14 +16895,14 @@ static sp_digit sp_4096_cmp_81(const sp_digit* a, const sp_digit* b) r |= (a[80] - b[80]) & (0 - (sp_digit)1); for (i = 72; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 25); } return r; @@ -17114,21 +17041,22 @@ static void sp_4096_mont_reduce_81(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_81(a + 79); for (i=0; i<78; i++) { - mu = (a[i] * mp) & 0x3ffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffff; sp_4096_mul_add_81(a+i, m, mu); a[i+1] += a[i] >> 26; } - mu = (a[i] * mp) & 0xfffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffL; sp_4096_mul_add_81(a+i, m, mu); a[i+1] += a[i] >> 26; a[i] &= 0x3ffffff; sp_4096_mont_shift_81(a, a); - sp_4096_cond_sub_81(a, a, m, 0 - (((a[78] - m[78]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[78] - m[78]; + sp_4096_cond_sub_81(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_81(a); } @@ -17139,9 +17067,9 @@ static void sp_4096_mont_reduce_81(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_81(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_81(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_81(r, a, b); @@ -17153,9 +17081,9 @@ static void sp_4096_mont_mul_81(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_81(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_81(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_81(r, a); @@ -17205,6 +17133,7 @@ SP_NOINLINE static void sp_4096_mul_d_81(sp_digit* r, const sp_digit* a, r[81] = (sp_digit)(t & 0x3ffffff); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -17218,7 +17147,7 @@ static void sp_4096_cond_add_81(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 72; i += 8) { + for (i = 0; i < 80; i += 8) { r[i + 0] = a[i + 0] + (b[i + 0] & m); r[i + 1] = a[i + 1] + (b[i + 1] & m); r[i + 2] = a[i + 2] + (b[i + 2] & m); @@ -17228,21 +17157,16 @@ static void sp_4096_cond_add_81(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } - r[72] = a[72] + (b[72] & m); - r[73] = a[73] + (b[73] & m); - r[74] = a[74] + (b[74] & m); - r[75] = a[75] + (b[75] & m); - r[76] = a[76] + (b[76] & m); - r[77] = a[77] + (b[77] & m); - r[78] = a[78] + (b[78] & m); + r[80] = a[80] + (b[80] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_4096_rshift_81(sp_digit* r, const sp_digit* a, byte n) { int i; - for (i=0; i<72; i += 8) { + for (i=0; i<80; i += 8) { r[i+0] = (a[i+0] >> n) | ((a[i+1] << (26 - n)) & 0x3ffffff); r[i+1] = (a[i+1] >> n) | ((a[i+2] << (26 - n)) & 0x3ffffff); r[i+2] = (a[i+2] >> n) | ((a[i+3] << (26 - n)) & 0x3ffffff); @@ -17252,74 +17176,99 @@ SP_NOINLINE static void sp_4096_rshift_81(sp_digit* r, const sp_digit* a, r[i+6] = (a[i+6] >> n) | ((a[i+7] << (26 - n)) & 0x3ffffff); r[i+7] = (a[i+7] >> n) | ((a[i+8] << (26 - n)) & 0x3ffffff); } - r[72] = (a[72] >> n) | ((a[73] << (26 - n)) & 0x3ffffff); - r[73] = (a[73] >> n) | ((a[74] << (26 - n)) & 0x3ffffff); - r[74] = (a[74] >> n) | ((a[75] << (26 - n)) & 0x3ffffff); - r[75] = (a[75] >> n) | ((a[76] << (26 - n)) & 0x3ffffff); - r[76] = (a[76] >> n) | ((a[77] << (26 - n)) & 0x3ffffff); - r[77] = (a[77] >> n) | ((a[78] << (26 - n)) & 0x3ffffff); - r[78] = (a[78] >> n) | ((a[79] << (26 - n)) & 0x3ffffff); - r[79] = (a[79] >> n) | ((a[80] << (26 - n)) & 0x3ffffff); r[80] = a[80] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_81(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 26) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 26); + sp_digit t0 = (sp_digit)(d & 0x3ffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 24; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 25) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 26); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 52) - (sp_digit)(d >> 52); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 11) + 1; - /* All 26 bits from d1 and top 5 bits from d0. */ - d = (d1 << 5) + (d0 >> 21); - r = d / dv; - d -= r * dv; - /* Up to 6 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 16) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 22); + t = (t / dv) << 11; r += t; - /* Up to 11 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 11) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 7); + t = t / (dv << 4); r += t; - /* Up to 16 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 6) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 21 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 1) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 26 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 26 bits from d1 and top 5 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_81(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -17336,11 +17285,10 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 81 + 3]; @@ -17351,7 +17299,7 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 81 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -17370,28 +17318,14 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, t1[79 + 79] += t1[79 + 79 - 1] >> 26; t1[79 + 79 - 1] &= 0x3ffffff; for (i=79; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[79 + i]; - d1 <<= 26; - d1 += t1[79 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_81(t1[79 + i], t1[79 + i - 1], dv); -#endif sp_4096_mul_d_81(t2, sd, r1); (void)sp_4096_sub_81(&t1[i], &t1[i], t2); sp_4096_norm_79(&t1[i]); t1[79 + i] += t1[79 + i - 1] >> 26; t1[79 + i - 1] &= 0x3ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[79 + i]; - d1 <<= 26; - d1 -= t1[79 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_81(-t1[79 + i], -t1[79 + i - 1], dv); -#endif r1 -= t1[79 + i]; sp_4096_mul_d_81(t2, sd, r1); (void)sp_4096_add_81(&t1[i], &t1[i], t2); @@ -17400,7 +17334,7 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, } t1[79 - 1] += t1[79 - 2] >> 26; t1[79 - 2] &= 0x3ffffff; - r1 = t1[79 - 1] / dv; + r1 = sp_4096_word_div_word_81(t1[79 - 1], dv); sp_4096_mul_d_81(t2, sd, r1); sp_4096_sub_81(t1, t1, t2); @@ -17409,8 +17343,7 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 26; r[i] &= 0x3ffffff; } - sp_4096_cond_add_81(r, r, sd, 0 - ((r[78] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_81(r, r, sd, r[78] >> 31); sp_4096_norm_79(r); sp_4096_rshift_81(r, r, 6); @@ -17418,7 +17351,7 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, r[80] = 0; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -17445,13 +17378,15 @@ static int sp_4096_mod_81(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 162]; @@ -17465,11 +17400,17 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 81 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 81 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -17524,20 +17465,19 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_81(t[0], m, mp); n = sp_4096_cmp_81(t[0], m); - sp_4096_cond_sub_81(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_81(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 81 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 162]; @@ -17551,11 +17491,17 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 81 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 81 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -17610,19 +17556,18 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_81(t[0], m, mp); n = sp_4096_cmp_81(t[0], m); - sp_4096_cond_sub_81(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_81(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 81 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 162) + 162]; @@ -17637,11 +17582,17 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 162) + 162), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 162) + 162), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -17751,12 +17702,11 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_81(rt, m, mp); n = sp_4096_cmp_81(rt, m); - sp_4096_cond_sub_81(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_81(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 162); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -17820,16 +17770,16 @@ static sp_digit sp_4096_cmp_162(const sp_digit* a, const sp_digit* b) int i; r |= (a[161] - b[161]) & (0 - (sp_digit)1); - r |= (a[160] - b[160]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[160] - b[160]) & ~(((sp_digit)0 - r) >> 25); for (i = 152; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 25); } return r; @@ -17971,17 +17921,18 @@ static void sp_4096_mont_reduce_162(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_162(a + 158); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<157; i++) { - mu = (a[i] * mp) & 0x3ffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffff; sp_4096_mul_add_162(a+i, m, mu); a[i+1] += a[i] >> 26; } - mu = (a[i] * mp) & 0x3fffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3fffL; sp_4096_mul_add_162(a+i, m, mu); a[i+1] += a[i] >> 26; a[i] &= 0x3ffffff; @@ -17999,18 +17950,18 @@ static void sp_4096_mont_reduce_162(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<157; i++) { - mu = (a[i] * mp) & 0x3ffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffff; sp_4096_mul_add_162(a+i, m, mu); a[i+1] += a[i] >> 26; } - mu = (a[i] * mp) & 0x3fffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3fffL; sp_4096_mul_add_162(a+i, m, mu); a[i+1] += a[i] >> 26; a[i] &= 0x3ffffff; #endif sp_4096_mont_shift_162(a, a); - sp_4096_cond_sub_162(a, a, m, 0 - (((a[157] - m[157]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[157] - m[157]; + sp_4096_cond_sub_162(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_162(a); } @@ -18021,9 +17972,9 @@ static void sp_4096_mont_reduce_162(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_162(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_162(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_162(r, a, b); @@ -18035,9 +17986,9 @@ static void sp_4096_mont_mul_162(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_162(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_162(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_162(r, a); @@ -18084,6 +18035,7 @@ SP_NOINLINE static void sp_4096_mul_d_324(sp_digit* r, const sp_digit* a, r[324] = (sp_digit)(t & 0x3ffffff); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -18097,7 +18049,7 @@ static void sp_4096_cond_add_162(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 152; i += 8) { + for (i = 0; i < 160; i += 8) { r[i + 0] = a[i + 0] + (b[i + 0] & m); r[i + 1] = a[i + 1] + (b[i + 1] & m); r[i + 2] = a[i + 2] + (b[i + 2] & m); @@ -18107,13 +18059,10 @@ static void sp_4096_cond_add_162(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } - r[152] = a[152] + (b[152] & m); - r[153] = a[153] + (b[153] & m); - r[154] = a[154] + (b[154] & m); - r[155] = a[155] + (b[155] & m); - r[156] = a[156] + (b[156] & m); - r[157] = a[157] + (b[157] & m); + r[160] = a[160] + (b[160] & m); + r[161] = a[161] + (b[161] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_4096_rshift_162(sp_digit* r, const sp_digit* a, byte n) @@ -18134,63 +18083,96 @@ SP_NOINLINE static void sp_4096_rshift_162(sp_digit* r, const sp_digit* a, r[161] = a[161] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_162(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 26) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 26); + sp_digit t0 = (sp_digit)(d & 0x3ffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 24; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 25) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 26); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 52) - (sp_digit)(d >> 52); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 11) + 1; - /* All 26 bits from d1 and top 5 bits from d0. */ - d = (d1 << 5) + (d0 >> 21); - r = d / dv; - d -= r * dv; - /* Up to 6 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 16) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 22); + t = (t / dv) << 11; r += t; - /* Up to 11 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 11) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 7); + t = t / (dv << 4); r += t; - /* Up to 16 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 6) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 21 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 1) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 26 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 26 bits from d1 and top 5 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_162(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -18207,11 +18189,10 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 162 + 3]; @@ -18222,7 +18203,7 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 162 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -18241,28 +18222,14 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, t1[158 + 158] += t1[158 + 158 - 1] >> 26; t1[158 + 158 - 1] &= 0x3ffffff; for (i=158; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[158 + i]; - d1 <<= 26; - d1 += t1[158 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_162(t1[158 + i], t1[158 + i - 1], dv); -#endif sp_4096_mul_d_162(t2, sd, r1); (void)sp_4096_sub_162(&t1[i], &t1[i], t2); sp_4096_norm_158(&t1[i]); t1[158 + i] += t1[158 + i - 1] >> 26; t1[158 + i - 1] &= 0x3ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[158 + i]; - d1 <<= 26; - d1 -= t1[158 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_162(-t1[158 + i], -t1[158 + i - 1], dv); -#endif r1 -= t1[158 + i]; sp_4096_mul_d_162(t2, sd, r1); (void)sp_4096_add_162(&t1[i], &t1[i], t2); @@ -18271,7 +18238,7 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, } t1[158 - 1] += t1[158 - 2] >> 26; t1[158 - 2] &= 0x3ffffff; - r1 = t1[158 - 1] / dv; + r1 = sp_4096_word_div_word_162(t1[158 - 1], dv); sp_4096_mul_d_162(t2, sd, r1); sp_4096_sub_162(t1, t1, t2); @@ -18280,8 +18247,7 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 26; r[i] &= 0x3ffffff; } - sp_4096_cond_add_162(r, r, sd, 0 - ((r[157] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_162(r, r, sd, r[157] >> 31); sp_4096_norm_158(r); sp_4096_rshift_162(r, r, 12); @@ -18291,7 +18257,7 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, r[161] = 0; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -18321,13 +18287,15 @@ static int sp_4096_mod_162(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 324]; @@ -18341,11 +18309,17 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 162 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 162 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -18400,20 +18374,19 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_162(t[0], m, mp); n = sp_4096_cmp_162(t[0], m); - sp_4096_cond_sub_162(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 162 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 324]; @@ -18427,11 +18400,17 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 162 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 162 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -18486,19 +18465,18 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_162(t[0], m, mp); n = sp_4096_cmp_162(t[0], m); - sp_4096_cond_sub_162(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 162 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 324) + 324]; @@ -18513,11 +18491,17 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 324) + 324), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 324) + 324), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -18610,12 +18594,11 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_162(rt, m, mp); n = sp_4096_cmp_162(rt, m); - sp_4096_cond_sub_162(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 324); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -18644,7 +18627,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[162 * 5]; @@ -18652,8 +18635,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -18662,7 +18645,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 26) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -18676,7 +18659,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -18691,12 +18674,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_4096_from_bin(a, 162, in, inLen); -#if DIGIT_BIT >= 26 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -18715,7 +18698,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_162(a, a, m); } if (err == MP_OKAY) { - for (i=25; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -18731,21 +18714,20 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_162(r, m, mp); mp = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_162(r, r, m, ~(mp >> 31)); sp_4096_to_bin_162(r, out); *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[162 * 5]; @@ -18753,14 +18735,14 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 512U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 26) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -18774,7 +18756,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -18789,12 +18771,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, m = r + 162 * 2; sp_4096_from_bin(a, 162, in, inLen); -#if DIGIT_BIT >= 26 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -18824,7 +18806,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_162(a, a, m); if (err == MP_OKAY) { - for (i=25; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -18840,8 +18822,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_162(r, m, mp); mp = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(r, r, m, ~(mp >> 31)); } } } @@ -18851,7 +18832,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -18886,7 +18867,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[162 * 4]; @@ -18920,7 +18901,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -18945,21 +18926,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 162); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[162 * 4]; @@ -18993,7 +18974,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -19018,14 +18999,14 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 162); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -19034,7 +19015,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[81 * 8]; @@ -19064,9 +19045,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 81 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -19097,6 +19084,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_79(tmpa); sp_4096_cond_add_81(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[78] >> 31)); sp_4096_cond_add_81(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[78] >> 31)); + sp_4096_norm_81(tmpa); sp_4096_from_mp(qi, 81, qim); sp_4096_mul_81(tmpa, tmpa, qi); @@ -19113,19 +19101,19 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 81 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[81 * 13]; @@ -19156,9 +19144,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 81 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -19195,6 +19189,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_79(tmpa); sp_4096_cond_add_81(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[78] >> 31)); sp_4096_cond_add_81(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[78] >> 31)); + sp_4096_norm_81(tmpa); sp_4096_mul_81(tmpa, tmpa, qi); err = sp_4096_mod_81(tmpa, tmpa, p); } @@ -19208,12 +19203,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 81 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -19239,8 +19234,8 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ #if DIGIT_BIT == 26 - XMEMCPY(r->dp, a, sizeof(sp_digit) * 162); - r->used = 162; + XMEMCPY(r->dp, a, sizeof(sp_digit) * 158); + r->used = 158; mp_clamp(r); #elif DIGIT_BIT < 26 int i; @@ -19248,7 +19243,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 162; i++) { + for (i = 0; i < 158; i++) { r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; s = DIGIT_BIT - s; @@ -19273,7 +19268,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 162; i++) { + for (i = 0; i < 158; i++) { r->dp[j] |= ((mp_digit)a[i]) << s; if (s + 26 >= DIGIT_BIT) { #if DIGIT_BIT != 32 && DIGIT_BIT != 64 @@ -19309,7 +19304,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[162 * 4]; @@ -19332,7 +19327,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 4, NULL, DYNAMIC_TYPE_DH); @@ -19357,20 +19352,20 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_4096_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 162U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[162 * 4]; @@ -19394,7 +19389,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -19419,14 +19414,14 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 162U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -19777,11 +19772,13 @@ SP_NOINLINE static void sp_4096_lshift_162(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_4096_mod_exp_2_162(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[487]; @@ -19796,11 +19793,17 @@ static int sp_4096_mod_exp_2_162(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 487, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 487, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19865,17 +19868,15 @@ static int sp_4096_mod_exp_2_162(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_162(r, r, tmp); sp_4096_norm_162(r); o = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(r, r, m, ~(o >> 31)); } sp_4096_mont_reduce_162(r, m, mp); n = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(r, r, m, ~(n >> 31)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19900,7 +19901,7 @@ static int sp_4096_mod_exp_2_162(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[162 * 4]; @@ -19924,7 +19925,7 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 162 * 4, NULL, DYNAMIC_TYPE_DH); @@ -19965,14 +19966,14 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 162U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -20121,106 +20122,110 @@ SP_NOINLINE static void sp_256_mul_9(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_256_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_int64 t0 = ((sp_int64)a[ 0]) * b[ 0]; - sp_int64 t1 = ((sp_int64)a[ 0]) * b[ 1] - + ((sp_int64)a[ 1]) * b[ 0]; - sp_int64 t2 = ((sp_int64)a[ 0]) * b[ 2] - + ((sp_int64)a[ 1]) * b[ 1] - + ((sp_int64)a[ 2]) * b[ 0]; - sp_int64 t3 = ((sp_int64)a[ 0]) * b[ 3] - + ((sp_int64)a[ 1]) * b[ 2] - + ((sp_int64)a[ 2]) * b[ 1] - + ((sp_int64)a[ 3]) * b[ 0]; - sp_int64 t4 = ((sp_int64)a[ 0]) * b[ 4] - + ((sp_int64)a[ 1]) * b[ 3] - + ((sp_int64)a[ 2]) * b[ 2] - + ((sp_int64)a[ 3]) * b[ 1] - + ((sp_int64)a[ 4]) * b[ 0]; - sp_int64 t5 = ((sp_int64)a[ 0]) * b[ 5] - + ((sp_int64)a[ 1]) * b[ 4] - + ((sp_int64)a[ 2]) * b[ 3] - + ((sp_int64)a[ 3]) * b[ 2] - + ((sp_int64)a[ 4]) * b[ 1] - + ((sp_int64)a[ 5]) * b[ 0]; - sp_int64 t6 = ((sp_int64)a[ 0]) * b[ 6] - + ((sp_int64)a[ 1]) * b[ 5] - + ((sp_int64)a[ 2]) * b[ 4] - + ((sp_int64)a[ 3]) * b[ 3] - + ((sp_int64)a[ 4]) * b[ 2] - + ((sp_int64)a[ 5]) * b[ 1] - + ((sp_int64)a[ 6]) * b[ 0]; - sp_int64 t7 = ((sp_int64)a[ 0]) * b[ 7] - + ((sp_int64)a[ 1]) * b[ 6] - + ((sp_int64)a[ 2]) * b[ 5] - + ((sp_int64)a[ 3]) * b[ 4] - + ((sp_int64)a[ 4]) * b[ 3] - + ((sp_int64)a[ 5]) * b[ 2] - + ((sp_int64)a[ 6]) * b[ 1] - + ((sp_int64)a[ 7]) * b[ 0]; - sp_int64 t8 = ((sp_int64)a[ 0]) * b[ 8] - + ((sp_int64)a[ 1]) * b[ 7] - + ((sp_int64)a[ 2]) * b[ 6] - + ((sp_int64)a[ 3]) * b[ 5] - + ((sp_int64)a[ 4]) * b[ 4] - + ((sp_int64)a[ 5]) * b[ 3] - + ((sp_int64)a[ 6]) * b[ 2] - + ((sp_int64)a[ 7]) * b[ 1] - + ((sp_int64)a[ 8]) * b[ 0]; - sp_int64 t9 = ((sp_int64)a[ 1]) * b[ 8] - + ((sp_int64)a[ 2]) * b[ 7] - + ((sp_int64)a[ 3]) * b[ 6] - + ((sp_int64)a[ 4]) * b[ 5] - + ((sp_int64)a[ 5]) * b[ 4] - + ((sp_int64)a[ 6]) * b[ 3] - + ((sp_int64)a[ 7]) * b[ 2] - + ((sp_int64)a[ 8]) * b[ 1]; - sp_int64 t10 = ((sp_int64)a[ 2]) * b[ 8] - + ((sp_int64)a[ 3]) * b[ 7] - + ((sp_int64)a[ 4]) * b[ 6] - + ((sp_int64)a[ 5]) * b[ 5] - + ((sp_int64)a[ 6]) * b[ 4] - + ((sp_int64)a[ 7]) * b[ 3] - + ((sp_int64)a[ 8]) * b[ 2]; - sp_int64 t11 = ((sp_int64)a[ 3]) * b[ 8] - + ((sp_int64)a[ 4]) * b[ 7] - + ((sp_int64)a[ 5]) * b[ 6] - + ((sp_int64)a[ 6]) * b[ 5] - + ((sp_int64)a[ 7]) * b[ 4] - + ((sp_int64)a[ 8]) * b[ 3]; - sp_int64 t12 = ((sp_int64)a[ 4]) * b[ 8] - + ((sp_int64)a[ 5]) * b[ 7] - + ((sp_int64)a[ 6]) * b[ 6] - + ((sp_int64)a[ 7]) * b[ 5] - + ((sp_int64)a[ 8]) * b[ 4]; - sp_int64 t13 = ((sp_int64)a[ 5]) * b[ 8] - + ((sp_int64)a[ 6]) * b[ 7] - + ((sp_int64)a[ 7]) * b[ 6] - + ((sp_int64)a[ 8]) * b[ 5]; - sp_int64 t14 = ((sp_int64)a[ 6]) * b[ 8] - + ((sp_int64)a[ 7]) * b[ 7] - + ((sp_int64)a[ 8]) * b[ 6]; - sp_int64 t15 = ((sp_int64)a[ 7]) * b[ 8] - + ((sp_int64)a[ 8]) * b[ 7]; - sp_int64 t16 = ((sp_int64)a[ 8]) * b[ 8]; + sp_int64 t0; + sp_int64 t1; + sp_digit t[9]; - t1 += t0 >> 29; r[ 0] = t0 & 0x1fffffff; - t2 += t1 >> 29; r[ 1] = t1 & 0x1fffffff; - t3 += t2 >> 29; r[ 2] = t2 & 0x1fffffff; - t4 += t3 >> 29; r[ 3] = t3 & 0x1fffffff; - t5 += t4 >> 29; r[ 4] = t4 & 0x1fffffff; - t6 += t5 >> 29; r[ 5] = t5 & 0x1fffffff; - t7 += t6 >> 29; r[ 6] = t6 & 0x1fffffff; - t8 += t7 >> 29; r[ 7] = t7 & 0x1fffffff; - t9 += t8 >> 29; r[ 8] = t8 & 0x1fffffff; - t10 += t9 >> 29; r[ 9] = t9 & 0x1fffffff; - t11 += t10 >> 29; r[10] = t10 & 0x1fffffff; - t12 += t11 >> 29; r[11] = t11 & 0x1fffffff; - t13 += t12 >> 29; r[12] = t12 & 0x1fffffff; - t14 += t13 >> 29; r[13] = t13 & 0x1fffffff; - t15 += t14 >> 29; r[14] = t14 & 0x1fffffff; - t16 += t15 >> 29; r[15] = t15 & 0x1fffffff; - r[17] = (sp_digit)(t16 >> 29); - r[16] = t16 & 0x1fffffff; + t0 = ((sp_int64)a[ 0]) * b[ 0]; + t1 = ((sp_int64)a[ 0]) * b[ 1] + + ((sp_int64)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 0]) * b[ 2] + + ((sp_int64)a[ 1]) * b[ 1] + + ((sp_int64)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 0]) * b[ 3] + + ((sp_int64)a[ 1]) * b[ 2] + + ((sp_int64)a[ 2]) * b[ 1] + + ((sp_int64)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 0]) * b[ 4] + + ((sp_int64)a[ 1]) * b[ 3] + + ((sp_int64)a[ 2]) * b[ 2] + + ((sp_int64)a[ 3]) * b[ 1] + + ((sp_int64)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 0]) * b[ 5] + + ((sp_int64)a[ 1]) * b[ 4] + + ((sp_int64)a[ 2]) * b[ 3] + + ((sp_int64)a[ 3]) * b[ 2] + + ((sp_int64)a[ 4]) * b[ 1] + + ((sp_int64)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 0]) * b[ 6] + + ((sp_int64)a[ 1]) * b[ 5] + + ((sp_int64)a[ 2]) * b[ 4] + + ((sp_int64)a[ 3]) * b[ 3] + + ((sp_int64)a[ 4]) * b[ 2] + + ((sp_int64)a[ 5]) * b[ 1] + + ((sp_int64)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 0]) * b[ 7] + + ((sp_int64)a[ 1]) * b[ 6] + + ((sp_int64)a[ 2]) * b[ 5] + + ((sp_int64)a[ 3]) * b[ 4] + + ((sp_int64)a[ 4]) * b[ 3] + + ((sp_int64)a[ 5]) * b[ 2] + + ((sp_int64)a[ 6]) * b[ 1] + + ((sp_int64)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 0]) * b[ 8] + + ((sp_int64)a[ 1]) * b[ 7] + + ((sp_int64)a[ 2]) * b[ 6] + + ((sp_int64)a[ 3]) * b[ 5] + + ((sp_int64)a[ 4]) * b[ 4] + + ((sp_int64)a[ 5]) * b[ 3] + + ((sp_int64)a[ 6]) * b[ 2] + + ((sp_int64)a[ 7]) * b[ 1] + + ((sp_int64)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 1]) * b[ 8] + + ((sp_int64)a[ 2]) * b[ 7] + + ((sp_int64)a[ 3]) * b[ 6] + + ((sp_int64)a[ 4]) * b[ 5] + + ((sp_int64)a[ 5]) * b[ 4] + + ((sp_int64)a[ 6]) * b[ 3] + + ((sp_int64)a[ 7]) * b[ 2] + + ((sp_int64)a[ 8]) * b[ 1]; + t[ 8] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 2]) * b[ 8] + + ((sp_int64)a[ 3]) * b[ 7] + + ((sp_int64)a[ 4]) * b[ 6] + + ((sp_int64)a[ 5]) * b[ 5] + + ((sp_int64)a[ 6]) * b[ 4] + + ((sp_int64)a[ 7]) * b[ 3] + + ((sp_int64)a[ 8]) * b[ 2]; + r[ 9] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 3]) * b[ 8] + + ((sp_int64)a[ 4]) * b[ 7] + + ((sp_int64)a[ 5]) * b[ 6] + + ((sp_int64)a[ 6]) * b[ 5] + + ((sp_int64)a[ 7]) * b[ 4] + + ((sp_int64)a[ 8]) * b[ 3]; + r[10] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 4]) * b[ 8] + + ((sp_int64)a[ 5]) * b[ 7] + + ((sp_int64)a[ 6]) * b[ 6] + + ((sp_int64)a[ 7]) * b[ 5] + + ((sp_int64)a[ 8]) * b[ 4]; + r[11] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 5]) * b[ 8] + + ((sp_int64)a[ 6]) * b[ 7] + + ((sp_int64)a[ 7]) * b[ 6] + + ((sp_int64)a[ 8]) * b[ 5]; + r[12] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 6]) * b[ 8] + + ((sp_int64)a[ 7]) * b[ 7] + + ((sp_int64)a[ 8]) * b[ 6]; + r[13] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = ((sp_int64)a[ 7]) * b[ 8] + + ((sp_int64)a[ 8]) * b[ 7]; + r[14] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 8]) * b[ 8]; + r[15] = t1 & 0x1fffffff; t0 += t1 >> 29; + r[16] = t0 & 0x1fffffff; + r[17] = (sp_digit)(t0 >> 29); + XMEMCPY(r, t, sizeof(t)); } #endif /* WOLFSSL_SP_SMALL */ @@ -20274,70 +20279,74 @@ SP_NOINLINE static void sp_256_sqr_9(sp_digit* r, const sp_digit* a) */ SP_NOINLINE static void sp_256_sqr_9(sp_digit* r, const sp_digit* a) { - sp_int64 t0 = ((sp_int64)a[ 0]) * a[ 0]; - sp_int64 t1 = (((sp_int64)a[ 0]) * a[ 1]) * 2; - sp_int64 t2 = (((sp_int64)a[ 0]) * a[ 2]) * 2 - + ((sp_int64)a[ 1]) * a[ 1]; - sp_int64 t3 = (((sp_int64)a[ 0]) * a[ 3] - + ((sp_int64)a[ 1]) * a[ 2]) * 2; - sp_int64 t4 = (((sp_int64)a[ 0]) * a[ 4] - + ((sp_int64)a[ 1]) * a[ 3]) * 2 - + ((sp_int64)a[ 2]) * a[ 2]; - sp_int64 t5 = (((sp_int64)a[ 0]) * a[ 5] - + ((sp_int64)a[ 1]) * a[ 4] - + ((sp_int64)a[ 2]) * a[ 3]) * 2; - sp_int64 t6 = (((sp_int64)a[ 0]) * a[ 6] - + ((sp_int64)a[ 1]) * a[ 5] - + ((sp_int64)a[ 2]) * a[ 4]) * 2 - + ((sp_int64)a[ 3]) * a[ 3]; - sp_int64 t7 = (((sp_int64)a[ 0]) * a[ 7] - + ((sp_int64)a[ 1]) * a[ 6] - + ((sp_int64)a[ 2]) * a[ 5] - + ((sp_int64)a[ 3]) * a[ 4]) * 2; - sp_int64 t8 = (((sp_int64)a[ 0]) * a[ 8] - + ((sp_int64)a[ 1]) * a[ 7] - + ((sp_int64)a[ 2]) * a[ 6] - + ((sp_int64)a[ 3]) * a[ 5]) * 2 - + ((sp_int64)a[ 4]) * a[ 4]; - sp_int64 t9 = (((sp_int64)a[ 1]) * a[ 8] - + ((sp_int64)a[ 2]) * a[ 7] - + ((sp_int64)a[ 3]) * a[ 6] - + ((sp_int64)a[ 4]) * a[ 5]) * 2; - sp_int64 t10 = (((sp_int64)a[ 2]) * a[ 8] - + ((sp_int64)a[ 3]) * a[ 7] - + ((sp_int64)a[ 4]) * a[ 6]) * 2 - + ((sp_int64)a[ 5]) * a[ 5]; - sp_int64 t11 = (((sp_int64)a[ 3]) * a[ 8] - + ((sp_int64)a[ 4]) * a[ 7] - + ((sp_int64)a[ 5]) * a[ 6]) * 2; - sp_int64 t12 = (((sp_int64)a[ 4]) * a[ 8] - + ((sp_int64)a[ 5]) * a[ 7]) * 2 - + ((sp_int64)a[ 6]) * a[ 6]; - sp_int64 t13 = (((sp_int64)a[ 5]) * a[ 8] - + ((sp_int64)a[ 6]) * a[ 7]) * 2; - sp_int64 t14 = (((sp_int64)a[ 6]) * a[ 8]) * 2 - + ((sp_int64)a[ 7]) * a[ 7]; - sp_int64 t15 = (((sp_int64)a[ 7]) * a[ 8]) * 2; - sp_int64 t16 = ((sp_int64)a[ 8]) * a[ 8]; + sp_int64 t0; + sp_int64 t1; + sp_digit t[9]; - t1 += t0 >> 29; r[ 0] = t0 & 0x1fffffff; - t2 += t1 >> 29; r[ 1] = t1 & 0x1fffffff; - t3 += t2 >> 29; r[ 2] = t2 & 0x1fffffff; - t4 += t3 >> 29; r[ 3] = t3 & 0x1fffffff; - t5 += t4 >> 29; r[ 4] = t4 & 0x1fffffff; - t6 += t5 >> 29; r[ 5] = t5 & 0x1fffffff; - t7 += t6 >> 29; r[ 6] = t6 & 0x1fffffff; - t8 += t7 >> 29; r[ 7] = t7 & 0x1fffffff; - t9 += t8 >> 29; r[ 8] = t8 & 0x1fffffff; - t10 += t9 >> 29; r[ 9] = t9 & 0x1fffffff; - t11 += t10 >> 29; r[10] = t10 & 0x1fffffff; - t12 += t11 >> 29; r[11] = t11 & 0x1fffffff; - t13 += t12 >> 29; r[12] = t12 & 0x1fffffff; - t14 += t13 >> 29; r[13] = t13 & 0x1fffffff; - t15 += t14 >> 29; r[14] = t14 & 0x1fffffff; - t16 += t15 >> 29; r[15] = t15 & 0x1fffffff; - r[17] = (sp_digit)(t16 >> 29); - r[16] = t16 & 0x1fffffff; + t0 = ((sp_int64)a[ 0]) * a[ 0]; + t1 = (((sp_int64)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 0]) * a[ 2]) * 2 + + ((sp_int64)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 0]) * a[ 3] + + ((sp_int64)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 0]) * a[ 4] + + ((sp_int64)a[ 1]) * a[ 3]) * 2 + + ((sp_int64)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 0]) * a[ 5] + + ((sp_int64)a[ 1]) * a[ 4] + + ((sp_int64)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 0]) * a[ 6] + + ((sp_int64)a[ 1]) * a[ 5] + + ((sp_int64)a[ 2]) * a[ 4]) * 2 + + ((sp_int64)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 0]) * a[ 7] + + ((sp_int64)a[ 1]) * a[ 6] + + ((sp_int64)a[ 2]) * a[ 5] + + ((sp_int64)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 0]) * a[ 8] + + ((sp_int64)a[ 1]) * a[ 7] + + ((sp_int64)a[ 2]) * a[ 6] + + ((sp_int64)a[ 3]) * a[ 5]) * 2 + + ((sp_int64)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 1]) * a[ 8] + + ((sp_int64)a[ 2]) * a[ 7] + + ((sp_int64)a[ 3]) * a[ 6] + + ((sp_int64)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 2]) * a[ 8] + + ((sp_int64)a[ 3]) * a[ 7] + + ((sp_int64)a[ 4]) * a[ 6]) * 2 + + ((sp_int64)a[ 5]) * a[ 5]; + r[ 9] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 3]) * a[ 8] + + ((sp_int64)a[ 4]) * a[ 7] + + ((sp_int64)a[ 5]) * a[ 6]) * 2; + r[10] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 4]) * a[ 8] + + ((sp_int64)a[ 5]) * a[ 7]) * 2 + + ((sp_int64)a[ 6]) * a[ 6]; + r[11] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 5]) * a[ 8] + + ((sp_int64)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = (((sp_int64)a[ 6]) * a[ 8]) * 2 + + ((sp_int64)a[ 7]) * a[ 7]; + r[13] = t1 & 0x1fffffff; t0 += t1 >> 29; + t1 = (((sp_int64)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x1fffffff; t1 += t0 >> 29; + t0 = ((sp_int64)a[ 8]) * a[ 8]; + r[15] = t1 & 0x1fffffff; t0 += t1 >> 29; + r[16] = t0 & 0x1fffffff; + r[17] = (sp_digit)(t0 >> 29); + XMEMCPY(r, t, sizeof(t)); } #endif /* WOLFSSL_SP_SMALL */ @@ -20435,20 +20444,23 @@ SP_NOINLINE static int sp_256_sub_9(sp_digit* r, const sp_digit* a, static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 29 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 28); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 28); } #elif DIGIT_BIT > 29 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1fffffff; s = 29U - s; @@ -20478,12 +20490,12 @@ static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 29) { r[j] &= 0x1fffffff; @@ -20618,8 +20630,6 @@ static int sp_256_point_to_ecc_point_9(const sp_point_256* p, ecc_point* pm) return err; } -#define sp_256_mont_reduce_order_9 sp_256_mont_reduce_9 - /* Compare a with b in constant time. * * a A single precision integer. @@ -20634,18 +20644,18 @@ static sp_digit sp_256_cmp_9(const sp_digit* a, const sp_digit* b) int i; for (i=8; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } #else r |= (a[ 8] - b[ 8]) & (0 - (sp_digit)1); - r |= (a[ 7] - b[ 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 7] - b[ 7]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 6] - b[ 6]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 28); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -20829,40 +20839,106 @@ static void sp_256_mont_shift_9(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_256_mont_reduce_9(sp_digit* a, const sp_digit* m, sp_digit mp) +static void sp_256_mont_reduce_order_9(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; - if (mp != 1) { - for (i=0; i<8; i++) { - mu = (a[i] * mp) & 0x1fffffff; - sp_256_mul_add_9(a+i, m, mu); - a[i+1] += a[i] >> 29; - } - mu = (a[i] * mp) & 0xffffffL; + sp_256_norm_9(a + 9); + + for (i=0; i<8; i++) { + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffff; sp_256_mul_add_9(a+i, m, mu); a[i+1] += a[i] >> 29; - a[i] &= 0x1fffffff; } - else { - for (i=0; i<8; i++) { - mu = a[i] & 0x1fffffff; - sp_256_mul_add_9(a+i, p256_mod, mu); - a[i+1] += a[i] >> 29; - } - mu = a[i] & 0xffffffL; - sp_256_mul_add_9(a+i, p256_mod, mu); - a[i+1] += a[i] >> 29; - a[i] &= 0x1fffffff; - } - + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xffffffL; + sp_256_mul_add_9(a+i, m, mu); + a[i+1] += a[i] >> 29; + a[i] &= 0x1fffffff; sp_256_mont_shift_9(a, a); - sp_256_cond_sub_9(a, a, m, 0 - (((a[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[8] >> 24; + sp_256_cond_sub_9(a, a, m, ~((over - 1) >> 31)); sp_256_norm_9(a); } +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_256_mont_reduce_9(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit am; + + (void)m; + (void)mp; + + for (i = 0; i < 8; i++) { + am = a[i] & 0x1fffffff; + a[i + 3] += (am << 9) & 0x1fffffff; + a[i + 4] += am >> 20; + a[i + 6] += (am << 18) & 0x1fffffff; + a[i + 7] += (am >> 11) - ((am << 21) & 0x1fffffff); + a[i + 8] += -(am >> 8) + ((am << 24) & 0x1fffffff); + a[i + 9] += am >> 5; + + a[i + 1] += a[i] >> 29; + } + am = a[8] & 0xffffff; + a[8 + 3] += (am << 9) & 0x1fffffff; + a[8 + 4] += am >> 20; + a[8 + 6] += (am << 18) & 0x1fffffff; + a[8 + 7] += (am >> 11) - ((am << 21) & 0x1fffffff); + a[8 + 8] += -(am >> 8) + ((am << 24) & 0x1fffffff); + a[8 + 9] += am >> 5; + + a[0] = (a[ 8] >> 24) + ((a[ 9] << 5) & 0x1fffffff); + a[1] = (a[ 9] >> 24) + ((a[10] << 5) & 0x1fffffff); + a[2] = (a[10] >> 24) + ((a[11] << 5) & 0x1fffffff); + a[3] = (a[11] >> 24) + ((a[12] << 5) & 0x1fffffff); + a[4] = (a[12] >> 24) + ((a[13] << 5) & 0x1fffffff); + a[5] = (a[13] >> 24) + ((a[14] << 5) & 0x1fffffff); + a[6] = (a[14] >> 24) + ((a[15] << 5) & 0x1fffffff); + a[7] = (a[15] >> 24) + ((a[16] << 5) & 0x1fffffff); + a[8] = (a[16] >> 24) + (a[17] << 5); + + a[1] += a[0] >> 29; a[0] &= 0x1fffffff; + a[2] += a[1] >> 29; a[1] &= 0x1fffffff; + a[3] += a[2] >> 29; a[2] &= 0x1fffffff; + a[4] += a[3] >> 29; a[3] &= 0x1fffffff; + a[5] += a[4] >> 29; a[4] &= 0x1fffffff; + a[6] += a[5] >> 29; a[5] &= 0x1fffffff; + a[7] += a[6] >> 29; a[6] &= 0x1fffffff; + a[8] += a[7] >> 29; a[7] &= 0x1fffffff; + + /* Get the bit over, if any. */ + am = a[8] >> 24; + /* Create mask. */ + am = 0 - am; + + a[0] -= 0x1fffffff & am; + a[1] -= 0x1fffffff & am; + a[2] -= 0x1fffffff & am; + a[3] -= 0x000001ff & am; + /* p256_mod[4] is zero */ + /* p256_mod[5] is zero */ + a[6] -= 0x00040000 & am; + a[7] -= 0x1fe00000 & am; + a[8] -= 0x00ffffff & am; + + a[1] += a[0] >> 29; a[0] &= 0x1fffffff; + a[2] += a[1] >> 29; a[1] &= 0x1fffffff; + a[3] += a[2] >> 29; a[2] &= 0x1fffffff; + a[4] += a[3] >> 29; a[3] &= 0x1fffffff; + a[5] += a[4] >> 29; a[4] &= 0x1fffffff; + a[6] += a[5] >> 29; a[5] &= 0x1fffffff; + a[7] += a[6] >> 29; a[6] &= 0x1fffffff; + a[8] += a[7] >> 29; a[7] &= 0x1fffffff; +} + /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -20870,9 +20946,9 @@ static void sp_256_mont_reduce_9(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_mul_9(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_256_mont_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_256_mul_9(r, a, b); @@ -20884,9 +20960,9 @@ static void sp_256_mont_mul_9(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_9(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_256_mont_sqr_9(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_256_sqr_9(r, a); @@ -20900,10 +20976,10 @@ static void sp_256_mont_sqr_9(sp_digit* r, const sp_digit* a, * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_9(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_9(r, a, m, mp); for (; n > 1; n--) { @@ -20911,7 +20987,7 @@ static void sp_256_mont_sqr_n_9(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P256 curve. */ static const uint32_t p256_mod_minus_2[8] = { @@ -21009,27 +21085,24 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_9(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 9, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); sp_256_mont_reduce_9(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_9(r->x, p256_mod); - sp_256_cond_sub_9(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_9(r->x, r->x, p256_mod, ~(n >> 28)); sp_256_norm_9(r->x); /* y /= z^3 */ sp_256_mont_mul_9(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 9, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); sp_256_mont_reduce_9(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_9(r->y, p256_mod); - sp_256_cond_sub_9(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_9(r->y, r->y, p256_mod, ~(n >> 28)); sp_256_norm_9(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -21042,10 +21115,11 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, static void sp_256_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_256_add_9(r, a, b); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); } @@ -21057,10 +21131,11 @@ static void sp_256_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_256_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_9(r, a, a); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); } @@ -21072,18 +21147,20 @@ static void sp_256_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_256_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_9(r, a, a); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); (void)sp_256_add_9(r, r, a); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -21095,13 +21172,26 @@ static void sp_256_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) static void sp_256_cond_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; for (i = 0; i < 9; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_256_cond_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ r[ 0] = a[ 0] + (b[ 0] & m); r[ 1] = a[ 1] + (b[ 1] & m); r[ 2] = a[ 2] + (b[ 2] & m); @@ -21111,8 +21201,8 @@ static void sp_256_cond_add_9(sp_digit* r, const sp_digit* a, r[ 6] = a[ 6] + (b[ 6] & m); r[ 7] = a[ 7] + (b[ 7] & m); r[ 8] = a[ 8] + (b[ 8] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * @@ -21163,7 +21253,8 @@ SP_NOINLINE static void sp_256_rshift1_9(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_div2_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_256_cond_add_9(r, a, m, 0 - (a[0] & 1)); sp_256_norm_9(r); @@ -21176,6 +21267,61 @@ static void sp_256_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_9(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_9(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_9(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_9(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_9(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_9(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_9(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_9(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_9(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_9(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_9(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_mont_div2_9(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_9(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_9(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_9(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_9(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_9(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_9(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_9(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_9_ctx { int state; @@ -21186,7 +21332,14 @@ typedef struct sp_256_proj_point_dbl_9_ctx { sp_digit* z; } sp_256_proj_point_dbl_9_ctx; -static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_9_ctx* ctx = (sp_256_proj_point_dbl_9_ctx*)sp_ctx->data; @@ -21260,7 +21413,7 @@ static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_9(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_9(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -21310,61 +21463,6 @@ static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_9(sp_point_256* r, const sp_point_256* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_9(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_9(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_9(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_9(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_9(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_9(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_9(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_9(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_9(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_9(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_9(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_9(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_9(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_9(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_9(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_9(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_9(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_9(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -21379,6 +21477,19 @@ static int sp_256_cmp_equal_9(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -21386,6 +21497,84 @@ static int sp_256_cmp_equal_9(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_9(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*9; + sp_digit* t2 = t + 4*9; + sp_digit* t3 = t + 6*9; + sp_digit* t4 = t + 8*9; + sp_digit* t5 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_9(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_9(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_9(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_9(t2, t1) & + sp_256_cmp_equal_9(t4, t3)) { + sp_256_proj_point_dbl_9(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_256_mont_sub_9(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_9(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_9(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(x, x, t5, p256_mod); + sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_9(t3, y, p256_mod); + sp_256_mont_sub_9(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_9(y, y, x, p256_mod); + sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(y, y, t5, p256_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_9_ctx { @@ -21398,11 +21587,19 @@ typedef struct sp_256_proj_point_add_9_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_256_proj_point_add_9_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -21421,261 +21618,168 @@ static int sp_256_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*9; - ctx->t3 = t + 4*9; - ctx->t4 = t + 6*9; - ctx->t5 = t + 8*9; + ctx->t6 = t; + ctx->t1 = t + 2*9; + ctx->t2 = t + 4*9; + ctx->t3 = t + 6*9; + ctx->t4 = t + 8*9; + ctx->t5 = t + 10*9; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_9(ctx->t1, p256_mod, q->y); - sp_256_norm_9(ctx->t1); - if ((sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & - (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_256_proj_point_dbl_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_256)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<9; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<9; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<9; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_256_mont_sqr_9(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; + break; + case 2: + sp_256_mont_mul_9(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; + break; + case 3: + sp_256_mont_mul_9(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_9(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_9(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_9(ctx->t1, ctx->t1, ctx->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_9(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_9(ctx->t4, ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_9(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_9(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_9(ctx->t2, ctx->t1) & + sp_256_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_9(ctx->t3, ctx->t3, ctx->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_9(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_9(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_9(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_9(ctx->z, ctx->z, ctx->t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_9(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_9(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - sp_256_mont_sqr_9(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_9(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_dbl_9(ctx->t1, ctx->y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_9(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t1, p256_mod); + sp_256_mont_mul_9(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_9(ctx->y, ctx->y, ctx->x, p256_mod); + sp_256_mont_sub_9(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - sp_256_mont_mul_9(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_9(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_9(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_256* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_256_sub_9(t1, p256_mod, q->y); - sp_256_norm_9(t1); - if ((sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & - (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, t1))) != 0) { - sp_256_proj_point_dbl_9(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<9; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<9; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<9; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_9(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t1, t1, x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_9(t3, t3, y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_9(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_9(t4, t4, t3, p256_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(z, z, t2, p256_mod, p256_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(x, x, t5, p256_mod); - sp_256_mont_dbl_9(t1, y, p256_mod); - sp_256_mont_sub_9(x, x, t1, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_9(y, y, x, p256_mod); - sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(y, y, t5, p256_mod); - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -21685,7 +21789,7 @@ static void sp_256_proj_point_add_9(sp_point_256* r, */ static int sp_256_mod_mul_norm_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK int64_t* t = NULL; #else int64_t t[2 * 8]; @@ -21696,7 +21800,7 @@ static int sp_256_mod_mul_norm_9(sp_digit* r, const sp_digit* a, const sp_digit* (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC); if (t == NULL) return MEMORY_E; @@ -21792,7 +21896,7 @@ static int sp_256_mod_mul_norm_9(sp_digit* r, const sp_digit* a, const sp_digit* r[8] = (sp_digit)(t[7] >> 8U); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, NULL, DYNAMIC_TYPE_ECC); #endif @@ -21816,6 +21920,108 @@ static int sp_256_mod_mul_norm_9(sp_digit* r, const sp_digit* a, const sp_digit* * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_256* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_256 t[3]; + sp_digit tmp[2 * 9 * 6]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_9(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_9(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_9(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 8; + c = 24; + n = k[i--] << (29 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 29; + } + + y = (n >> 28) & 1; + n <<= 1; + + sp_256_proj_point_add_9(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_256)); + sp_256_proj_point_dbl_9(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_256)); + } + + if (map != 0) { + sp_256_map_9(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_256)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 9 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_256) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_ecc_mulmod_9_ctx { @@ -21825,7 +22031,7 @@ typedef struct sp_256_ecc_mulmod_9_ctx { sp_256_proj_point_add_9_ctx add_ctx; }; sp_point_256 t[3]; - sp_digit tmp[2 * 9 * 5]; + sp_digit tmp[2 * 9 * 6]; sp_digit n; int i; int c; @@ -21931,109 +22137,6 @@ static int sp_256_ecc_mulmod_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, #endif /* WOLFSSL_SP_NONBLOCK */ -static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, - const sp_digit* k, int map, int ct, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_256* t = NULL; - sp_digit* tmp = NULL; -#else - sp_point_256 t[3]; - sp_digit tmp[2 * 9 * 5]; -#endif - sp_digit n; - int i; - int c; - int y; - int err = MP_OKAY; - - /* Implementation is constant time. */ - (void)ct; - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, - DYNAMIC_TYPE_ECC); - if (t == NULL) - err = MEMORY_E; - if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 5, heap, - DYNAMIC_TYPE_ECC); - if (tmp == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - XMEMSET(t, 0, sizeof(sp_point_256) * 3); - - /* t[0] = {0, 0, 1} * norm */ - t[0].infinity = 1; - /* t[1] = {g->x, g->y, g->z} * norm */ - err = sp_256_mod_mul_norm_9(t[1].x, g->x, p256_mod); - } - if (err == MP_OKAY) - err = sp_256_mod_mul_norm_9(t[1].y, g->y, p256_mod); - if (err == MP_OKAY) - err = sp_256_mod_mul_norm_9(t[1].z, g->z, p256_mod); - - if (err == MP_OKAY) { - i = 8; - c = 24; - n = k[i--] << (29 - c); - for (; ; c--) { - if (c == 0) { - if (i == -1) - break; - - n = k[i--]; - c = 29; - } - - y = (n >> 28) & 1; - n <<= 1; - - sp_256_proj_point_add_9(&t[y^1], &t[0], &t[1], tmp); - - XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), - sizeof(sp_point_256)); - sp_256_proj_point_dbl_9(&t[2], &t[2], tmp); - XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), &t[2], - sizeof(sp_point_256)); - } - - if (map != 0) { - sp_256_map_9(r, &t[0], tmp); - } - else { - XMEMCPY(r, &t[0], sizeof(sp_point_256)); - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (tmp != NULL) -#endif - { - ForceZero(tmp, sizeof(sp_digit) * 2 * 9 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(tmp, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (t != NULL) -#endif - { - ForceZero(t, sizeof(sp_point_256) * 3); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(t, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} - #else /* A table entry for pre-computed points. */ typedef struct sp_table_entry_256 { @@ -22089,7 +22192,7 @@ static void sp_256_cond_copy_9(sp_digit* r, const sp_digit* a, const sp_digit m) * n Number of times to double * t Temporary ordinate data. */ -static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int n, +static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int i, sp_digit* t) { sp_digit* w = t; @@ -22100,6 +22203,7 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -22110,7 +22214,6 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int n, /* W = Z^4 */ sp_256_mont_sqr_9(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_9(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -22128,9 +22231,12 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int n, sp_256_mont_sqr_9(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_9(t2, b, p256_mod); sp_256_mont_sub_9(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_9(t2, b, x, p256_mod); + sp_256_mont_dbl_9(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_9(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_9(t1, t1, p256_mod, p256_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -22140,9 +22246,7 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int n, sp_256_mont_mul_9(w, w, t1, p256_mod, p256_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_9(y, b, x, p256_mod); - sp_256_mont_mul_9(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_9(y, y, p256_mod); + sp_256_mont_mul_9(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_9(y, y, t1, p256_mod); } #ifndef WOLFSSL_SP_SMALL @@ -22157,18 +22261,19 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int n, sp_256_mont_sqr_9(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_9(t2, b, p256_mod); sp_256_mont_sub_9(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_9(t2, b, x, p256_mod); + sp_256_mont_dbl_9(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_9(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_9(t1, t1, p256_mod, p256_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_9(y, b, x, p256_mod); - sp_256_mont_mul_9(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_9(y, y, p256_mod); + sp_256_mont_mul_9(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_9(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_9(y, y, p256_mod); + sp_256_mont_div2_9(y, y, p256_mod); } /* Double the Montgomery form projective point p a number of times. @@ -22216,30 +22321,30 @@ static void sp_256_proj_point_dbl_n_store_9(sp_point_256* r, sp_256_mont_sub_9(t1, t1, w, p256_mod); sp_256_mont_tpl_9(a, t1, p256_mod); /* B = X*Y^2 */ - sp_256_mont_sqr_9(t2, y, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(b, t2, x, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(b, t1, x, p256_mod, p256_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_256_mont_sqr_9(x, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_9(t1, b, p256_mod); - sp_256_mont_sub_9(x, x, t1, p256_mod); + sp_256_mont_dbl_9(t2, b, p256_mod); + sp_256_mont_sub_9(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_9(t2, b, x, p256_mod); + sp_256_mont_dbl_9(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_9(r[j].z, z, y, p256_mod, p256_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_256_mont_sqr_9(t2, t2, p256_mod, p256_mp_mod); + /* t1 = Y^4 */ + sp_256_mont_sqr_9(t1, t1, p256_mod, p256_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_256_mont_mul_9(w, w, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(w, w, t1, p256_mod, p256_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_9(y, b, x, p256_mod); - sp_256_mont_mul_9(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_9(y, y, p256_mod); - sp_256_mont_sub_9(y, y, t2, p256_mod); - + sp_256_mont_mul_9(y, b, a, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_9(r[j].y, y, p256_mod); + sp_256_mont_div2_9(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -22262,30 +22367,30 @@ static void sp_256_proj_point_add_sub_9(sp_point_256* ra, sp_digit* t4 = t + 6*9; sp_digit* t5 = t + 8*9; sp_digit* t6 = t + 10*9; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_256_mont_sqr_9(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_9(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t1, t1, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t1, t1, xa, p256_mod, p256_mp_mod); /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(t2, za, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t4, t2, za, p256_mod, p256_mp_mod); sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_9(t3, t3, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t3, t3, ya, p256_mod, p256_mp_mod); /* S2 = Y2*Z1^3 */ sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - U1 */ @@ -22296,30 +22401,30 @@ static void sp_256_proj_point_add_sub_9(sp_point_256* ra, sp_256_mont_sub_9(t4, t4, t3, p256_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(z, z, t2, p256_mod, p256_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_256_mont_mul_9(za, za, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(za, za, t2, p256_mod, p256_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(xa, t4, p256_mod, p256_mp_mod); sp_256_mont_sqr_9(xs, t6, p256_mod, p256_mp_mod); sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ya, t1, t5, p256_mod, p256_mp_mod); sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(x, x, t5, p256_mod); + sp_256_mont_sub_9(xa, xa, t5, p256_mod); sp_256_mont_sub_9(xs, xs, t5, p256_mod); - sp_256_mont_dbl_9(t1, y, p256_mod); - sp_256_mont_sub_9(x, x, t1, p256_mod); + sp_256_mont_dbl_9(t1, ya, p256_mod); + sp_256_mont_sub_9(xa, xa, t1, p256_mod); sp_256_mont_sub_9(xs, xs, t1, p256_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_256_mont_sub_9(ys, y, xs, p256_mod); - sp_256_mont_sub_9(y, y, x, p256_mod); - sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(ys, ya, xs, p256_mod); + sp_256_mont_sub_9(ya, ya, xa, p256_mod); + sp_256_mont_mul_9(ya, ya, t4, p256_mod, p256_mp_mod); sp_256_sub_9(t6, p256_mod, t6); sp_256_mont_mul_9(ys, ys, t6, p256_mod, p256_mp_mod); sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(y, y, t5, p256_mod); + sp_256_mont_sub_9(ya, ya, t5, p256_mod); sp_256_mont_sub_9(ys, ys, t5, p256_mod); } @@ -22398,7 +22503,7 @@ static void sp_256_ecc_recode_6_9(const sp_digit* k, ecc_recode_256* v) /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_point_33_9(sp_point_256* r, const sp_point_256* table, @@ -22487,7 +22592,7 @@ static void sp_256_get_point_33_9(sp_point_256* r, const sp_point_256* table, static int sp_256_ecc_mulmod_win_add_sub_9(sp_point_256* r, const sp_point_256* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; sp_digit* tmp = NULL; #else @@ -22505,8 +22610,8 @@ static int sp_256_ecc_mulmod_win_add_sub_9(sp_point_256* r, const sp_point_256* (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * (33+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -22601,7 +22706,7 @@ static int sp_256_ecc_mulmod_win_add_sub_9(sp_point_256* r, const sp_point_256* } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -22622,76 +22727,75 @@ static int sp_256_ecc_mulmod_win_add_sub_9(sp_point_256* r, const sp_point_256* * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_9(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_9(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*9; + sp_digit* t6 = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t4 = t + 8*9; + sp_digit* t5 = t + 10*9; - /* Check double */ - (void)sp_256_sub_9(t1, p256_mod, q->y); - sp_256_norm_9(t1); - if ((sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & - (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_9(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_9(p->x, t2) & + sp_256_cmp_equal_9(p->y, t4)) { sp_256_proj_point_dbl_9(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<9; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<9; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<9; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ - sp_256_mont_sub_9(t2, t2, x, p256_mod); + sp_256_mont_sub_9(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ - sp_256_mont_sub_9(t4, t4, y, p256_mod); + sp_256_mont_sub_9(t4, t4, p->y, p256_mod); /* Z3 = H*Z1 */ - sp_256_mont_mul_9(z, z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(z, p->z, t2, p256_mod, p256_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_256_mont_sqr_9(t1, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t3, x, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(x, t1, t5, p256_mod); - sp_256_mont_dbl_9(t1, t3, p256_mod); - sp_256_mont_sub_9(x, x, t1, p256_mod); + sp_256_mont_sqr_9(t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t3, p->x, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t1, t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(t2, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(t2, t2, t1, p256_mod); + sp_256_mont_dbl_9(t5, t3, p256_mod); + sp_256_mont_sub_9(x, t2, t5, p256_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_256_mont_sub_9(t3, t3, x, p256_mod); sp_256_mont_mul_9(t3, t3, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t5, t5, y, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(y, t3, t5, p256_mod); + sp_256_mont_mul_9(t1, t1, p->y, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(y, t3, t1, p256_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -22732,7 +22836,7 @@ static void sp_256_proj_to_affine_9(sp_point_256* a, sp_digit* t) static int sp_256_gen_stripe_table_9(const sp_point_256* a, sp_table_entry_256* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; #else sp_point_256 t[3]; @@ -22745,7 +22849,7 @@ static int sp_256_gen_stripe_table_9(const sp_point_256* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -22800,7 +22904,7 @@ static int sp_256_gen_stripe_table_9(const sp_point_256* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -22813,7 +22917,7 @@ static int sp_256_gen_stripe_table_9(const sp_point_256* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_entry_256_9(sp_point_256* r, @@ -22883,12 +22987,12 @@ static int sp_256_ecc_mulmod_stripe_9(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* t = NULL; #else sp_point_256 rt[2]; - sp_digit t[2 * 9 * 5]; + sp_digit t[2 * 9 * 6]; #endif sp_point_256* p = NULL; int i; @@ -22903,13 +23007,13 @@ static int sp_256_ecc_mulmod_stripe_9(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -22969,7 +23073,7 @@ static int sp_256_ecc_mulmod_stripe_9(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -23012,7 +23116,7 @@ static THREAD_LS_T int sp_cache_256_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) @@ -23083,23 +23187,36 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 9 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 9 * 6]; +#endif sp_cache_256_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_256 == 0) { - wc_InitMutex(&sp_cache_256_lock); - initCacheMutex_256 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_256_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -23120,6 +23237,9 @@ static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -23138,7 +23258,7 @@ static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, const sp_ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -23147,7 +23267,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -23170,7 +23290,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_256_point_to_ecc_point_9(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -23185,7 +23305,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -23195,25 +23315,25 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_256* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[9 + 9 * 2 * 5]; + sp_digit k[9 + 9 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (9 + 9 * 2 * 5), heap, + sizeof(sp_digit) * (9 + 9 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -23250,7 +23370,7 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, err = sp_256_point_to_ecc_point_9(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -23277,6 +23397,16 @@ static int sp_256_ecc_mulmod_base_9(sp_point_256* r, const sp_digit* k, return sp_256_ecc_mulmod_9(r, &p256_base, k, map, ct, heap); } +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_256_ecc_mulmod_base_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_9_nb(sp_ctx, r, &p256_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + #else /* Striping precomputation table. * 8 points combined into a table of 256 points. @@ -24598,7 +24728,7 @@ static int sp_256_ecc_mulmod_base_9(sp_point_256* r, const sp_digit* k, */ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -24607,7 +24737,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -24629,7 +24759,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_256_point_to_ecc_point_9(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -24643,7 +24773,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -24653,25 +24783,25 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[9 + 9 * 2 * 5]; + sp_digit k[9 + 9 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (9 + 9 * 2 * 5), + sizeof(sp_digit) * (9 + 9 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -24707,7 +24837,7 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, err = sp_256_point_to_ecc_point_9(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -24719,18 +24849,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_9(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -24794,7 +24912,7 @@ static int sp_256_ecc_gen_k_9(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_256_from_bin(k, 9, buf, (int)sizeof(buf)); - if (sp_256_cmp_9(k, p256_order2) < 0) { + if (sp_256_cmp_9(k, p256_order2) <= 0) { sp_256_add_one_9(k); break; } @@ -24816,7 +24934,7 @@ static int sp_256_ecc_gen_k_9(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -24831,15 +24949,15 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_256* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -24880,7 +24998,7 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_256_point_to_ecc_point_9(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -24892,6 +25010,84 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_256_ctx { + int state; + sp_256_ecc_mulmod_9_ctx mulmod_ctx; + sp_digit k[9]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 point[2]; +#else + sp_point_256 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_256_ctx; + +int sp_ecc_make_key_256_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_256_ctx* ctx = (sp_ecc_key_gen_256_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_256_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_256_ecc_gen_k_9(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_256_ecc_mulmod_base_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_256_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p256_order, 1, 1); + if (err == MP_OKAY) { + if (sp_256_iszero_9(ctx->point->x) || + sp_256_iszero_9(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_256_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_9(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 32 @@ -24910,7 +25106,7 @@ static void sp_256_to_bin_9(sp_digit* r, byte* a) r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - j = 256 / 8 - 1; + j = 263 / 8 - 1; a[j] = 0; for (i=0; i<9 && j>=0; i++) { b = 0; @@ -24952,7 +25148,7 @@ static void sp_256_to_bin_9(sp_digit* r, byte* a) int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -24965,7 +25161,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); @@ -24990,7 +25186,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 32; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -24999,6 +25195,56 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_256_ctx { + int state; + union { + sp_256_ecc_mulmod_9_ctx mulmod_ctx; + }; + sp_digit k[9]; + sp_point_256 point; +} sp_ecc_sec_gen_256_ctx; + +int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_256_ctx* ctx = (sp_ecc_sec_gen_256_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_256_from_mp(ctx->k, 9, priv); + sp_256_point_from_ecc_point_9(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_256_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_256_to_bin_9(ctx->point.x, out); + *outLen = 32; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -25014,7 +25260,7 @@ SP_NOINLINE static void sp_256_rshift_9(sp_digit* r, const sp_digit* a, r[i] = ((a[i] >> n) | (a[i + 1] << (29 - n))) & 0x1fffffff; } #else - for (i=0; i<0; i += 8) { + for (i=0; i<8; i += 8) { r[i+0] = (a[i+0] >> n) | ((a[i+1] << (29 - n)) & 0x1fffffff); r[i+1] = (a[i+1] >> n) | ((a[i+2] << (29 - n)) & 0x1fffffff); r[i+2] = (a[i+2] >> n) | ((a[i+3] << (29 - n)) & 0x1fffffff); @@ -25024,14 +25270,6 @@ SP_NOINLINE static void sp_256_rshift_9(sp_digit* r, const sp_digit* a, r[i+6] = (a[i+6] >> n) | ((a[i+7] << (29 - n)) & 0x1fffffff); r[i+7] = (a[i+7] >> n) | ((a[i+8] << (29 - n)) & 0x1fffffff); } - r[0] = (a[0] >> n) | ((a[1] << (29 - n)) & 0x1fffffff); - r[1] = (a[1] >> n) | ((a[2] << (29 - n)) & 0x1fffffff); - r[2] = (a[2] >> n) | ((a[3] << (29 - n)) & 0x1fffffff); - r[3] = (a[3] >> n) | ((a[4] << (29 - n)) & 0x1fffffff); - r[4] = (a[4] >> n) | ((a[5] << (29 - n)) & 0x1fffffff); - r[5] = (a[5] >> n) | ((a[6] << (29 - n)) & 0x1fffffff); - r[6] = (a[6] >> n) | ((a[7] << (29 - n)) & 0x1fffffff); - r[7] = (a[7] >> n) | ((a[8] << (29 - n)) & 0x1fffffff); #endif /* WOLFSSL_SP_SMALL */ r[8] = a[8] >> n; } @@ -25153,7 +25391,7 @@ static int sp_256_div_9(const sp_digit* a, const sp_digit* d, int i; sp_digit r1; sp_digit mask; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 9 + 3]; @@ -25164,7 +25402,7 @@ static int sp_256_div_9(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 9 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -25188,8 +25426,7 @@ static int sp_256_div_9(const sp_digit* a, const sp_digit* d, t1[9 + i] -= t2[9]; sp_256_norm_9(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[9 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[9 + i] - 1) >> 31); sp_256_cond_sub_9(t1 + i, t1 + i, sd, mask); sp_256_norm_9(&t1[i + 1]); } @@ -25197,7 +25434,7 @@ static int sp_256_div_9(const sp_digit* a, const sp_digit* d, sp_256_rshift_9(r, t1, 5); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -25219,6 +25456,19 @@ static int sp_256_mod_9(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_9(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_9(r, a, b); + sp_256_mont_reduce_order_9(r, p256_order, p256_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P256 curve. */ static const uint32_t p256_order_minus_2[8] = { @@ -25232,18 +25482,6 @@ static const sp_int_digit p256_order_low[4] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P256 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_256_mont_mul_order_9(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_256_mul_9(r, a, b); - sp_256_mont_reduce_order_9(r, p256_order, p256_mp_order); -} - /* Square number mod the order of P256 curve. (r = a * a mod order) * * r Result of the squaring. @@ -25354,7 +25592,7 @@ static void sp_256_mont_inv_order_9(sp_digit* r, const sp_digit* a, sp_256_mont_sqr_n_order_9(t2, t3, 4); /* t = a^ff = t2 * t3 */ sp_256_mont_mul_order_9(t, t2, t3); - /* t3= a^ff00 = t ^ 2 ^ 8 */ + /* t2= a^ff00 = t ^ 2 ^ 8 */ sp_256_mont_sqr_n_order_9(t2, t, 8); /* t = a^ffff = t2 * t */ sp_256_mont_mul_order_9(t, t2, t); @@ -25371,7 +25609,11 @@ static void sp_256_mont_inv_order_9(sp_digit* r, const sp_digit* a, /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ sp_256_mont_mul_order_9(t2, t2, t); /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ - for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_9(t2, t2); + sp_256_mont_mul_order_9(t2, t2, a); + sp_256_mont_sqr_n_order_9(t2, t2, 5); + sp_256_mont_mul_order_9(t2, t2, t3); + for (i=121; i>=112; i--) { sp_256_mont_sqr_order_9(t2, t2); if ((p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { sp_256_mont_mul_order_9(t2, t2, a); @@ -25414,6 +25656,7 @@ static void sp_256_mont_inv_order_9(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -25488,6 +25731,128 @@ static int sp_256_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_256* point = NULL; +#else + sp_digit e[7 * 2 * 9]; + sp_point_256 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int32 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 9, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 9; + k = e + 4 * 9; + r = e + 6 * 9; + tmp = e + 8 * 9; + s = e; + + if (hashLen > 32U) { + hashLen = 32U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_9(rng, k); + } + else { + sp_256_from_mp(k, 9, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_9(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 9U); + sp_256_norm_9(r); + c = sp_256_cmp_9(r, p256_order); + sp_256_cond_sub_9(r, r, p256_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_256_norm_9(r); + + if (!sp_256_iszero_9(r)) { + /* x is modified in calculation of s. */ + sp_256_from_mp(x, 9, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_256_from_bin(e, 9, hash, (int)hashLen); + + err = sp_256_calc_s_9(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_256_iszero_9(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 9); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_256)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_256_ctx { int state; @@ -25515,15 +25880,10 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 32U) { - hashLen = 32U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -25558,6 +25918,9 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_9(ctx->r); + if (hashLen > 32U) { + hashLen = 32U; + } sp_256_from_mp(ctx->x, 9, priv); sp_256_from_bin(ctx->e, 9, hash, (int)hashLen); ctx->state = 4; @@ -25652,124 +26015,6 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_256* point = NULL; -#else - sp_digit e[7 * 2 * 9]; - sp_point_256 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int32 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 9, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 9; - k = e + 4 * 9; - r = e + 6 * 9; - tmp = e + 8 * 9; - s = e; - - if (hashLen > 32U) { - hashLen = 32U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_256_ecc_gen_k_9(rng, k); - } - else { - sp_256_from_mp(k, 9, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_256_ecc_mulmod_base_9(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 9U); - sp_256_norm_9(r); - c = sp_256_cmp_9(r, p256_order); - sp_256_cond_sub_9(r, r, p256_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_256_norm_9(r); - - sp_256_from_mp(x, 9, priv); - sp_256_from_bin(e, 9, hash, (int)hashLen); - - err = sp_256_calc_s_9(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_9(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_256_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_256_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 9); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_256)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -25816,7 +26061,7 @@ static int sp_256_num_bits_9(const sp_digit* a) static int sp_256_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* u = NULL; #else sp_digit u[9 * 4]; @@ -25827,7 +26072,7 @@ static int sp_256_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut; int vt; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK u = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 4, NULL, DYNAMIC_TYPE_ECC); if (u == NULL) @@ -25867,8 +26112,8 @@ static int sp_256_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_256_cmp_9(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_256_cmp_9(u, v) >= 0))) { sp_256_sub_9(u, u, v); sp_256_norm_9(u); @@ -25915,7 +26160,7 @@ static int sp_256_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(sp_digit) * 9); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (u != NULL) XFREE(u, NULL, DYNAMIC_TYPE_ECC); #endif @@ -25962,7 +26207,7 @@ static void sp_256_add_points_9(sp_point_256* p1, const sp_point_256* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -26034,6 +26279,106 @@ static int sp_256_calc_vfy_point_9(sp_point_256* p1, sp_point_256* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_256* p1 = NULL; +#else + sp_digit u1[18 * 9]; + sp_point_256 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p2 = NULL; + sp_digit carry; + sp_int32 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 9, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 9; + s = u1 + 4 * 9; + tmp = u1 + 6 * 9; + p2 = p1 + 1; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 9, hash, (int)hashLen); + sp_256_from_mp(u2, 9, rm); + sp_256_from_mp(s, 9, sm); + sp_256_from_mp(p2->x, 9, pX); + sp_256_from_mp(p2->y, 9, pY); + sp_256_from_mp(p2->z, 9, pZ); + + err = sp_256_calc_vfy_point_9(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 9, rm); + err = sp_256_mod_mul_norm_9(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_9(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_9(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 9, rm); + carry = sp_256_add_9(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_9(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_9(u2, p256_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_9(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_256_mont_mul_9(u1, u2, p1->z, p256_mod, p256_mp_mod); + } + *res = (sp_256_cmp_9(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_256_ctx { int state; @@ -26046,7 +26391,7 @@ typedef struct sp_ecc_verify_256_ctx { sp_digit u1[2*9]; sp_digit u2[2*9]; sp_digit s[2*9]; - sp_digit tmp[2*9 * 5]; + sp_digit tmp[2*9 * 6]; sp_point_256 p1; sp_point_256 p2; } sp_ecc_verify_256_ctx; @@ -26183,109 +26528,10 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_256* p1 = NULL; -#else - sp_digit u1[16 * 9]; - sp_point_256 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_256* p2 = NULL; - sp_digit carry; - sp_int32 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 9, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 9; - s = u1 + 4 * 9; - tmp = u1 + 6 * 9; - p2 = p1 + 1; - - if (hashLen > 32U) { - hashLen = 32U; - } - - sp_256_from_bin(u1, 9, hash, (int)hashLen); - sp_256_from_mp(u2, 9, rm); - sp_256_from_mp(s, 9, sm); - sp_256_from_mp(p2->x, 9, pX); - sp_256_from_mp(p2->y, 9, pY); - sp_256_from_mp(p2->z, 9, pZ); - - err = sp_256_calc_vfy_point_9(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_256_from_mp(u2, 9, rm); - err = sp_256_mod_mul_norm_9(u2, u2, p256_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_256_mont_sqr_9(p1->z, p1->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(u1, u2, p1->z, p256_mod, p256_mp_mod); - *res = (int)(sp_256_cmp_9(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_256_from_mp(u2, 9, rm); - carry = sp_256_add_9(u2, u2, p256_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_256_norm_9(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_256_cmp_9(u2, p256_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_256_mod_mul_norm_9(u2, u2, p256_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_256_mont_mul_9(u1, u2, p1->z, p256_mod, - p256_mp_mod); - *res = (sp_256_cmp_9(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -26295,7 +26541,7 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_256_ecc_is_point_9(const sp_point_256* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[9 * 4]; @@ -26303,7 +26549,7 @@ static int sp_256_ecc_is_point_9(const sp_point_256* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -26313,25 +26559,27 @@ static int sp_256_ecc_is_point_9(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 9; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_9(t1, point->y); (void)sp_256_mod_9(t1, t1, p256_mod); sp_256_sqr_9(t2, point->x); (void)sp_256_mod_9(t2, t2, p256_mod); sp_256_mul_9(t2, t2, point->x); (void)sp_256_mod_9(t2, t2, p256_mod); - (void)sp_256_sub_9(t2, p256_mod, t2); - sp_256_mont_add_9(t1, t1, t2, p256_mod); + sp_256_mont_sub_9(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_9(t1, t1, point->x, p256_mod); sp_256_mont_add_9(t1, t1, point->x, p256_mod); sp_256_mont_add_9(t1, t1, point->x, p256_mod); + if (sp_256_cmp_9(t1, p256_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -26339,7 +26587,7 @@ static int sp_256_ecc_is_point_9(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -26348,7 +26596,7 @@ static int sp_256_ecc_is_point_9(const sp_point_256* point, */ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* pub = NULL; #else sp_point_256 pub[1]; @@ -26356,7 +26604,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -26371,7 +26619,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) err = sp_256_ecc_is_point_9(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -26393,7 +26641,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_256* pub = NULL; #else @@ -26414,7 +26662,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); @@ -26480,7 +26728,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -26509,17 +26757,17 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else - sp_digit tmp[2 * 9 * 5]; + sp_digit tmp[2 * 9 * 6]; sp_point_256 p[2]; #endif sp_point_256* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -26527,7 +26775,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -26562,7 +26810,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -26586,7 +26834,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -26595,7 +26843,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -26630,7 +26878,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -26650,7 +26898,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -26660,7 +26908,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -26694,7 +26942,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_256_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -26712,7 +26960,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_256_mont_sqrt_9(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 9]; @@ -26720,7 +26968,7 @@ static int sp_256_mont_sqrt_9(sp_digit* y) sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 9, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) { err = MEMORY_E; @@ -26763,7 +27011,7 @@ static int sp_256_mont_sqrt_9(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -26781,7 +27029,7 @@ static int sp_256_mont_sqrt_9(sp_digit* y) */ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 9]; @@ -26789,7 +27037,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 9, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -26829,7 +27077,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) err = sp_256_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -26983,262 +27231,266 @@ SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_int64 t0 = ((sp_int64)a[ 0]) * b[ 0]; - sp_int64 t1 = ((sp_int64)a[ 0]) * b[ 1] - + ((sp_int64)a[ 1]) * b[ 0]; - sp_int64 t2 = ((sp_int64)a[ 0]) * b[ 2] - + ((sp_int64)a[ 1]) * b[ 1] - + ((sp_int64)a[ 2]) * b[ 0]; - sp_int64 t3 = ((sp_int64)a[ 0]) * b[ 3] - + ((sp_int64)a[ 1]) * b[ 2] - + ((sp_int64)a[ 2]) * b[ 1] - + ((sp_int64)a[ 3]) * b[ 0]; - sp_int64 t4 = ((sp_int64)a[ 0]) * b[ 4] - + ((sp_int64)a[ 1]) * b[ 3] - + ((sp_int64)a[ 2]) * b[ 2] - + ((sp_int64)a[ 3]) * b[ 1] - + ((sp_int64)a[ 4]) * b[ 0]; - sp_int64 t5 = ((sp_int64)a[ 0]) * b[ 5] - + ((sp_int64)a[ 1]) * b[ 4] - + ((sp_int64)a[ 2]) * b[ 3] - + ((sp_int64)a[ 3]) * b[ 2] - + ((sp_int64)a[ 4]) * b[ 1] - + ((sp_int64)a[ 5]) * b[ 0]; - sp_int64 t6 = ((sp_int64)a[ 0]) * b[ 6] - + ((sp_int64)a[ 1]) * b[ 5] - + ((sp_int64)a[ 2]) * b[ 4] - + ((sp_int64)a[ 3]) * b[ 3] - + ((sp_int64)a[ 4]) * b[ 2] - + ((sp_int64)a[ 5]) * b[ 1] - + ((sp_int64)a[ 6]) * b[ 0]; - sp_int64 t7 = ((sp_int64)a[ 0]) * b[ 7] - + ((sp_int64)a[ 1]) * b[ 6] - + ((sp_int64)a[ 2]) * b[ 5] - + ((sp_int64)a[ 3]) * b[ 4] - + ((sp_int64)a[ 4]) * b[ 3] - + ((sp_int64)a[ 5]) * b[ 2] - + ((sp_int64)a[ 6]) * b[ 1] - + ((sp_int64)a[ 7]) * b[ 0]; - sp_int64 t8 = ((sp_int64)a[ 0]) * b[ 8] - + ((sp_int64)a[ 1]) * b[ 7] - + ((sp_int64)a[ 2]) * b[ 6] - + ((sp_int64)a[ 3]) * b[ 5] - + ((sp_int64)a[ 4]) * b[ 4] - + ((sp_int64)a[ 5]) * b[ 3] - + ((sp_int64)a[ 6]) * b[ 2] - + ((sp_int64)a[ 7]) * b[ 1] - + ((sp_int64)a[ 8]) * b[ 0]; - sp_int64 t9 = ((sp_int64)a[ 0]) * b[ 9] - + ((sp_int64)a[ 1]) * b[ 8] - + ((sp_int64)a[ 2]) * b[ 7] - + ((sp_int64)a[ 3]) * b[ 6] - + ((sp_int64)a[ 4]) * b[ 5] - + ((sp_int64)a[ 5]) * b[ 4] - + ((sp_int64)a[ 6]) * b[ 3] - + ((sp_int64)a[ 7]) * b[ 2] - + ((sp_int64)a[ 8]) * b[ 1] - + ((sp_int64)a[ 9]) * b[ 0]; - sp_int64 t10 = ((sp_int64)a[ 0]) * b[10] - + ((sp_int64)a[ 1]) * b[ 9] - + ((sp_int64)a[ 2]) * b[ 8] - + ((sp_int64)a[ 3]) * b[ 7] - + ((sp_int64)a[ 4]) * b[ 6] - + ((sp_int64)a[ 5]) * b[ 5] - + ((sp_int64)a[ 6]) * b[ 4] - + ((sp_int64)a[ 7]) * b[ 3] - + ((sp_int64)a[ 8]) * b[ 2] - + ((sp_int64)a[ 9]) * b[ 1] - + ((sp_int64)a[10]) * b[ 0]; - sp_int64 t11 = ((sp_int64)a[ 0]) * b[11] - + ((sp_int64)a[ 1]) * b[10] - + ((sp_int64)a[ 2]) * b[ 9] - + ((sp_int64)a[ 3]) * b[ 8] - + ((sp_int64)a[ 4]) * b[ 7] - + ((sp_int64)a[ 5]) * b[ 6] - + ((sp_int64)a[ 6]) * b[ 5] - + ((sp_int64)a[ 7]) * b[ 4] - + ((sp_int64)a[ 8]) * b[ 3] - + ((sp_int64)a[ 9]) * b[ 2] - + ((sp_int64)a[10]) * b[ 1] - + ((sp_int64)a[11]) * b[ 0]; - sp_int64 t12 = ((sp_int64)a[ 0]) * b[12] - + ((sp_int64)a[ 1]) * b[11] - + ((sp_int64)a[ 2]) * b[10] - + ((sp_int64)a[ 3]) * b[ 9] - + ((sp_int64)a[ 4]) * b[ 8] - + ((sp_int64)a[ 5]) * b[ 7] - + ((sp_int64)a[ 6]) * b[ 6] - + ((sp_int64)a[ 7]) * b[ 5] - + ((sp_int64)a[ 8]) * b[ 4] - + ((sp_int64)a[ 9]) * b[ 3] - + ((sp_int64)a[10]) * b[ 2] - + ((sp_int64)a[11]) * b[ 1] - + ((sp_int64)a[12]) * b[ 0]; - sp_int64 t13 = ((sp_int64)a[ 0]) * b[13] - + ((sp_int64)a[ 1]) * b[12] - + ((sp_int64)a[ 2]) * b[11] - + ((sp_int64)a[ 3]) * b[10] - + ((sp_int64)a[ 4]) * b[ 9] - + ((sp_int64)a[ 5]) * b[ 8] - + ((sp_int64)a[ 6]) * b[ 7] - + ((sp_int64)a[ 7]) * b[ 6] - + ((sp_int64)a[ 8]) * b[ 5] - + ((sp_int64)a[ 9]) * b[ 4] - + ((sp_int64)a[10]) * b[ 3] - + ((sp_int64)a[11]) * b[ 2] - + ((sp_int64)a[12]) * b[ 1] - + ((sp_int64)a[13]) * b[ 0]; - sp_int64 t14 = ((sp_int64)a[ 0]) * b[14] - + ((sp_int64)a[ 1]) * b[13] - + ((sp_int64)a[ 2]) * b[12] - + ((sp_int64)a[ 3]) * b[11] - + ((sp_int64)a[ 4]) * b[10] - + ((sp_int64)a[ 5]) * b[ 9] - + ((sp_int64)a[ 6]) * b[ 8] - + ((sp_int64)a[ 7]) * b[ 7] - + ((sp_int64)a[ 8]) * b[ 6] - + ((sp_int64)a[ 9]) * b[ 5] - + ((sp_int64)a[10]) * b[ 4] - + ((sp_int64)a[11]) * b[ 3] - + ((sp_int64)a[12]) * b[ 2] - + ((sp_int64)a[13]) * b[ 1] - + ((sp_int64)a[14]) * b[ 0]; - sp_int64 t15 = ((sp_int64)a[ 1]) * b[14] - + ((sp_int64)a[ 2]) * b[13] - + ((sp_int64)a[ 3]) * b[12] - + ((sp_int64)a[ 4]) * b[11] - + ((sp_int64)a[ 5]) * b[10] - + ((sp_int64)a[ 6]) * b[ 9] - + ((sp_int64)a[ 7]) * b[ 8] - + ((sp_int64)a[ 8]) * b[ 7] - + ((sp_int64)a[ 9]) * b[ 6] - + ((sp_int64)a[10]) * b[ 5] - + ((sp_int64)a[11]) * b[ 4] - + ((sp_int64)a[12]) * b[ 3] - + ((sp_int64)a[13]) * b[ 2] - + ((sp_int64)a[14]) * b[ 1]; - sp_int64 t16 = ((sp_int64)a[ 2]) * b[14] - + ((sp_int64)a[ 3]) * b[13] - + ((sp_int64)a[ 4]) * b[12] - + ((sp_int64)a[ 5]) * b[11] - + ((sp_int64)a[ 6]) * b[10] - + ((sp_int64)a[ 7]) * b[ 9] - + ((sp_int64)a[ 8]) * b[ 8] - + ((sp_int64)a[ 9]) * b[ 7] - + ((sp_int64)a[10]) * b[ 6] - + ((sp_int64)a[11]) * b[ 5] - + ((sp_int64)a[12]) * b[ 4] - + ((sp_int64)a[13]) * b[ 3] - + ((sp_int64)a[14]) * b[ 2]; - sp_int64 t17 = ((sp_int64)a[ 3]) * b[14] - + ((sp_int64)a[ 4]) * b[13] - + ((sp_int64)a[ 5]) * b[12] - + ((sp_int64)a[ 6]) * b[11] - + ((sp_int64)a[ 7]) * b[10] - + ((sp_int64)a[ 8]) * b[ 9] - + ((sp_int64)a[ 9]) * b[ 8] - + ((sp_int64)a[10]) * b[ 7] - + ((sp_int64)a[11]) * b[ 6] - + ((sp_int64)a[12]) * b[ 5] - + ((sp_int64)a[13]) * b[ 4] - + ((sp_int64)a[14]) * b[ 3]; - sp_int64 t18 = ((sp_int64)a[ 4]) * b[14] - + ((sp_int64)a[ 5]) * b[13] - + ((sp_int64)a[ 6]) * b[12] - + ((sp_int64)a[ 7]) * b[11] - + ((sp_int64)a[ 8]) * b[10] - + ((sp_int64)a[ 9]) * b[ 9] - + ((sp_int64)a[10]) * b[ 8] - + ((sp_int64)a[11]) * b[ 7] - + ((sp_int64)a[12]) * b[ 6] - + ((sp_int64)a[13]) * b[ 5] - + ((sp_int64)a[14]) * b[ 4]; - sp_int64 t19 = ((sp_int64)a[ 5]) * b[14] - + ((sp_int64)a[ 6]) * b[13] - + ((sp_int64)a[ 7]) * b[12] - + ((sp_int64)a[ 8]) * b[11] - + ((sp_int64)a[ 9]) * b[10] - + ((sp_int64)a[10]) * b[ 9] - + ((sp_int64)a[11]) * b[ 8] - + ((sp_int64)a[12]) * b[ 7] - + ((sp_int64)a[13]) * b[ 6] - + ((sp_int64)a[14]) * b[ 5]; - sp_int64 t20 = ((sp_int64)a[ 6]) * b[14] - + ((sp_int64)a[ 7]) * b[13] - + ((sp_int64)a[ 8]) * b[12] - + ((sp_int64)a[ 9]) * b[11] - + ((sp_int64)a[10]) * b[10] - + ((sp_int64)a[11]) * b[ 9] - + ((sp_int64)a[12]) * b[ 8] - + ((sp_int64)a[13]) * b[ 7] - + ((sp_int64)a[14]) * b[ 6]; - sp_int64 t21 = ((sp_int64)a[ 7]) * b[14] - + ((sp_int64)a[ 8]) * b[13] - + ((sp_int64)a[ 9]) * b[12] - + ((sp_int64)a[10]) * b[11] - + ((sp_int64)a[11]) * b[10] - + ((sp_int64)a[12]) * b[ 9] - + ((sp_int64)a[13]) * b[ 8] - + ((sp_int64)a[14]) * b[ 7]; - sp_int64 t22 = ((sp_int64)a[ 8]) * b[14] - + ((sp_int64)a[ 9]) * b[13] - + ((sp_int64)a[10]) * b[12] - + ((sp_int64)a[11]) * b[11] - + ((sp_int64)a[12]) * b[10] - + ((sp_int64)a[13]) * b[ 9] - + ((sp_int64)a[14]) * b[ 8]; - sp_int64 t23 = ((sp_int64)a[ 9]) * b[14] - + ((sp_int64)a[10]) * b[13] - + ((sp_int64)a[11]) * b[12] - + ((sp_int64)a[12]) * b[11] - + ((sp_int64)a[13]) * b[10] - + ((sp_int64)a[14]) * b[ 9]; - sp_int64 t24 = ((sp_int64)a[10]) * b[14] - + ((sp_int64)a[11]) * b[13] - + ((sp_int64)a[12]) * b[12] - + ((sp_int64)a[13]) * b[11] - + ((sp_int64)a[14]) * b[10]; - sp_int64 t25 = ((sp_int64)a[11]) * b[14] - + ((sp_int64)a[12]) * b[13] - + ((sp_int64)a[13]) * b[12] - + ((sp_int64)a[14]) * b[11]; - sp_int64 t26 = ((sp_int64)a[12]) * b[14] - + ((sp_int64)a[13]) * b[13] - + ((sp_int64)a[14]) * b[12]; - sp_int64 t27 = ((sp_int64)a[13]) * b[14] - + ((sp_int64)a[14]) * b[13]; - sp_int64 t28 = ((sp_int64)a[14]) * b[14]; + sp_int64 t0; + sp_int64 t1; + sp_digit t[15]; - t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; - t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; - t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; - t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; - t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; - t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; - t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; - t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; - t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; - t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; - t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; - t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; - t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; - t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; - t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; - t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; - t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; - t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; - t19 += t18 >> 26; r[18] = t18 & 0x3ffffff; - t20 += t19 >> 26; r[19] = t19 & 0x3ffffff; - t21 += t20 >> 26; r[20] = t20 & 0x3ffffff; - t22 += t21 >> 26; r[21] = t21 & 0x3ffffff; - t23 += t22 >> 26; r[22] = t22 & 0x3ffffff; - t24 += t23 >> 26; r[23] = t23 & 0x3ffffff; - t25 += t24 >> 26; r[24] = t24 & 0x3ffffff; - t26 += t25 >> 26; r[25] = t25 & 0x3ffffff; - t27 += t26 >> 26; r[26] = t26 & 0x3ffffff; - t28 += t27 >> 26; r[27] = t27 & 0x3ffffff; - r[29] = (sp_digit)(t28 >> 26); - r[28] = t28 & 0x3ffffff; + t0 = ((sp_int64)a[ 0]) * b[ 0]; + t1 = ((sp_int64)a[ 0]) * b[ 1] + + ((sp_int64)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[ 2] + + ((sp_int64)a[ 1]) * b[ 1] + + ((sp_int64)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 0]) * b[ 3] + + ((sp_int64)a[ 1]) * b[ 2] + + ((sp_int64)a[ 2]) * b[ 1] + + ((sp_int64)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[ 4] + + ((sp_int64)a[ 1]) * b[ 3] + + ((sp_int64)a[ 2]) * b[ 2] + + ((sp_int64)a[ 3]) * b[ 1] + + ((sp_int64)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 0]) * b[ 5] + + ((sp_int64)a[ 1]) * b[ 4] + + ((sp_int64)a[ 2]) * b[ 3] + + ((sp_int64)a[ 3]) * b[ 2] + + ((sp_int64)a[ 4]) * b[ 1] + + ((sp_int64)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[ 6] + + ((sp_int64)a[ 1]) * b[ 5] + + ((sp_int64)a[ 2]) * b[ 4] + + ((sp_int64)a[ 3]) * b[ 3] + + ((sp_int64)a[ 4]) * b[ 2] + + ((sp_int64)a[ 5]) * b[ 1] + + ((sp_int64)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 0]) * b[ 7] + + ((sp_int64)a[ 1]) * b[ 6] + + ((sp_int64)a[ 2]) * b[ 5] + + ((sp_int64)a[ 3]) * b[ 4] + + ((sp_int64)a[ 4]) * b[ 3] + + ((sp_int64)a[ 5]) * b[ 2] + + ((sp_int64)a[ 6]) * b[ 1] + + ((sp_int64)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[ 8] + + ((sp_int64)a[ 1]) * b[ 7] + + ((sp_int64)a[ 2]) * b[ 6] + + ((sp_int64)a[ 3]) * b[ 5] + + ((sp_int64)a[ 4]) * b[ 4] + + ((sp_int64)a[ 5]) * b[ 3] + + ((sp_int64)a[ 6]) * b[ 2] + + ((sp_int64)a[ 7]) * b[ 1] + + ((sp_int64)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 0]) * b[ 9] + + ((sp_int64)a[ 1]) * b[ 8] + + ((sp_int64)a[ 2]) * b[ 7] + + ((sp_int64)a[ 3]) * b[ 6] + + ((sp_int64)a[ 4]) * b[ 5] + + ((sp_int64)a[ 5]) * b[ 4] + + ((sp_int64)a[ 6]) * b[ 3] + + ((sp_int64)a[ 7]) * b[ 2] + + ((sp_int64)a[ 8]) * b[ 1] + + ((sp_int64)a[ 9]) * b[ 0]; + t[ 8] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[10] + + ((sp_int64)a[ 1]) * b[ 9] + + ((sp_int64)a[ 2]) * b[ 8] + + ((sp_int64)a[ 3]) * b[ 7] + + ((sp_int64)a[ 4]) * b[ 6] + + ((sp_int64)a[ 5]) * b[ 5] + + ((sp_int64)a[ 6]) * b[ 4] + + ((sp_int64)a[ 7]) * b[ 3] + + ((sp_int64)a[ 8]) * b[ 2] + + ((sp_int64)a[ 9]) * b[ 1] + + ((sp_int64)a[10]) * b[ 0]; + t[ 9] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 0]) * b[11] + + ((sp_int64)a[ 1]) * b[10] + + ((sp_int64)a[ 2]) * b[ 9] + + ((sp_int64)a[ 3]) * b[ 8] + + ((sp_int64)a[ 4]) * b[ 7] + + ((sp_int64)a[ 5]) * b[ 6] + + ((sp_int64)a[ 6]) * b[ 5] + + ((sp_int64)a[ 7]) * b[ 4] + + ((sp_int64)a[ 8]) * b[ 3] + + ((sp_int64)a[ 9]) * b[ 2] + + ((sp_int64)a[10]) * b[ 1] + + ((sp_int64)a[11]) * b[ 0]; + t[10] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[12] + + ((sp_int64)a[ 1]) * b[11] + + ((sp_int64)a[ 2]) * b[10] + + ((sp_int64)a[ 3]) * b[ 9] + + ((sp_int64)a[ 4]) * b[ 8] + + ((sp_int64)a[ 5]) * b[ 7] + + ((sp_int64)a[ 6]) * b[ 6] + + ((sp_int64)a[ 7]) * b[ 5] + + ((sp_int64)a[ 8]) * b[ 4] + + ((sp_int64)a[ 9]) * b[ 3] + + ((sp_int64)a[10]) * b[ 2] + + ((sp_int64)a[11]) * b[ 1] + + ((sp_int64)a[12]) * b[ 0]; + t[11] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 0]) * b[13] + + ((sp_int64)a[ 1]) * b[12] + + ((sp_int64)a[ 2]) * b[11] + + ((sp_int64)a[ 3]) * b[10] + + ((sp_int64)a[ 4]) * b[ 9] + + ((sp_int64)a[ 5]) * b[ 8] + + ((sp_int64)a[ 6]) * b[ 7] + + ((sp_int64)a[ 7]) * b[ 6] + + ((sp_int64)a[ 8]) * b[ 5] + + ((sp_int64)a[ 9]) * b[ 4] + + ((sp_int64)a[10]) * b[ 3] + + ((sp_int64)a[11]) * b[ 2] + + ((sp_int64)a[12]) * b[ 1] + + ((sp_int64)a[13]) * b[ 0]; + t[12] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 0]) * b[14] + + ((sp_int64)a[ 1]) * b[13] + + ((sp_int64)a[ 2]) * b[12] + + ((sp_int64)a[ 3]) * b[11] + + ((sp_int64)a[ 4]) * b[10] + + ((sp_int64)a[ 5]) * b[ 9] + + ((sp_int64)a[ 6]) * b[ 8] + + ((sp_int64)a[ 7]) * b[ 7] + + ((sp_int64)a[ 8]) * b[ 6] + + ((sp_int64)a[ 9]) * b[ 5] + + ((sp_int64)a[10]) * b[ 4] + + ((sp_int64)a[11]) * b[ 3] + + ((sp_int64)a[12]) * b[ 2] + + ((sp_int64)a[13]) * b[ 1] + + ((sp_int64)a[14]) * b[ 0]; + t[13] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 1]) * b[14] + + ((sp_int64)a[ 2]) * b[13] + + ((sp_int64)a[ 3]) * b[12] + + ((sp_int64)a[ 4]) * b[11] + + ((sp_int64)a[ 5]) * b[10] + + ((sp_int64)a[ 6]) * b[ 9] + + ((sp_int64)a[ 7]) * b[ 8] + + ((sp_int64)a[ 8]) * b[ 7] + + ((sp_int64)a[ 9]) * b[ 6] + + ((sp_int64)a[10]) * b[ 5] + + ((sp_int64)a[11]) * b[ 4] + + ((sp_int64)a[12]) * b[ 3] + + ((sp_int64)a[13]) * b[ 2] + + ((sp_int64)a[14]) * b[ 1]; + t[14] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 2]) * b[14] + + ((sp_int64)a[ 3]) * b[13] + + ((sp_int64)a[ 4]) * b[12] + + ((sp_int64)a[ 5]) * b[11] + + ((sp_int64)a[ 6]) * b[10] + + ((sp_int64)a[ 7]) * b[ 9] + + ((sp_int64)a[ 8]) * b[ 8] + + ((sp_int64)a[ 9]) * b[ 7] + + ((sp_int64)a[10]) * b[ 6] + + ((sp_int64)a[11]) * b[ 5] + + ((sp_int64)a[12]) * b[ 4] + + ((sp_int64)a[13]) * b[ 3] + + ((sp_int64)a[14]) * b[ 2]; + r[15] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 3]) * b[14] + + ((sp_int64)a[ 4]) * b[13] + + ((sp_int64)a[ 5]) * b[12] + + ((sp_int64)a[ 6]) * b[11] + + ((sp_int64)a[ 7]) * b[10] + + ((sp_int64)a[ 8]) * b[ 9] + + ((sp_int64)a[ 9]) * b[ 8] + + ((sp_int64)a[10]) * b[ 7] + + ((sp_int64)a[11]) * b[ 6] + + ((sp_int64)a[12]) * b[ 5] + + ((sp_int64)a[13]) * b[ 4] + + ((sp_int64)a[14]) * b[ 3]; + r[16] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 4]) * b[14] + + ((sp_int64)a[ 5]) * b[13] + + ((sp_int64)a[ 6]) * b[12] + + ((sp_int64)a[ 7]) * b[11] + + ((sp_int64)a[ 8]) * b[10] + + ((sp_int64)a[ 9]) * b[ 9] + + ((sp_int64)a[10]) * b[ 8] + + ((sp_int64)a[11]) * b[ 7] + + ((sp_int64)a[12]) * b[ 6] + + ((sp_int64)a[13]) * b[ 5] + + ((sp_int64)a[14]) * b[ 4]; + r[17] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 5]) * b[14] + + ((sp_int64)a[ 6]) * b[13] + + ((sp_int64)a[ 7]) * b[12] + + ((sp_int64)a[ 8]) * b[11] + + ((sp_int64)a[ 9]) * b[10] + + ((sp_int64)a[10]) * b[ 9] + + ((sp_int64)a[11]) * b[ 8] + + ((sp_int64)a[12]) * b[ 7] + + ((sp_int64)a[13]) * b[ 6] + + ((sp_int64)a[14]) * b[ 5]; + r[18] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 6]) * b[14] + + ((sp_int64)a[ 7]) * b[13] + + ((sp_int64)a[ 8]) * b[12] + + ((sp_int64)a[ 9]) * b[11] + + ((sp_int64)a[10]) * b[10] + + ((sp_int64)a[11]) * b[ 9] + + ((sp_int64)a[12]) * b[ 8] + + ((sp_int64)a[13]) * b[ 7] + + ((sp_int64)a[14]) * b[ 6]; + r[19] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 7]) * b[14] + + ((sp_int64)a[ 8]) * b[13] + + ((sp_int64)a[ 9]) * b[12] + + ((sp_int64)a[10]) * b[11] + + ((sp_int64)a[11]) * b[10] + + ((sp_int64)a[12]) * b[ 9] + + ((sp_int64)a[13]) * b[ 8] + + ((sp_int64)a[14]) * b[ 7]; + r[20] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[ 8]) * b[14] + + ((sp_int64)a[ 9]) * b[13] + + ((sp_int64)a[10]) * b[12] + + ((sp_int64)a[11]) * b[11] + + ((sp_int64)a[12]) * b[10] + + ((sp_int64)a[13]) * b[ 9] + + ((sp_int64)a[14]) * b[ 8]; + r[21] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[ 9]) * b[14] + + ((sp_int64)a[10]) * b[13] + + ((sp_int64)a[11]) * b[12] + + ((sp_int64)a[12]) * b[11] + + ((sp_int64)a[13]) * b[10] + + ((sp_int64)a[14]) * b[ 9]; + r[22] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[10]) * b[14] + + ((sp_int64)a[11]) * b[13] + + ((sp_int64)a[12]) * b[12] + + ((sp_int64)a[13]) * b[11] + + ((sp_int64)a[14]) * b[10]; + r[23] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[11]) * b[14] + + ((sp_int64)a[12]) * b[13] + + ((sp_int64)a[13]) * b[12] + + ((sp_int64)a[14]) * b[11]; + r[24] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[12]) * b[14] + + ((sp_int64)a[13]) * b[13] + + ((sp_int64)a[14]) * b[12]; + r[25] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = ((sp_int64)a[13]) * b[14] + + ((sp_int64)a[14]) * b[13]; + r[26] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[14]) * b[14]; + r[27] = t1 & 0x3ffffff; t0 += t1 >> 26; + r[28] = t0 & 0x3ffffff; + r[29] = (sp_digit)(t0 >> 26); + XMEMCPY(r, t, sizeof(t)); } #endif /* WOLFSSL_SP_SMALL */ @@ -27292,157 +27544,161 @@ SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a) */ SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a) { - sp_int64 t0 = ((sp_int64)a[ 0]) * a[ 0]; - sp_int64 t1 = (((sp_int64)a[ 0]) * a[ 1]) * 2; - sp_int64 t2 = (((sp_int64)a[ 0]) * a[ 2]) * 2 - + ((sp_int64)a[ 1]) * a[ 1]; - sp_int64 t3 = (((sp_int64)a[ 0]) * a[ 3] - + ((sp_int64)a[ 1]) * a[ 2]) * 2; - sp_int64 t4 = (((sp_int64)a[ 0]) * a[ 4] - + ((sp_int64)a[ 1]) * a[ 3]) * 2 - + ((sp_int64)a[ 2]) * a[ 2]; - sp_int64 t5 = (((sp_int64)a[ 0]) * a[ 5] - + ((sp_int64)a[ 1]) * a[ 4] - + ((sp_int64)a[ 2]) * a[ 3]) * 2; - sp_int64 t6 = (((sp_int64)a[ 0]) * a[ 6] - + ((sp_int64)a[ 1]) * a[ 5] - + ((sp_int64)a[ 2]) * a[ 4]) * 2 - + ((sp_int64)a[ 3]) * a[ 3]; - sp_int64 t7 = (((sp_int64)a[ 0]) * a[ 7] - + ((sp_int64)a[ 1]) * a[ 6] - + ((sp_int64)a[ 2]) * a[ 5] - + ((sp_int64)a[ 3]) * a[ 4]) * 2; - sp_int64 t8 = (((sp_int64)a[ 0]) * a[ 8] - + ((sp_int64)a[ 1]) * a[ 7] - + ((sp_int64)a[ 2]) * a[ 6] - + ((sp_int64)a[ 3]) * a[ 5]) * 2 - + ((sp_int64)a[ 4]) * a[ 4]; - sp_int64 t9 = (((sp_int64)a[ 0]) * a[ 9] - + ((sp_int64)a[ 1]) * a[ 8] - + ((sp_int64)a[ 2]) * a[ 7] - + ((sp_int64)a[ 3]) * a[ 6] - + ((sp_int64)a[ 4]) * a[ 5]) * 2; - sp_int64 t10 = (((sp_int64)a[ 0]) * a[10] - + ((sp_int64)a[ 1]) * a[ 9] - + ((sp_int64)a[ 2]) * a[ 8] - + ((sp_int64)a[ 3]) * a[ 7] - + ((sp_int64)a[ 4]) * a[ 6]) * 2 - + ((sp_int64)a[ 5]) * a[ 5]; - sp_int64 t11 = (((sp_int64)a[ 0]) * a[11] - + ((sp_int64)a[ 1]) * a[10] - + ((sp_int64)a[ 2]) * a[ 9] - + ((sp_int64)a[ 3]) * a[ 8] - + ((sp_int64)a[ 4]) * a[ 7] - + ((sp_int64)a[ 5]) * a[ 6]) * 2; - sp_int64 t12 = (((sp_int64)a[ 0]) * a[12] - + ((sp_int64)a[ 1]) * a[11] - + ((sp_int64)a[ 2]) * a[10] - + ((sp_int64)a[ 3]) * a[ 9] - + ((sp_int64)a[ 4]) * a[ 8] - + ((sp_int64)a[ 5]) * a[ 7]) * 2 - + ((sp_int64)a[ 6]) * a[ 6]; - sp_int64 t13 = (((sp_int64)a[ 0]) * a[13] - + ((sp_int64)a[ 1]) * a[12] - + ((sp_int64)a[ 2]) * a[11] - + ((sp_int64)a[ 3]) * a[10] - + ((sp_int64)a[ 4]) * a[ 9] - + ((sp_int64)a[ 5]) * a[ 8] - + ((sp_int64)a[ 6]) * a[ 7]) * 2; - sp_int64 t14 = (((sp_int64)a[ 0]) * a[14] - + ((sp_int64)a[ 1]) * a[13] - + ((sp_int64)a[ 2]) * a[12] - + ((sp_int64)a[ 3]) * a[11] - + ((sp_int64)a[ 4]) * a[10] - + ((sp_int64)a[ 5]) * a[ 9] - + ((sp_int64)a[ 6]) * a[ 8]) * 2 - + ((sp_int64)a[ 7]) * a[ 7]; - sp_int64 t15 = (((sp_int64)a[ 1]) * a[14] - + ((sp_int64)a[ 2]) * a[13] - + ((sp_int64)a[ 3]) * a[12] - + ((sp_int64)a[ 4]) * a[11] - + ((sp_int64)a[ 5]) * a[10] - + ((sp_int64)a[ 6]) * a[ 9] - + ((sp_int64)a[ 7]) * a[ 8]) * 2; - sp_int64 t16 = (((sp_int64)a[ 2]) * a[14] - + ((sp_int64)a[ 3]) * a[13] - + ((sp_int64)a[ 4]) * a[12] - + ((sp_int64)a[ 5]) * a[11] - + ((sp_int64)a[ 6]) * a[10] - + ((sp_int64)a[ 7]) * a[ 9]) * 2 - + ((sp_int64)a[ 8]) * a[ 8]; - sp_int64 t17 = (((sp_int64)a[ 3]) * a[14] - + ((sp_int64)a[ 4]) * a[13] - + ((sp_int64)a[ 5]) * a[12] - + ((sp_int64)a[ 6]) * a[11] - + ((sp_int64)a[ 7]) * a[10] - + ((sp_int64)a[ 8]) * a[ 9]) * 2; - sp_int64 t18 = (((sp_int64)a[ 4]) * a[14] - + ((sp_int64)a[ 5]) * a[13] - + ((sp_int64)a[ 6]) * a[12] - + ((sp_int64)a[ 7]) * a[11] - + ((sp_int64)a[ 8]) * a[10]) * 2 - + ((sp_int64)a[ 9]) * a[ 9]; - sp_int64 t19 = (((sp_int64)a[ 5]) * a[14] - + ((sp_int64)a[ 6]) * a[13] - + ((sp_int64)a[ 7]) * a[12] - + ((sp_int64)a[ 8]) * a[11] - + ((sp_int64)a[ 9]) * a[10]) * 2; - sp_int64 t20 = (((sp_int64)a[ 6]) * a[14] - + ((sp_int64)a[ 7]) * a[13] - + ((sp_int64)a[ 8]) * a[12] - + ((sp_int64)a[ 9]) * a[11]) * 2 - + ((sp_int64)a[10]) * a[10]; - sp_int64 t21 = (((sp_int64)a[ 7]) * a[14] - + ((sp_int64)a[ 8]) * a[13] - + ((sp_int64)a[ 9]) * a[12] - + ((sp_int64)a[10]) * a[11]) * 2; - sp_int64 t22 = (((sp_int64)a[ 8]) * a[14] - + ((sp_int64)a[ 9]) * a[13] - + ((sp_int64)a[10]) * a[12]) * 2 - + ((sp_int64)a[11]) * a[11]; - sp_int64 t23 = (((sp_int64)a[ 9]) * a[14] - + ((sp_int64)a[10]) * a[13] - + ((sp_int64)a[11]) * a[12]) * 2; - sp_int64 t24 = (((sp_int64)a[10]) * a[14] - + ((sp_int64)a[11]) * a[13]) * 2 - + ((sp_int64)a[12]) * a[12]; - sp_int64 t25 = (((sp_int64)a[11]) * a[14] - + ((sp_int64)a[12]) * a[13]) * 2; - sp_int64 t26 = (((sp_int64)a[12]) * a[14]) * 2 - + ((sp_int64)a[13]) * a[13]; - sp_int64 t27 = (((sp_int64)a[13]) * a[14]) * 2; - sp_int64 t28 = ((sp_int64)a[14]) * a[14]; + sp_int64 t0; + sp_int64 t1; + sp_digit t[15]; - t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff; - t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff; - t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff; - t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff; - t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff; - t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff; - t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff; - t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff; - t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff; - t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff; - t11 += t10 >> 26; r[10] = t10 & 0x3ffffff; - t12 += t11 >> 26; r[11] = t11 & 0x3ffffff; - t13 += t12 >> 26; r[12] = t12 & 0x3ffffff; - t14 += t13 >> 26; r[13] = t13 & 0x3ffffff; - t15 += t14 >> 26; r[14] = t14 & 0x3ffffff; - t16 += t15 >> 26; r[15] = t15 & 0x3ffffff; - t17 += t16 >> 26; r[16] = t16 & 0x3ffffff; - t18 += t17 >> 26; r[17] = t17 & 0x3ffffff; - t19 += t18 >> 26; r[18] = t18 & 0x3ffffff; - t20 += t19 >> 26; r[19] = t19 & 0x3ffffff; - t21 += t20 >> 26; r[20] = t20 & 0x3ffffff; - t22 += t21 >> 26; r[21] = t21 & 0x3ffffff; - t23 += t22 >> 26; r[22] = t22 & 0x3ffffff; - t24 += t23 >> 26; r[23] = t23 & 0x3ffffff; - t25 += t24 >> 26; r[24] = t24 & 0x3ffffff; - t26 += t25 >> 26; r[25] = t25 & 0x3ffffff; - t27 += t26 >> 26; r[26] = t26 & 0x3ffffff; - t28 += t27 >> 26; r[27] = t27 & 0x3ffffff; - r[29] = (sp_digit)(t28 >> 26); - r[28] = t28 & 0x3ffffff; + t0 = ((sp_int64)a[ 0]) * a[ 0]; + t1 = (((sp_int64)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[ 2]) * 2 + + ((sp_int64)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 0]) * a[ 3] + + ((sp_int64)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[ 4] + + ((sp_int64)a[ 1]) * a[ 3]) * 2 + + ((sp_int64)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 0]) * a[ 5] + + ((sp_int64)a[ 1]) * a[ 4] + + ((sp_int64)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[ 6] + + ((sp_int64)a[ 1]) * a[ 5] + + ((sp_int64)a[ 2]) * a[ 4]) * 2 + + ((sp_int64)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 0]) * a[ 7] + + ((sp_int64)a[ 1]) * a[ 6] + + ((sp_int64)a[ 2]) * a[ 5] + + ((sp_int64)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[ 8] + + ((sp_int64)a[ 1]) * a[ 7] + + ((sp_int64)a[ 2]) * a[ 6] + + ((sp_int64)a[ 3]) * a[ 5]) * 2 + + ((sp_int64)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 0]) * a[ 9] + + ((sp_int64)a[ 1]) * a[ 8] + + ((sp_int64)a[ 2]) * a[ 7] + + ((sp_int64)a[ 3]) * a[ 6] + + ((sp_int64)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[10] + + ((sp_int64)a[ 1]) * a[ 9] + + ((sp_int64)a[ 2]) * a[ 8] + + ((sp_int64)a[ 3]) * a[ 7] + + ((sp_int64)a[ 4]) * a[ 6]) * 2 + + ((sp_int64)a[ 5]) * a[ 5]; + t[ 9] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 0]) * a[11] + + ((sp_int64)a[ 1]) * a[10] + + ((sp_int64)a[ 2]) * a[ 9] + + ((sp_int64)a[ 3]) * a[ 8] + + ((sp_int64)a[ 4]) * a[ 7] + + ((sp_int64)a[ 5]) * a[ 6]) * 2; + t[10] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[12] + + ((sp_int64)a[ 1]) * a[11] + + ((sp_int64)a[ 2]) * a[10] + + ((sp_int64)a[ 3]) * a[ 9] + + ((sp_int64)a[ 4]) * a[ 8] + + ((sp_int64)a[ 5]) * a[ 7]) * 2 + + ((sp_int64)a[ 6]) * a[ 6]; + t[11] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 0]) * a[13] + + ((sp_int64)a[ 1]) * a[12] + + ((sp_int64)a[ 2]) * a[11] + + ((sp_int64)a[ 3]) * a[10] + + ((sp_int64)a[ 4]) * a[ 9] + + ((sp_int64)a[ 5]) * a[ 8] + + ((sp_int64)a[ 6]) * a[ 7]) * 2; + t[12] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 0]) * a[14] + + ((sp_int64)a[ 1]) * a[13] + + ((sp_int64)a[ 2]) * a[12] + + ((sp_int64)a[ 3]) * a[11] + + ((sp_int64)a[ 4]) * a[10] + + ((sp_int64)a[ 5]) * a[ 9] + + ((sp_int64)a[ 6]) * a[ 8]) * 2 + + ((sp_int64)a[ 7]) * a[ 7]; + t[13] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 1]) * a[14] + + ((sp_int64)a[ 2]) * a[13] + + ((sp_int64)a[ 3]) * a[12] + + ((sp_int64)a[ 4]) * a[11] + + ((sp_int64)a[ 5]) * a[10] + + ((sp_int64)a[ 6]) * a[ 9] + + ((sp_int64)a[ 7]) * a[ 8]) * 2; + t[14] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 2]) * a[14] + + ((sp_int64)a[ 3]) * a[13] + + ((sp_int64)a[ 4]) * a[12] + + ((sp_int64)a[ 5]) * a[11] + + ((sp_int64)a[ 6]) * a[10] + + ((sp_int64)a[ 7]) * a[ 9]) * 2 + + ((sp_int64)a[ 8]) * a[ 8]; + r[15] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 3]) * a[14] + + ((sp_int64)a[ 4]) * a[13] + + ((sp_int64)a[ 5]) * a[12] + + ((sp_int64)a[ 6]) * a[11] + + ((sp_int64)a[ 7]) * a[10] + + ((sp_int64)a[ 8]) * a[ 9]) * 2; + r[16] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 4]) * a[14] + + ((sp_int64)a[ 5]) * a[13] + + ((sp_int64)a[ 6]) * a[12] + + ((sp_int64)a[ 7]) * a[11] + + ((sp_int64)a[ 8]) * a[10]) * 2 + + ((sp_int64)a[ 9]) * a[ 9]; + r[17] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 5]) * a[14] + + ((sp_int64)a[ 6]) * a[13] + + ((sp_int64)a[ 7]) * a[12] + + ((sp_int64)a[ 8]) * a[11] + + ((sp_int64)a[ 9]) * a[10]) * 2; + r[18] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 6]) * a[14] + + ((sp_int64)a[ 7]) * a[13] + + ((sp_int64)a[ 8]) * a[12] + + ((sp_int64)a[ 9]) * a[11]) * 2 + + ((sp_int64)a[10]) * a[10]; + r[19] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 7]) * a[14] + + ((sp_int64)a[ 8]) * a[13] + + ((sp_int64)a[ 9]) * a[12] + + ((sp_int64)a[10]) * a[11]) * 2; + r[20] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[ 8]) * a[14] + + ((sp_int64)a[ 9]) * a[13] + + ((sp_int64)a[10]) * a[12]) * 2 + + ((sp_int64)a[11]) * a[11]; + r[21] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[ 9]) * a[14] + + ((sp_int64)a[10]) * a[13] + + ((sp_int64)a[11]) * a[12]) * 2; + r[22] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[10]) * a[14] + + ((sp_int64)a[11]) * a[13]) * 2 + + ((sp_int64)a[12]) * a[12]; + r[23] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[11]) * a[14] + + ((sp_int64)a[12]) * a[13]) * 2; + r[24] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = (((sp_int64)a[12]) * a[14]) * 2 + + ((sp_int64)a[13]) * a[13]; + r[25] = t1 & 0x3ffffff; t0 += t1 >> 26; + t1 = (((sp_int64)a[13]) * a[14]) * 2; + r[26] = t0 & 0x3ffffff; t1 += t0 >> 26; + t0 = ((sp_int64)a[14]) * a[14]; + r[27] = t1 & 0x3ffffff; t0 += t1 >> 26; + r[28] = t0 & 0x3ffffff; + r[29] = (sp_digit)(t0 >> 26); + XMEMCPY(r, t, sizeof(t)); } #endif /* WOLFSSL_SP_SMALL */ @@ -27552,20 +27808,23 @@ SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a, static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 26 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 25); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 25); } #elif DIGIT_BIT > 26 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x3ffffff; s = 26U - s; @@ -27595,12 +27854,12 @@ static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 26) { r[j] &= 0x3ffffff; @@ -27735,8 +27994,6 @@ static int sp_384_point_to_ecc_point_15(const sp_point_384* p, ecc_point* pm) return err; } -#define sp_384_mont_reduce_order_15 sp_384_mont_reduce_15 - /* Compare a with b in constant time. * * a A single precision integer. @@ -27751,24 +28008,24 @@ static sp_digit sp_384_cmp_15(const sp_digit* a, const sp_digit* b) int i; for (i=14; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 25); } #else r |= (a[14] - b[14]) & (0 - (sp_digit)1); - r |= (a[13] - b[13]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[12] - b[12]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[11] - b[11]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[10] - b[10]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 9] - b[ 9]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 8] - b[ 8]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 7] - b[ 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[13] - b[13]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[12] - b[12]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[11] - b[11]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[10] - b[10]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 9] - b[ 9]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 8] - b[ 8]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 7] - b[ 7]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 6] - b[ 6]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 25); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -27962,28 +28219,134 @@ static void sp_384_mont_shift_15(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_384_mont_reduce_15(sp_digit* a, const sp_digit* m, sp_digit mp) +static void sp_384_mont_reduce_order_15(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_384_norm_15(a + 15); for (i=0; i<14; i++) { - mu = (a[i] * mp) & 0x3ffffff; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x3ffffff; sp_384_mul_add_15(a+i, m, mu); a[i+1] += a[i] >> 26; } - mu = (a[i] * mp) & 0xfffffL; + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0xfffffL; sp_384_mul_add_15(a+i, m, mu); a[i+1] += a[i] >> 26; a[i] &= 0x3ffffff; sp_384_mont_shift_15(a, a); - sp_384_cond_sub_15(a, a, m, 0 - (((a[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[14] >> 20; + sp_384_cond_sub_15(a, a, m, ~((over - 1) >> 31)); sp_384_norm_15(a); } +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_384_mont_reduce_15(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit am; + + (void)m; + (void)mp; + + for (i = 0; i < 14; i++) { + am = (a[i] * 0x1) & 0x3ffffff; + a[i + 1] += (am << 6) & 0x3ffffff; + a[i + 2] += am >> 20; + a[i + 3] -= (am << 18) & 0x3ffffff; + a[i + 4] -= am >> 8; + a[i + 4] -= (am << 24) & 0x3ffffff; + a[i + 5] -= am >> 2; + a[i + 14] += (am << 20) & 0x3ffffff; + a[i + 15] += am >> 6; + + a[i + 1] += a[i] >> 26; + } + am = (a[14] * 0x1) & 0xfffff; + a[14 + 1] += (am << 6) & 0x3ffffff; + a[14 + 2] += am >> 20; + a[14 + 3] -= (am << 18) & 0x3ffffff; + a[14 + 4] -= am >> 8; + a[14 + 4] -= (am << 24) & 0x3ffffff; + a[14 + 5] -= am >> 2; + a[14 + 14] += (am << 20) & 0x3ffffff; + a[14 + 15] += am >> 6; + + a[0] = (a[14] >> 20) + ((a[15] << 6) & 0x3ffffff); + a[1] = (a[15] >> 20) + ((a[16] << 6) & 0x3ffffff); + a[2] = (a[16] >> 20) + ((a[17] << 6) & 0x3ffffff); + a[3] = (a[17] >> 20) + ((a[18] << 6) & 0x3ffffff); + a[4] = (a[18] >> 20) + ((a[19] << 6) & 0x3ffffff); + a[5] = (a[19] >> 20) + ((a[20] << 6) & 0x3ffffff); + a[6] = (a[20] >> 20) + ((a[21] << 6) & 0x3ffffff); + a[7] = (a[21] >> 20) + ((a[22] << 6) & 0x3ffffff); + a[8] = (a[22] >> 20) + ((a[23] << 6) & 0x3ffffff); + a[9] = (a[23] >> 20) + ((a[24] << 6) & 0x3ffffff); + a[10] = (a[24] >> 20) + ((a[25] << 6) & 0x3ffffff); + a[11] = (a[25] >> 20) + ((a[26] << 6) & 0x3ffffff); + a[12] = (a[26] >> 20) + ((a[27] << 6) & 0x3ffffff); + a[13] = (a[27] >> 20) + ((a[28] << 6) & 0x3ffffff); + a[14] = (a[14 + 14] >> 20) + (a[29] << 6); + + a[1] += a[0] >> 26; a[0] &= 0x3ffffff; + a[2] += a[1] >> 26; a[1] &= 0x3ffffff; + a[3] += a[2] >> 26; a[2] &= 0x3ffffff; + a[4] += a[3] >> 26; a[3] &= 0x3ffffff; + a[5] += a[4] >> 26; a[4] &= 0x3ffffff; + a[6] += a[5] >> 26; a[5] &= 0x3ffffff; + a[7] += a[6] >> 26; a[6] &= 0x3ffffff; + a[8] += a[7] >> 26; a[7] &= 0x3ffffff; + a[9] += a[8] >> 26; a[8] &= 0x3ffffff; + a[10] += a[9] >> 26; a[9] &= 0x3ffffff; + a[11] += a[10] >> 26; a[10] &= 0x3ffffff; + a[12] += a[11] >> 26; a[11] &= 0x3ffffff; + a[13] += a[12] >> 26; a[12] &= 0x3ffffff; + a[14] += a[13] >> 26; a[13] &= 0x3ffffff; + + /* Get the bit over, if any. */ + am = a[14] >> 20; + /* Create mask. */ + am = 0 - am; + + a[0] -= 0x03ffffff & am; + a[1] -= 0x0000003f & am; + /* p384_mod[2] is zero */ + a[3] -= 0x03fc0000 & am; + a[4] -= 0x02ffffff & am; + a[5] -= 0x03ffffff & am; + a[6] -= 0x03ffffff & am; + a[7] -= 0x03ffffff & am; + a[8] -= 0x03ffffff & am; + a[9] -= 0x03ffffff & am; + a[10] -= 0x03ffffff & am; + a[11] -= 0x03ffffff & am; + a[12] -= 0x03ffffff & am; + a[13] -= 0x03ffffff & am; + a[14] -= 0x000fffff & am; + + a[1] += a[0] >> 26; a[0] &= 0x3ffffff; + a[2] += a[1] >> 26; a[1] &= 0x3ffffff; + a[3] += a[2] >> 26; a[2] &= 0x3ffffff; + a[4] += a[3] >> 26; a[3] &= 0x3ffffff; + a[5] += a[4] >> 26; a[4] &= 0x3ffffff; + a[6] += a[5] >> 26; a[5] &= 0x3ffffff; + a[7] += a[6] >> 26; a[6] &= 0x3ffffff; + a[8] += a[7] >> 26; a[7] &= 0x3ffffff; + a[9] += a[8] >> 26; a[8] &= 0x3ffffff; + a[10] += a[9] >> 26; a[9] &= 0x3ffffff; + a[11] += a[10] >> 26; a[10] &= 0x3ffffff; + a[12] += a[11] >> 26; a[11] &= 0x3ffffff; + a[13] += a[12] >> 26; a[12] &= 0x3ffffff; + a[14] += a[13] >> 26; a[13] &= 0x3ffffff; +} + /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -27991,9 +28354,9 @@ static void sp_384_mont_reduce_15(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_mul_15(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_mul_15(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_384_mul_15(r, a, b); @@ -28005,9 +28368,9 @@ static void sp_384_mont_mul_15(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_384_sqr_15(r, a); @@ -28021,10 +28384,10 @@ static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_15(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_15(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_15(r, a, m, mp); for (; n > 1; n--) { @@ -28032,7 +28395,7 @@ static void sp_384_mont_sqr_n_15(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P384 curve. */ static const uint32_t p384_mod_minus_2[12] = { @@ -28146,27 +28509,24 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_15(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 15, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 15, 0, sizeof(sp_digit) * 15U); sp_384_mont_reduce_15(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_15(r->x, p384_mod); - sp_384_cond_sub_15(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_15(r->x, r->x, p384_mod, ~(n >> 25)); sp_384_norm_15(r->x); /* y /= z^3 */ sp_384_mont_mul_15(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 15, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 15, 0, sizeof(sp_digit) * 15U); sp_384_mont_reduce_15(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_15(r->y, p384_mod); - sp_384_cond_sub_15(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_15(r->y, r->y, p384_mod, ~(n >> 25)); sp_384_norm_15(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -28179,10 +28539,11 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_384_add_15(r, a, b); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); } @@ -28194,10 +28555,11 @@ static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b */ static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_15(r, a, a); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); } @@ -28209,18 +28571,20 @@ static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m */ static void sp_384_mont_tpl_15(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_15(r, a, a); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); (void)sp_384_add_15(r, r, a); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -28232,13 +28596,26 @@ static void sp_384_mont_tpl_15(sp_digit* r, const sp_digit* a, const sp_digit* m static void sp_384_cond_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; for (i = 0; i < 15; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_384_cond_add_15(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ r[ 0] = a[ 0] + (b[ 0] & m); r[ 1] = a[ 1] + (b[ 1] & m); r[ 2] = a[ 2] + (b[ 2] & m); @@ -28254,8 +28631,8 @@ static void sp_384_cond_add_15(sp_digit* r, const sp_digit* a, r[12] = a[12] + (b[12] & m); r[13] = a[13] + (b[13] & m); r[14] = a[14] + (b[14] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * @@ -28312,7 +28689,8 @@ SP_NOINLINE static void sp_384_rshift1_15(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_15(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_384_cond_add_15(r, a, m, 0 - (a[0] & 1)); sp_384_norm_15(r); @@ -28325,6 +28703,61 @@ static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_15(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_15(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_15(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_15(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_15(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_mont_div2_15(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_15(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_15(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_15(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_15(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_15_ctx { int state; @@ -28335,7 +28768,14 @@ typedef struct sp_384_proj_point_dbl_15_ctx { sp_digit* z; } sp_384_proj_point_dbl_15_ctx; -static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_15_ctx* ctx = (sp_384_proj_point_dbl_15_ctx*)sp_ctx->data; @@ -28409,7 +28849,7 @@ static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co break; case 11: /* T2 = T2/2 */ - sp_384_div2_15(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_15(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -28459,61 +28899,6 @@ static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*15; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_15(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_15(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_15(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_15(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_15(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_15(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_15(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_15(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_15(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_15(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -28530,6 +28915,19 @@ static int sp_384_cmp_equal_15(const sp_digit* a, const sp_digit* b) (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_15(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -28537,6 +28935,84 @@ static int sp_384_cmp_equal_15(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_15(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*15; + sp_digit* t2 = t + 4*15; + sp_digit* t3 = t + 6*15; + sp_digit* t4 = t + 8*15; + sp_digit* t5 = t + 10*15; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_15(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_15(t2, t1) & + sp_384_cmp_equal_15(t4, t3)) { + sp_384_proj_point_dbl_15(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_384_mont_sub_15(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_15(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_15(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(x, x, t5, p384_mod); + sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(t3, y, p384_mod); + sp_384_mont_sub_15(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_15(y, y, x, p384_mod); + sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, y, t5, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 15; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_15_ctx { @@ -28549,11 +29025,19 @@ typedef struct sp_384_proj_point_add_15_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_384_proj_point_add_15_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -28572,261 +29056,168 @@ static int sp_384_proj_point_add_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*15; - ctx->t3 = t + 4*15; - ctx->t4 = t + 6*15; - ctx->t5 = t + 8*15; + ctx->t6 = t; + ctx->t1 = t + 2*15; + ctx->t2 = t + 4*15; + ctx->t3 = t + 6*15; + ctx->t4 = t + 8*15; + ctx->t5 = t + 10*15; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_15(ctx->t1, p384_mod, q->y); - sp_384_norm_15(ctx->t1); - if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & - (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_384_proj_point_dbl_15_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_384)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<15; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<15; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<15; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_384_mont_sqr_15(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; + break; + case 2: + sp_384_mont_mul_15(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; + break; + case 3: + sp_384_mont_mul_15(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_15(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_15(ctx->t1, ctx->t1, ctx->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_15(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_15(ctx->t4, ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_15(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_15(ctx->t2, ctx->t1) & + sp_384_cmp_equal_15(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_15(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_15(ctx->t3, ctx->t3, ctx->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_15(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_15(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_15(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_15(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_15(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_15(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_15(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_15(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_15(ctx->z, ctx->z, ctx->t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_15(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_15(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - sp_384_mont_sqr_15(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_15(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_15(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_dbl_15(ctx->t1, ctx->y, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_15(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t1, p384_mod); + sp_384_mont_mul_15(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_15(ctx->y, ctx->y, ctx->x, p384_mod); + sp_384_mont_sub_15(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - sp_384_mont_mul_15(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 15; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_15(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_15(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*15; - sp_digit* t3 = t + 4*15; - sp_digit* t4 = t + 6*15; - sp_digit* t5 = t + 8*15; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_384* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_384_sub_15(t1, p384_mod, q->y); - sp_384_norm_15(t1); - if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & - (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { - sp_384_proj_point_dbl_15(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<15; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<15; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<15; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t1, t1, x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_15(t3, t3, y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_15(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_15(t4, t4, t3, p384_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(x, x, t5, p384_mod); - sp_384_mont_dbl_15(t1, y, p384_mod); - sp_384_mont_sub_15(x, x, t1, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_15(y, y, x, p384_mod); - sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(y, y, t5, p384_mod); - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -28836,7 +29227,7 @@ static void sp_384_proj_point_add_15(sp_point_384* r, */ static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK int64_t* t = NULL; #else int64_t t[2 * 12]; @@ -28847,7 +29238,7 @@ static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -28987,7 +29378,7 @@ static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit r[14] = (sp_digit)(t[11] >> 12U); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, NULL, DYNAMIC_TYPE_ECC); #endif @@ -29011,6 +29402,108 @@ static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_384* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_384 t[3]; + sp_digit tmp[2 * 15 * 6]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod); + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod); + + if (err == MP_OKAY) { + i = 14; + c = 20; + n = k[i--] << (26 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 26; + } + + y = (n >> 25) & 1; + n <<= 1; + + sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_384)); + sp_384_proj_point_dbl_15(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_384)); + } + + if (map != 0) { + sp_384_map_15(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_384)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 15 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_384) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_ecc_mulmod_15_ctx { @@ -29126,109 +29619,6 @@ static int sp_384_ecc_mulmod_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, #endif /* WOLFSSL_SP_NONBLOCK */ -static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, - const sp_digit* k, int map, int ct, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_384* t = NULL; - sp_digit* tmp = NULL; -#else - sp_point_384 t[3]; - sp_digit tmp[2 * 15 * 6]; -#endif - sp_digit n; - int i; - int c; - int y; - int err = MP_OKAY; - - /* Implementation is constant time. */ - (void)ct; - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, - DYNAMIC_TYPE_ECC); - if (t == NULL) - err = MEMORY_E; - if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap, - DYNAMIC_TYPE_ECC); - if (tmp == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - XMEMSET(t, 0, sizeof(sp_point_384) * 3); - - /* t[0] = {0, 0, 1} * norm */ - t[0].infinity = 1; - /* t[1] = {g->x, g->y, g->z} * norm */ - err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod); - } - if (err == MP_OKAY) - err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod); - if (err == MP_OKAY) - err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod); - - if (err == MP_OKAY) { - i = 14; - c = 20; - n = k[i--] << (26 - c); - for (; ; c--) { - if (c == 0) { - if (i == -1) - break; - - n = k[i--]; - c = 26; - } - - y = (n >> 25) & 1; - n <<= 1; - - sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp); - - XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), - sizeof(sp_point_384)); - sp_384_proj_point_dbl_15(&t[2], &t[2], tmp); - XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), &t[2], - sizeof(sp_point_384)); - } - - if (map != 0) { - sp_384_map_15(r, &t[0], tmp); - } - else { - XMEMCPY(r, &t[0], sizeof(sp_point_384)); - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (tmp != NULL) -#endif - { - ForceZero(tmp, sizeof(sp_digit) * 2 * 15 * 6); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(tmp, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (t != NULL) -#endif - { - ForceZero(t, sizeof(sp_point_384) * 3); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(t, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} - #else /* A table entry for pre-computed points. */ typedef struct sp_table_entry_384 { @@ -29296,7 +29686,7 @@ static void sp_384_cond_copy_15(sp_digit* r, const sp_digit* a, const sp_digit m * n Number of times to double * t Temporary ordinate data. */ -static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, +static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int i, sp_digit* t) { sp_digit* w = t; @@ -29307,6 +29697,7 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -29317,7 +29708,6 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, /* W = Z^4 */ sp_384_mont_sqr_15(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_15(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -29335,9 +29725,12 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_15(t2, b, p384_mod); sp_384_mont_sub_15(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_15(t2, b, x, p384_mod); + sp_384_mont_dbl_15(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -29347,9 +29740,7 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_384_mont_mul_15(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_15(y, b, x, p384_mod); - sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_15(y, y, p384_mod); + sp_384_mont_mul_15(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_15(y, y, t1, p384_mod); } #ifndef WOLFSSL_SP_SMALL @@ -29364,18 +29755,19 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_15(t2, b, p384_mod); sp_384_mont_sub_15(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_15(t2, b, x, p384_mod); + sp_384_mont_dbl_15(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_15(y, b, x, p384_mod); - sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_15(y, y, p384_mod); + sp_384_mont_mul_15(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_15(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_15(y, y, p384_mod); + sp_384_mont_div2_15(y, y, p384_mod); } /* Double the Montgomery form projective point p a number of times. @@ -29423,30 +29815,30 @@ static void sp_384_proj_point_dbl_n_store_15(sp_point_384* r, sp_384_mont_sub_15(t1, t1, w, p384_mod); sp_384_mont_tpl_15(a, t1, p384_mod); /* B = X*Y^2 */ - sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(b, t2, x, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_15(t1, b, p384_mod); - sp_384_mont_sub_15(x, x, t1, p384_mod); + sp_384_mont_dbl_15(t2, b, p384_mod); + sp_384_mont_sub_15(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_15(t2, b, x, p384_mod); + sp_384_mont_dbl_15(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_15(r[j].z, z, y, p384_mod, p384_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_384_mont_sqr_15(t2, t2, p384_mod, p384_mp_mod); + /* t1 = Y^4 */ + sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_384_mont_mul_15(w, w, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_15(y, b, x, p384_mod); - sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_15(y, y, p384_mod); - sp_384_mont_sub_15(y, y, t2, p384_mod); - + sp_384_mont_mul_15(y, b, a, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_15(r[j].y, y, p384_mod); + sp_384_mont_div2_15(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -29469,30 +29861,30 @@ static void sp_384_proj_point_add_sub_15(sp_point_384* ra, sp_digit* t4 = t + 6*15; sp_digit* t5 = t + 8*15; sp_digit* t6 = t + 10*15; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t1, t1, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t1, xa, p384_mod, p384_mp_mod); /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(t2, za, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, za, p384_mod, p384_mp_mod); sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_15(t3, t3, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t3, t3, ya, p384_mod, p384_mp_mod); /* S2 = Y2*Z1^3 */ sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - U1 */ @@ -29503,30 +29895,30 @@ static void sp_384_proj_point_add_sub_15(sp_point_384* ra, sp_384_mont_sub_15(t4, t4, t3, p384_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_384_mont_mul_15(za, za, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(za, za, t2, p384_mod, p384_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(xa, t4, p384_mod, p384_mp_mod); sp_384_mont_sqr_15(xs, t6, p384_mod, p384_mp_mod); sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ya, t1, t5, p384_mod, p384_mp_mod); sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(x, x, t5, p384_mod); + sp_384_mont_sub_15(xa, xa, t5, p384_mod); sp_384_mont_sub_15(xs, xs, t5, p384_mod); - sp_384_mont_dbl_15(t1, y, p384_mod); - sp_384_mont_sub_15(x, x, t1, p384_mod); + sp_384_mont_dbl_15(t1, ya, p384_mod); + sp_384_mont_sub_15(xa, xa, t1, p384_mod); sp_384_mont_sub_15(xs, xs, t1, p384_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_384_mont_sub_15(ys, y, xs, p384_mod); - sp_384_mont_sub_15(y, y, x, p384_mod); - sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(ys, ya, xs, p384_mod); + sp_384_mont_sub_15(ya, ya, xa, p384_mod); + sp_384_mont_mul_15(ya, ya, t4, p384_mod, p384_mp_mod); sp_384_sub_15(t6, p384_mod, t6); sp_384_mont_mul_15(ys, ys, t6, p384_mod, p384_mp_mod); sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(y, y, t5, p384_mod); + sp_384_mont_sub_15(ya, ya, t5, p384_mod); sp_384_mont_sub_15(ys, ys, t5, p384_mod); } @@ -29605,7 +29997,7 @@ static void sp_384_ecc_recode_6_15(const sp_digit* k, ecc_recode_384* v) /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_point_33_15(sp_point_384* r, const sp_point_384* table, @@ -29730,7 +30122,7 @@ static void sp_384_get_point_33_15(sp_point_384* r, const sp_point_384* table, static int sp_384_ecc_mulmod_win_add_sub_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; sp_digit* tmp = NULL; #else @@ -29748,8 +30140,8 @@ static int sp_384_ecc_mulmod_win_add_sub_15(sp_point_384* r, const sp_point_384* (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * (33+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -29844,7 +30236,7 @@ static int sp_384_ecc_mulmod_win_add_sub_15(sp_point_384* r, const sp_point_384* } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -29865,76 +30257,75 @@ static int sp_384_ecc_mulmod_win_add_sub_15(sp_point_384* r, const sp_point_384* * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_15(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*15; - sp_digit* t3 = t + 4*15; - sp_digit* t4 = t + 6*15; - sp_digit* t5 = t + 8*15; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*15; + sp_digit* t6 = t + 4*15; + sp_digit* t1 = t + 6*15; + sp_digit* t4 = t + 8*15; + sp_digit* t5 = t + 10*15; - /* Check double */ - (void)sp_384_sub_15(t1, p384_mod, q->y); - sp_384_norm_15(t1); - if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & - (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_15(p->x, t2) & + sp_384_cmp_equal_15(p->y, t4)) { sp_384_proj_point_dbl_15(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<15; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<15; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<15; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ - sp_384_mont_sub_15(t2, t2, x, p384_mod); + sp_384_mont_sub_15(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ - sp_384_mont_sub_15(t4, t4, y, p384_mod); + sp_384_mont_sub_15(t4, t4, p->y, p384_mod); /* Z3 = H*Z1 */ - sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(z, p->z, t2, p384_mod, p384_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_384_mont_sqr_15(t1, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t3, x, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(x, t1, t5, p384_mod); - sp_384_mont_dbl_15(t1, t3, p384_mod); - sp_384_mont_sub_15(x, x, t1, p384_mod); + sp_384_mont_sqr_15(t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t3, p->x, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(t2, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(t2, t2, t1, p384_mod); + sp_384_mont_dbl_15(t5, t3, p384_mod); + sp_384_mont_sub_15(x, t2, t5, p384_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_384_mont_sub_15(t3, t3, x, p384_mod); sp_384_mont_mul_15(t3, t3, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t5, t5, y, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(y, t3, t5, p384_mod); + sp_384_mont_mul_15(t1, t1, p->y, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, t3, t1, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 15; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -29975,7 +30366,7 @@ static void sp_384_proj_to_affine_15(sp_point_384* a, sp_digit* t) static int sp_384_gen_stripe_table_15(const sp_point_384* a, sp_table_entry_384* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; #else sp_point_384 t[3]; @@ -29988,7 +30379,7 @@ static int sp_384_gen_stripe_table_15(const sp_point_384* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -30043,7 +30434,7 @@ static int sp_384_gen_stripe_table_15(const sp_point_384* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -30056,7 +30447,7 @@ static int sp_384_gen_stripe_table_15(const sp_point_384* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_256_15(sp_point_384* r, @@ -30150,7 +30541,7 @@ static int sp_384_ecc_mulmod_stripe_15(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* t = NULL; #else @@ -30170,7 +30561,7 @@ static int sp_384_ecc_mulmod_stripe_15(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -30236,7 +30627,7 @@ static int sp_384_ecc_mulmod_stripe_15(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -30279,7 +30670,7 @@ static THREAD_LS_T int sp_cache_384_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) @@ -30350,23 +30741,36 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_15(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 15 * 7]; +#endif sp_cache_384_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_384 == 0) { - wc_InitMutex(&sp_cache_384_lock); - initCacheMutex_384 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 7, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_384_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -30387,6 +30791,9 @@ static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -30405,7 +30812,7 @@ static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -30414,7 +30821,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -30437,7 +30844,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_384_point_to_ecc_point_15(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -30452,7 +30859,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -30462,8 +30869,8 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_384* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_384* point = NULL; sp_digit* k = NULL; #else sp_point_384 point[2]; @@ -30473,7 +30880,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -30517,7 +30924,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, err = sp_384_point_to_ecc_point_15(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -30544,6 +30951,16 @@ static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k, return sp_384_ecc_mulmod_15(r, &p384_base, k, map, ct, heap); } +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_384_ecc_mulmod_base_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_384_ecc_mulmod_15_nb(sp_ctx, r, &p384_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + #else /* Striping precomputation table. * 8 points combined into a table of 256 points. @@ -32377,7 +32794,7 @@ static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k, */ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -32386,7 +32803,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -32408,7 +32825,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_384_point_to_ecc_point_15(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -32422,7 +32839,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -32432,7 +32849,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -32443,8 +32860,8 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; @@ -32486,7 +32903,7 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, err = sp_384_point_to_ecc_point_15(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -32498,18 +32915,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_15(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -32573,7 +32978,7 @@ static int sp_384_ecc_gen_k_15(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_384_from_bin(k, 15, buf, (int)sizeof(buf)); - if (sp_384_cmp_15(k, p384_order2) < 0) { + if (sp_384_cmp_15(k, p384_order2) <= 0) { sp_384_add_one_15(k); break; } @@ -32595,7 +33000,7 @@ static int sp_384_ecc_gen_k_15(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -32610,15 +33015,15 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_384* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -32659,7 +33064,7 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_384_point_to_ecc_point_15(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -32671,6 +33076,84 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_384_ctx { + int state; + sp_384_ecc_mulmod_15_ctx mulmod_ctx; + sp_digit k[15]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 point[2]; +#else + sp_point_384 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_384_ctx; + +int sp_ecc_make_key_384_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_384_ctx* ctx = (sp_ecc_key_gen_384_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_384_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_384_ecc_gen_k_15(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_384_ecc_mulmod_base_15_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_384_ecc_mulmod_15_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p384_order, 1, 1); + if (err == MP_OKAY) { + if (sp_384_iszero_15(ctx->point->x) || + sp_384_iszero_15(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_384_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_15(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 48 @@ -32689,7 +33172,7 @@ static void sp_384_to_bin_15(sp_digit* r, byte* a) r[i+1] += r[i] >> 26; r[i] &= 0x3ffffff; } - j = 384 / 8 - 1; + j = 391 / 8 - 1; a[j] = 0; for (i=0; i<15 && j>=0; i++) { b = 0; @@ -32731,7 +33214,7 @@ static void sp_384_to_bin_15(sp_digit* r, byte* a) int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -32744,7 +33227,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); @@ -32769,7 +33252,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 48; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -32778,6 +33261,56 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_384_ctx { + int state; + union { + sp_384_ecc_mulmod_15_ctx mulmod_ctx; + }; + sp_digit k[15]; + sp_point_384 point; +} sp_ecc_sec_gen_384_ctx; + +int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_384_ctx* ctx = (sp_ecc_sec_gen_384_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_384_from_mp(ctx->k, 15, priv); + sp_384_point_from_ecc_point_15(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_384_ecc_mulmod_15_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_384_to_bin_15(ctx->point.x, out); + *outLen = 48; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -32966,7 +33499,7 @@ static int sp_384_div_15(const sp_digit* a, const sp_digit* d, int i; sp_digit r1; sp_digit mask; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 15 + 3]; @@ -32977,7 +33510,7 @@ static int sp_384_div_15(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 15 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -33001,8 +33534,7 @@ static int sp_384_div_15(const sp_digit* a, const sp_digit* d, t1[15 + i] -= t2[15]; sp_384_norm_15(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[15 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[15 + i] - 1) >> 31); sp_384_cond_sub_15(t1 + i, t1 + i, sd, mask); sp_384_norm_15(&t1[i + 1]); } @@ -33010,7 +33542,7 @@ static int sp_384_div_15(const sp_digit* a, const sp_digit* d, sp_384_rshift_15(r, t1, 6); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -33032,6 +33564,19 @@ static int sp_384_mod_15(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_15(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_15(r, a, b); + sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P384 curve. */ static const uint32_t p384_order_minus_2[12] = { @@ -33045,18 +33590,6 @@ static const uint32_t p384_order_low[6] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P384 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_384_mont_mul_order_15(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_384_mul_15(r, a, b); - sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order); -} - /* Square number mod the order of P384 curve. (r = a * a mod order) * * r Result of the squaring. @@ -33198,6 +33731,7 @@ static void sp_384_mont_inv_order_15(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -33272,6 +33806,128 @@ static int sp_384_calc_s_15(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_384* point = NULL; +#else + sp_digit e[7 * 2 * 15]; + sp_point_384 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int32 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 15, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 15; + k = e + 4 * 15; + r = e + 6 * 15; + tmp = e + 8 * 15; + s = e; + + if (hashLen > 48U) { + hashLen = 48U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_15(rng, k); + } + else { + sp_384_from_mp(k, 15, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_15(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 15U); + sp_384_norm_15(r); + c = sp_384_cmp_15(r, p384_order); + sp_384_cond_sub_15(r, r, p384_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_384_norm_15(r); + + if (!sp_384_iszero_15(r)) { + /* x is modified in calculation of s. */ + sp_384_from_mp(x, 15, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_384_from_bin(e, 15, hash, (int)hashLen); + + err = sp_384_calc_s_15(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_384_iszero_15(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 15); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_384)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_384_ctx { int state; @@ -33299,15 +33955,10 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 48U) { - hashLen = 48U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -33342,6 +33993,9 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_15(ctx->r); + if (hashLen > 48U) { + hashLen = 48U; + } sp_384_from_mp(ctx->x, 15, priv); sp_384_from_bin(ctx->e, 15, hash, (int)hashLen); ctx->state = 4; @@ -33436,124 +34090,6 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_384* point = NULL; -#else - sp_digit e[7 * 2 * 15]; - sp_point_384 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int32 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 15, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 15; - k = e + 4 * 15; - r = e + 6 * 15; - tmp = e + 8 * 15; - s = e; - - if (hashLen > 48U) { - hashLen = 48U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_384_ecc_gen_k_15(rng, k); - } - else { - sp_384_from_mp(k, 15, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_384_ecc_mulmod_base_15(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 15U); - sp_384_norm_15(r); - c = sp_384_cmp_15(r, p384_order); - sp_384_cond_sub_15(r, r, p384_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_384_norm_15(r); - - sp_384_from_mp(x, 15, priv); - sp_384_from_bin(e, 15, hash, (int)hashLen); - - err = sp_384_calc_s_15(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_15(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_384_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_384_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 15); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_384)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -33600,7 +34136,7 @@ static int sp_384_num_bits_15(const sp_digit* a) static int sp_384_mod_inv_15(sp_digit* r, const sp_digit* a, const sp_digit* m) { int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* u = NULL; #else sp_digit u[15 * 4]; @@ -33611,7 +34147,7 @@ static int sp_384_mod_inv_15(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut; int vt; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK u = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15 * 4, NULL, DYNAMIC_TYPE_ECC); if (u == NULL) @@ -33651,8 +34187,8 @@ static int sp_384_mod_inv_15(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_384_cmp_15(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_384_cmp_15(u, v) >= 0))) { sp_384_sub_15(u, u, v); sp_384_norm_15(u); @@ -33699,7 +34235,7 @@ static int sp_384_mod_inv_15(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(sp_digit) * 15); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (u != NULL) XFREE(u, NULL, DYNAMIC_TYPE_ECC); #endif @@ -33752,7 +34288,7 @@ static void sp_384_add_points_15(sp_point_384* p1, const sp_point_384* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -33824,6 +34360,106 @@ static int sp_384_calc_vfy_point_15(sp_point_384* p1, sp_point_384* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_384* p1 = NULL; +#else + sp_digit u1[18 * 15]; + sp_point_384 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p2 = NULL; + sp_digit carry; + sp_int32 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 15, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 15; + s = u1 + 4 * 15; + tmp = u1 + 6 * 15; + p2 = p1 + 1; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 15, hash, (int)hashLen); + sp_384_from_mp(u2, 15, rm); + sp_384_from_mp(s, 15, sm); + sp_384_from_mp(p2->x, 15, pX); + sp_384_from_mp(p2->y, 15, pY); + sp_384_from_mp(p2->z, 15, pZ); + + err = sp_384_calc_vfy_point_15(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 15, rm); + err = sp_384_mod_mul_norm_15(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_15(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_15(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 15, rm); + carry = sp_384_add_15(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_15(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_15(u2, p384_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_15(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, p384_mp_mod); + } + *res = (sp_384_cmp_15(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_384_ctx { int state; @@ -33836,7 +34472,7 @@ typedef struct sp_ecc_verify_384_ctx { sp_digit u1[2*15]; sp_digit u2[2*15]; sp_digit s[2*15]; - sp_digit tmp[2*15 * 5]; + sp_digit tmp[2*15 * 6]; sp_point_384 p1; sp_point_384 p2; } sp_ecc_verify_384_ctx; @@ -33973,109 +34609,10 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_384* p1 = NULL; -#else - sp_digit u1[16 * 15]; - sp_point_384 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_384* p2 = NULL; - sp_digit carry; - sp_int32 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 15, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 15; - s = u1 + 4 * 15; - tmp = u1 + 6 * 15; - p2 = p1 + 1; - - if (hashLen > 48U) { - hashLen = 48U; - } - - sp_384_from_bin(u1, 15, hash, (int)hashLen); - sp_384_from_mp(u2, 15, rm); - sp_384_from_mp(s, 15, sm); - sp_384_from_mp(p2->x, 15, pX); - sp_384_from_mp(p2->y, 15, pY); - sp_384_from_mp(p2->z, 15, pZ); - - err = sp_384_calc_vfy_point_15(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_384_from_mp(u2, 15, rm); - err = sp_384_mod_mul_norm_15(u2, u2, p384_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_384_mont_sqr_15(p1->z, p1->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, p384_mp_mod); - *res = (int)(sp_384_cmp_15(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_384_from_mp(u2, 15, rm); - carry = sp_384_add_15(u2, u2, p384_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_384_norm_15(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_384_cmp_15(u2, p384_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_384_mod_mul_norm_15(u2, u2, p384_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, - p384_mp_mod); - *res = (sp_384_cmp_15(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -34085,7 +34622,7 @@ int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_384_ecc_is_point_15(const sp_point_384* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[15 * 4]; @@ -34093,7 +34630,7 @@ static int sp_384_ecc_is_point_15(const sp_point_384* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -34103,25 +34640,27 @@ static int sp_384_ecc_is_point_15(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 15; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_15(t1, point->y); (void)sp_384_mod_15(t1, t1, p384_mod); sp_384_sqr_15(t2, point->x); (void)sp_384_mod_15(t2, t2, p384_mod); sp_384_mul_15(t2, t2, point->x); (void)sp_384_mod_15(t2, t2, p384_mod); - (void)sp_384_sub_15(t2, p384_mod, t2); - sp_384_mont_add_15(t1, t1, t2, p384_mod); + sp_384_mont_sub_15(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_15(t1, t1, point->x, p384_mod); sp_384_mont_add_15(t1, t1, point->x, p384_mod); sp_384_mont_add_15(t1, t1, point->x, p384_mod); + if (sp_384_cmp_15(t1, p384_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -34129,7 +34668,7 @@ static int sp_384_ecc_is_point_15(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -34138,7 +34677,7 @@ static int sp_384_ecc_is_point_15(const sp_point_384* point, */ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* pub = NULL; #else sp_point_384 pub[1]; @@ -34146,7 +34685,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -34161,7 +34700,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) err = sp_384_ecc_is_point_15(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -34183,7 +34722,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_384* pub = NULL; #else @@ -34204,7 +34743,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); @@ -34270,7 +34809,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -34299,17 +34838,17 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else - sp_digit tmp[2 * 15 * 5]; + sp_digit tmp[2 * 15 * 6]; sp_point_384 p[2]; #endif sp_point_384* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -34317,7 +34856,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -34352,7 +34891,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -34376,7 +34915,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -34385,7 +34924,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -34420,7 +34959,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -34440,7 +34979,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -34450,7 +34989,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -34484,7 +35023,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_384_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -34502,7 +35041,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_384_mont_sqrt_15(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[5 * 2 * 15]; @@ -34513,7 +35052,7 @@ static int sp_384_mont_sqrt_15(sp_digit* y) sp_digit* t5 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 15, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -34583,7 +35122,7 @@ static int sp_384_mont_sqrt_15(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -34601,7 +35140,7 @@ static int sp_384_mont_sqrt_15(sp_digit* y) */ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 15]; @@ -34609,7 +35148,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 15, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -34649,7 +35188,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) err = sp_384_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -34658,6 +35197,8135 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) } #endif #endif /* WOLFSSL_SP_384 */ +#ifdef WOLFSSL_SP_521 + +/* Point structure to use. */ +typedef struct sp_point_521 { + /* X ordinate of point. */ + sp_digit x[2 * 21]; + /* Y ordinate of point. */ + sp_digit y[2 * 21]; + /* Z ordinate of point. */ + sp_digit z[2 * 21]; + /* Indicates point is at infinity. */ + int infinity; +} sp_point_521; + +/* The modulus (prime) of the curve P521. */ +static const sp_digit p521_mod[21] = { + 0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff, + 0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff, + 0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x01fffff +}; +/* The Montgomery normalizer for modulus of the curve P521. */ +static const sp_digit p521_norm_mod[21] = { + 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000 +}; +/* The Montgomery multiplier for modulus of the curve P521. */ +static sp_digit p521_mp_mod = 0x000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P521. */ +static const sp_digit p521_order[21] = { + 0x1386409,0x1db8f48,0x1ebaedb,0x1113388,0x1bb5c9b,0x04d2e81,0x00523dc, + 0x0d6ff98,0x1bf2f96,0x0c343c1,0x1fffe94,0x1ffffff,0x1ffffff,0x1ffffff, + 0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x01fffff +}; +#endif +/* The order of the curve P521 minus 2. */ +static const sp_digit p521_order2[21] = { + 0x1386407,0x1db8f48,0x1ebaedb,0x1113388,0x1bb5c9b,0x04d2e81,0x00523dc, + 0x0d6ff98,0x1bf2f96,0x0c343c1,0x1fffe94,0x1ffffff,0x1ffffff,0x1ffffff, + 0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x1ffffff,0x01fffff +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery normalizer for order of the curve P521. */ +static const sp_digit p521_norm_order[21] = { + 0x0c79bf7,0x02470b7,0x0145124,0x0eecc77,0x044a364,0x1b2d17e,0x1fadc23, + 0x1290067,0x040d069,0x13cbc3e,0x000016b,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000 +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery multiplier for order of the curve P521. */ +static sp_digit p521_mp_order = 0x1a995c7; +#endif +/* The base point of curve P521. */ +static const sp_point_521 p521_base = { + /* X ordinate */ + { + 0x0e5bd66,0x13f18e1,0x0a6fe5f,0x030ad48,0x1348b3c,0x1fd46f1,0x1049e8b, + 0x051fc3b,0x1efe759,0x0a5af3b,0x14f6ea8,0x1ec0d69,0x01f828a,0x029fda9, + 0x19204e4,0x1688538,0x1662395,0x0cf1f65,0x1013a73,0x1c0d6e0,0x00c6858, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0 + }, + /* Y ordinate */ + { + 0x1d16650,0x14a3b4f,0x090222f,0x0d44e58,0x153c708,0x1683b09,0x0e404fe, + 0x0818aa1,0x15ef426,0x1f7394c,0x1998b25,0x1a2e4e7,0x0817afb,0x0bcda23, + 0x1d51125,0x037b331,0x1b42c7d,0x02e452f,0x08ef001,0x12d4f13,0x0118392, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0 + }, + /* Z ordinate */ + { + 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0 + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p521_b[21] = { + 0x1503f00,0x08fea35,0x13c7bd1,0x107a586,0x1573df8,0x18df839,0x102f4ee, + 0x0f62ca5,0x1ec7e93,0x10c9ca8,0x0427855,0x13231de,0x13b8b48,0x0cd98af, + 0x169c96e,0x081dd45,0x1a0b685,0x1c94d10,0x1872687,0x1d72c31,0x0051953 +}; +#endif + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_521_mul_21(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + int imax; + int k; + sp_uint64 c; + sp_uint64 lo; + + c = ((sp_uint64)a[20]) * b[20]; + r[41] = (sp_digit)(c >> 25); + c &= 0x1ffffff; + for (k = 39; k >= 0; k--) { + if (k >= 21) { + i = k - 20; + imax = 20; + } + else { + i = 0; + imax = k; + } + lo = 0; + for (; i <= imax; i++) { + lo += ((sp_uint64)a[i]) * b[k - i]; + } + c += lo >> 25; + r[k + 2] += (sp_digit)(c >> 25); + r[k + 1] = (sp_digit)(c & 0x1ffffff); + c = lo & 0x1ffffff; + } + r[0] = (sp_digit)c; +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_521_mul_21(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + int j; + sp_int64 t[42]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<21; i++) { + for (j=0; j<21; j++) { + t[i+j] += ((sp_int64)a[i]) * b[j]; + } + } + for (i=0; i<41; i++) { + r[i] = t[i] & 0x1ffffff; + t[i+1] += t[i] >> 25; + } + r[41] = (sp_digit)t[41]; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_521_sqr_21(sp_digit* r, const sp_digit* a) +{ + int i; + int imax; + int k; + sp_uint64 c; + sp_uint64 t; + + c = ((sp_uint64)a[20]) * a[20]; + r[41] = (sp_digit)(c >> 25); + c = (c & 0x1ffffff) << 25; + for (k = 39; k >= 0; k--) { + i = (k + 1) / 2; + if ((k & 1) == 0) { + c += ((sp_uint64)a[i]) * a[i]; + i++; + } + if (k < 20) { + imax = k; + } + else { + imax = 20; + } + t = 0; + for (; i <= imax; i++) { + t += ((sp_uint64)a[i]) * a[k - i]; + } + c += t * 2; + + r[k + 2] += (sp_digit) (c >> 50); + r[k + 1] = (sp_digit)((c >> 25) & 0x1ffffff); + c = (c & 0x1ffffff) << 25; + } + r[0] = (sp_digit)(c >> 25); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_521_sqr_21(sp_digit* r, const sp_digit* a) +{ + int i; + int j; + sp_int64 t[42]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<21; i++) { + for (j=0; j> 25; + } + r[41] = (sp_digit)t[41]; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_add_21(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 21; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_add_21(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[16] = a[16] + b[16]; + r[17] = a[17] + b[17]; + r[18] = a[18] + b[18]; + r[19] = a[19] + b[19]; + r[20] = a[20] + b[20]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_sub_21(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 21; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_sub_21(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[16] = a[16] - b[16]; + r[17] = a[17] - b[17]; + r[18] = a[18] - b[18]; + r[19] = a[19] - b[19]; + r[20] = a[20] - b[20]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_521_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 25 + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; + + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 24); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 24); + } +#elif DIGIT_BIT > 25 + unsigned int i; + int j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1ffffff; + s = 25U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 25U) <= (word32)DIGIT_BIT) { + s += 25U; + r[j] &= 0x1ffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = (sp_digit)0; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + unsigned int i; + int j = 0; + int s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 25) { + r[j] &= 0x1ffffff; + if (j + 1 >= size) { + break; + } + s = 25 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_521. + * + * p Point of type sp_point_521 (result). + * pm Point of type ecc_point. + */ +static void sp_521_point_from_ecc_point_21(sp_point_521* p, + const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_521_from_mp(p->x, 21, pm->x); + sp_521_from_mp(p->y, 21, pm->y); + sp_521_from_mp(p->z, 21, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_521_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (521 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 25 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 21); + r->used = 21; + mp_clamp(r); +#elif DIGIT_BIT < 25 + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 21; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 25) { + s += DIGIT_BIT; + r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 25 - s; + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 21; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 25 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 25 - s; + } + else { + s += 25; + } + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_521 to type ecc_point. + * + * p Point of type sp_point_521. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_521_point_to_ecc_point_21(const sp_point_521* p, ecc_point* pm) +{ + int err; + + err = sp_521_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pm->z); + } + + return err; +} + +/* Normalize the values in each word to 25 bits. + * + * a Array of sp_digit to normalize. + */ +static void sp_521_norm_21(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 20; i++) { + a[i+1] += a[i] >> 25; + a[i] &= 0x1ffffff; + } +#else + int i; + for (i = 0; i < 16; i += 8) { + a[i+1] += a[i+0] >> 25; a[i+0] &= 0x1ffffff; + a[i+2] += a[i+1] >> 25; a[i+1] &= 0x1ffffff; + a[i+3] += a[i+2] >> 25; a[i+2] &= 0x1ffffff; + a[i+4] += a[i+3] >> 25; a[i+3] &= 0x1ffffff; + a[i+5] += a[i+4] >> 25; a[i+4] &= 0x1ffffff; + a[i+6] += a[i+5] >> 25; a[i+5] &= 0x1ffffff; + a[i+7] += a[i+6] >> 25; a[i+6] &= 0x1ffffff; + a[i+8] += a[i+7] >> 25; a[i+7] &= 0x1ffffff; + } + a[17] += a[16] >> 25; a[16] &= 0x1ffffff; + a[18] += a[17] >> 25; a[17] &= 0x1ffffff; + a[19] += a[18] >> 25; a[18] &= 0x1ffffff; + a[20] += a[19] >> 25; a[19] &= 0x1ffffff; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_521_mont_reduce_21(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + + (void)m; + (void)mp; + + for (i = 0; i < 20; i++) { + a[i] += ((a[20 + i] >> 21) + (a[20 + i + 1] << 4)) & 0x1ffffff; + } + a[20] &= 0x1fffff; + a[20] += ((a[40] >> 21) + (a[41] << 4)) & 0x1ffffff; + + sp_521_norm_21(a); + + a[0] += a[20] >> 21; + a[20] &= 0x1fffff; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_521_cmp_21(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=20; i>=0; i--) { + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 24); + } +#else + int i; + + r |= (a[20] - b[20]) & (0 - (sp_digit)1); + r |= (a[19] - b[19]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[18] - b[18]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[17] - b[17]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[16] - b[16]) & ~(((sp_digit)0 - r) >> 24); + for (i = 8; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 24); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_521_cond_sub_21(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 21; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[16] = a[16] - (b[16] & m); + r[17] = a[17] - (b[17] & m); + r[18] = a[18] - (b[18] & m); + r[19] = a[19] - (b[19] & m); + r[20] = a[20] - (b[20] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_521_mul_add_21(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + sp_int64 tb = b; + sp_int64 t[4]; + int i; + + t[0] = 0; + for (i = 0; i < 20; i += 4) { + t[0] += (tb * a[i+0]) + r[i+0]; + t[1] = (tb * a[i+1]) + r[i+1]; + t[2] = (tb * a[i+2]) + r[i+2]; + t[3] = (tb * a[i+3]) + r[i+3]; + r[i+0] = t[0] & 0x1ffffff; + t[1] += t[0] >> 25; + r[i+1] = t[1] & 0x1ffffff; + t[2] += t[1] >> 25; + r[i+2] = t[2] & 0x1ffffff; + t[3] += t[2] >> 25; + r[i+3] = t[3] & 0x1ffffff; + t[0] = t[3] >> 25; + } + t[0] += (tb * a[20]) + r[20]; + r[20] = t[0] & 0x1ffffff; + r[21] += (sp_digit)(t[0] >> 25); +#else + sp_int64 tb = b; + sp_int64 t[8]; + int i; + + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffff); + for (i = 0; i < 16; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (sp_digit)((t[0] >> 25) + (t[1] & 0x1ffffff)); + t[2] = tb * a[i+2]; + r[i+2] += (sp_digit)((t[1] >> 25) + (t[2] & 0x1ffffff)); + t[3] = tb * a[i+3]; + r[i+3] += (sp_digit)((t[2] >> 25) + (t[3] & 0x1ffffff)); + t[4] = tb * a[i+4]; + r[i+4] += (sp_digit)((t[3] >> 25) + (t[4] & 0x1ffffff)); + t[5] = tb * a[i+5]; + r[i+5] += (sp_digit)((t[4] >> 25) + (t[5] & 0x1ffffff)); + t[6] = tb * a[i+6]; + r[i+6] += (sp_digit)((t[5] >> 25) + (t[6] & 0x1ffffff)); + t[7] = tb * a[i+7]; + r[i+7] += (sp_digit)((t[6] >> 25) + (t[7] & 0x1ffffff)); + t[0] = tb * a[i+8]; + r[i+8] += (sp_digit)((t[7] >> 25) + (t[0] & 0x1ffffff)); + } + t[1] = tb * a[17]; + r[17] += (sp_digit)((t[0] >> 25) + (t[1] & 0x1ffffff)); + t[2] = tb * a[18]; + r[18] += (sp_digit)((t[1] >> 25) + (t[2] & 0x1ffffff)); + t[3] = tb * a[19]; + r[19] += (sp_digit)((t[2] >> 25) + (t[3] & 0x1ffffff)); + t[4] = tb * a[20]; + r[20] += (sp_digit)((t[3] >> 25) + (t[4] & 0x1ffffff)); + r[21] += (sp_digit)(t[4] >> 25); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Shift the result in the high 521 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_521_mont_shift_21(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_digit n; + sp_digit s; + + s = a[21]; + n = a[20] >> 21; + for (i = 0; i < 20; i++) { + n += (s & 0x1ffffff) << 4; + r[i] = n & 0x1ffffff; + n >>= 25; + s = a[22 + i] + (s >> 25); + } + n += s << 4; + r[20] = n; +#else + sp_digit n; + sp_digit s; + int i; + + s = a[21]; n = a[20] >> 21; + for (i = 0; i < 16; i += 8) { + n += (s & 0x1ffffff) << 4; r[i+0] = n & 0x1ffffff; + n >>= 25; s = a[i+22] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+1] = n & 0x1ffffff; + n >>= 25; s = a[i+23] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+2] = n & 0x1ffffff; + n >>= 25; s = a[i+24] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+3] = n & 0x1ffffff; + n >>= 25; s = a[i+25] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+4] = n & 0x1ffffff; + n >>= 25; s = a[i+26] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+5] = n & 0x1ffffff; + n >>= 25; s = a[i+27] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+6] = n & 0x1ffffff; + n >>= 25; s = a[i+28] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[i+7] = n & 0x1ffffff; + n >>= 25; s = a[i+29] + (s >> 25); + } + n += (s & 0x1ffffff) << 4; r[16] = n & 0x1ffffff; + n >>= 25; s = a[38] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[17] = n & 0x1ffffff; + n >>= 25; s = a[39] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[18] = n & 0x1ffffff; + n >>= 25; s = a[40] + (s >> 25); + n += (s & 0x1ffffff) << 4; r[19] = n & 0x1ffffff; + n >>= 25; s = a[41] + (s >> 25); + n += s << 4; r[20] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[21], 0, sizeof(*r) * 21U); +} + +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_521_mont_reduce_order_21(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + sp_digit over; + + sp_521_norm_21(a + 21); + + for (i=0; i<20; i++) { + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1ffffff; + sp_521_mul_add_21(a+i, m, mu); + a[i+1] += a[i] >> 25; + } + mu = ((sp_uint32)a[i] * (sp_uint32)mp) & 0x1fffffL; + sp_521_mul_add_21(a+i, m, mu); + a[i+1] += a[i] >> 25; + a[i] &= 0x1ffffff; + sp_521_mont_shift_21(a, a); + over = a[20] >> 21; + sp_521_cond_sub_21(a, a, m, ~((over - 1) >> 31)); + sp_521_norm_21(a); +} + +/* Multiply two Montgomery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montgomery form. + * b Second number to multiply in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_mul_21(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m, sp_digit mp) +{ + sp_521_mul_21(r, a, b); + sp_521_mont_reduce_21(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_21(sp_digit* r, const sp_digit* a, + const sp_digit* m, sp_digit mp) +{ + sp_521_sqr_21(r, a); + sp_521_mont_reduce_21(r, m, mp); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_n_21(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) +{ + sp_521_mont_sqr_21(r, a, m, mp); + for (; n > 1; n--) { + sp_521_mont_sqr_21(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P521 curve. */ +static const uint32_t p521_mod_minus_2[17] = { + 0xfffffffdU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0x000001ffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P521 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_521_mont_inv_21(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 21); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_21(t, t, p521_mod, p521_mp_mod); + if (p521_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32))) + sp_521_mont_mul_21(t, t, a, p521_mod, p521_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 21); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 21; + sp_digit* t3 = td + 4 * 21; + + /* 0x2 */ + sp_521_mont_sqr_21(t1, a, p521_mod, p521_mp_mod); + /* 0x3 */ + sp_521_mont_mul_21(t2, t1, a, p521_mod, p521_mp_mod); + /* 0x6 */ + sp_521_mont_sqr_21(t1, t2, p521_mod, p521_mp_mod); + /* 0x7 */ + sp_521_mont_mul_21(t3, t1, a, p521_mod, p521_mp_mod); + /* 0xc */ + sp_521_mont_sqr_n_21(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0xf */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x78 */ + sp_521_mont_sqr_n_21(t1, t2, 3, p521_mod, p521_mp_mod); + /* 0x7f */ + sp_521_mont_mul_21(t3, t3, t1, p521_mod, p521_mp_mod); + /* 0xf0 */ + sp_521_mont_sqr_n_21(t1, t2, 4, p521_mod, p521_mp_mod); + /* 0xff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xff00 */ + sp_521_mont_sqr_n_21(t1, t2, 8, p521_mod, p521_mp_mod); + /* 0xffff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffff0000 */ + sp_521_mont_sqr_n_21(t1, t2, 16, p521_mod, p521_mp_mod); + /* 0xffffffff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffff00000000 */ + sp_521_mont_sqr_n_21(t1, t2, 32, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff0000000000000000 */ + sp_521_mont_sqr_n_21(t1, t2, 64, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff00000000000000000000000000000000 */ + sp_521_mont_sqr_n_21(t1, t2, 128, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000000000000000 */ + sp_521_mont_sqr_n_21(t1, t2, 256, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_21(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80 */ + sp_521_mont_sqr_n_21(t1, t2, 7, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_21(t2, t3, t1, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc */ + sp_521_mont_sqr_n_21(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ + sp_521_mont_mul_21(r, t1, a, p521_mod, p521_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*21; + sp_int32 n; + + sp_521_mont_inv_21(t1, p->z, t + 2*21); + + sp_521_mont_sqr_21(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t2, t1, p521_mod, p521_mp_mod); + + /* x /= z^2 */ + sp_521_mont_mul_21(r->x, p->x, t2, p521_mod, p521_mp_mod); + XMEMSET(r->x + 21, 0, sizeof(sp_digit) * 21U); + sp_521_mont_reduce_21(r->x, p521_mod, p521_mp_mod); + /* Reduce x to less than modulus */ + n = sp_521_cmp_21(r->x, p521_mod); + sp_521_cond_sub_21(r->x, r->x, p521_mod, ~(n >> 24)); + sp_521_norm_21(r->x); + + /* y /= z^3 */ + sp_521_mont_mul_21(r->y, p->y, t1, p521_mod, p521_mp_mod); + XMEMSET(r->y + 21, 0, sizeof(sp_digit) * 21U); + sp_521_mont_reduce_21(r->y, p521_mod, p521_mp_mod); + /* Reduce y to less than modulus */ + n = sp_521_cmp_21(r->y, p521_mod); + sp_521_cond_sub_21(r->y, r->y, p521_mod, ~(n >> 24)); + sp_521_norm_21(r->y); + + XMEMSET(r->z, 0, sizeof(r->z) / 2); + r->z[0] = 1; +} + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montgomery form. + * b Second number to add in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_add_21(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit over; + (void)sp_521_add_21(r, a, b); + sp_521_norm_21(r); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); + sp_521_norm_21(r); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_dbl_21(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit over; + (void)sp_521_add_21(r, a, a); + sp_521_norm_21(r); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); + sp_521_norm_21(r); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_tpl_21(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit over; + (void)sp_521_add_21(r, a, a); + sp_521_norm_21(r); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); + sp_521_norm_21(r); + (void)sp_521_add_21(r, r, a); + sp_521_norm_21(r); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); + sp_521_norm_21(r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_521_cond_add_21(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ + int i; + + for (i = 0; i < 21; i++) { + r[i] = a[i] + (b[i] & m); + } +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_521_cond_add_21(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ + int i; + + for (i = 0; i < 16; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[16] = a[16] + (b[16] & m); + r[17] = a[17] + (b[17] & m); + r[18] = a[18] + (b[18] & m); + r[19] = a[19] + (b[19] & m); + r[20] = a[20] + (b[20] & m); +} +#endif /* !WOLFSSL_SP_SMALL */ + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montgomery form. + * b Number to subtract with in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_sub_21(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_521_sub_21(r, a, b); + sp_521_norm_21(r); + sp_521_cond_add_21(r, r, m, r[20] >> 21); + sp_521_norm_21(r); +} + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_521_rshift1_21(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<20; i++) { + r[i] = (a[i] >> 1) + ((a[i + 1] << 24) & 0x1ffffff); + } +#else + r[0] = (a[0] >> 1) + ((a[1] << 24) & 0x1ffffff); + r[1] = (a[1] >> 1) + ((a[2] << 24) & 0x1ffffff); + r[2] = (a[2] >> 1) + ((a[3] << 24) & 0x1ffffff); + r[3] = (a[3] >> 1) + ((a[4] << 24) & 0x1ffffff); + r[4] = (a[4] >> 1) + ((a[5] << 24) & 0x1ffffff); + r[5] = (a[5] >> 1) + ((a[6] << 24) & 0x1ffffff); + r[6] = (a[6] >> 1) + ((a[7] << 24) & 0x1ffffff); + r[7] = (a[7] >> 1) + ((a[8] << 24) & 0x1ffffff); + r[8] = (a[8] >> 1) + ((a[9] << 24) & 0x1ffffff); + r[9] = (a[9] >> 1) + ((a[10] << 24) & 0x1ffffff); + r[10] = (a[10] >> 1) + ((a[11] << 24) & 0x1ffffff); + r[11] = (a[11] >> 1) + ((a[12] << 24) & 0x1ffffff); + r[12] = (a[12] >> 1) + ((a[13] << 24) & 0x1ffffff); + r[13] = (a[13] >> 1) + ((a[14] << 24) & 0x1ffffff); + r[14] = (a[14] >> 1) + ((a[15] << 24) & 0x1ffffff); + r[15] = (a[15] >> 1) + ((a[16] << 24) & 0x1ffffff); + r[16] = (a[16] >> 1) + ((a[17] << 24) & 0x1ffffff); + r[17] = (a[17] >> 1) + ((a[18] << 24) & 0x1ffffff); + r[18] = (a[18] >> 1) + ((a[19] << 24) & 0x1ffffff); + r[19] = (a[19] >> 1) + ((a[20] << 24) & 0x1ffffff); +#endif + r[20] = a[20] >> 1; +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_521_mont_div2_21(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + sp_521_cond_add_21(r, a, m, 0 - (a[0] & 1)); + sp_521_norm_21(r); + sp_521_rshift1_21(r, r); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_21(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*21; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_21(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_21(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_21(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_21(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_21(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_21(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_21(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_21(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_21(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_21(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_mont_div2_21(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_21(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_21(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_21(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_21(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_21(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_21(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_21(y, y, t2, p521_mod); +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_dbl_21_ctx { + int state; + sp_digit* t1; + sp_digit* t2; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_dbl_21_ctx; + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_dbl_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_dbl_21_ctx* ctx = (sp_521_proj_point_dbl_21_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_521_proj_point_dbl_21_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + ctx->t1 = t; + ctx->t2 = t + 2*21; + ctx->x = r->x; + ctx->y = r->y; + ctx->z = r->z; + + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + ctx->state = 1; + break; + case 1: + /* T1 = Z * Z */ + sp_521_mont_sqr_21(ctx->t1, p->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + /* Z = Y * Z */ + sp_521_mont_mul_21(ctx->z, p->y, p->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + /* Z = 2Z */ + sp_521_mont_dbl_21(ctx->z, ctx->z, p521_mod); + ctx->state = 4; + break; + case 4: + /* T2 = X - T1 */ + sp_521_mont_sub_21(ctx->t2, p->x, ctx->t1, p521_mod); + ctx->state = 5; + break; + case 5: + /* T1 = X + T1 */ + sp_521_mont_add_21(ctx->t1, p->x, ctx->t1, p521_mod); + ctx->state = 6; + break; + case 6: + /* T2 = T1 * T2 */ + sp_521_mont_mul_21(ctx->t2, ctx->t1, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* T1 = 3T2 */ + sp_521_mont_tpl_21(ctx->t1, ctx->t2, p521_mod); + ctx->state = 8; + break; + case 8: + /* Y = 2Y */ + sp_521_mont_dbl_21(ctx->y, p->y, p521_mod); + ctx->state = 9; + break; + case 9: + /* Y = Y * Y */ + sp_521_mont_sqr_21(ctx->y, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 10; + break; + case 10: + /* T2 = Y * Y */ + sp_521_mont_sqr_21(ctx->t2, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: + /* T2 = T2/2 */ + sp_521_mont_div2_21(ctx->t2, ctx->t2, p521_mod); + ctx->state = 12; + break; + case 12: + /* Y = Y * X */ + sp_521_mont_mul_21(ctx->y, ctx->y, p->x, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + /* X = T1 * T1 */ + sp_521_mont_sqr_21(ctx->x, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + /* X = X - Y */ + sp_521_mont_sub_21(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 15; + break; + case 15: + /* X = X - Y */ + sp_521_mont_sub_21(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 16; + break; + case 16: + /* Y = Y - X */ + sp_521_mont_sub_21(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 17; + break; + case 17: + /* Y = Y * T1 */ + sp_521_mont_mul_21(ctx->y, ctx->y, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + /* Y = Y - T2 */ + sp_521_mont_sub_21(ctx->y, ctx->y, ctx->t2, p521_mod); + ctx->state = 19; + /* fall-through */ + case 19: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 19) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_521_cmp_equal_21(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | + (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) | + (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8]) | + (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11]) | + (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14]) | + (a[15] ^ b[15]) | (a[16] ^ b[16]) | (a[17] ^ b[17]) | + (a[18] ^ b[18]) | (a[19] ^ b[19]) | (a[20] ^ b[20])) == 0; +} + +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_21(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20]) == 0; +} + + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_21(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*21; + sp_digit* t2 = t + 4*21; + sp_digit* t3 = t + 6*21; + sp_digit* t4 = t + 8*21; + sp_digit* t5 = t + 10*21; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_21(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_21(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_21(t2, t1) & + sp_521_cmp_equal_21(t4, t3)) { + sp_521_proj_point_dbl_21(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_521_mont_sub_21(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_21(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_21(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_21(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(x, x, t5, p521_mod); + sp_521_mont_mul_21(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_21(t3, y, p521_mod); + sp_521_mont_sub_21(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_21(y, y, x, p521_mod); + sp_521_mont_mul_21(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, y, t5, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 21; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_add_21_ctx { + int state; + sp_521_proj_point_dbl_21_ctx dbl_ctx; + const sp_point_521* ap[2]; + sp_point_521* rp[2]; + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + sp_digit* t6; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_add_21_ctx; + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_add_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_add_21_ctx* ctx = (sp_521_proj_point_add_21_ctx*)sp_ctx->data; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_521* a = p; + p = q; + q = a; + } + + typedef char ctx_size_test[sizeof(sp_521_proj_point_add_21_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->t6 = t; + ctx->t1 = t + 2*21; + ctx->t2 = t + 4*21; + ctx->t3 = t + 6*21; + ctx->t4 = t + 8*21; + ctx->t5 = t + 10*21; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; + + ctx->state = 1; + break; + case 1: + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_21(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + sp_521_mont_mul_21(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + sp_521_mont_mul_21(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 5; + break; + case 5: + sp_521_mont_mul_21(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 6; + break; + case 6: + sp_521_mont_mul_21(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_21(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + ctx->state = 8; + break; + case 8: + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + ctx->state = 9; + break; + case 9: + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_21(ctx->t2, ctx->t1) & + sp_521_cmp_equal_21(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_21(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } + break; + case 10: + /* H = U2 - U1 */ + sp_521_mont_sub_21(ctx->t2, ctx->t2, ctx->t1, p521_mod); + ctx->state = 11; + break; + case 11: + /* R = S2 - S1 */ + sp_521_mont_sub_21(ctx->t4, ctx->t4, ctx->t3, p521_mod); + ctx->state = 12; + break; + case 12: + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_21(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + sp_521_mont_mul_21(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + sp_521_mont_mul_21(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 15; + break; + case 15: + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_21(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 16; + break; + case 16: + sp_521_mont_mul_21(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + ctx->state = 17; + break; + case 17: + sp_521_mont_sqr_21(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + sp_521_mont_sub_21(ctx->x, ctx->x, ctx->t5, p521_mod); + ctx->state = 19; + break; + case 19: + sp_521_mont_mul_21(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + ctx->state = 20; + break; + case 20: + sp_521_mont_dbl_21(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_21(ctx->x, ctx->x, ctx->t3, p521_mod); + ctx->state = 21; + break; + case 21: + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_21(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 22; + break; + case 22: + sp_521_mont_mul_21(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 23; + break; + case 23: + sp_521_mont_sub_21(ctx->y, ctx->y, ctx->t5, p521_mod); + ctx->state = 24; + break; + case 24: + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 21; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + ctx->state = 25; + break; + } + case 25: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 25) { + err = FP_WOULDBLOCK; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +/* Multiply a number by Montgomery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mod_mul_norm_21(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + if (r != a) { + XMEMCPY(r, a, 21 * sizeof(sp_digit)); + } + + return MP_OKAY; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Small implementation using add and double that is cache attack resistant but + * allocates memory rather than use large stacks. + * 521 adds and doubles. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_21(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 t[3]; + sp_digit tmp[2 * 21 * 6]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_521) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_21(t[1].x, g->x, p521_mod); + } + if (err == MP_OKAY) + err = sp_521_mod_mul_norm_21(t[1].y, g->y, p521_mod); + if (err == MP_OKAY) + err = sp_521_mod_mul_norm_21(t[1].z, g->z, p521_mod); + + if (err == MP_OKAY) { + i = 20; + c = 21; + n = k[i--] << (25 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 25; + } + + y = (n >> 24) & 1; + n <<= 1; + + sp_521_proj_point_add_21(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_521)); + sp_521_proj_point_dbl_21(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_521)); + } + + if (map != 0) { + sp_521_map_21(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 21 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_521) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_ecc_mulmod_21_ctx { + int state; + union { + sp_521_proj_point_dbl_21_ctx dbl_ctx; + sp_521_proj_point_add_21_ctx add_ctx; + }; + sp_point_521 t[3]; + sp_digit tmp[2 * 21 * 6]; + sp_digit n; + int i; + int c; + int y; +} sp_521_ecc_mulmod_21_ctx; + +static int sp_521_ecc_mulmod_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* g, const sp_digit* k, int map, int ct, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_521_ecc_mulmod_21_ctx* ctx = (sp_521_ecc_mulmod_21_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_521_ecc_mulmod_21_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + /* Implementation is constant time. */ + (void)ct; + + switch (ctx->state) { + case 0: /* INIT */ + XMEMSET(ctx->t, 0, sizeof(sp_point_521) * 3); + ctx->i = 20; + ctx->c = 21; + ctx->n = k[ctx->i--] << (25 - ctx->c); + + /* t[0] = {0, 0, 1} * norm */ + ctx->t[0].infinity = 1; + ctx->state = 1; + break; + case 1: /* T1X */ + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_21(ctx->t[1].x, g->x, p521_mod); + ctx->state = 2; + break; + case 2: /* T1Y */ + err = sp_521_mod_mul_norm_21(ctx->t[1].y, g->y, p521_mod); + ctx->state = 3; + break; + case 3: /* T1Z */ + err = sp_521_mod_mul_norm_21(ctx->t[1].z, g->z, p521_mod); + ctx->state = 4; + break; + case 4: /* ADDPREP */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 7; + break; + } + + ctx->n = k[ctx->i--]; + ctx->c = 25; + } + ctx->y = (ctx->n >> 24) & 1; + ctx->n <<= 1; + XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx)); + ctx->state = 5; + break; + case 5: /* ADD */ + err = sp_521_proj_point_add_21_nb((sp_ecc_ctx_t*)&ctx->add_ctx, + &ctx->t[ctx->y^1], &ctx->t[0], &ctx->t[1], ctx->tmp); + if (err == MP_OKAY) { + XMEMCPY(&ctx->t[2], (void*)(((size_t)&ctx->t[0] & addr_mask[ctx->y^1]) + + ((size_t)&ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_point_521)); + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + ctx->state = 6; + } + break; + case 6: /* DBL */ + err = sp_521_proj_point_dbl_21_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->t[2], + &ctx->t[2], ctx->tmp); + if (err == MP_OKAY) { + XMEMCPY((void*)(((size_t)&ctx->t[0] & addr_mask[ctx->y^1]) + + ((size_t)&ctx->t[1] & addr_mask[ctx->y])), &ctx->t[2], + sizeof(sp_point_521)); + ctx->state = 4; + ctx->c--; + } + break; + case 7: /* MAP */ + if (map != 0) { + sp_521_map_21(r, &ctx->t[0], ctx->tmp); + } + else { + XMEMCPY(r, &ctx->t[0], sizeof(sp_point_521)); + } + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 7) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + ForceZero(ctx->tmp, sizeof(ctx->tmp)); + ForceZero(ctx->t, sizeof(ctx->t)); + } + + (void)heap; + + return err; +} + +#endif /* WOLFSSL_SP_NONBLOCK */ + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_521 { + sp_digit x[21]; + sp_digit y[21]; +} sp_table_entry_521; + +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +static void sp_521_cond_copy_21(sp_digit* r, const sp_digit* a, const sp_digit m) +{ + sp_digit t[21]; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 21; i++) { + t[i] = r[i] ^ a[i]; + } + for (i = 0; i < 21; i++) { + r[i] ^= t[i] & m; + } +#else + t[ 0] = r[ 0] ^ a[ 0]; + t[ 1] = r[ 1] ^ a[ 1]; + t[ 2] = r[ 2] ^ a[ 2]; + t[ 3] = r[ 3] ^ a[ 3]; + t[ 4] = r[ 4] ^ a[ 4]; + t[ 5] = r[ 5] ^ a[ 5]; + t[ 6] = r[ 6] ^ a[ 6]; + t[ 7] = r[ 7] ^ a[ 7]; + t[ 8] = r[ 8] ^ a[ 8]; + t[ 9] = r[ 9] ^ a[ 9]; + t[10] = r[10] ^ a[10]; + t[11] = r[11] ^ a[11]; + t[12] = r[12] ^ a[12]; + t[13] = r[13] ^ a[13]; + t[14] = r[14] ^ a[14]; + t[15] = r[15] ^ a[15]; + t[16] = r[16] ^ a[16]; + t[17] = r[17] ^ a[17]; + t[18] = r[18] ^ a[18]; + t[19] = r[19] ^ a[19]; + t[20] = r[20] ^ a[20]; + r[ 0] ^= t[ 0] & m; + r[ 1] ^= t[ 1] & m; + r[ 2] ^= t[ 2] & m; + r[ 3] ^= t[ 3] & m; + r[ 4] ^= t[ 4] & m; + r[ 5] ^= t[ 5] & m; + r[ 6] ^= t[ 6] & m; + r[ 7] ^= t[ 7] & m; + r[ 8] ^= t[ 8] & m; + r[ 9] ^= t[ 9] & m; + r[10] ^= t[10] & m; + r[11] ^= t[11] & m; + r[12] ^= t[12] & m; + r[13] ^= t[13] & m; + r[14] ^= t[14] & m; + r[15] ^= t[15] & m; + r[16] ^= t[16] & m; + r[17] ^= t[17] & m; + r[18] ^= t[18] & m; + r[19] ^= t[19] & m; + r[20] ^= t[20] & m; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_21(sp_point_521* p, int i, + sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*21; + sp_digit* b = t + 4*21; + sp_digit* t1 = t + 6*21; + sp_digit* t2 = t + 8*21; + sp_digit* x; + sp_digit* y; + sp_digit* z; + volatile int n = i; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_521_mont_dbl_21(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_21(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(w, w, p521_mod, p521_mp_mod); +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_21(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(t1, t1, w, p521_mod); + sp_521_mont_tpl_21(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_21(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_21(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_21(t2, b, p521_mod); + sp_521_mont_sub_21(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_21(t2, b, x, p521_mod); + sp_521_mont_dbl_21(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_21(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_21(t1, t1, p521_mod, p521_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_521_mont_mul_21(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_21(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, y, t1, p521_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_21(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(t1, t1, w, p521_mod); + sp_521_mont_tpl_21(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_21(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_21(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_21(t2, b, p521_mod); + sp_521_mont_sub_21(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_21(t2, b, x, p521_mod); + sp_521_mont_dbl_21(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_21(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_21(t1, t1, p521_mod, p521_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_21(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, y, t1, p521_mod); +#endif /* WOLFSSL_SP_SMALL */ + /* Y = Y/2 */ + sp_521_mont_div2_21(y, y, p521_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_store_21(sp_point_521* r, + const sp_point_521* p, int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*21; + sp_digit* b = t + 4*21; + sp_digit* t1 = t + 6*21; + sp_digit* t2 = t + 8*21; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<21; i++) { + y[i] = p->y[i]; + } + for (i=0; i<21; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_521_mont_dbl_21(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_21(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(w, w, p521_mod, p521_mp_mod); + j = m; + for (i=1; i<=n; i++) { + j *= 2; + + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_21(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(t1, t1, w, p521_mod); + sp_521_mont_tpl_21(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_21(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(b, t1, x, p521_mod, p521_mp_mod); + x = r[j].x; + /* X = A^2 - 2B */ + sp_521_mont_sqr_21(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_21(t2, b, p521_mod); + sp_521_mont_sub_21(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_21(t2, b, x, p521_mod); + sp_521_mont_dbl_21(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_21(r[j].z, z, y, p521_mod, p521_mp_mod); + z = r[j].z; + /* t1 = Y^4 */ + sp_521_mont_sqr_21(t1, t1, p521_mod, p521_mp_mod); + if (i != n) { + /* W = W*Y^4 */ + sp_521_mont_mul_21(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_21(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, y, t1, p521_mod); + /* Y = Y/2 */ + sp_521_mont_div2_21(r[j].y, y, p521_mod); + r[j].infinity = 0; + } +} + +/* Add two Montgomery form projective points. + * + * ra Result of addition. + * rs Result of subtraction. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_sub_21(sp_point_521* ra, + sp_point_521* rs, const sp_point_521* p, const sp_point_521* q, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*21; + sp_digit* t3 = t + 4*21; + sp_digit* t4 = t + 6*21; + sp_digit* t5 = t + 8*21; + sp_digit* t6 = t + 10*21; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_21(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t1, xa, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(t2, za, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t4, t2, za, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_21(t3, t3, ya, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_21(t2, t2, t1, p521_mod); + /* RS = S2 + S1 */ + sp_521_mont_add_21(t6, t4, t3, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_21(t4, t4, t3, p521_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_521_mont_mul_21(za, za, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(za, za, t2, p521_mod, p521_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_21(xa, t4, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(xs, t6, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(ya, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t5, t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(xa, xa, t5, p521_mod); + sp_521_mont_sub_21(xs, xs, t5, p521_mod); + sp_521_mont_dbl_21(t1, ya, p521_mod); + sp_521_mont_sub_21(xa, xa, t1, p521_mod); + sp_521_mont_sub_21(xs, xs, t1, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_521_mont_sub_21(ys, ya, xs, p521_mod); + sp_521_mont_sub_21(ya, ya, xa, p521_mod); + sp_521_mont_mul_21(ya, ya, t4, p521_mod, p521_mp_mod); + sp_521_sub_21(t6, p521_mod, t6); + sp_521_mont_mul_21(ys, ys, t6, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(ya, ya, t5, p521_mod); + sp_521_mont_sub_21(ys, ys, t5, p521_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_521 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_521; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_21_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_21_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_521_ecc_recode_6_21(const sp_digit* k, ecc_recode_521* v) +{ + int i; + int j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<87; i++) { + y = (int8_t)n; + if (o + 6 < 25) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 25) { + n >>= 6; + if (++j < 21) + n = k[j]; + o = 0; + } + else if (++j < 21) { + n = k[j]; + y |= (uint8_t)((n << (25 - o)) & 0x3f); + o -= 19; + n >>= o; + } + + y += (uint8_t)carry; + v[i].i = recode_index_21_6[y]; + v[i].neg = recode_neg_21_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible point that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_point_33_21(sp_point_521* r, const sp_point_521* table, + int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->x[9] = 0; + r->x[10] = 0; + r->x[11] = 0; + r->x[12] = 0; + r->x[13] = 0; + r->x[14] = 0; + r->x[15] = 0; + r->x[16] = 0; + r->x[17] = 0; + r->x[18] = 0; + r->x[19] = 0; + r->x[20] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + r->y[9] = 0; + r->y[10] = 0; + r->y[11] = 0; + r->y[12] = 0; + r->y[13] = 0; + r->y[14] = 0; + r->y[15] = 0; + r->y[16] = 0; + r->y[17] = 0; + r->y[18] = 0; + r->y[19] = 0; + r->y[20] = 0; + r->z[0] = 0; + r->z[1] = 0; + r->z[2] = 0; + r->z[3] = 0; + r->z[4] = 0; + r->z[5] = 0; + r->z[6] = 0; + r->z[7] = 0; + r->z[8] = 0; + r->z[9] = 0; + r->z[10] = 0; + r->z[11] = 0; + r->z[12] = 0; + r->z[13] = 0; + r->z[14] = 0; + r->z[15] = 0; + r->z[16] = 0; + r->z[17] = 0; + r->z[18] = 0; + r->z[19] = 0; + r->z[20] = 0; + for (i = 1; i < 33; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->x[9] |= mask & table[i].x[9]; + r->x[10] |= mask & table[i].x[10]; + r->x[11] |= mask & table[i].x[11]; + r->x[12] |= mask & table[i].x[12]; + r->x[13] |= mask & table[i].x[13]; + r->x[14] |= mask & table[i].x[14]; + r->x[15] |= mask & table[i].x[15]; + r->x[16] |= mask & table[i].x[16]; + r->x[17] |= mask & table[i].x[17]; + r->x[18] |= mask & table[i].x[18]; + r->x[19] |= mask & table[i].x[19]; + r->x[20] |= mask & table[i].x[20]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + r->y[9] |= mask & table[i].y[9]; + r->y[10] |= mask & table[i].y[10]; + r->y[11] |= mask & table[i].y[11]; + r->y[12] |= mask & table[i].y[12]; + r->y[13] |= mask & table[i].y[13]; + r->y[14] |= mask & table[i].y[14]; + r->y[15] |= mask & table[i].y[15]; + r->y[16] |= mask & table[i].y[16]; + r->y[17] |= mask & table[i].y[17]; + r->y[18] |= mask & table[i].y[18]; + r->y[19] |= mask & table[i].y[19]; + r->y[20] |= mask & table[i].y[20]; + r->z[0] |= mask & table[i].z[0]; + r->z[1] |= mask & table[i].z[1]; + r->z[2] |= mask & table[i].z[2]; + r->z[3] |= mask & table[i].z[3]; + r->z[4] |= mask & table[i].z[4]; + r->z[5] |= mask & table[i].z[5]; + r->z[6] |= mask & table[i].z[6]; + r->z[7] |= mask & table[i].z[7]; + r->z[8] |= mask & table[i].z[8]; + r->z[9] |= mask & table[i].z[9]; + r->z[10] |= mask & table[i].z[10]; + r->z[11] |= mask & table[i].z[11]; + r->z[12] |= mask & table[i].z[12]; + r->z[13] |= mask & table[i].z[13]; + r->z[14] |= mask & table[i].z[14]; + r->z[15] |= mask & table[i].z[15]; + r->z[16] |= mask & table[i].z[16]; + r->z[17] |= mask & table[i].z[17]; + r->z[18] |= mask & table[i].z[18]; + r->z[19] |= mask & table[i].z[19]; + r->z[20] |= mask & table[i].z[20]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Window technique of 6 bits. (Add-Sub variation.) + * Calculate 0..32 times the point. Use function that adds and + * subtracts the same two points. + * Recode to add or subtract one of the computed points. + * Double to push up. + * NOT a sliding window. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_win_add_sub_21(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 t[33+2]; + sp_digit tmp[2 * 21 * 6]; +#endif + sp_point_521* rt = NULL; + sp_point_521* p = NULL; + sp_digit* negy; + int i; + ecc_recode_521 v[87]; + int err = MP_OKAY; + + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * + (33+2), heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 6, + heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + rt = t + 33; + p = t + 33+1; + + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_21(t[1].x, g->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_21(t[1].y, g->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_21(t[1].z, g->z, p521_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_521_proj_point_dbl_n_store_21(t, &t[ 1], 5, 1, tmp); + sp_521_proj_point_add_21(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[ 6], &t[ 3], tmp); + sp_521_proj_point_add_sub_21(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[10], &t[ 5], tmp); + sp_521_proj_point_add_sub_21(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[12], &t[ 6], tmp); + sp_521_proj_point_dbl_21(&t[14], &t[ 7], tmp); + sp_521_proj_point_add_sub_21(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[18], &t[ 9], tmp); + sp_521_proj_point_add_sub_21(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[20], &t[10], tmp); + sp_521_proj_point_dbl_21(&t[22], &t[11], tmp); + sp_521_proj_point_add_sub_21(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[24], &t[12], tmp); + sp_521_proj_point_dbl_21(&t[26], &t[13], tmp); + sp_521_proj_point_add_sub_21(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_521_proj_point_dbl_21(&t[28], &t[14], tmp); + sp_521_proj_point_dbl_21(&t[30], &t[15], tmp); + sp_521_proj_point_add_sub_21(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_521_ecc_recode_6_21(k, v); + + i = 86; + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_33_21(rt, t, v[i].i); + rt->infinity = !v[i].i; + } + else + #endif + { + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_521)); + } + for (--i; i>=0; i--) { + sp_521_proj_point_dbl_n_21(rt, 6, tmp); + + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_33_21(p, t, v[i].i); + p->infinity = !v[i].i; + } + else + #endif + { + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_521)); + } + sp_521_sub_21(negy, p521_mod, p->y); + sp_521_norm_21(negy); + sp_521_cond_copy_21(p->y, negy, (sp_digit)0 - v[i].neg); + sp_521_proj_point_add_21(rt, rt, p, tmp); + } + + if (map != 0) { + sp_521_map_21(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef FP_ECC +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_qz1_21(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t2 = t; + sp_digit* t3 = t + 2*21; + sp_digit* t6 = t + 4*21; + sp_digit* t1 = t + 6*21; + sp_digit* t4 = t + 8*21; + sp_digit* t5 = t + 10*21; + + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_21(p->x, t2) & + sp_521_cmp_equal_21(p->y, t4)) { + sp_521_proj_point_dbl_21(r, p, t); + } + else { + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; + + /* H = U2 - X1 */ + sp_521_mont_sub_21(t2, t2, p->x, p521_mod); + /* R = S2 - Y1 */ + sp_521_mont_sub_21(t4, t4, p->y, p521_mod); + /* Z3 = H*Z1 */ + sp_521_mont_mul_21(z, p->z, t2, p521_mod, p521_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_521_mont_sqr_21(t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t3, p->x, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(t2, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(t2, t2, t1, p521_mod); + sp_521_mont_dbl_21(t5, t3, p521_mod); + sp_521_mont_sub_21(x, t2, t5, p521_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_521_mont_sub_21(t3, t3, x, p521_mod); + sp_521_mont_mul_21(t3, t3, t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t1, p->y, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, t3, t1, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 21; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_521_proj_to_affine_21(sp_point_521* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 21; + sp_digit* tmp = t + 4 * 21; + + sp_521_mont_inv_21(t1, a->z, tmp); + + sp_521_mont_sqr_21(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t2, t1, p521_mod, p521_mp_mod); + + sp_521_mont_mul_21(a->x, a->x, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(a->y, a->y, t1, p521_mod, p521_mp_mod); + XMEMCPY(a->z, p521_norm_mod, sizeof(p521_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * width = 8 + * 256 entries + * 65 bits between + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_521_gen_stripe_table_21(const sp_point_521* a, + sp_table_entry_521* table, sp_digit* tmp, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; +#else + sp_point_521 t[3]; +#endif + sp_point_521* s1 = NULL; + sp_point_521* s2 = NULL; + int i; + int j; + int err = MP_OKAY; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + s1 = t + 1; + s2 = t + 2; + + err = sp_521_mod_mul_norm_21(t->x, a->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_21(t->y, a->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_21(t->z, a->z, p521_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_521_proj_to_affine_21(t, tmp); + + XMEMCPY(s1->z, p521_norm_mod, sizeof(p521_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p521_norm_mod, sizeof(p521_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_521)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_521_proj_point_dbl_n_21(t, 66, tmp); + sp_521_proj_to_affine_21(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_521_proj_point_add_qz1_21(t, s1, s2, tmp); + sp_521_proj_to_affine_21(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC */ +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_256_21(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->x[9] = 0; + r->x[10] = 0; + r->x[11] = 0; + r->x[12] = 0; + r->x[13] = 0; + r->x[14] = 0; + r->x[15] = 0; + r->x[16] = 0; + r->x[17] = 0; + r->x[18] = 0; + r->x[19] = 0; + r->x[20] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + r->y[9] = 0; + r->y[10] = 0; + r->y[11] = 0; + r->y[12] = 0; + r->y[13] = 0; + r->y[14] = 0; + r->y[15] = 0; + r->y[16] = 0; + r->y[17] = 0; + r->y[18] = 0; + r->y[19] = 0; + r->y[20] = 0; + for (i = 1; i < 256; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->x[9] |= mask & table[i].x[9]; + r->x[10] |= mask & table[i].x[10]; + r->x[11] |= mask & table[i].x[11]; + r->x[12] |= mask & table[i].x[12]; + r->x[13] |= mask & table[i].x[13]; + r->x[14] |= mask & table[i].x[14]; + r->x[15] |= mask & table[i].x[15]; + r->x[16] |= mask & table[i].x[16]; + r->x[17] |= mask & table[i].x[17]; + r->x[18] |= mask & table[i].x[18]; + r->x[19] |= mask & table[i].x[19]; + r->x[20] |= mask & table[i].x[20]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + r->y[9] |= mask & table[i].y[9]; + r->y[10] |= mask & table[i].y[10]; + r->y[11] |= mask & table[i].y[11]; + r->y[12] |= mask & table[i].y[12]; + r->y[13] |= mask & table[i].y[13]; + r->y[14] |= mask & table[i].y[14]; + r->y[15] |= mask & table[i].y[15]; + r->y[16] |= mask & table[i].y[16]; + r->y[17] |= mask & table[i].y[17]; + r->y[18] |= mask & table[i].y[18]; + r->y[19] |= mask & table[i].y[19]; + r->y[20] |= mask & table[i].y[20]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * table Pre-computed table. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_stripe_21(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* t = NULL; +#else + sp_point_521 rt[2]; + sp_digit t[2 * 21 * 6]; +#endif + sp_point_521* p = NULL; + int i; + int j; + int y; + int x; + int err = MP_OKAY; + + (void)g; + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = rt + 1; + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + y = 0; + x = 65; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 25] >> (x % 25)) & 1) << j); + x += 66; + } + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_21(rt, table, y); + } else + #endif + { + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + } + rt->infinity = !y; + for (i=64; i>=0; i--) { + y = 0; + x = i; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 25] >> (x % 25)) & 1) << j); + x += 66; + } + + sp_521_proj_point_dbl_21(rt, rt, t); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_21(p, table, y); + } + else + #endif + { + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + } + p->infinity = !y; + sp_521_proj_point_add_qz1_21(rt, rt, p, t); + } + + if (map != 0) { + sp_521_map_21(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +/* Cache entry - holds precomputation tables for a point. */ +typedef struct sp_cache_521_t { + /* X ordinate of point that table was generated from. */ + sp_digit x[21]; + /* Y ordinate of point that table was generated from. */ + sp_digit y[21]; + /* Precomputation table for point. */ + sp_table_entry_521 table[256]; + /* Count of entries in table. */ + uint32_t cnt; + /* Point and table set in entry. */ + int set; +} sp_cache_521_t; + +/* Cache of tables. */ +static THREAD_LS_T sp_cache_521_t sp_cache_521[FP_ENTRIES]; +/* Index of last entry in cache. */ +static THREAD_LS_T int sp_cache_521_last = -1; +/* Cache has been initialized. */ +static THREAD_LS_T int sp_cache_521_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_521 = 0; + static wolfSSL_Mutex sp_cache_521_lock; +#endif + +/* Get the cache entry for the point. + * + * g [in] Point scalar multiplying. + * cache [out] Cache table to use. + */ +static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) +{ + int i; + int j; + uint32_t least; + + if (sp_cache_521_inited == 0) { + for (i=0; ix, sp_cache_521[i].x) & + sp_521_cmp_equal_21(g->y, sp_cache_521[i].y)) { + sp_cache_521[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_521_last + 1) % FP_ENTRIES; + for (; i != sp_cache_521_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_521[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_521_last) { + least = sp_cache_521[0].cnt; + for (j=1; jx, sizeof(sp_cache_521[i].x)); + XMEMCPY(sp_cache_521[i].y, g->y, sizeof(sp_cache_521[i].y)); + sp_cache_521[i].set = 1; + sp_cache_521[i].cnt = 1; + } + + *cache = &sp_cache_521[i]; + sp_cache_521_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_21(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifndef FP_ECC + return sp_521_ecc_mulmod_win_add_sub_21(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 21 * 6]; +#endif + sp_cache_521_t* cache; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_521 == 0) { + wc_InitMutex(&sp_cache_521_lock); + initCacheMutex_521 = 1; + } + if (wc_LockMutex(&sp_cache_521_lock) != 0) { + err = BAD_MUTEX_E; + } + } +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_521(g, &cache); + if (cache->cnt == 2) + sp_521_gen_stripe_table_21(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_521_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_521_ecc_mulmod_win_add_sub_21(r, g, k, map, ct, heap); + } + else { + err = sp_521_ecc_mulmod_stripe_21(r, g, cache->table, k, + map, ct, heap); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + return err; +#endif +} + +#endif +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_521(const mp_int* km, const ecc_point* gm, ecc_point* r, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[21]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 21, km); + sp_521_point_from_ecc_point_21(point, gm); + + err = sp_521_ecc_mulmod_21(point, point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_21(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the point by the scalar, add point a and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, + const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[21 + 21 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (21 + 21 * 2 * 6), heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 21; + + sp_521_from_mp(k, 21, km); + sp_521_point_from_ecc_point_21(point, gm); + sp_521_point_from_ecc_point_21(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_21(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_21(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_21(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_21(point, point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_21(point, point, addP, tmp); + + if (map) { + sp_521_map_21(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_21(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_21(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_521_ecc_mulmod_21(r, &p521_base, k, map, ct, heap); +} + +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_521_ecc_mulmod_base_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_521_ecc_mulmod_21_nb(sp_ctx, r, &p521_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + +#else +/* Striping precomputation table. + * 8 points combined into a table of 256 points. + * Distance of 66 between points. + */ +static const sp_table_entry_521 p521_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x0e5bd66,0x13f18e1,0x0a6fe5f,0x030ad48,0x1348b3c,0x1fd46f1, + 0x1049e8b,0x051fc3b,0x1efe759,0x0a5af3b,0x14f6ea8,0x1ec0d69, + 0x01f828a,0x029fda9,0x19204e4,0x1688538,0x1662395,0x0cf1f65, + 0x1013a73,0x1c0d6e0,0x00c6858 }, + { 0x1d16650,0x14a3b4f,0x090222f,0x0d44e58,0x153c708,0x1683b09, + 0x0e404fe,0x0818aa1,0x15ef426,0x1f7394c,0x1998b25,0x1a2e4e7, + 0x0817afb,0x0bcda23,0x1d51125,0x037b331,0x1b42c7d,0x02e452f, + 0x08ef001,0x12d4f13,0x0118392 } }, + /* 2 */ + { { 0x10ccb51,0x0c33387,0x1d2a00e,0x026ca92,0x187e1d0,0x194f6cd, + 0x13c86ca,0x06efeb1,0x0a3add3,0x16074d5,0x023fec1,0x0ba1d3a, + 0x07f13b3,0x0b3b0b1,0x02fd132,0x07de9bb,0x014758a,0x1d250c6, + 0x0010eb6,0x0aedbb4,0x013e96a }, + { 0x12d95a3,0x1127c31,0x00a4af7,0x0298a49,0x19f15ef,0x0d5d0cb, + 0x018fa6f,0x00f55bb,0x0a962b7,0x0f029fa,0x1636637,0x05bc284, + 0x1cc598a,0x030e11a,0x0968674,0x1a6593f,0x110e8ff,0x0076a32, + 0x1de33ab,0x153ba3d,0x01852ae } }, + /* 3 */ + { { 0x0af1fe3,0x08eec75,0x14af42a,0x0488016,0x0db3866,0x15f8690, + 0x01aa486,0x081fed4,0x0a768c9,0x00943cd,0x1bb0de5,0x1579343, + 0x1cf3791,0x139c1a1,0x04fef98,0x1578392,0x0abe222,0x1b945a1, + 0x0e7bcc4,0x18150c5,0x0157874 }, + { 0x0f03d49,0x078c273,0x180c2b2,0x083c917,0x09c357e,0x0e5ef7d, + 0x17bce05,0x078059c,0x15fd8dc,0x120e3d5,0x0c4275e,0x0f93f5d, + 0x184bef6,0x1427a69,0x0633286,0x0155c5f,0x07d672f,0x1bf01ea, + 0x15625a2,0x0356b03,0x000724b } }, + /* 4 */ + { { 0x19314e0,0x196a5ef,0x0ab2413,0x1bcf401,0x1aae850,0x177d81e, + 0x0420d60,0x1a4f246,0x1ec7fe6,0x078e141,0x15d2a20,0x132c333, + 0x072d5b3,0x1ca803f,0x0482e6c,0x1e07cbe,0x1734773,0x118691b, + 0x0de2da1,0x0324e67,0x0121f4c }, + { 0x08b51f0,0x1ffb6fd,0x17c3c40,0x0281c57,0x0e7afb5,0x12a0b8d, + 0x0e03a0c,0x12a2260,0x0cda10d,0x01a80dc,0x0a3007a,0x0e3c9e7, + 0x0910577,0x1640383,0x14865aa,0x070431e,0x0aaa562,0x09b04d8, + 0x12829fc,0x0af20d2,0x01db8c2 } }, + /* 5 */ + { { 0x0c0958d,0x1b86659,0x0e1cc00,0x0cd34f6,0x09aef16,0x064d9c4, + 0x1cf3d20,0x0924f25,0x0fab3e1,0x194c279,0x12259c2,0x086ca0e, + 0x0a9751e,0x1699ed9,0x0ae6756,0x09b5539,0x132b44a,0x0a6ca2e, + 0x1b1dcc9,0x1994a42,0x000aa26 }, + { 0x1e66d18,0x10ea0fc,0x19eb36f,0x0d5422f,0x00aeef9,0x186925c, + 0x0528b76,0x17e0a64,0x15c98b7,0x0e7d913,0x0f2121b,0x086dbfa, + 0x0c613e7,0x1e526a9,0x1c0fe84,0x03cc8dc,0x1771855,0x0864714, + 0x1ea149f,0x121d597,0x01c6f5e } }, + /* 6 */ + { { 0x0b2d58f,0x178f3a5,0x000a8b0,0x185412f,0x01bbf82,0x05dbb56, + 0x1ac91dc,0x17acb07,0x15667f7,0x1276cf6,0x1a25fa3,0x1b0dfb2, + 0x15d8c01,0x1fdf078,0x0e5684c,0x1b962cc,0x19dd99c,0x0a8f279, + 0x0837ac9,0x108494e,0x0082de0 }, + { 0x0ea91af,0x129d930,0x1f765ea,0x0ef463b,0x04384af,0x084ddf5, + 0x1c8e573,0x1c39b05,0x0f30058,0x0be0ced,0x1e3a5e6,0x018dcb8, + 0x05443b6,0x0bad8c2,0x0ba6d7d,0x19c2df5,0x13308c2,0x12e7437, + 0x1d8fea1,0x19cb1e9,0x0073983 } }, + /* 7 */ + { { 0x017609d,0x09898c1,0x1002bba,0x084825f,0x1f8a9dd,0x163194b, + 0x19930a1,0x0bdc22f,0x07bf1c6,0x01bc16b,0x0fbb973,0x09b71a0, + 0x19e8c14,0x0d5c9bc,0x0b2b2ca,0x1098e03,0x1b5b077,0x190af58, + 0x0bff361,0x013f503,0x00f82c4 }, + { 0x18139a4,0x09bb31b,0x0a4c01f,0x176ab7d,0x06d969e,0x045e4ee, + 0x035bda3,0x0858f8c,0x15f93f2,0x0274230,0x1c5f661,0x1454e82, + 0x0e8461c,0x185f890,0x04c39e7,0x133af1d,0x0026b56,0x170aaa5, + 0x093edb7,0x18ee04d,0x007de69 } }, + /* 8 */ + { { 0x1ee80d7,0x08dd825,0x19a586d,0x1eed25b,0x0e1f6a1,0x15e1de8, + 0x191b283,0x1e106f3,0x1930644,0x005ffd3,0x16c1dc7,0x170e721, + 0x0997c67,0x1d6d0e7,0x170cf87,0x16a2412,0x0ddac54,0x11e2805, + 0x0c46195,0x03a6c1a,0x00b0c23 }, + { 0x1bcab2f,0x0494c1c,0x082818a,0x00c9ba4,0x00c0678,0x1ee1506, + 0x18211d8,0x1c60c5e,0x11938c3,0x074ed39,0x11bae62,0x1e5aa5c, + 0x1d69be8,0x152ef07,0x17234b5,0x01c4dca,0x163db2c,0x1f9d1fe, + 0x192ffd5,0x18db3e3,0x014a899 } }, + /* 9 */ + { { 0x005ce88,0x171d0f6,0x080a7fd,0x0d6d5fa,0x18fc249,0x1f5803f, + 0x081ddbe,0x080173a,0x1eebded,0x087605e,0x1c03ded,0x0e84d26, + 0x0eaef97,0x1fbd818,0x1b8de84,0x03eef00,0x1171b90,0x1ae78be, + 0x0a56b83,0x0dcbbf9,0x0159903 }, + { 0x00e8e0c,0x1b25a80,0x17e402b,0x080df69,0x13f2ae0,0x0f91dd6, + 0x1699d12,0x152bec3,0x0255b25,0x0548c21,0x0f19403,0x07cd1c6, + 0x01fa6af,0x016013e,0x0dcf003,0x0814a28,0x1a19728,0x04cf9e6, + 0x03a1090,0x0c56f3a,0x00e798c } }, + /* 10 */ + { { 0x04d0f28,0x1e25457,0x01bba31,0x1eacda0,0x1a8a55e,0x1720119, + 0x17d9419,0x0ec6f30,0x15d321b,0x0f6655a,0x146c1e3,0x0dad706, + 0x0b38b96,0x0beaa45,0x022794d,0x156165d,0x02fe631,0x1bd4f47, + 0x1d714de,0x0c1f2bc,0x005945c }, + { 0x067d79c,0x13e9a3c,0x0602f28,0x0b03903,0x1f460b1,0x15c628b, + 0x166ae5d,0x1b2fd85,0x061b91e,0x0682243,0x07457ff,0x144bb38, + 0x19730a7,0x1ca64ed,0x0b3c967,0x0b47714,0x1875dec,0x1473c25, + 0x1944c7b,0x0a4c0e7,0x0004062 } }, + /* 11 */ + { { 0x1631bba,0x0272e78,0x14937b8,0x1e2ade8,0x00e6c1d,0x0184c82, + 0x0fcc393,0x18e0cc0,0x16b6abe,0x1b24d21,0x053dbb6,0x0139ed7, + 0x15354f5,0x1b5bf05,0x1b3d1a4,0x0dba4ff,0x07eba1e,0x153d388, + 0x0251432,0x1db58ad,0x0022889 }, + { 0x05596f2,0x148b768,0x0e2e404,0x1960479,0x03901da,0x0a55f0f, + 0x14fb39f,0x0264a03,0x0a9c903,0x140a820,0x051b42c,0x07e38da, + 0x169dbcd,0x1a770c4,0x08756c5,0x04df6df,0x161a912,0x024d750, + 0x02a0261,0x19ddbf7,0x0154754 } }, + /* 12 */ + { { 0x070b2f0,0x113d821,0x135ed93,0x117e9ae,0x04b34e4,0x13915d4, + 0x0fa2c30,0x039630d,0x19ff9b7,0x0a52c4e,0x15af13d,0x09be69f, + 0x1d9887e,0x1a097a4,0x119a7f5,0x13a2d6f,0x1bb77f8,0x020046c, + 0x040b81d,0x1284d79,0x01cfafb }, + { 0x02935ca,0x07968b3,0x111b329,0x0732fb9,0x0847c70,0x1e3cfc1, + 0x1a794d4,0x1e98113,0x15215f0,0x16c6cc4,0x046e767,0x1179012, + 0x0359cf0,0x16f13d5,0x00d5039,0x0641a96,0x03ef69e,0x1a97a6b, + 0x13bc64e,0x02ffad2,0x00e6a02 } }, + /* 13 */ + { { 0x0214780,0x0f313ba,0x07aaddf,0x0e40e8b,0x0a06681,0x03fd80e, + 0x1e6dfa7,0x18fef0a,0x1d6d4b7,0x0aaa460,0x12a8e79,0x03214cd, + 0x0f45756,0x0c282d2,0x0506c0e,0x0c9d7f0,0x17c4c88,0x1d2e506, + 0x184a74f,0x15f2a13,0x0053bf8 }, + { 0x1285092,0x194ec42,0x197ef26,0x151ddab,0x02f31da,0x0c555cc, + 0x1a43bd8,0x1a33866,0x0d2626e,0x1770a7a,0x1638243,0x0e160fd, + 0x0042295,0x039b682,0x1de483a,0x1a03a32,0x1ffede7,0x1a3f712, + 0x11eadce,0x0438757,0x01b93c9 } }, + /* 14 */ + { { 0x08b2b14,0x103e650,0x11fc2da,0x177e2e9,0x0a978de,0x0659525, + 0x0e0a310,0x0705239,0x090adc8,0x0e3c139,0x1b779a5,0x1655183, + 0x0008da8,0x087de91,0x073acbe,0x1729ce8,0x1e5322d,0x12fc4e4, + 0x1cf1523,0x0cc10b6,0x007d182 }, + { 0x1efd012,0x1fc1516,0x1fbda7a,0x08b42a6,0x01ecb09,0x18408e8, + 0x1d4d4fb,0x1d478aa,0x1b2bd4d,0x0e44153,0x05a7216,0x12e4f7f, + 0x1b00a1f,0x0592d68,0x0eb7d78,0x0c00a0c,0x106f253,0x0260ff9, + 0x044bf86,0x02b7d88,0x01178e5 } }, + /* 15 */ + { { 0x1e3d3d5,0x03c3ff7,0x089e4c5,0x0b3b12e,0x09e76f6,0x1b567a9, + 0x1fb4782,0x1b22b8e,0x01c5e8d,0x015bd90,0x199ebe7,0x11e2bea, + 0x1478803,0x19abb77,0x031d9bf,0x02a95e7,0x1c80040,0x1cf8311, + 0x1a20ed4,0x078897b,0x009647d }, + { 0x01b21a4,0x1ab1c6f,0x0704c81,0x02ae210,0x1b6399c,0x001accd, + 0x1819dd7,0x1ea645c,0x1ade60c,0x03fef3f,0x0641657,0x0881df8, + 0x001b195,0x0ebd9cb,0x1c2b233,0x14e7cfc,0x03d6a6f,0x02552d4, + 0x0c201d9,0x119f58c,0x004234f } }, + /* 16 */ + { { 0x06492ad,0x0f38d14,0x0b13b8c,0x08cbf0d,0x08f3de4,0x189e5a0, + 0x0035369,0x009d12e,0x1a86b71,0x1687af4,0x0b0585e,0x1c9e4ae, + 0x19d9a62,0x12e60e4,0x1488fbc,0x05c18ef,0x1613b96,0x0f6ffb4, + 0x0762c81,0x1a51e70,0x008e818 }, + { 0x0df1f60,0x118e7c6,0x183dc84,0x16ce2ee,0x0b640f2,0x02d201c, + 0x1be3381,0x13f7ce4,0x0037068,0x11142ee,0x08372d0,0x1f1ee5d, + 0x037196b,0x0404331,0x1bde157,0x1fc9142,0x1c7c326,0x06a70cf, + 0x1da2fd1,0x190add1,0x013efdb } }, + /* 17 */ + { { 0x0a3ace5,0x06827f3,0x070778d,0x1d12c32,0x0dbb603,0x0f687a0, + 0x0001fdd,0x16b69b8,0x095b259,0x0f0735e,0x17c0805,0x14cc4c2, + 0x18dfbcb,0x098f51f,0x1b150cf,0x1f04965,0x0e4103f,0x1215858, + 0x1200ccb,0x02a0c18,0x0111193 }, + { 0x05452f1,0x1f51402,0x1cee665,0x1ee3e7e,0x00b678c,0x1499474, + 0x0f77107,0x04694a5,0x0e6af1c,0x1f932b7,0x08579ed,0x0b73688, + 0x0bc4380,0x1852014,0x09cd3cb,0x0edc475,0x0794224,0x1f1e392, + 0x031833d,0x05d160d,0x01f16dc } }, + /* 18 */ + { { 0x1fc0de5,0x1d737ff,0x1c92f37,0x1f5694b,0x0801814,0x15546ed, + 0x0d963a8,0x0823202,0x1da4f04,0x1d8e57a,0x001847c,0x19b6682, + 0x08f24b9,0x0b7067c,0x10c93b6,0x0b90491,0x1342305,0x0a5bf51, + 0x0424b8a,0x06b6c91,0x01d36e8 }, + { 0x1372f27,0x1bd7383,0x0669fad,0x150775c,0x0779b4f,0x014f5da, + 0x16b8595,0x07f42eb,0x0fc03ef,0x0176133,0x071f125,0x0d52d32, + 0x1c0e5fc,0x0b129e9,0x1d8793d,0x1ce7141,0x158de74,0x0bd08ff, + 0x0937a46,0x0499a8c,0x0002605 } }, + /* 19 */ + { { 0x1342e08,0x0e86500,0x02bd16d,0x016e93e,0x109ed4f,0x14ec022, + 0x00b6594,0x139d6aa,0x16d8035,0x15843ed,0x0120017,0x150e987, + 0x04eaa66,0x03ad43c,0x1cb1e83,0x062fdd2,0x0216874,0x0460b4f, + 0x1727efd,0x0aadc1c,0x014f81c }, + { 0x120674d,0x05895f0,0x02b09ac,0x12433e0,0x06bf09b,0x0c65536, + 0x1ccb759,0x13c3c3c,0x18292d9,0x1b8e2d7,0x16fe031,0x0a524bf, + 0x1d5d813,0x1b3361b,0x06f5e60,0x1ed01cc,0x06a1d0d,0x1c6d64a, + 0x0e7c260,0x19ed098,0x009f58d } }, + /* 20 */ + { { 0x17dc837,0x148813d,0x0710505,0x096a1d6,0x0d71975,0x133a0d9, + 0x024ab5f,0x07009e8,0x1bc824a,0x0853f8e,0x082f3c7,0x00ad91c, + 0x10570b2,0x0d0c0ed,0x0cb8ee7,0x0a114ce,0x16e0a7b,0x13c4031, + 0x07dc124,0x1ea0599,0x004511a }, + { 0x16f4ffa,0x106ca62,0x03e82e0,0x0589e18,0x1c6205a,0x1030350, + 0x0f53a86,0x1f733e6,0x079b316,0x1d5b233,0x0903f06,0x10a5c9e, + 0x0305aa0,0x096bee2,0x14e6de2,0x180e644,0x11206e3,0x181b2bf, + 0x1b6d98c,0x00a5019,0x0059284 } }, + /* 21 */ + { { 0x197760c,0x04388a1,0x141a434,0x0c393f9,0x19020b7,0x1f127bd, + 0x11fea61,0x1418ffd,0x0522335,0x119dc50,0x0728403,0x15fb5c4, + 0x0073dbe,0x1d81911,0x0301828,0x0bb4c8b,0x1b8ee14,0x1cdce39, + 0x1ffd8bb,0x0cc3ca4,0x00aa31c }, + { 0x1430b5e,0x0c75840,0x15a6bd4,0x14a1dc1,0x132f9ce,0x175f45d, + 0x0c2d6a9,0x1121d9b,0x09fe1d6,0x18afbf9,0x0732687,0x11e634b, + 0x03ce5d6,0x0455953,0x159e650,0x19ca9e9,0x0ef4347,0x1742d8e, + 0x01b41dd,0x0847805,0x01768ff } }, + /* 22 */ + { { 0x1dcec23,0x0082619,0x1466159,0x179ba0e,0x1af0d61,0x07984d5, + 0x0bd4531,0x02a90db,0x1de4887,0x00de47a,0x0e6e8fc,0x15e3a6a, + 0x0cddd6b,0x1d1df47,0x1f99974,0x10cbf76,0x0c3cb5d,0x07c8ced, + 0x0485268,0x007b47e,0x0173fe2 }, + { 0x0d4a3d1,0x174d0bc,0x1b6010e,0x110ca62,0x04d5cf5,0x0bb231d, + 0x09b0104,0x089d5e0,0x1f84afa,0x0b631c7,0x0908b4c,0x072fffd, + 0x13512f2,0x13115b0,0x07aa811,0x00d1ad2,0x0a397e7,0x02442b7, + 0x1286ccf,0x0365c7e,0x01b542d } }, + /* 23 */ + { { 0x1487402,0x196af0f,0x1757d46,0x0cf55e3,0x036016e,0x14e1057, + 0x1c7d5b6,0x1fa3d67,0x1ece45b,0x0dbe9b0,0x0a78609,0x0c6604f, + 0x0942db0,0x14208b2,0x08a1ddf,0x0e7a17e,0x0c44587,0x07afe70, + 0x175e97c,0x062a3a5,0x001fb2b }, + { 0x1aa096a,0x1b9f47d,0x01e0409,0x17c1275,0x152726e,0x1f8bc08, + 0x1341cb1,0x0ecb8a7,0x0ab5dca,0x069efe8,0x1cb528e,0x1b0b0fd, + 0x02bb4a7,0x1bf588e,0x070804e,0x1445eb9,0x0340b6d,0x0af1a9e, + 0x0c97b2b,0x1aa14b4,0x0039846 } }, + /* 24 */ + { { 0x077df58,0x13b9b0b,0x15b1db6,0x0e396a1,0x164bd56,0x0407f91, + 0x11f5c28,0x0600887,0x1865324,0x0542a14,0x04079e8,0x1ba586a, + 0x1682002,0x0462e6b,0x0f1850d,0x1e27f7d,0x1aeca6c,0x07f8ac8, + 0x02fe370,0x0f85cd3,0x00fb91c }, + { 0x0de14d5,0x02e5689,0x0089a9f,0x1ecac39,0x1c448c5,0x0dd9ed5, + 0x190c1f3,0x1af3f1b,0x1c76811,0x02c7808,0x1881267,0x00dcea8, + 0x091e898,0x04d3a72,0x0ab428b,0x06f87ca,0x05cb2be,0x0901a34, + 0x082f1cb,0x0c648a1,0x00ec7a8 } }, + /* 25 */ + { { 0x086786e,0x0c610c5,0x0b20ce0,0x08426fc,0x0d537f7,0x1375907, + 0x043469f,0x006bb2d,0x05cdc48,0x1c87638,0x1ef5d65,0x059049e, + 0x1446916,0x070f878,0x19fbe75,0x02b9413,0x08bce99,0x1e98609, + 0x11c489b,0x028becd,0x002d810 }, + { 0x11d87e5,0x1a4fadb,0x1b68c49,0x02f6059,0x05f3b14,0x1d7f8b1, + 0x1b4bb82,0x04e048a,0x1fcae66,0x1fbd9d4,0x16617e5,0x1f1e6f7, + 0x010d6eb,0x1fd3686,0x0aa06e5,0x1e26e41,0x00121f2,0x0d94f8d, + 0x130376c,0x0d45f0b,0x003de32 } }, + /* 26 */ + { { 0x0c2ee78,0x19cc59c,0x0fb89bc,0x034eb41,0x00c3d10,0x0d3fc72, + 0x05c1959,0x0ba6b46,0x104019e,0x094c2f1,0x1d2dbb4,0x0c85702, + 0x0a21e2a,0x17c0529,0x0857ba2,0x1b01c4b,0x1e68518,0x12e8f07, + 0x13dbaa6,0x1782700,0x00848cb }, + { 0x1d45169,0x143486f,0x0341da0,0x10b3a7d,0x18d7e09,0x1c5fe11, + 0x0204736,0x09046eb,0x0162cf6,0x04caa3d,0x056e321,0x167769a, + 0x06494ba,0x03024cd,0x0b2f15f,0x19fdb04,0x04ea8a1,0x1d62191, + 0x1f19662,0x0c68d2a,0x00d9435 } }, + /* 27 */ + { { 0x0271323,0x14929b4,0x135cac1,0x10939a0,0x04d9e0a,0x18e63e9, + 0x17efcac,0x0c355c6,0x157a3e3,0x07b25a7,0x13a1591,0x0d0c052, + 0x0e14904,0x01e76a5,0x120bb9d,0x1b48fbb,0x0a57e2c,0x065c953, + 0x1f07e5a,0x1885df7,0x013f989 }, + { 0x0651600,0x0c5efdc,0x0bbafb6,0x08f479f,0x0c36343,0x18d1134, + 0x0950cd6,0x00f2742,0x1d58255,0x0c6d3ee,0x1ac7a55,0x16470a5, + 0x05a5173,0x114afaa,0x16b9614,0x1a203be,0x0ef6646,0x172a371, + 0x1627e18,0x02d458b,0x01faf7e } }, + /* 28 */ + { { 0x1ec136d,0x0364763,0x146c35d,0x0f9a226,0x18e1d82,0x03d08b7, + 0x0eb4fc6,0x0caec94,0x1136e84,0x18dcb47,0x060f08b,0x05290a1, + 0x19d41aa,0x1f38b92,0x08fb312,0x0293842,0x152763c,0x0ee6e55, + 0x008ae0b,0x0a16302,0x016da7f }, + { 0x0a5e258,0x1299686,0x09efe67,0x0f2f6c5,0x0148ad1,0x1feef7d, + 0x090bb1d,0x1891a14,0x174f9b6,0x028c5e6,0x048b516,0x0170ffa, + 0x17c53b3,0x1da8596,0x033464f,0x155d377,0x0eebc01,0x08d0b4d, + 0x1789b82,0x1362143,0x01c57e4 } }, + /* 29 */ + { { 0x1210716,0x1f33a90,0x1000b2a,0x060fc04,0x01a296a,0x01bcadc, + 0x1047632,0x0d5295f,0x0dd9efa,0x079019a,0x15a1bda,0x13d6cef, + 0x155be2f,0x1fae713,0x04fc9de,0x0f8b8d4,0x041b975,0x07bec91, + 0x1d3d2e3,0x07a5e98,0x013270c }, + { 0x1209aa4,0x0304e46,0x10dbe72,0x05b656a,0x06f413a,0x091a2ea, + 0x0b468a6,0x09f2d6e,0x19487c3,0x0379575,0x028dd46,0x02ed688, + 0x0e4fa72,0x1ed29ac,0x10824d9,0x1662074,0x1e3ff25,0x0788f56, + 0x017582e,0x0e02a6a,0x01a99a5 } }, + /* 30 */ + { { 0x07495bb,0x089c9b7,0x0746b85,0x109210f,0x0bd2fd2,0x1ebb7e7, + 0x0ac2ca7,0x0393846,0x1c60e72,0x0d06a4d,0x08278a8,0x1706a2f, + 0x189f582,0x0ec5d6f,0x0de027a,0x1176958,0x09e0ad4,0x1a5526f, + 0x0db3121,0x0826259,0x0027fd0 }, + { 0x0d4fb6d,0x0817775,0x12fb015,0x1a14c05,0x160c25e,0x1fa503b, + 0x1a106f5,0x028b174,0x054edce,0x145b019,0x1d85330,0x1c72072, + 0x13b9d41,0x0c0f76c,0x086dc74,0x0961684,0x1c2332d,0x0e80871, + 0x0ac3906,0x0b144fb,0x0096dfe } }, + /* 31 */ + { { 0x1ebd24e,0x17e6b3e,0x01d5335,0x0135c56,0x1e3fca6,0x0be1365, + 0x108bbc8,0x07f4fb1,0x0b9620e,0x01681f0,0x07e1f75,0x042d8ff, + 0x0e634bf,0x04b97ff,0x0c7b14e,0x07cee45,0x1c1d60d,0x141d4ab, + 0x1da94df,0x1cbf0c1,0x0162edf }, + { 0x0ea20b8,0x02a0078,0x0401028,0x1c3af2d,0x0872ac7,0x0d86561, + 0x097243b,0x14eeecb,0x0b62939,0x0fadc98,0x12dc227,0x0edd5e5, + 0x12f78a6,0x097f5e0,0x01ccafd,0x015a606,0x0deba19,0x09d3320, + 0x0f9f8d0,0x15c2bf2,0x00d536e } }, + /* 32 */ + { { 0x1c88f3c,0x08cfb50,0x1129b18,0x185d8d2,0x124e5fe,0x017f954, + 0x0b1815d,0x0f89915,0x0ddb22c,0x056ef0f,0x1496ed8,0x0719f4b, + 0x0097289,0x1608bef,0x16b13df,0x05383f4,0x0b74829,0x0a0f9ad, + 0x0bf657d,0x09d1f21,0x0180d1c }, + { 0x1cd8358,0x0739ed3,0x0480bf1,0x0fe5439,0x19361a5,0x0a69441, + 0x1c4c2b6,0x1c5ede5,0x02b6a78,0x1bf1233,0x098b378,0x1f16f38, + 0x190babf,0x10dacbd,0x0b807bd,0x09cc8d9,0x1f0a60d,0x0ce0f19, + 0x1407e11,0x084501b,0x000e52a } }, + /* 33 */ + { { 0x1013755,0x1205207,0x03a5cb5,0x0ff7070,0x0b6dce7,0x1b25988, + 0x139e5fa,0x06c4f13,0x193ca5a,0x1382585,0x17ff263,0x01feb17, + 0x1218c36,0x191861b,0x0c7cc8e,0x10ba2a7,0x0885a73,0x1eb59c8, + 0x1ae4efd,0x0261eaa,0x004a071 }, + { 0x0ef3f88,0x104b5ff,0x0514a68,0x1370567,0x02eba86,0x1332539, + 0x0612a1c,0x084ffc4,0x1858ff9,0x06e05d0,0x03276a8,0x1d6ae92, + 0x0833799,0x00ac467,0x0d5bd8a,0x19dc43a,0x07fa7b2,0x0beecde, + 0x0f3ebba,0x0349d14,0x00d21e6 } }, + /* 34 */ + { { 0x1068656,0x0db14f4,0x137fb17,0x193fdbc,0x023bd70,0x0a2aa33, + 0x156f7f3,0x0838f15,0x06291a7,0x1cc0ee9,0x19a23bd,0x1b24ec3, + 0x0f3ac53,0x0adc939,0x05a24a9,0x0dfd8d5,0x1b80654,0x1210bf3, + 0x0e78bd5,0x1807975,0x015e793 }, + { 0x0ff39be,0x0caa1b7,0x1da023f,0x1db7fe9,0x1a1af07,0x120b0b2, + 0x1eaf6c0,0x05307a8,0x1d47980,0x1e2e97e,0x0b9becd,0x12f0c16, + 0x189d86d,0x0746dcc,0x18ca13b,0x17377c7,0x0b5d868,0x1cf824f, + 0x16b462c,0x1d14f13,0x018e3b3 } }, + /* 35 */ + { { 0x11e61f0,0x1362b72,0x1d5d5c0,0x0660fe4,0x1ddbcaa,0x1757a0e, + 0x09baec6,0x1752540,0x0e2d7f5,0x19f49be,0x1ab6468,0x003d78b, + 0x1d1f7cc,0x1723403,0x0ad9974,0x12a3321,0x1555341,0x0e15227, + 0x0599012,0x18394cf,0x00aa099 }, + { 0x197e387,0x0d484c7,0x15a6d58,0x108bc3b,0x1605177,0x18eb55f, + 0x144adff,0x1123ff4,0x0d09a9c,0x16d2ad2,0x00b8ad0,0x18e3a45, + 0x0d5e5a7,0x13a0c2d,0x096880f,0x15dffbf,0x09dea0b,0x10cd89b, + 0x1b30285,0x1df2283,0x01a3a5e } }, + /* 36 */ + { { 0x0573b81,0x106853d,0x13bcabc,0x10cc329,0x1eac1ca,0x188e1a3, + 0x0b6342d,0x085de1a,0x0ba099d,0x17500b6,0x1ea329a,0x1a50a0c, + 0x0fa6609,0x1d09a8f,0x14b1801,0x04c68d4,0x018b11c,0x06d5c2c, + 0x0c700cf,0x1f48bb7,0x0121f17 }, + { 0x03279d6,0x05c3d7e,0x07867ee,0x178403e,0x030e76a,0x1610eef, + 0x1aa0e01,0x09e055e,0x1c63f82,0x17ebf15,0x14694fa,0x1c4c8d7, + 0x047b074,0x1109c8b,0x1bd24c6,0x1b37f9a,0x139c172,0x0d5967e, + 0x16d673c,0x07d6969,0x010a62f } }, + /* 37 */ + { { 0x0689a1b,0x16f1b70,0x19cb900,0x1afb95f,0x1dccc9f,0x0e85fdc, + 0x0b5f895,0x1b3c9bd,0x04ada04,0x1f743f7,0x0b9dd35,0x073d7fa, + 0x1b5a850,0x1b8595c,0x0b1995d,0x0777450,0x026ba10,0x0d3d654, + 0x1f3541c,0x0051758,0x011aac7 }, + { 0x00c8f04,0x0e9ce34,0x0d78b98,0x1969167,0x0f09c4c,0x1a279e1, + 0x026f655,0x126262c,0x0aaccb5,0x0b9725a,0x1ec825b,0x0194b5b, + 0x0fdb706,0x0fe9f66,0x1f6790c,0x054e78c,0x06fe175,0x00a43d1, + 0x134215f,0x0a6cc6c,0x01e33d9 } }, + /* 38 */ + { { 0x0ec9e7f,0x02835a6,0x063f999,0x0861557,0x044564b,0x1fd1425, + 0x1407c5c,0x0e4bc36,0x015c974,0x1dbdebf,0x1b00cf9,0x0f5105b, + 0x02d6cc6,0x0531dbb,0x18ba4d0,0x05f9a3f,0x01b3f8e,0x11d0427, + 0x0b9b9d4,0x1c9b513,0x00fdccc }, + { 0x12fd820,0x1fc7760,0x1ccc1e5,0x152db48,0x125f892,0x0cbdfa1, + 0x0907556,0x19eb2fa,0x002b753,0x1779ad6,0x1f3ae8e,0x12bbece, + 0x0c8a73f,0x08ddd63,0x0a24adf,0x0f160b6,0x183cc52,0x1483a8a, + 0x11fd17d,0x1daa7f4,0x001c2f5 } }, + /* 39 */ + { { 0x140b79c,0x00b2f55,0x06a0e45,0x104b691,0x1fb6eed,0x16083fd, + 0x1adf629,0x117b426,0x18e01f2,0x018edc5,0x1e641f5,0x01bb49a, + 0x0584e5d,0x1238f34,0x0a451ca,0x0dff0d3,0x1699837,0x0ac6834, + 0x118c47f,0x0d36e98,0x0006ce3 }, + { 0x0dd1452,0x1b9e88d,0x08a9b01,0x0bdb1d3,0x0e4e9c9,0x0ad2061, + 0x038cb28,0x11fd1ff,0x0af62f1,0x1e5be9b,0x05212cf,0x0ddddd9, + 0x1b2ca33,0x1d90202,0x15b9ea4,0x106a549,0x031956d,0x1b6c868, + 0x07280f9,0x0eac07b,0x00e5dd3 } }, + /* 40 */ + { { 0x1481bf7,0x194bec5,0x00f3317,0x0854267,0x06a2a3e,0x005cb60, + 0x14a3371,0x0793c28,0x11189da,0x115f9af,0x15fe9e6,0x1312d9a, + 0x0bb8adb,0x09abe99,0x0924d72,0x0df5b83,0x180c2d7,0x0a8fd92, + 0x13c8f78,0x043d684,0x01ba987 }, + { 0x0a4b397,0x16d57a9,0x1952300,0x181a169,0x03c5f4c,0x1f3ce6e, + 0x136cded,0x16c537c,0x0b33970,0x1a19b76,0x0231ffc,0x16f9250, + 0x11ed3dc,0x011446d,0x0a43bfc,0x1ab35d8,0x151e96e,0x19523ce, + 0x1b63e97,0x1db0e0e,0x00929d7 } }, + /* 41 */ + { { 0x060043c,0x0d785f3,0x1d3763b,0x1602dc0,0x04aa2cc,0x061d9ec, + 0x1a39f8b,0x1893a46,0x05c269f,0x1da8098,0x0cf8d91,0x1dc27bc, + 0x04d0194,0x1c4e528,0x0cd86e5,0x1623bb6,0x033984d,0x0466a8c, + 0x03b24bc,0x1003d99,0x00c6d5b }, + { 0x1ab9887,0x08e0aa3,0x0044cfe,0x14d6b56,0x0f285e2,0x1fe40c1, + 0x139684c,0x05936e6,0x038d869,0x021ad3a,0x00ba057,0x08f8865, + 0x0a3c92b,0x0e3de6d,0x048c7d6,0x1190c32,0x1c34d15,0x11d7212, + 0x1688f32,0x0d1fd78,0x00117f5 } }, + /* 42 */ + { { 0x15caa87,0x1eceadf,0x1276332,0x1ed1bb1,0x17bfc60,0x0a6f6f0, + 0x136ef1f,0x17ec7d6,0x18270b5,0x1b72ca2,0x063f9ef,0x0f4b981, + 0x1588713,0x02ebdc7,0x17ada1c,0x14a6794,0x0ee4b25,0x025bef7, + 0x09c029b,0x08b8649,0x00ef8e0 }, + { 0x0cf52bc,0x00e4938,0x0a60583,0x152198c,0x0bf3f63,0x18147da, + 0x10872fc,0x1e2bffe,0x1523bef,0x140816b,0x1384142,0x1347173, + 0x1eff330,0x03310d8,0x0769340,0x0f00f1d,0x09fcc0a,0x14bbafc, + 0x005e184,0x0890ca0,0x00eb590 } }, + /* 43 */ + { { 0x1bd33ec,0x1327ef5,0x15e6299,0x019cb5a,0x0cf9a66,0x1dab768, + 0x1b01543,0x0ddd9a0,0x11d5aaa,0x0652fd6,0x09fc1ed,0x1cb7291, + 0x1a36dae,0x17f0e08,0x18de21f,0x0a897a5,0x0c491d2,0x120fb0d, + 0x0fff63a,0x1ee0e25,0x00be49d }, + { 0x1acdb56,0x178fab2,0x0f79838,0x08bcbcb,0x12f13c8,0x1d02097, + 0x14d5385,0x1df72ff,0x1d9c93b,0x11433e7,0x055f922,0x02d64b5, + 0x1f9ca9d,0x050c31a,0x157066d,0x15ce23e,0x0f58d26,0x0cd9c34, + 0x1251507,0x0900829,0x0000ac4 } }, + /* 44 */ + { { 0x0ad38db,0x1e7c4ea,0x1445b06,0x027ae28,0x1180f38,0x18121d0, + 0x09d672d,0x0d8b698,0x1163a71,0x0eb26b1,0x122f6d7,0x1fd426c, + 0x09bbd2e,0x126f4cb,0x1c61fe7,0x1188b48,0x112e2de,0x1b2ef34, + 0x0f6b429,0x0be5389,0x0048e07 }, + { 0x04dd88d,0x1aa3a2f,0x0bf000c,0x1100aef,0x1828363,0x19447b8, + 0x1700489,0x1bdc966,0x1e68989,0x0047ec8,0x1dc6eb4,0x062b9a7, + 0x0242142,0x1f26d0f,0x0c08ffc,0x05762b9,0x035b566,0x0bf35ce, + 0x1ec13f9,0x1e82caf,0x0072143 } }, + /* 45 */ + { { 0x0f40f2c,0x1823613,0x0c76c1a,0x18d9af8,0x1d5d246,0x09d4dbd, + 0x189c065,0x0df554a,0x08f0043,0x16494dc,0x0198356,0x125843a, + 0x0619373,0x0deb6df,0x1e7b456,0x087f3a4,0x15ad17c,0x09bbe26, + 0x03f3409,0x1db4a17,0x0179800 }, + { 0x0132f31,0x0ee059b,0x0e8ee23,0x0255bce,0x0f8f4f0,0x1ef15cb, + 0x07b0c80,0x066710b,0x0231b65,0x0d81c0a,0x024f2bb,0x1a41428, + 0x19ad08c,0x0e15f17,0x1e1b511,0x1813f73,0x132f6eb,0x0fe9eca, + 0x0bbd1e3,0x16b1323,0x013d757 } }, + /* 46 */ + { { 0x00f894b,0x168802c,0x11bdf66,0x15b24bc,0x1612488,0x0d3432d, + 0x1f850b9,0x0268a92,0x117f9a8,0x0370829,0x0cd5072,0x0415f14, + 0x18d8aa8,0x1d336ab,0x1e41981,0x11c474c,0x0ae5f75,0x023efb0, + 0x1fe2ad7,0x1a99214,0x0107cad }, + { 0x164ad0e,0x18227b3,0x06ccd5a,0x024a031,0x169fe0e,0x0a6db57, + 0x129897c,0x0a85bd5,0x11bd77d,0x0f93bcf,0x0a2573a,0x03e4b9f, + 0x0397991,0x1b78cd6,0x1a533b6,0x08963a9,0x01701af,0x0e1a99a, + 0x031c9fd,0x087ffea,0x003bcac } }, + /* 47 */ + { { 0x1c1d4cf,0x14a8e41,0x0d3c5d0,0x01648b8,0x003791d,0x16e638f, + 0x03bda70,0x0cfd51f,0x12a3107,0x152bd14,0x0522f4b,0x0d77625, + 0x03255b4,0x07f575c,0x1707824,0x17eb255,0x18c449a,0x0d06968, + 0x12a29a2,0x193feb8,0x00199e8 }, + { 0x128171a,0x1dce6f5,0x01ef27d,0x07aaed3,0x0fd7840,0x1fc1267, + 0x1cefc8b,0x18ab169,0x1bf333c,0x104d9c9,0x13adcbb,0x0745603, + 0x0debff8,0x11014ce,0x0cd3114,0x1eea2b7,0x0a066eb,0x1d1e1f4, + 0x074173c,0x1c0f769,0x01a65de } }, + /* 48 */ + { { 0x114257b,0x0ac6b58,0x18c026a,0x03a92eb,0x129afd4,0x173d88b, + 0x1e6d4ea,0x1060e50,0x1edd1ac,0x1c8d849,0x19e5d41,0x0fa23d6, + 0x0acfefc,0x1133ada,0x152f4df,0x0a2fe1c,0x17e8d69,0x1c4d316, + 0x0084268,0x100bb04,0x006b96f }, + { 0x1b5f9f4,0x0ea8bab,0x1345205,0x0c80b68,0x05c9e43,0x0380b07, + 0x1778392,0x1f06885,0x11ef6b3,0x09ff7ca,0x05febe5,0x19ebee9, + 0x17919e4,0x00b7785,0x18f3134,0x1ddda49,0x0872512,0x1fe2e55, + 0x0ef45c0,0x1480534,0x01b6f1b } }, + /* 49 */ + { { 0x09252ac,0x1421aa9,0x0360a99,0x00e9cf6,0x1da626c,0x1f43559, + 0x0330782,0x0a6aa10,0x14ed5dc,0x1a529fb,0x107f414,0x028019a, + 0x1ca9eff,0x0b3a448,0x1f25171,0x16b5a1c,0x095ec53,0x06f525c, + 0x1454262,0x0cf7de2,0x01ffefc }, + { 0x06033fd,0x0e08498,0x1766623,0x13e6d0e,0x1b28797,0x019ae28, + 0x0bc9b8f,0x1ac9a73,0x1124e29,0x0392cfe,0x16f7f29,0x0ae1883, + 0x155d60c,0x06606c4,0x0892d84,0x1ff0c0c,0x0e5eea8,0x1d020ea, + 0x19361c1,0x01c2b95,0x01fd292 } }, + /* 50 */ + { { 0x167da85,0x0af8666,0x08559b4,0x08b58a9,0x0e98b6f,0x1638e1d, + 0x18087c6,0x0485e0b,0x0475592,0x1f59113,0x015b707,0x0ac2cdd, + 0x072a2f1,0x17da5d2,0x1ac5159,0x12416cb,0x1d2a29d,0x19a3445, + 0x07532e6,0x19d0ddf,0x0061943 }, + { 0x0c91174,0x0b10c55,0x08d2d1a,0x1883bb2,0x05b519e,0x03b1d24, + 0x0b7ca7c,0x0676fdf,0x1712c8b,0x028bf93,0x0e18c26,0x1d8760a, + 0x04a02e7,0x0ff9f1f,0x0f116ec,0x0c90c8d,0x16f2949,0x1a35744, + 0x0f4ae4f,0x162c93d,0x01462ae } }, + /* 51 */ + { { 0x0e4d3c3,0x07a0ff4,0x076c7cd,0x1eb76fd,0x080d87f,0x085abce, + 0x1b02b64,0x15de042,0x1b87349,0x1125bb0,0x09b300a,0x0a50561, + 0x17054bc,0x17968ca,0x131c0a6,0x0d9ba76,0x0e2adbe,0x00725c8, + 0x181828d,0x0e9f024,0x00cf8e7 }, + { 0x0229950,0x1cede17,0x0dc0f1f,0x0db3f05,0x0b11f84,0x0602f9d, + 0x1668fc4,0x19456f5,0x10f1820,0x01f56a7,0x1eccc88,0x1791997, + 0x1151dbc,0x0333837,0x1672bc0,0x13abc77,0x0250605,0x12d1cdf, + 0x12bf993,0x070f91b,0x014c984 } }, + /* 52 */ + { { 0x0011531,0x13abfc7,0x15f1c22,0x0587b9a,0x1f45b17,0x0ccf14b, + 0x127f70b,0x02b51d5,0x1b93b64,0x0a7740f,0x023a1a7,0x16a94a9, + 0x10a5833,0x05dbd5b,0x155870c,0x1e753bb,0x184b3bd,0x1daded1, + 0x177ccca,0x13f1c03,0x0124f90 }, + { 0x141e782,0x0554255,0x0e1f16e,0x0d0a3bb,0x1de2012,0x0415e90, + 0x0a9f665,0x077e937,0x1f4b641,0x0cb1ef5,0x0788901,0x1f76f9a, + 0x0eed369,0x0dd6b07,0x1d25774,0x061dbb9,0x093892e,0x0f5a3ab, + 0x1c2884b,0x0237b15,0x010baaf } }, + /* 53 */ + { { 0x0ec64e2,0x100ba0b,0x1af9c51,0x1efaf8d,0x1fd14ac,0x05b8bb5, + 0x0128d9a,0x0383c6a,0x1741b04,0x171f9f9,0x0d9ec1c,0x0a945a7, + 0x0d651fa,0x12bec94,0x0fb728f,0x1e832c4,0x08b72c8,0x194dba7, + 0x09eaebb,0x13968e6,0x00383d9 }, + { 0x0342a3f,0x0e859ed,0x0552023,0x05bde95,0x1200246,0x1ad4300, + 0x190bbaa,0x0da3638,0x106e54b,0x10f1502,0x1b3c697,0x021e218, + 0x109ba17,0x07c81e6,0x13f0d98,0x0cdea66,0x0011341,0x1cb4f00, + 0x15710d4,0x04c0e82,0x00fafaa } }, + /* 54 */ + { { 0x12de285,0x0687338,0x1717217,0x010d3eb,0x0d2ff8b,0x0769c4e, + 0x0ae4b7d,0x1086e54,0x055b99c,0x1a92698,0x0800cd8,0x0b45c0f, + 0x1346fef,0x0b704a4,0x0b20b6b,0x12a5614,0x02172a8,0x159b133, + 0x1c85fad,0x1963115,0x002c9af }, + { 0x064c5b5,0x0ea3b4d,0x1f874ee,0x1c89899,0x00d8d5d,0x036dffd, + 0x163bc47,0x1daac10,0x141c14a,0x10ecbc7,0x1fa1533,0x1ce46bd, + 0x1d251f9,0x023a2ba,0x1430530,0x13807f3,0x18ebda8,0x0069641, + 0x1b32770,0x1e08166,0x016fa25 } }, + /* 55 */ + { { 0x0ad682d,0x0cef54e,0x0e46c8f,0x068c6d2,0x07acb1b,0x07926bc, + 0x0662170,0x19d3eb8,0x1d41883,0x1fb17e3,0x15791b7,0x13bea6a, + 0x05d1ab2,0x048e6d0,0x06c72ca,0x067daad,0x1c452c6,0x06d8a6d, + 0x08d150a,0x1770d85,0x01941ac }, + { 0x0db8127,0x1386412,0x1d6f61a,0x1e836f9,0x04a6563,0x046cda4, + 0x16afae4,0x0151b09,0x1899c26,0x1755731,0x0da55ea,0x1656888, + 0x0d13ed6,0x0854964,0x1253e67,0x1972e77,0x02bd04b,0x1cbc797, + 0x05a9597,0x0711dee,0x007456a } }, + /* 56 */ + { { 0x0fc1f77,0x16ff24b,0x15a9820,0x1e268f5,0x104c435,0x15f22bd, + 0x0537097,0x155e84d,0x1b6f764,0x050b834,0x00f6859,0x07aa09b, + 0x10e0387,0x1064119,0x0e76d4b,0x1367d61,0x14ed423,0x14c4359, + 0x0620536,0x10fe54b,0x016a765 }, + { 0x1c71a5d,0x07c7475,0x08cda46,0x050a80a,0x09141a4,0x0165e62, + 0x0273306,0x14fac7e,0x1e09057,0x17f2ce9,0x0763ad2,0x161bc47, + 0x12e633d,0x1eca4a5,0x12160b7,0x1fac375,0x0414704,0x0c5c8ad, + 0x13abbf6,0x0cd53bf,0x010ee08 } }, + /* 57 */ + { { 0x0e07a4d,0x0623829,0x1740ad4,0x11cbae8,0x1f6d38b,0x1789133, + 0x111f386,0x1ef6829,0x139c505,0x1f25a25,0x1ce6f80,0x0f2b0de, + 0x1c59f3d,0x13e178d,0x066f29f,0x1f5a994,0x01ec063,0x18e28e0, + 0x1d0a2be,0x126f4af,0x0080da3 }, + { 0x02369fa,0x0654e88,0x18d7a76,0x16e0d81,0x0009bff,0x1aaec07, + 0x0669e5a,0x0985c14,0x0ac0d09,0x107216f,0x1061eb6,0x058af0f, + 0x166c1be,0x0e7d025,0x12b8b32,0x0e680da,0x0607657,0x0ad8675, + 0x1f258a1,0x04a48b8,0x00d82d5 } }, + /* 58 */ + { { 0x093de69,0x191c657,0x1a6db72,0x0677fb5,0x0963c83,0x1bcc1b2, + 0x07d37a2,0x15c6790,0x0ae8bf8,0x09d1122,0x1aeb338,0x0f0c987, + 0x160bc6e,0x0aad2d6,0x0de94f1,0x128b350,0x135bc7e,0x0c3aec6, + 0x07d1bf3,0x00aa69f,0x001fb37 }, + { 0x1b974a1,0x093863f,0x1205e3a,0x01d3da4,0x03448fa,0x1ffdea1, + 0x1b0f592,0x078282c,0x1d79f4b,0x02d5221,0x1cca828,0x09e2773, + 0x1ed855a,0x164811a,0x1af3e36,0x0569097,0x1878db5,0x0b2c24c, + 0x1234274,0x1ab3e3c,0x0183aa4 } }, + /* 59 */ + { { 0x1ffad9f,0x02ebaed,0x03f3b96,0x09e833b,0x04df617,0x0349a2b, + 0x0fd679b,0x018dee7,0x183d59b,0x003c9e8,0x122542e,0x1f87253, + 0x0b6baf4,0x14cb15d,0x1116a54,0x024e77a,0x145eaa9,0x1a95b0c, + 0x1471e16,0x19bffe7,0x01be4fc }, + { 0x0b2857f,0x1c26cbe,0x0fd0170,0x100d6f5,0x0cf8305,0x1673592, + 0x1745d0e,0x16dea51,0x0bc43d6,0x03dc7d1,0x1592e4b,0x117e29c, + 0x1a8f0e2,0x095cf80,0x1a6f1cf,0x107cc36,0x1403dd3,0x1d5c5f5, + 0x1e4651a,0x1d418b2,0x00aeacc } }, + /* 60 */ + { { 0x163c2de,0x05d7700,0x029269a,0x17d64ed,0x042d0b2,0x0d73b3e, + 0x1c493ff,0x086ad0d,0x10aaca9,0x136d2ea,0x02473e4,0x099dc02, + 0x0d699c3,0x09925f6,0x0951501,0x141527a,0x0f14193,0x08db5ac, + 0x1847327,0x0924bda,0x014ff14 }, + { 0x1ed9259,0x0d30660,0x09fdfd8,0x065e3ab,0x1be37aa,0x177a188, + 0x1c4f41e,0x1740708,0x14e6fa7,0x0f99ea4,0x0dcc326,0x182d17a, + 0x1c43928,0x0dcaabe,0x13e333d,0x17dcae7,0x060d1a2,0x005e36a, + 0x0ec5584,0x1a32870,0x014527c } }, + /* 61 */ + { { 0x027af4e,0x1289a9a,0x0df52f9,0x02621b2,0x0e6c0bb,0x1338e19, + 0x09dab2a,0x0ed7b1d,0x0d3a9c3,0x0bd9fea,0x1c26aa7,0x10c68e2, + 0x00124ce,0x00c028e,0x1739074,0x1dc3844,0x04ff9e8,0x02a4494, + 0x0d713b2,0x105392a,0x013d22d }, + { 0x1b15e02,0x0f6ced0,0x01a1ac0,0x18603cb,0x1d092ba,0x1209ad8, + 0x0860d5d,0x1497f4f,0x16f7159,0x0772cdb,0x0434370,0x00a2301, + 0x169171a,0x1c0290c,0x054c6ee,0x0f208b8,0x0fc2092,0x0ba0498, + 0x18cdda1,0x169198e,0x0008963 } }, + /* 62 */ + { { 0x0aaaed5,0x05b107a,0x1ba03fa,0x1bfd0e3,0x1068de7,0x1fe5a58, + 0x00c3ffa,0x0b65644,0x1c3a215,0x06fdf73,0x06e0175,0x15184ed, + 0x10a7a26,0x169cf57,0x1f79dc1,0x1e0646e,0x047f615,0x0f8d492, + 0x0b66dcc,0x1035088,0x012aa1b }, + { 0x1152e8f,0x133e858,0x0530a67,0x0f256bc,0x0e773d9,0x05abd11, + 0x041cfc7,0x145c1b0,0x0bf1da4,0x1d7854e,0x0d12680,0x0c1d845, + 0x1d169b1,0x0e96be8,0x0b06b23,0x11dc970,0x0a6bfc9,0x0ba8456, + 0x0f2fa85,0x124881c,0x0150549 } }, + /* 63 */ + { { 0x13a4602,0x0250550,0x1839c00,0x07a1a58,0x105c71a,0x0bcde2a, + 0x0918e9b,0x1e949fc,0x0d54d9d,0x03c759d,0x0f1ee3a,0x120ee7a, + 0x057ecca,0x122767d,0x0eec9e0,0x1a2f2b6,0x01fb124,0x045187b, + 0x1d8cabc,0x1ca0029,0x01155b7 }, + { 0x0f0021a,0x017664f,0x07518b1,0x0ff0ad9,0x18017fd,0x123c5e2, + 0x10ee0b9,0x1b621c4,0x11505a4,0x183a334,0x1fba96b,0x143899a, + 0x0ad9bb0,0x0a95768,0x0e8e68b,0x1e13bd1,0x09ab549,0x003a3a2, + 0x195fe99,0x11ef7b3,0x013fd5c } }, + /* 64 */ + { { 0x053c22b,0x0673dad,0x11a86f6,0x1af9568,0x18733fc,0x1659ca3, + 0x0938922,0x01f8899,0x0a38c79,0x0c4458f,0x0d08dea,0x0dd62b8, + 0x0336afb,0x1db8103,0x04ee2a3,0x011f572,0x0c59175,0x19a5bbe, + 0x0791cca,0x03af4ff,0x0050a93 }, + { 0x0d21d18,0x121482b,0x0286a42,0x0eab682,0x0266630,0x053582c, + 0x12a2e25,0x0b968d0,0x1828cf7,0x10d6f31,0x1c0a8e2,0x10b424e, + 0x094fb2f,0x16fbdb8,0x1fdf416,0x03b6d07,0x092a68d,0x00e9fad, + 0x024f357,0x19c3b78,0x00f5243 } }, + /* 65 */ + { { 0x17d7891,0x0c1e1e9,0x1b2a3f0,0x13fb0cb,0x17b5014,0x10c2208, + 0x10f5a3c,0x0b01edc,0x15a07f6,0x1a8f612,0x00c80ab,0x0d975a6, + 0x158fe5a,0x0833b77,0x179a3cc,0x000192b,0x11fca4e,0x03a8471, + 0x1dcd495,0x1cb52ae,0x0159783 }, + { 0x0537ad9,0x0dab897,0x13def07,0x1a6b7d3,0x1e87112,0x1fcde5a, + 0x0ad2355,0x18f76a4,0x0a8b3cb,0x17fbc48,0x136d707,0x1c23cbd, + 0x0d4f306,0x19c3f3f,0x16a0e48,0x03c7a61,0x0f47232,0x026c8fe, + 0x104a99f,0x0f76c5c,0x009f848 } }, + /* 66 */ + { { 0x0b8e08e,0x0fc07c6,0x1b5a1bd,0x02492df,0x1cfd2c4,0x1bee6fb, + 0x0dd0d82,0x0be00c3,0x157f4d0,0x0dd7fef,0x0187c93,0x18548b0, + 0x04b1993,0x0ef4ca6,0x1b2a342,0x1c0c4d8,0x04d2747,0x077b869, + 0x066572f,0x0ba9c77,0x00ffd4e }, + { 0x0f40077,0x0f122e3,0x1418c5c,0x0a0e47c,0x1592e04,0x15fec40, + 0x1bdf9a9,0x1c06b90,0x16d9d9c,0x104ace8,0x15dc32e,0x1fd07d6, + 0x1d2e7f8,0x0206b1e,0x1ac2207,0x08832b1,0x1daeb9e,0x0ab199d, + 0x0bf47d3,0x072fbe7,0x0034fb0 } }, + /* 67 */ + { { 0x158815c,0x0702f59,0x1f65ee1,0x09c8210,0x1abcb2d,0x182ebd1, + 0x162241f,0x0390f4e,0x17a9d48,0x083bc6a,0x1932f4d,0x1ff085a, + 0x1e9d34c,0x067944f,0x167356b,0x058dc10,0x191dd2b,0x141b96a, + 0x02d02d8,0x1a905c3,0x006bc06 }, + { 0x04ed375,0x14ad06d,0x0bab441,0x10531b5,0x11baf58,0x1b84962, + 0x086d3d2,0x06b6051,0x07a335b,0x15c3ed7,0x1fbf622,0x06c40ac, + 0x14a7359,0x199061b,0x127f040,0x11660f4,0x0c4a355,0x1b9bd65, + 0x103f3a6,0x0d2d469,0x001ed30 } }, + /* 68 */ + { { 0x13902fe,0x085585e,0x0ecf655,0x170d53a,0x1bba4b4,0x0e561bc, + 0x182a65d,0x1b874b3,0x1333605,0x02f4398,0x10b1601,0x118435f, + 0x11f2c59,0x177ce5f,0x1fe35bf,0x0788503,0x1d09bf8,0x0c15f6a, + 0x0a04c75,0x1b3ab6a,0x01579d1 }, + { 0x119258e,0x0d182aa,0x0aa1a1f,0x1204fbc,0x13f539f,0x11186b3, + 0x05d1f5a,0x108d3f5,0x15f5d16,0x18d7591,0x1907d6a,0x128ebef, + 0x135bbfe,0x0b53ff5,0x151aaec,0x0a30f7a,0x0e8e16d,0x0957dea, + 0x13254f7,0x0f7c277,0x0160743 } }, + /* 69 */ + { { 0x09755a3,0x0b2d4f7,0x0ac557c,0x1570593,0x0c8d5a1,0x15cbf30, + 0x1916aad,0x0e2cb43,0x0ab05e2,0x00266d8,0x020c3cc,0x16a4db6, + 0x0b9e0c3,0x1ad65ef,0x187b069,0x1093155,0x084761e,0x1209ea2, + 0x06e718b,0x1c13776,0x01e9589 }, + { 0x072258d,0x09040ce,0x0f519d4,0x08b82b2,0x01dcd73,0x008fedb, + 0x1e9ee47,0x11cd8c4,0x1885790,0x0e9f4df,0x0f1a3b4,0x0dfca61, + 0x1f9aac0,0x15ada27,0x1705aed,0x1dbaa24,0x1b6db90,0x01c4305, + 0x0efb6d7,0x1d1611f,0x01aa96f } }, + /* 70 */ + { { 0x057c0f8,0x12eec79,0x0364c8e,0x05ba742,0x0884dc9,0x1c6701a, + 0x1e73aee,0x15207e6,0x1a47262,0x10bd6a9,0x01b1b58,0x002ea5c, + 0x0da1df2,0x0192146,0x0dc8f83,0x18c59eb,0x0892c30,0x00f2e9c, + 0x1dfe0b3,0x121e3e8,0x01fdd9a }, + { 0x163ab59,0x093dd0b,0x0fa60c3,0x1ce46f0,0x0f27d93,0x0cb4556, + 0x0099251,0x1ab02ab,0x01700d5,0x1928d19,0x11b67d8,0x1ce6062, + 0x12cf6bb,0x132df87,0x173d157,0x047f6d9,0x0ce6323,0x0405500, + 0x05a91d1,0x13cc59b,0x01496e4 } }, + /* 71 */ + { { 0x0574c09,0x185bf20,0x1a5afbf,0x067fd01,0x176f264,0x11bec8d, + 0x14d4bac,0x0041677,0x17edc31,0x006315b,0x08db70f,0x1296849, + 0x1ef9893,0x1e3621a,0x1a99309,0x1a0edd3,0x1c4e388,0x196fe10, + 0x139a792,0x10a5ed4,0x0139cc3 }, + { 0x1096b91,0x051ffdd,0x10f948e,0x0ae7b1a,0x0e72c9e,0x0bbaac7, + 0x16c4631,0x169822d,0x0dc47d4,0x07644e9,0x06557d5,0x1a6a85c, + 0x1c2006d,0x1a1ba3a,0x12bb5a9,0x1208200,0x12a2bee,0x0e0eee1, + 0x164ccb2,0x082f45d,0x01fb597 } }, + /* 72 */ + { { 0x19bae66,0x18cc0c2,0x106cf03,0x0308baf,0x0b48e9b,0x151e0f5, + 0x0700d14,0x0738d9d,0x0ff8103,0x1c25006,0x035bf88,0x1c22bf3, + 0x1bcd7ed,0x1c506ea,0x08038f4,0x0380def,0x08a3c7e,0x1ab6eca, + 0x194e987,0x034fa31,0x00d09d2 }, + { 0x00eb3fb,0x1edd7c4,0x1f27e73,0x0ebd07e,0x04cfd29,0x053a5a3, + 0x1f5be8a,0x006c374,0x1dfb13e,0x01006af,0x0984a2e,0x1e96465, + 0x0e03bc8,0x00d46c3,0x1ee4b0a,0x0dd4fa3,0x1ae706d,0x13433af, + 0x1eac630,0x10c115d,0x011d9b0 } }, + /* 73 */ + { { 0x1d2f539,0x1b0a35d,0x0e885f3,0x00edc4d,0x16052fc,0x1f2533c, + 0x0746352,0x1506d04,0x09f3f39,0x1c11a11,0x1e1cea3,0x0d72867, + 0x0868b84,0x18b7a2b,0x074fcd9,0x0eea0f4,0x0282fd4,0x16fb01f, + 0x05d7889,0x16058ad,0x000377c }, + { 0x001dd59,0x0d6e9c6,0x0debc9d,0x1d73834,0x1c213a9,0x1e2a01c, + 0x1441137,0x10cd215,0x007ee0d,0x0177103,0x1f10388,0x1d2acc3, + 0x16896ed,0x085817a,0x135ce63,0x03448d6,0x191e5af,0x0e65cb4, + 0x04fdc49,0x05035f8,0x009fd5c } }, + /* 74 */ + { { 0x1073a5a,0x062a5eb,0x11f7216,0x190c3d5,0x07c81a5,0x10100d4, + 0x128e79c,0x19ca3f0,0x040e003,0x0954fc7,0x06677a5,0x0956b1e, + 0x0b76bdc,0x0ab6601,0x1c48c8b,0x0c5e639,0x06383f1,0x0db31a7, + 0x1e5a784,0x002fdd1,0x016984c }, + { 0x089f1fa,0x019b12e,0x01e3c7d,0x016d2f6,0x0a02a63,0x02dbfa2, + 0x079712c,0x1986662,0x14fede4,0x1e65728,0x096a929,0x10e8960, + 0x0d0d26e,0x1c26dbd,0x16ddeef,0x183fcfa,0x0a8f571,0x01cf78d, + 0x0633348,0x1752508,0x018d65e } }, + /* 75 */ + { { 0x0bb2537,0x03355c5,0x05be8de,0x16cb661,0x14ac4cb,0x0145698, + 0x09fb4a9,0x12d04ff,0x010e9e1,0x0e8cfb1,0x006d3a5,0x0f41130, + 0x0331eb9,0x15745c1,0x19de98a,0x12c8555,0x02a5f5c,0x04b49eb, + 0x18da2e1,0x17fd2e7,0x00adff5 }, + { 0x12b0dee,0x1d710a4,0x0b3a8fb,0x1d2c058,0x0143e9e,0x1dccf29, + 0x1f265bc,0x0b2426c,0x0e93b8f,0x0bc5958,0x1304fb7,0x187020c, + 0x1a8d541,0x1ab9c73,0x0e5c36b,0x16349cd,0x0168373,0x1d7b766, + 0x12b8823,0x147e9ee,0x0180dbf } }, + /* 76 */ + { { 0x07a6aa0,0x0310d48,0x07dac09,0x1080f0f,0x0f56cb6,0x14549a7, + 0x02da205,0x0908987,0x19b9a90,0x06b1c69,0x107c81c,0x154104a, + 0x106968c,0x0fe445a,0x165c14c,0x0af0818,0x0d5af63,0x1aab26f, + 0x1352533,0x11318f8,0x0097e7e }, + { 0x16ebb2f,0x04c6cb5,0x049b877,0x18f553c,0x092a17f,0x1516341, + 0x03f6fe8,0x0376c1e,0x0b2e185,0x0319386,0x0933fa7,0x04cb039, + 0x15898db,0x188cace,0x02098e2,0x11a3328,0x08ea54b,0x0722798, + 0x1398c25,0x133d708,0x00d6963 } }, + /* 77 */ + { { 0x03769ee,0x079b15c,0x12cfe80,0x187df89,0x12d040a,0x15eb43b, + 0x0e2255e,0x0518726,0x1940a71,0x1132212,0x10a8c58,0x191fd84, + 0x11909c4,0x12d0d2a,0x1923c79,0x042e5a3,0x0f1049c,0x0345eb8, + 0x026dff5,0x125a56e,0x0041c86 }, + { 0x1816784,0x04550ef,0x173938e,0x0a037ce,0x0a58c8a,0x133c092, + 0x17fec0a,0x1c13693,0x0eda721,0x1994cf0,0x0997b29,0x03ebccf, + 0x168a0bd,0x02b638d,0x07a47a2,0x15461b0,0x0f4c005,0x11bd771, + 0x1656efc,0x000ea00,0x0073d94 } }, + /* 78 */ + { { 0x10c0ef3,0x1562500,0x0682a44,0x109d036,0x0e654bd,0x1a9a848, + 0x18f713c,0x1351e0a,0x1b47d18,0x06e20f9,0x0302704,0x1a0de47, + 0x07122ed,0x020d67b,0x1305abf,0x10a4044,0x1348375,0x18e65c9, + 0x09d6b9b,0x16be524,0x01271a4 }, + { 0x0e688b5,0x1ea399e,0x1a2de4b,0x0fb9538,0x14566d3,0x0b88e80, + 0x0c9b950,0x151f9d2,0x03cc341,0x1dd0a77,0x0b047f8,0x0998424, + 0x156b8ab,0x1ae9bcd,0x1e9d8ef,0x05f2381,0x0aef152,0x0caf169, + 0x073e569,0x04367a6,0x00acd4e } }, + /* 79 */ + { { 0x18e061a,0x1d3bc8e,0x08c1004,0x0159909,0x02707e7,0x17b1b53, + 0x0099bac,0x13ad581,0x177b25c,0x08bf510,0x1cd73fa,0x177ae1f, + 0x1eddb78,0x020c4c5,0x0236cac,0x1c88aa0,0x0fcce0a,0x187ac52, + 0x095f439,0x12472e4,0x0043ed0 }, + { 0x0e129e6,0x0bbd9f1,0x135cb2b,0x0e1e37c,0x1b8c4a8,0x02b199f, + 0x037fc80,0x0875dca,0x12a6915,0x0132c60,0x189902f,0x199571f, + 0x0f95dc0,0x0cb2d05,0x13ad610,0x1b33cd2,0x053edd1,0x1be9dd5, + 0x087b721,0x0276411,0x00832df } }, + /* 80 */ + { { 0x181c3f2,0x09123e8,0x08fffab,0x1de66f6,0x115d35b,0x0483394, + 0x1f2e9d2,0x143b699,0x1fda7a3,0x07b86c7,0x1d5a1b9,0x0832f24, + 0x1e226b6,0x17f8fbc,0x010218d,0x149d1d0,0x139cf5f,0x04c7425, + 0x02827d8,0x1417d3b,0x00da57a }, + { 0x0fcea66,0x0767aa7,0x1ebb503,0x195f8ed,0x18df2ae,0x0ac2d44, + 0x0692324,0x14ac7e3,0x113f00a,0x088ded3,0x172e7ec,0x1f56896, + 0x116687a,0x1293106,0x157ec49,0x06b578d,0x11bbacb,0x157ca9f, + 0x1e53134,0x0126e1f,0x00ed997 } }, + /* 81 */ + { { 0x0b54c89,0x1ab7034,0x108ab27,0x1b9ce6f,0x08ecc17,0x044da98, + 0x1a0feac,0x036411d,0x1543fbd,0x079d094,0x175c1ac,0x19f1089, + 0x0d1b204,0x0f61720,0x05d7227,0x1229501,0x1ae9399,0x1845808, + 0x119d37d,0x1742e0e,0x00176b4 }, + { 0x1dfc175,0x0b754c7,0x0c31c48,0x06fc1eb,0x17b7fc6,0x199d1a3, + 0x0a17f3a,0x16f11a0,0x10223ea,0x13cc0a7,0x1b648ad,0x0416a38, + 0x1d90787,0x0e09fa8,0x1675692,0x0c16ab0,0x10bfaed,0x1734fc2, + 0x14332ac,0x135088d,0x005c249 } }, + /* 82 */ + { { 0x1e7bcf1,0x0c0fdb9,0x1ef9075,0x19ba782,0x16dde61,0x0ccfec8, + 0x05fb3e8,0x12f8c53,0x1c159db,0x13ac439,0x0ca0c06,0x112cc82, + 0x184ed77,0x14a1548,0x1cb3a24,0x149772c,0x187816b,0x1f9f722, + 0x195375f,0x0f42919,0x01234fb }, + { 0x009be8c,0x0c057f8,0x0e87c17,0x0ef1be3,0x02e938d,0x16f3103, + 0x0ba10c4,0x1734fc4,0x16070c4,0x0694f3f,0x1768dd2,0x07d7436, + 0x135cd9c,0x1238ba2,0x146f4be,0x13cce3c,0x0b056ab,0x0ca04c5, + 0x07df1a8,0x1095789,0x0049bb5 } }, + /* 83 */ + { { 0x0a470f7,0x12a980f,0x18c2a7c,0x11d24a9,0x001bf80,0x1001c6d, + 0x1a7a9c6,0x10e130a,0x15913ca,0x0959770,0x007f6c3,0x0097705, + 0x0aae170,0x08c72e1,0x171bac0,0x08757b6,0x04c1fa9,0x0d2b563, + 0x0a4b540,0x1ec8ee3,0x00531aa }, + { 0x0345730,0x0f7a483,0x1f0a59e,0x1d08de6,0x146aaa4,0x1e1d55c, + 0x09ac069,0x09df02e,0x08166df,0x1c046d1,0x1370fb2,0x1f849c0, + 0x14e9fb3,0x1b760cd,0x02d876d,0x1a27d3c,0x05eeed6,0x0373fb3, + 0x1a9d4e1,0x1b180f0,0x00e570e } }, + /* 84 */ + { { 0x08ce13f,0x0b72c08,0x004d991,0x1a1c72f,0x15bfc58,0x1ca4f4d, + 0x0a12fa8,0x0fa096d,0x075af66,0x14db35e,0x0559afa,0x0db9512, + 0x1a7cb4d,0x1fb0aca,0x0f3b3c2,0x04a4036,0x13d002e,0x1218963, + 0x04d697e,0x0ed130c,0x014b81d }, + { 0x01078ec,0x1de12c2,0x1535011,0x0c2f388,0x15aa9c9,0x08fc7e3, + 0x0182521,0x03ed42c,0x0ce3409,0x0c6a71f,0x15040a6,0x0e0911c, + 0x1e9a9f6,0x0ed4562,0x0a03e21,0x046197e,0x0a08fec,0x0e32656, + 0x0252ddd,0x10c960a,0x002b0ac } }, + /* 85 */ + { { 0x15daf7f,0x0371cc7,0x1419ad8,0x122124e,0x0838548,0x02c5392, + 0x1717023,0x1c7444a,0x0c90f3e,0x19b17e8,0x057c08b,0x15e810f, + 0x0ac9633,0x0212fad,0x1c42f44,0x1b7f6e2,0x005ec06,0x0e100bf, + 0x06e2ef3,0x0fb9058,0x01c8d9c }, + { 0x0b8bed9,0x00fef8c,0x0495f6d,0x11c7446,0x0948330,0x08e25df, + 0x0779dca,0x15f79f2,0x141448a,0x185cb95,0x16918a6,0x0c67889, + 0x0295dfc,0x00dfa85,0x0e7118c,0x0626321,0x177869e,0x08c5b37, + 0x086eab6,0x09c5f42,0x00f5a8a } }, + /* 86 */ + { { 0x00251ea,0x0a884e5,0x06c2329,0x164f4d9,0x12aeed8,0x107a947, + 0x02fad58,0x0ad2035,0x0ae13fc,0x14210f4,0x04f01e6,0x03890b3, + 0x171349f,0x068d586,0x1820d64,0x1b21253,0x09baeb5,0x1cb7149, + 0x166699b,0x05e3f1e,0x00ce96c }, + { 0x0be8bd7,0x025a889,0x066f92f,0x1e78cfd,0x14846a0,0x1d1c327, + 0x11f4d34,0x103b139,0x073f439,0x1b23889,0x13959c7,0x06484db, + 0x0bc32bc,0x181584b,0x04d3aff,0x1056fee,0x00b0d06,0x0ab0278, + 0x0f3a2d6,0x07afd5c,0x011cfd2 } }, + /* 87 */ + { { 0x07689a6,0x1236651,0x1cafe25,0x06aac82,0x16a7dc4,0x1e5fe66, + 0x0923ad5,0x1ca617b,0x15b1adf,0x188fffd,0x162fd26,0x01b6e23, + 0x1b9f2d8,0x1b872d2,0x1e7f7c2,0x1143bd0,0x1836bd1,0x04ba9a0, + 0x12ff541,0x0a4d7b1,0x0114c8c }, + { 0x17388bd,0x1392df7,0x1a9f57f,0x1fcfff5,0x11c3dbd,0x16f1567, + 0x16e25f9,0x1f6f072,0x09ebf1b,0x0d3964d,0x01451a0,0x0e0ed2f, + 0x0f65265,0x1a93385,0x097b367,0x0fa9072,0x1d283d5,0x121bde6, + 0x003b2c0,0x0e654f9,0x01ceb5d } }, + /* 88 */ + { { 0x1d376d7,0x0fe6767,0x01369fe,0x1d4cd61,0x0b4eab3,0x1c8dec3, + 0x0342356,0x1b0d592,0x08aa304,0x11eadbf,0x19a93ea,0x0856ff0, + 0x0127f3d,0x1dc09d7,0x1467ea2,0x1240d2b,0x0d7e34a,0x0e9c3cc, + 0x0cb0737,0x1814d34,0x0073df7 }, + { 0x0315b16,0x000dd9c,0x03e6f8b,0x133c319,0x1daa7c8,0x1b5c298, + 0x0fed022,0x10347a8,0x068092a,0x0acf246,0x1eab52c,0x1b3d06d, + 0x1077e93,0x1234cb9,0x1b58d86,0x1c8eda9,0x1f66297,0x12b4e59, + 0x1e047e9,0x1b0307c,0x0185b69 } }, + /* 89 */ + { { 0x19cb764,0x13f59d5,0x15b463c,0x031d783,0x1bbefc2,0x1cd53cd, + 0x0376c11,0x1ea8eec,0x009e542,0x068b692,0x066e5ad,0x11a378d, + 0x0ae35c3,0x0646c64,0x0cab896,0x148ba27,0x15267a3,0x042bce0, + 0x1155301,0x16e6aed,0x00d9773 }, + { 0x018c299,0x0523981,0x08ce588,0x0733ef1,0x09be29b,0x07a0a7b, + 0x0802521,0x1a88d09,0x19a2ca4,0x163a49b,0x0deacec,0x0e7cd1b, + 0x1f09c07,0x09ae1ab,0x007c166,0x1c7e4c3,0x03d8b7d,0x0049898, + 0x03edb82,0x1ff9a1c,0x0060f3e } }, + /* 90 */ + { { 0x05d6530,0x00a5f59,0x103dc8f,0x13352fa,0x1e015b3,0x1bfb112, + 0x0f12fef,0x1e24138,0x014b4f0,0x1ec62ce,0x1a3b3e0,0x1fbc7ef, + 0x0fcf002,0x0f58f78,0x14d4f24,0x018c06b,0x0a5201f,0x01ca621, + 0x0fa3b8d,0x025156f,0x01b5787 }, + { 0x10110cd,0x1be9d5b,0x06d6824,0x188ef22,0x00fa4ef,0x1d260cf, + 0x0bd6f14,0x1e58d59,0x138d509,0x0980879,0x0b071af,0x1057ca9, + 0x1f3ee2a,0x127951d,0x1a99f0f,0x18f7263,0x06ef089,0x1bd2653, + 0x1288d8b,0x14589e6,0x00b05bd } }, + /* 91 */ + { { 0x1f575cd,0x05038e8,0x060ad09,0x034a46e,0x15693b0,0x164ea00, + 0x0d80a68,0x0c02826,0x19c914a,0x0621a45,0x0cc7054,0x0e7a12b, + 0x0290245,0x117ea4b,0x05d7f48,0x164eedf,0x086e210,0x1d0b824, + 0x16ea4de,0x137026d,0x01f6ac2 }, + { 0x15da491,0x0f7aabb,0x160827b,0x1c56d55,0x05953f9,0x1a06ad9, + 0x084186e,0x1b0cd2d,0x14d5127,0x1e22988,0x0b418b3,0x195303d, + 0x032f21d,0x179db89,0x0f93c1e,0x1e41a7e,0x0b89646,0x1896683, + 0x0443d6e,0x06c6d2d,0x015e241 } }, + /* 92 */ + { { 0x0cfc44e,0x027e81f,0x0f54321,0x10a0876,0x0095f2c,0x1e82cd2, + 0x19f6f26,0x1bf34bf,0x0f65bec,0x1c9947d,0x0587348,0x08e34cf, + 0x1de3102,0x1ddaefe,0x078e6fe,0x18b75d5,0x0d0133d,0x0c0115b, + 0x1c4b0de,0x0f5536b,0x0141bed }, + { 0x194d941,0x1802cfe,0x006025b,0x00fa9fe,0x1c6e9f0,0x0f82f1f, + 0x1d661de,0x133cc75,0x100483c,0x0207859,0x0661c13,0x1ddee54, + 0x1104d2f,0x0325253,0x1dced6d,0x0fe3db6,0x10f4936,0x1005b3b, + 0x0a7ef4a,0x1c06025,0x01694f7 } }, + /* 93 */ + { { 0x09095fd,0x0eeb9c5,0x15e837d,0x03a79d0,0x04b7a02,0x16e3b3e, + 0x1e5af97,0x0112154,0x1180a08,0x124bf7f,0x042aad5,0x1c3ecde, + 0x06b9856,0x1cc3cbb,0x0a62090,0x00c0262,0x0f73ba8,0x0b0ba46, + 0x1576a4a,0x120ed8a,0x001207d }, + { 0x044394d,0x04d008e,0x19142c1,0x0e19c93,0x15f25ef,0x14a132f, + 0x027c2c5,0x1f03c74,0x0109b33,0x02decff,0x04cb90b,0x087f461, + 0x1207f2a,0x0367c57,0x1aaff2b,0x0ce44e6,0x004f336,0x056fbfd, + 0x0a749ac,0x1d25f7f,0x00e02f1 } }, + /* 94 */ + { { 0x1be4d4a,0x0725331,0x1246549,0x1acde79,0x1fa57be,0x1d3e668, + 0x04fe9f9,0x1a7baf9,0x088c5d1,0x07467b5,0x147c79c,0x12f47e4, + 0x15b2579,0x11aaa67,0x17b163b,0x0e21214,0x0d7065a,0x1346934, + 0x014227a,0x07a9a41,0x004c7c2 }, + { 0x152d132,0x12badde,0x13158eb,0x0e71903,0x0fb8daa,0x131dcc8, + 0x1b94793,0x10e12d4,0x0b239d3,0x0eb59b3,0x127fb54,0x10e94ba, + 0x1aed5f8,0x01d4603,0x1424765,0x0d5c404,0x05ae468,0x10807c2, + 0x1ad3bd6,0x0b3ae8f,0x01c21af } }, + /* 95 */ + { { 0x1441308,0x1e00f6e,0x02417de,0x090c611,0x0dc3494,0x0b08e68, + 0x029d1d6,0x0cc55e7,0x14c23ce,0x0d38930,0x0bfb484,0x0f6bf17, + 0x1937f31,0x0649f03,0x1eee7fd,0x0a59e9d,0x0dd8ecc,0x1440787, + 0x172760a,0x19ba59b,0x0028480 }, + { 0x1f807ac,0x0e506e1,0x1527a3c,0x057a0e0,0x0a3e4fc,0x1c5db63, + 0x0285247,0x19b5a7a,0x13d6dfa,0x1f70e7e,0x11bfef8,0x0372bf6, + 0x1cee46b,0x1eeae7d,0x01eceb1,0x1d16ea4,0x0d9b1b8,0x16ac060, + 0x1ef7446,0x0cd3e98,0x008452c } }, + /* 96 */ + { { 0x0ace6d5,0x1a3a3e0,0x1eb690a,0x177ce50,0x15acb64,0x1e130a6, + 0x1226626,0x03de660,0x0ff05c7,0x0bff41b,0x0b11420,0x048da6b, + 0x1c772eb,0x1bad4e1,0x17f0858,0x1adfafe,0x01acbc0,0x1fdb7cf, + 0x083a5cc,0x07862ae,0x009a764 }, + { 0x1845ccf,0x10b5a79,0x16f52c8,0x0121780,0x1c174e8,0x02481bc, + 0x031d358,0x00cf4aa,0x16358c8,0x0b91050,0x1dedb6f,0x188354c, + 0x0e838f9,0x1371704,0x0ccb065,0x0db4a6e,0x15e496f,0x0d81943, + 0x10c18c3,0x04e99f3,0x000c52b } }, + /* 97 */ + { { 0x0a58beb,0x173c147,0x0921bb0,0x1a6ccbf,0x0b404c1,0x1a07f81, + 0x17eb482,0x14aa8da,0x029d3e6,0x1aefbdb,0x006647e,0x08dacd9, + 0x1ef1868,0x17167f1,0x1a42f79,0x1a2d77c,0x1a01410,0x14bd75c, + 0x0b323a4,0x102a917,0x00cb59d }, + { 0x0f66a23,0x0e9d6dd,0x0207641,0x0e81bf6,0x0333738,0x007a196, + 0x0d7792c,0x07cdaaa,0x007d3a0,0x0bff474,0x0f2a038,0x1fee0cd, + 0x1529544,0x1d6ffd2,0x10ae5b2,0x0dd48c1,0x19445a2,0x04f80c6, + 0x128d3ff,0x0702ce4,0x011ed54 } }, + /* 98 */ + { { 0x17f8a61,0x039fdde,0x02ed8aa,0x0377cb0,0x1e18cd7,0x1fb4c02, + 0x07acd99,0x181fab9,0x1571d3d,0x1c6a7b0,0x1e6f22a,0x042af07, + 0x14e2e45,0x121cc58,0x10ddd2c,0x0236a6d,0x16374d8,0x196da51, + 0x17af8f0,0x1e252e5,0x01389f7 }, + { 0x18fefb2,0x1f90e3c,0x09caee5,0x0a20f75,0x1c76fcb,0x0ddab44, + 0x1dd83eb,0x18a25f7,0x1d33ea6,0x13245f3,0x04d2946,0x132646c, + 0x1b412a2,0x04c2c49,0x0f605a6,0x15b4894,0x18f3e66,0x1b0a24a, + 0x1a1ed15,0x1f8f36e,0x0140b4d } }, + /* 99 */ + { { 0x0be5bb9,0x0a2b83d,0x06fa0ec,0x11ca3b0,0x0e0cbfd,0x013d7fd, + 0x17d2726,0x0a841b5,0x0a687b5,0x1d392a4,0x105ccf0,0x07f7dd6, + 0x0308026,0x09c13e3,0x053f70f,0x16e1ce0,0x184b5e3,0x03e80c7, + 0x0f3dc5a,0x107c01f,0x00151d4 }, + { 0x1578aa3,0x11e3e35,0x16b8553,0x0ba6087,0x111ce9b,0x004080a, + 0x07a6ed8,0x0deabf1,0x0f405ac,0x1618889,0x02b1ed3,0x09b0401, + 0x067e66a,0x12e297d,0x10034e4,0x185d6e7,0x1988aca,0x1f70dcc, + 0x02d5d14,0x063b2ac,0x008fdfa } }, + /* 100 */ + { { 0x11cf8d8,0x0507012,0x0f4b31d,0x1a083e5,0x14d8949,0x15e7296, + 0x12924cf,0x15c16e6,0x15c5bcd,0x0d62fa8,0x002e4f8,0x1f982c4, + 0x0ed3ecd,0x13c9b9b,0x01a899a,0x0d2804a,0x08bea6e,0x0ac2d0e, + 0x0643e4d,0x19baa72,0x000e081 }, + { 0x1e28412,0x1ccab29,0x192c157,0x05b64e2,0x0d1526f,0x19d6e38, + 0x097ac77,0x1bb9aac,0x0dd35de,0x16229e5,0x03ff8b4,0x1093507, + 0x09ed442,0x0e0672c,0x08304dd,0x16c135a,0x081bd99,0x196afdd, + 0x08bbec1,0x083b98c,0x01ad5be } }, + /* 101 */ + { { 0x1850756,0x17b33c7,0x165d58e,0x1ca5e76,0x06d37aa,0x14217ac, + 0x0294de5,0x12e21a7,0x1f743f9,0x0d57ccf,0x06a2eb3,0x0bcb27e, + 0x192fa75,0x004fbe6,0x1c13855,0x0ca1635,0x00ad6d0,0x131dfcd, + 0x16aff66,0x039d5aa,0x000e67b }, + { 0x1f43178,0x054705a,0x0cccd98,0x1b3986b,0x16bd412,0x07b4042, + 0x1e98e20,0x0e27af7,0x02e622c,0x19b96b3,0x009115f,0x17cedff, + 0x11ad7b7,0x06d8272,0x0af7a02,0x0b91a1e,0x1fe4bd1,0x170f3c0, + 0x03940bc,0x0eb7f77,0x01941f4 } }, + /* 102 */ + { { 0x03543ec,0x015fceb,0x1cf9e52,0x19422fd,0x185cb67,0x066631c, + 0x018e058,0x03d158a,0x1729bdc,0x0b65f6a,0x1a1b7d5,0x12fb444, + 0x1cd62ed,0x040f5bb,0x0932d7f,0x05db362,0x16672fa,0x126bda7, + 0x00cd6e5,0x05354ef,0x017260b }, + { 0x03df7c6,0x1e3db52,0x01b086f,0x077840e,0x05acac2,0x0ecac04, + 0x0def0d1,0x179d6de,0x0a32a08,0x0c79069,0x14f17a7,0x09eda32, + 0x10f0892,0x027b406,0x0975f1b,0x12258fa,0x0372de9,0x0327351, + 0x0b39913,0x180d88a,0x00ebda1 } }, + /* 103 */ + { { 0x11dd110,0x1be2e20,0x1128999,0x1459323,0x0d6787a,0x0b336b0, + 0x1a90691,0x02aa77c,0x0c15f9f,0x1f38b55,0x131ec9c,0x0c7e1c1, + 0x10a93b8,0x1531255,0x015c45c,0x184c148,0x16e1a39,0x072f3b2, + 0x1bdbc4c,0x1af16a5,0x0046af8 }, + { 0x0f38dff,0x10a58b8,0x0415e58,0x1024742,0x1e35d82,0x1f6c091, + 0x1135255,0x0c208d4,0x00da601,0x0c7d4dd,0x01104d8,0x054aa9f, + 0x0be7cdd,0x0cf54ad,0x10958f8,0x06169e3,0x014cb2a,0x0e222cf, + 0x07fe6aa,0x115bacc,0x0183c74 } }, + /* 104 */ + { { 0x1e58caf,0x00f9cce,0x0990ca6,0x1b0ea7d,0x05bb80f,0x08ca430, + 0x07c90b4,0x015907f,0x003eeb0,0x0486783,0x0f5e73d,0x04a2f8e, + 0x1b4037f,0x1926a30,0x10827f5,0x0419f08,0x0d22724,0x13581fb, + 0x0d0e3e8,0x17a53d6,0x01526f4 }, + { 0x189e51c,0x081a561,0x063a593,0x12db6fb,0x0cda55e,0x09e2c1d, + 0x05f7ba4,0x081655d,0x1feb034,0x1c983bd,0x1878a41,0x06f13a8, + 0x1eaa16e,0x021dfc5,0x099d4cc,0x1187f61,0x042ba7d,0x04eba4d, + 0x0ee4977,0x03cdacd,0x00ec7c4 } }, + /* 105 */ + { { 0x1da8398,0x19a2ee2,0x10c0ba6,0x1f76718,0x1c66841,0x1dda3d5, + 0x11589f0,0x1bb9c75,0x1738d2c,0x1df5895,0x0c46163,0x15aed0e, + 0x14d4bc2,0x1dea7a7,0x0876c72,0x0361d2a,0x0aefe4e,0x1153486, + 0x0ffaf8f,0x042bd6f,0x0194375 }, + { 0x0dfd661,0x11a7897,0x07d132c,0x1ddaa58,0x0149984,0x1c7cc60, + 0x1c98363,0x12065a4,0x07be385,0x13b7272,0x02d9cbf,0x0e7b2bd, + 0x0254358,0x1958074,0x1b0e5ff,0x03d7122,0x105bad6,0x11dcdfb, + 0x184c6ef,0x1203055,0x00007ee } }, + /* 106 */ + { { 0x1fbcb5c,0x1f54f49,0x0a6f4db,0x073f50a,0x182be58,0x108dd01, + 0x0c497f5,0x06e1648,0x1cd8a26,0x0cd71bf,0x151c129,0x0c1c7b1, + 0x19ab78c,0x02620db,0x0b090f5,0x1398a37,0x1eaeda4,0x1e2000f, + 0x0f71fa7,0x1d48950,0x00f6988 }, + { 0x077f79e,0x0655278,0x0435364,0x03b3c4b,0x14d1760,0x0da5bbf, + 0x0eecf48,0x16c23bd,0x09037e1,0x18d9fb0,0x0fb3c00,0x1b0426b, + 0x1af113e,0x19481ee,0x1004de7,0x1252ded,0x1caa6f1,0x09b5ef3, + 0x16eeb61,0x076d093,0x006c57d } }, + /* 107 */ + { { 0x0bfccb0,0x1f71c4d,0x198e58f,0x0972ced,0x0c6e2a2,0x1d3693b, + 0x03c0a12,0x1a3f0ed,0x0465853,0x1c5d1dd,0x0ae6db0,0x06da371, + 0x116e3ab,0x03d0399,0x1f25d09,0x07e6403,0x1182523,0x17eea0b, + 0x118779e,0x19f5035,0x00214da }, + { 0x0a3198c,0x14f9bf5,0x0754d96,0x0bf9173,0x0be8a34,0x1af65e6, + 0x1c4ab53,0x029484f,0x00c2375,0x020ffb0,0x09ec17a,0x18b4514, + 0x135d9e8,0x1142cff,0x0ddd111,0x1bc6e5a,0x0ffea8b,0x00e0230, + 0x073d6fe,0x1c93425,0x01810a0 } }, + /* 108 */ + { { 0x1843c3e,0x101d7a2,0x0b9da20,0x07557d7,0x0601e30,0x06fb15a, + 0x023cd89,0x15072f6,0x0d21e5a,0x1439a45,0x10ac395,0x18e7344, + 0x0d2cf12,0x1953b63,0x123b404,0x0a34590,0x1c2f527,0x0db9550, + 0x0b00b41,0x052d872,0x00f3b63 }, + { 0x0f3d1f0,0x1a156e3,0x0e53392,0x065ea65,0x0f0dcc5,0x021ece1, + 0x0ccd60d,0x196af02,0x0dc8dd9,0x0808c77,0x1c64bed,0x034bdd0, + 0x023039e,0x0aba0ce,0x1dc99f5,0x0d61932,0x04c30f9,0x123177d, + 0x134f0d6,0x1f6f2c7,0x01f7454 } }, + /* 109 */ + { { 0x1153926,0x140ca4e,0x152043c,0x03056ae,0x02e28c9,0x0f4a64a, + 0x0ecc142,0x0ae9684,0x0de9d6b,0x0d66295,0x128c531,0x1873167, + 0x05aa746,0x031eade,0x13a8c1f,0x193121e,0x1a2e1cc,0x0212aa9, + 0x1db6465,0x03317fe,0x008e271 }, + { 0x08e672b,0x007231e,0x109f1e4,0x1a7e5bf,0x103675c,0x10b1e4b, + 0x147debc,0x160e092,0x07aceaa,0x06b4c84,0x148da5d,0x0352fd1, + 0x15482f2,0x009ee08,0x1ef0772,0x19a27b9,0x08004f6,0x106715e, + 0x0afebfc,0x08cc590,0x003f2a5 } }, + /* 110 */ + { { 0x188a8bc,0x1a0f30a,0x0b2c373,0x1c4218a,0x0f48cd0,0x073d22b, + 0x18af5d6,0x0ae670a,0x148b9b9,0x1006aa5,0x026e785,0x10174d7, + 0x0f461df,0x04c6641,0x1f53c5c,0x0e28fef,0x1cd1497,0x08b3f80, + 0x045b17e,0x070a22c,0x0048b13 }, + { 0x12617f0,0x1b199ae,0x181b7ad,0x04dd970,0x1f9a577,0x08fe749, + 0x00cb46e,0x12f5278,0x16c84b9,0x1d21c45,0x1296fbd,0x044b047, + 0x0bbfe80,0x1ad197b,0x06700a0,0x0b8b0de,0x1ade3cb,0x0f9366a, + 0x1430776,0x1bb8eed,0x01e77f5 } }, + /* 111 */ + { { 0x0e764c9,0x1f76437,0x0b30f27,0x0d60f90,0x11bec83,0x02d8a16, + 0x0cb9a80,0x1d4d7e3,0x129e8a5,0x077a8d1,0x189071c,0x131c7ff, + 0x08517d2,0x194b361,0x0e278a1,0x198ed76,0x0a92c7a,0x09d16d4, + 0x0ca886d,0x19224ce,0x004a902 }, + { 0x17ce110,0x08dce47,0x1bc65b1,0x0f5d606,0x1cc33a8,0x152cf16, + 0x1426029,0x00104d2,0x1e78db5,0x1579353,0x0ec0c33,0x070992b, + 0x0282f3c,0x126217a,0x15ba7dc,0x09414db,0x02970ac,0x03b46ef, + 0x0f48bbf,0x1b9c960,0x016f4ae } }, + /* 112 */ + { { 0x1ed03c0,0x1819576,0x15341df,0x04b11bb,0x0684a05,0x02df079, + 0x0f13e6a,0x176da13,0x1e0b9b6,0x0ed063f,0x0d621ef,0x18fde5f, + 0x1e19689,0x161e673,0x0a5a583,0x055cbf1,0x1d5768d,0x15821ec, + 0x0c84866,0x101037b,0x006829c }, + { 0x059f006,0x0397d6f,0x1d69afe,0x0d972fd,0x02b9ffc,0x173f7c6, + 0x0576d62,0x03e6e32,0x1f4ccaa,0x1711e50,0x09f3130,0x0c1d138, + 0x061af8c,0x0435ee6,0x1975f9f,0x1bc87dd,0x07f9bd8,0x1c912da, + 0x0c93c22,0x0fe8c69,0x00b453e } }, + /* 113 */ + { { 0x1048bda,0x04b6871,0x1939531,0x128787b,0x02b6749,0x16a84f7, + 0x127dd30,0x1135840,0x0543c50,0x00fb48f,0x08d96ec,0x014620b, + 0x09cd996,0x1c58b82,0x164fff9,0x128ce69,0x1b3f82c,0x0814fcc, + 0x05869d5,0x18bd440,0x0091785 }, + { 0x13dbdb6,0x0fcbc4a,0x067ed15,0x132fd94,0x0a9e84d,0x0a6bad7, + 0x140a4db,0x1f48e77,0x0c15276,0x0e0be54,0x1d8d5aa,0x02668f8, + 0x129cf66,0x01cb9c6,0x1a0d82c,0x06c1294,0x0a86973,0x0e9f218, + 0x0ac9fc8,0x0a65bdc,0x01b40ae } }, + /* 114 */ + { { 0x164cb8b,0x0874128,0x19f5a04,0x1e4aa54,0x0979af4,0x0c2a93b, + 0x1b43a34,0x189c21a,0x1fb64ea,0x1b62bc3,0x09164b3,0x0c77588, + 0x1084081,0x1e706c0,0x03ffcdf,0x182b8bb,0x049da84,0x0c59427, + 0x0998fb2,0x00aace6,0x0010ed8 }, + { 0x1f3ee9e,0x1a01828,0x1c7841b,0x136715b,0x0e8e3ee,0x1eb2249, + 0x1e9ba84,0x163a790,0x180e1ab,0x1da4fa2,0x15ca609,0x02f217f, + 0x1fc283d,0x17e3d1a,0x1943e96,0x15a9f1f,0x145ade3,0x13b9ed2, + 0x068877c,0x1f55c9b,0x01f878b } }, + /* 115 */ + { { 0x1ad5678,0x06c7455,0x096eb98,0x1dcc018,0x0afa72c,0x1447108, + 0x182d130,0x13f73a9,0x0d254cf,0x0223fbb,0x18ae959,0x17892b3, + 0x0c1fb36,0x14b0899,0x0f1135c,0x01e3272,0x01ffc14,0x06bd444, + 0x1425992,0x10c2511,0x009127a }, + { 0x09e690c,0x16010c5,0x0856d4d,0x03d569f,0x05dcc52,0x0772a64, + 0x1108ec0,0x090135e,0x1af3a8e,0x1bc9a92,0x0c7616c,0x06116ee, + 0x15e1f36,0x0a0e7da,0x0d875e0,0x08a536a,0x09eeffc,0x07520f9, + 0x1df498d,0x0eab633,0x00e8cf5 } }, + /* 116 */ + { { 0x012b398,0x0dc06e9,0x0dcc07b,0x03aa7ba,0x1039618,0x097d4ae, + 0x1811e29,0x0da1c10,0x0a7825e,0x08f3219,0x1b393eb,0x178a661, + 0x0fe0185,0x183c49b,0x03dcc4e,0x0dd46a1,0x0fd9e7f,0x00ee4c1, + 0x1555ad8,0x074c05a,0x00e8dbf }, + { 0x19e05bc,0x1191a13,0x0f4f0dd,0x19e888a,0x1f5f40e,0x1183c9b, + 0x17d35fe,0x0446218,0x0108d7e,0x07fd69b,0x062ef17,0x1de7855, + 0x00f2f01,0x0bea3fc,0x0ac5c67,0x05c3861,0x118a9b2,0x03de4fc, + 0x00d37e5,0x1b8a55d,0x01f9f53 } }, + /* 117 */ + { { 0x183f89b,0x15a4f60,0x1b53c99,0x04beb00,0x13fb5f0,0x1618406, + 0x10ad653,0x02fa614,0x0371cd9,0x1b58ca0,0x1f89b52,0x15576fe, + 0x04f7541,0x16adbdb,0x149a7ac,0x06d8bca,0x1c17f80,0x0870d42, + 0x097c99d,0x1e1e45b,0x01cea0f }, + { 0x08e11f8,0x1eab51d,0x0d5180a,0x03ebf35,0x0986402,0x06496b9, + 0x0b16833,0x0178ce8,0x0523f65,0x122b4f3,0x0afed35,0x1037eff, + 0x0bc8e46,0x01e4f36,0x09d651f,0x1fe4168,0x0d538f5,0x1159ca9, + 0x1c12ba8,0x1f1c703,0x01b0818 } }, + /* 118 */ + { { 0x10d90f0,0x0dffd72,0x1370ef9,0x17ea023,0x0cb3b11,0x08efd62, + 0x09c469a,0x0e7c219,0x14ea1a7,0x176108e,0x1bbad98,0x1d77cb0, + 0x1d5a979,0x106178f,0x1c5aac6,0x17fd49b,0x17ec57b,0x17f4f1f, + 0x0b949bd,0x0b2c1cb,0x015e1b0 }, + { 0x030e62e,0x10252c3,0x06dc723,0x1cc88fc,0x1d00310,0x1a223d1, + 0x1ad850e,0x1479e3c,0x17462e7,0x155dc28,0x09c9364,0x1410000, + 0x1f8309e,0x12294b6,0x00175c3,0x1b0243b,0x1b33d4e,0x1079c24, + 0x00d3513,0x17ff78d,0x00962d6 } }, + /* 119 */ + { { 0x0e07711,0x1f2c6a4,0x0ecb44f,0x11a4e14,0x10f8364,0x0ff8263, + 0x024b633,0x0282a2f,0x051411f,0x0ddb2bc,0x1e29545,0x1b207c9, + 0x0f6c31c,0x02099b1,0x1e1c548,0x0da9ae7,0x1eeeca0,0x197f012, + 0x1538c5f,0x0dc82f2,0x00ad32a }, + { 0x1d147df,0x0631fb4,0x0dedf8e,0x1ce217e,0x169bb06,0x0a8a6f5, + 0x1afbca3,0x1b3729b,0x18d11c3,0x19183fd,0x1718112,0x1bf2070, + 0x033b369,0x13c0074,0x1a8bd27,0x03838d1,0x0587d50,0x0781459, + 0x13bde06,0x0f0442b,0x0055970 } }, + /* 120 */ + { { 0x0c1d751,0x1a8edaa,0x1448430,0x03741f2,0x0144530,0x0e45f6c, + 0x0cd3eff,0x0154efd,0x0cf2368,0x0c6c09c,0x1ca1812,0x0949c09, + 0x1a928c1,0x0b52db6,0x064b6e8,0x122072c,0x15b5f9a,0x124ef54, + 0x05c9040,0x1a8af00,0x008580d }, + { 0x009221c,0x1928007,0x015ba41,0x03e43bc,0x02e05b2,0x1304a83, + 0x0be8783,0x0528919,0x16f7751,0x0bfdcbd,0x0d2b299,0x037be3e, + 0x165d299,0x04ff8ae,0x1b356b1,0x1d8f34c,0x097d049,0x06e0eb4, + 0x1caebaa,0x1f9509c,0x0067388 } }, + /* 121 */ + { { 0x0ef1dd3,0x05a4ed3,0x15d9948,0x1c774d9,0x191a045,0x1eafa41, + 0x0602bcc,0x0953909,0x0ef0747,0x09e7ad9,0x1ec7ab9,0x1d34f17, + 0x1aa35b2,0x16d4837,0x0a5ff5b,0x059e9d9,0x1891b9f,0x0f8d49b, + 0x0aca162,0x0a66d27,0x010d667 }, + { 0x1691faf,0x0824b39,0x18616d4,0x13aafd8,0x1c73d3a,0x054292e, + 0x086ee4c,0x0d2fc52,0x040b05b,0x0a7ab8f,0x0fb7282,0x002e827, + 0x185e96a,0x068d35c,0x1f53dca,0x1d16f3c,0x1da3ead,0x0aa8a1f, + 0x05b9153,0x170889a,0x00fb859 } }, + /* 122 */ + { { 0x0667aaf,0x1041f3e,0x12e9f08,0x1295239,0x13545cb,0x1074a51, + 0x064c632,0x18f943d,0x1e4eaa0,0x1d7ff91,0x15a1130,0x086c85e, + 0x0ba21ac,0x106a968,0x11a2a2d,0x003a9f9,0x05b6a93,0x0a00d2c, + 0x01eaf38,0x1eec592,0x00a3547 }, + { 0x1e260ce,0x09f69fd,0x07e98f7,0x1b01b80,0x0717752,0x0ed1f21, + 0x0dd75bc,0x01dabf5,0x05261f1,0x18b4325,0x135aed7,0x1ec7a41, + 0x16be7b1,0x110d632,0x18e3040,0x1231d3a,0x0f6673b,0x0189bdc, + 0x0b68bee,0x1688709,0x017423e } }, + /* 123 */ + { { 0x01fbcf4,0x113e215,0x17b8653,0x16bf59a,0x0c0d285,0x0f3303a, + 0x1af7645,0x134eb85,0x0ef0a6a,0x134b288,0x13d1607,0x1f420cf, + 0x1a13c5a,0x1df70fd,0x1804f05,0x0f3ce57,0x0d6dad2,0x0c2d203, + 0x050b3d6,0x052a3aa,0x0031004 }, + { 0x02bbc45,0x1af60d1,0x1361a9c,0x14feade,0x0ee5391,0x1000ef2, + 0x1e7408d,0x04a60b5,0x1aa2f8d,0x0590c28,0x16de2aa,0x0db030f, + 0x030e2c3,0x10d4446,0x13020fe,0x0fab79f,0x17fbd3e,0x1dc8ed5, + 0x13f7408,0x10a8c1e,0x00f462d } }, + /* 124 */ + { { 0x172d703,0x05d0124,0x080fd5a,0x1a72131,0x1c44ca1,0x14642af, + 0x1950ab8,0x06dd371,0x05b1b45,0x1ea79b0,0x1df9213,0x00f698f, + 0x1d2e08b,0x1118411,0x0bcee60,0x1fa2608,0x1131889,0x0e4ffe9, + 0x1b1a0d6,0x1e0ca58,0x01bb56a }, + { 0x0e0f16a,0x182f103,0x1297b6f,0x15ae8c8,0x1c1ac2f,0x09638d7, + 0x02a603e,0x143cb34,0x136c800,0x1d71beb,0x05e3704,0x1f8c46c, + 0x105f20e,0x15a3778,0x0e962e0,0x013c888,0x1cf4425,0x064a8be, + 0x103b66c,0x17682ac,0x01667d0 } }, + /* 125 */ + { { 0x122842d,0x185309e,0x1380ea8,0x0b6789d,0x0c6e00f,0x1c15bcc, + 0x13e1db7,0x18b0ec9,0x178d208,0x1496c36,0x02152b6,0x0723cf1, + 0x140a52d,0x12cd84c,0x06c9bee,0x1f93493,0x1ad04c5,0x02ee099, + 0x138fc4d,0x0124d26,0x01dda5c }, + { 0x0d6d673,0x0e5617d,0x0ff9bc3,0x0a01e76,0x0d8fdf0,0x0bab74b, + 0x065058c,0x1c7d9ce,0x10a4d80,0x0c87a49,0x04c004e,0x126c63a, + 0x18f2aca,0x1aac0b1,0x04659b1,0x0acf3dd,0x174e6dd,0x136f87a, + 0x135c736,0x0490d19,0x0111be1 } }, + /* 126 */ + { { 0x15cc1b4,0x0639323,0x1e33d91,0x1256e72,0x115fc2f,0x1ebf5bc, + 0x19b4438,0x1c0cb4f,0x0f40c38,0x1a2710d,0x1493f2e,0x0573c35, + 0x0598866,0x01ab037,0x02e9377,0x127ee4e,0x02c1a4f,0x1e1c1a5, + 0x0d8a935,0x0193446,0x002193d }, + { 0x169fd7f,0x1bdc67b,0x0ee78b2,0x0f13442,0x1815da9,0x0887f78, + 0x03159ae,0x070f69f,0x1269314,0x0445984,0x0cdf008,0x037b24b, + 0x05477b7,0x1353207,0x126a484,0x18ddf40,0x1bdfd21,0x169eef8, + 0x0ca95ac,0x1f3afa4,0x00649b5 } }, + /* 127 */ + { { 0x19a9c35,0x056fc33,0x1e5b590,0x0796e9a,0x0dad98e,0x074ed7e, + 0x03aed7e,0x0788c97,0x0ad4a07,0x19c30a7,0x17955d1,0x01dc5db, + 0x19bd86c,0x0bb6705,0x0cc5ce1,0x1f72cee,0x1274095,0x0cdae99, + 0x1826bab,0x015d67d,0x013672f }, + { 0x0e54ba5,0x063b6b2,0x14868e2,0x03b88e9,0x03fe7af,0x13b840b, + 0x1a746ca,0x15aff47,0x0de1240,0x023da4f,0x00c0e81,0x16cd8e4, + 0x13d9f64,0x135e810,0x11e00a7,0x07d4b63,0x0700aa0,0x18e578e, + 0x0ee174a,0x0301d67,0x0103179 } }, + /* 128 */ + { { 0x12ed12f,0x1a7cfd7,0x162ab6f,0x09e701f,0x0e1d19e,0x0f40d76, + 0x0f6d68e,0x17812af,0x1626ef6,0x0c19990,0x16ca37e,0x0bd419e, + 0x14110ae,0x101c966,0x0565140,0x0f0ab56,0x0876bc6,0x133e24c, + 0x0ff5871,0x1cb2714,0x004ace7 }, + { 0x0c7dea9,0x0dcf794,0x0611671,0x1414d4e,0x102f95b,0x013b4e6, + 0x1095e08,0x12c069b,0x094dd68,0x09d8584,0x1aa5688,0x16ff6bb, + 0x0903730,0x10be544,0x090fb41,0x140a5fc,0x117fb1b,0x10b67a6, + 0x09be5b6,0x123ad64,0x01c0d86 } }, + /* 129 */ + { { 0x18015c2,0x16f9fdf,0x0b62a8b,0x1b892a0,0x07f8236,0x1218abf, + 0x1db829a,0x019d121,0x1a2d04b,0x0c77992,0x076eacc,0x0d1b501, + 0x019cc06,0x0d33e51,0x09a4deb,0x17893ba,0x12c83fe,0x04793e0, + 0x126e611,0x07b65e7,0x002987b }, + { 0x12e3dc7,0x1d7687e,0x1554df9,0x16e82bf,0x098e8bd,0x122f92a, + 0x1b26962,0x1a1f81a,0x0209c85,0x1eadd5d,0x0787ba0,0x1b8daaf, + 0x0d31ec8,0x12815ff,0x132b42e,0x17de23e,0x0ce1f41,0x0e21973, + 0x0fff299,0x015f557,0x01913b1 } }, + /* 130 */ + { { 0x1053af7,0x1bef829,0x13d2f67,0x0b65143,0x0030476,0x14821c3, + 0x1e3f1f3,0x1ba882e,0x0ac8c5d,0x1df69b7,0x07b1863,0x0277f6b, + 0x0f27b13,0x10d8df6,0x0995bfe,0x0e7533a,0x1459459,0x099a709, + 0x0d8ad65,0x0311198,0x018c326 }, + { 0x07f6ff8,0x1d20a55,0x11ebd04,0x107f56f,0x092aeb8,0x0183dd0, + 0x021adf3,0x01df43b,0x1234610,0x040e092,0x10324f7,0x04e6042, + 0x1593d4d,0x1308241,0x1b5f8f3,0x12be743,0x0cfdf17,0x1715c8f, + 0x1a7b505,0x1b82346,0x0191160 } }, + /* 131 */ + { { 0x157d7cc,0x17a3745,0x0e1a69c,0x0a97e04,0x1140b0e,0x19d48e9, + 0x0e5b816,0x1c110d8,0x1a4ec26,0x1cd59d4,0x1d63a46,0x15d78a1, + 0x10742fe,0x0af1357,0x04b1821,0x1b3ee2b,0x076bb1c,0x0ca1e6a, + 0x1fc0b22,0x12ffa98,0x017c3ed }, + { 0x0d54964,0x01281f3,0x03014ec,0x058d463,0x19bd116,0x0146116, + 0x1b3d273,0x08031fe,0x0035346,0x02e3c20,0x1019a29,0x06bd699, + 0x038ea33,0x1a16df0,0x15c9fe3,0x1879af5,0x111fdf6,0x158abf4, + 0x1264b5d,0x112993d,0x01b3a7f } }, + /* 132 */ + { { 0x109ea77,0x171cbd7,0x1716479,0x12ebb84,0x06a760b,0x050cbd9, + 0x03022e5,0x0331808,0x0b68ce6,0x00dd654,0x08d5901,0x1a2ab7a, + 0x1fa19a0,0x0cbbd99,0x1296e53,0x1a0530d,0x1f8e5fb,0x0f98fc3, + 0x06407e6,0x18ab4d6,0x00b8f76 }, + { 0x046ec9f,0x1fc619c,0x09185d6,0x193bd59,0x1462205,0x0846f87, + 0x17b028c,0x0512596,0x1cfaed9,0x1ced941,0x127eca1,0x0008ca0, + 0x11477dc,0x0b77281,0x1492eb2,0x19c8a91,0x11656ad,0x1d3edb5, + 0x0c71a13,0x019b575,0x00fc011 } }, + /* 133 */ + { { 0x1308bf2,0x1b36c26,0x0010546,0x1facc70,0x19013c9,0x1c1dfcc, + 0x17e4bf4,0x1f8d125,0x03ffc8e,0x0877ec2,0x1a8a1e8,0x02d8627, + 0x00527e3,0x1d06fba,0x1db8f34,0x1a5431d,0x030f6eb,0x165cb72, + 0x1c3b933,0x17d9e54,0x018cc1e }, + { 0x070404c,0x0a56b8d,0x08c2034,0x01f39c5,0x0ad21dd,0x11f0393, + 0x0f378ea,0x1217299,0x16363a6,0x15acb08,0x078ad02,0x1e8b8d6, + 0x1be70bf,0x1367762,0x05b742d,0x0af8025,0x0747477,0x06a6595, + 0x15f647a,0x11194c7,0x00aa089 } }, + /* 134 */ + { { 0x0db0396,0x0e7e57c,0x09daa8b,0x0f6845b,0x08ae8f3,0x042b927, + 0x00d2659,0x07eca5f,0x07bf149,0x123e1e2,0x11e93bd,0x168d604, + 0x0e8b600,0x1d75ed4,0x1cf90e5,0x11be157,0x11fa795,0x1170e91, + 0x0206eac,0x0d2563f,0x00ef38e }, + { 0x0cf3047,0x00b4493,0x01607cf,0x08b2a73,0x1ad14f9,0x1f905b6, + 0x17470a4,0x02ffbd0,0x0f57abb,0x152a1b7,0x1378e0b,0x1ff82f2, + 0x0f0d1a8,0x15ff669,0x0942388,0x0c08537,0x07fdb78,0x0088785, + 0x1378c7e,0x1cdec8f,0x01962ad } }, + /* 135 */ + { { 0x0c78898,0x1529bff,0x1dff265,0x05bc1f4,0x0b39de7,0x0658478, + 0x1dab34d,0x0a7eda0,0x0da78d3,0x06c5dc1,0x04b306b,0x09a7407, + 0x1d5fe80,0x12c0aa4,0x1eb7b7b,0x18db356,0x1a0c067,0x1c41c80, + 0x1b64fcd,0x0bff449,0x0191585 }, + { 0x19ebef3,0x1871b5f,0x05dca55,0x0bbe966,0x021046a,0x00b5ae7, + 0x06a569a,0x023f371,0x1288d0e,0x0f9c940,0x04566ab,0x17ca72f, + 0x12d6baa,0x0e47d5d,0x06bfb81,0x15e2082,0x1afe5c7,0x1f8c961, + 0x1f738de,0x05d039a,0x00f7aa7 } }, + /* 136 */ + { { 0x0c386ee,0x11e078b,0x00e483e,0x13a9813,0x133b046,0x15189b5, + 0x15c8a1d,0x00cf3c1,0x03c406c,0x01e0549,0x0f89f4d,0x1c7c9bd, + 0x0aef220,0x0cb7807,0x15ec784,0x1b9fe13,0x1d824a9,0x0a507ae, + 0x0707421,0x105d8b3,0x01e2535 }, + { 0x138c7ed,0x1793128,0x0237323,0x08ca8ff,0x1ec4319,0x054a446, + 0x14eb774,0x1b856dc,0x08257eb,0x1cf8f7d,0x032627a,0x0dd63e1, + 0x08c583c,0x000b1bb,0x1cda445,0x01c7be2,0x18bdbc2,0x131417f, + 0x12f5453,0x10200b3,0x00d526b } }, + /* 137 */ + { { 0x0025949,0x0a917d0,0x0514912,0x1e177b1,0x126d888,0x1b90b7d, + 0x0bd7f98,0x1ec6688,0x0472827,0x0761db2,0x109a076,0x034733f, + 0x0d91d8a,0x1463b88,0x08cbab5,0x04ec4da,0x02fe51b,0x1c72dff, + 0x14427e9,0x1e9fdbf,0x00040f9 }, + { 0x14a05e0,0x17528b5,0x03ac654,0x1de438f,0x0b0d48e,0x0befede, + 0x1986466,0x1fac9a6,0x08b4c21,0x088d902,0x08c0e83,0x136d7d2, + 0x09a6f56,0x1c62f40,0x03d8259,0x0bb1c57,0x1ab3680,0x139135a, + 0x0cd2728,0x1fe301b,0x01bdd6c } }, + /* 138 */ + { { 0x03cc612,0x1c2bb4a,0x071e927,0x1d06566,0x0914319,0x056f5ee, + 0x18a5f33,0x043244b,0x0b06198,0x08c7da1,0x0731f12,0x01084b6, + 0x10accb3,0x132372f,0x074cd1e,0x07c44ea,0x0ae590e,0x0757da5, + 0x1128002,0x08c0705,0x0151821 }, + { 0x196a461,0x040eddf,0x0e90f09,0x136a547,0x11c122e,0x06d845a, + 0x0163919,0x03a4385,0x06d6a08,0x080a5bc,0x0f3bdec,0x1da9ea6, + 0x1c167d3,0x00aa2fb,0x1ecca52,0x0f73ed9,0x11c449b,0x0f52369, + 0x18870a6,0x1aec272,0x0081cfa } }, + /* 139 */ + { { 0x18a7f0e,0x0b193a3,0x0177bde,0x05bc2ee,0x114183e,0x108bf44, + 0x09b7d5c,0x19fa494,0x1b7cd52,0x06d8d84,0x0f0580f,0x13f75b0, + 0x099e42b,0x184f7c6,0x1c74ba9,0x0999ad2,0x05b8ee5,0x00c4a7e, + 0x129483f,0x0f69ca6,0x00fcf75 }, + { 0x0b62347,0x08c6643,0x04a1695,0x04f7855,0x0c51c9d,0x13393ff, + 0x0ac14a5,0x0de5dd4,0x00ae43e,0x045471d,0x0819aef,0x16bc0b9, + 0x0d80535,0x0419cc3,0x1ff36c6,0x099bb23,0x1ba3237,0x197a52d, + 0x1480890,0x0c74921,0x0124087 } }, + /* 140 */ + { { 0x0fac14d,0x05cb927,0x14f3926,0x1b4f353,0x16f4bf8,0x103e14d, + 0x036f75b,0x0701e3d,0x1717715,0x161867e,0x00c98fe,0x1a44e36, + 0x154c91e,0x0cda2af,0x04e0cd4,0x1257f7f,0x1891270,0x0bb52f3, + 0x1204ef6,0x0ce9c36,0x0128a97 }, + { 0x03e5924,0x11e20ac,0x1418a6d,0x031e2e3,0x01f9aff,0x113d143, + 0x0cf36ac,0x0e0568b,0x08a11ab,0x1ceaeed,0x0da5c64,0x0f61d1b, + 0x052bfb4,0x0760840,0x08de77c,0x03002ac,0x08124ce,0x157ad32, + 0x13e52ae,0x1188686,0x01508d9 } }, + /* 141 */ + { { 0x1ffc80f,0x0ff39e7,0x0fdb7aa,0x17a868e,0x023e2e9,0x09bdd3f, + 0x0fb4f27,0x0ae4ff6,0x07a3fc3,0x19bb369,0x1280f5c,0x19e71c0, + 0x03d0db4,0x15df07a,0x1805d48,0x0de9f19,0x119da98,0x1ec3f5b, + 0x1f9ac0d,0x16a15c5,0x01536d1 }, + { 0x040bab1,0x1aef7ed,0x098cdc7,0x1f3657b,0x07d6a8a,0x0565438, + 0x1722435,0x156bd14,0x1643ff8,0x0b9787f,0x03b0bd3,0x01b297f, + 0x029c4c1,0x075c9f1,0x0c3aae8,0x1fa026d,0x08f1d2d,0x15e2587, + 0x14d2820,0x0a5cb53,0x01429f2 } }, + /* 142 */ + { { 0x10e7020,0x1ea60be,0x05a12bf,0x156a904,0x1b169aa,0x079a47c, + 0x05c2162,0x177b7c0,0x1885986,0x175fb7f,0x070e076,0x0fea2bf, + 0x1bb3398,0x0254a53,0x1157cb0,0x0d092fc,0x042a0ed,0x01cd20a, + 0x1bdde63,0x15a94c3,0x01541c1 }, + { 0x12709c4,0x1db1403,0x17f9d91,0x171021c,0x1330d68,0x1707b1d, + 0x021d3a4,0x175a37b,0x1f8bea9,0x02727dc,0x0260685,0x1831063, + 0x07c15af,0x1b46350,0x071720a,0x016cdc3,0x1a236e0,0x042c62b, + 0x1f2debb,0x0aa2200,0x00119b2 } }, + /* 143 */ + { { 0x087027d,0x07693e4,0x0a18487,0x0a57f56,0x0050f33,0x0a88f13, + 0x0f07067,0x1eadc6e,0x17f4c69,0x16a61d4,0x09aed00,0x0d5e4a4, + 0x10e6f35,0x01f3d61,0x040470e,0x1fbf677,0x03d33d8,0x1a1d861, + 0x1cba8d8,0x0721ef5,0x000ba8c }, + { 0x0851bac,0x061eb3f,0x13f310c,0x134bea8,0x0991c38,0x1dd030c, + 0x0f1919f,0x1e800d7,0x097cbdb,0x04e8127,0x12b6b75,0x0fbaee6, + 0x0a4539b,0x1465b69,0x0ea3e7c,0x1675b21,0x0304de4,0x03d490c, + 0x1ee5a4a,0x0e65df4,0x006ab28 } }, + /* 144 */ + { { 0x0ed5986,0x15a9691,0x1819c76,0x14b0a67,0x1eee627,0x0aaff1e, + 0x18deb3c,0x065d1fd,0x17ae8b1,0x0b0a486,0x022e533,0x030a694, + 0x102706e,0x1ce0ae1,0x17ff54b,0x15a8d50,0x0f351a5,0x1ead112, + 0x135c02e,0x036daaa,0x01e644d }, + { 0x02e4e9c,0x1834343,0x1f925a0,0x1890ec7,0x1e5cd76,0x01ce557, + 0x059e702,0x05ac061,0x18d83d6,0x07265f5,0x112b8b0,0x0a9c237, + 0x02911e2,0x127e503,0x0835f21,0x0e08b2d,0x1d5e9a2,0x07abc2e, + 0x0f8104b,0x0cefa1e,0x01be2f4 } }, + /* 145 */ + { { 0x101a6dc,0x0096ed5,0x0da5300,0x035c35b,0x191bd6c,0x18283c9, + 0x16bb2e6,0x03e75cf,0x062a106,0x138a7cf,0x14dadf0,0x1dcf52c, + 0x0b71978,0x0f0bb2a,0x1046f41,0x07ba9dd,0x0e0efab,0x0e388b3, + 0x1fb6fd8,0x154ae50,0x01d70f7 }, + { 0x1eb5932,0x137bea8,0x12909ba,0x14bf105,0x154ea0a,0x1cfbee1, + 0x1825ddc,0x0682eb6,0x09be579,0x19a8c95,0x117b334,0x0846f0a, + 0x1d9801f,0x1db21e4,0x0e38959,0x157d865,0x1d723e3,0x0dca08e, + 0x1c71942,0x1bd4d19,0x00ee656 } }, + /* 146 */ + { { 0x0890deb,0x070a050,0x12f534e,0x1b79d70,0x1f7bd87,0x020ef65, + 0x1fdcae8,0x1d2a3e1,0x0a6820b,0x1f76385,0x018a62b,0x0147189, + 0x0475519,0x1380876,0x16e9563,0x0f363d9,0x1b88c78,0x0676c8e, + 0x1d78857,0x1c7c99d,0x014c08d }, + { 0x0266da2,0x09a768b,0x0026705,0x16f6992,0x1ce322e,0x093b444, + 0x12bbda6,0x09a6fbd,0x105c284,0x09284bf,0x1466ad9,0x1c26358, + 0x06d23b7,0x12d1e64,0x0baedc9,0x08aead0,0x1b9628c,0x186298e, + 0x0e014dc,0x01d170e,0x00be2e0 } }, + /* 147 */ + { { 0x1ed32e9,0x1e4002b,0x065ce01,0x1ef8049,0x027e40c,0x1aa4182, + 0x1aaeeae,0x1e8b0a0,0x1ce820b,0x124bbb7,0x10fa055,0x0527658, + 0x08b5353,0x07f7b32,0x07a0d4f,0x1b94ace,0x13f903b,0x09390be, + 0x004ff5e,0x1382135,0x01dc40a }, + { 0x1b21a38,0x153619e,0x1f91afa,0x03ae7de,0x0ae222e,0x0ea83fe, + 0x0139ef4,0x1563fed,0x0587a77,0x0dd6332,0x12935bd,0x1ec418c, + 0x0a58c74,0x153e1bc,0x0a0df65,0x1c81299,0x1313e42,0x1fa1efa, + 0x0d27853,0x14868ff,0x013f8a9 } }, + /* 148 */ + { { 0x12f8923,0x1a76fcc,0x07ce16a,0x00dfa41,0x024aa5e,0x09a0777, + 0x06e1c6c,0x0804f7d,0x191e0bb,0x0abe88f,0x1318b0a,0x15a5e7a, + 0x0f425af,0x03ffbd5,0x08c4a1b,0x197d25a,0x12b0114,0x0cb2095, + 0x0f88d4a,0x0d44638,0x019f670 }, + { 0x05c02af,0x1dde911,0x06341ac,0x0c7f47d,0x13ebc16,0x07a4172, + 0x0add6e1,0x1bf4dbe,0x12bfc55,0x095a290,0x09cf6a4,0x1a80a25, + 0x0430bdb,0x1ea9f55,0x03d0f64,0x1faa758,0x1e40c27,0x07e1ac7, + 0x065092d,0x03077d2,0x00a32cb } }, + /* 149 */ + { { 0x1a6a746,0x186169f,0x12a38e6,0x043ab44,0x084a792,0x06f95af, + 0x02451e3,0x166e14b,0x130666c,0x144033e,0x1c741a2,0x013deda, + 0x04b09a7,0x0032e8c,0x001e8f8,0x12890a0,0x14bb8dc,0x0382357, + 0x19524eb,0x1462538,0x01fd2b6 }, + { 0x05f2771,0x0eadef2,0x16574f5,0x15e865d,0x0542b08,0x19535dc, + 0x103efc8,0x1645d9a,0x1e8becc,0x1e5b0a1,0x1891fc3,0x02757f1, + 0x1bcecc5,0x06d181c,0x1755bde,0x141bf2a,0x01956c2,0x148abe3, + 0x00c7f8a,0x06b97e6,0x018ca6d } }, + /* 150 */ + { { 0x00c4923,0x0058ddf,0x01ef760,0x00d2052,0x046ae74,0x1de8638, + 0x0cdfe55,0x1704731,0x19655f8,0x1470d4e,0x1d0542a,0x0ff4a01, + 0x0ecd292,0x10173d7,0x1aa71b4,0x0d25d04,0x0b39f29,0x05a67ac, + 0x1d055df,0x070d197,0x011f309 }, + { 0x13ed442,0x1af3d19,0x1deeb72,0x1f20dfd,0x0e5c8e2,0x0c79145, + 0x0048cf6,0x0b85b36,0x07ffe12,0x119796d,0x0c60d51,0x0e63744, + 0x1259487,0x0969628,0x12ab96c,0x1b38941,0x0589857,0x15f8073, + 0x13c803d,0x02010ca,0x0172c5d } }, + /* 151 */ + { { 0x1c283e0,0x0a02317,0x0039625,0x08fdc11,0x1763398,0x1e8b117, + 0x0d03adf,0x1dbf5e3,0x0f598c5,0x07a8a8f,0x0366efb,0x05eefc0, + 0x146b4d9,0x14621fe,0x10f8ece,0x1a3a4ea,0x12c6511,0x19cca70, + 0x1c16db4,0x08343b5,0x00c6dd8 }, + { 0x1b991ad,0x10bf011,0x14508f6,0x06e3f74,0x0ab2b21,0x0e0c3cd, + 0x1b16837,0x1b9682f,0x15f63ac,0x19de456,0x09f5405,0x04203c5, + 0x082fcf5,0x1083680,0x0dcff41,0x0259ec6,0x1de7db0,0x18f4108, + 0x1d9517b,0x0ecdb2a,0x018ca07 } }, + /* 152 */ + { { 0x180dfaf,0x1a3dcd7,0x1fce390,0x1f388cc,0x080b631,0x0de11c5, + 0x16c99b7,0x140dfe3,0x1aa8718,0x0b0f1b2,0x070d7d8,0x19215e6, + 0x08e7f7a,0x1e34237,0x0e0c747,0x0eb6980,0x1106841,0x10f334e, + 0x0d2dcc6,0x13ac412,0x00c76da }, + { 0x1e4e78b,0x1acbdd1,0x1e6a607,0x18aa133,0x0c14ded,0x0446309, + 0x0e6564c,0x0b17e6e,0x19b2074,0x02b4183,0x1da401f,0x188f444, + 0x13c4440,0x1bf36d7,0x17c8f23,0x122076d,0x0254292,0x1a7b316, + 0x0cede58,0x14db631,0x00f9f4e } }, + /* 153 */ + { { 0x0d36049,0x0f5c467,0x07e319a,0x03e8373,0x07a4ffe,0x1970844, + 0x1d58da9,0x114d216,0x065a0bb,0x1eeb546,0x10a5559,0x18b12dc, + 0x0d42cf8,0x0d55ffd,0x01ad7cc,0x04d48a5,0x0f28f6f,0x18fbefd, + 0x186b940,0x13c1581,0x0120c5d }, + { 0x0c10da7,0x171ffd6,0x1b96bef,0x1328928,0x07e2d5f,0x01107fb, + 0x1fa18f1,0x05d1d82,0x0bd6f63,0x137ba0a,0x127bd3f,0x181f87f, + 0x104a9e3,0x01dfdc3,0x1fcf2e8,0x0685a4b,0x000bb03,0x10c7e9b, + 0x014334b,0x07cea60,0x01ac1e6 } }, + /* 154 */ + { { 0x13d6a02,0x1e83e47,0x0347760,0x18fde9a,0x11fc143,0x03d7b0b, + 0x12fc353,0x1e19532,0x0827c5c,0x0549f4c,0x05e20b2,0x18f656d, + 0x1a4a102,0x052af45,0x0f21f56,0x0c9e0c6,0x02fcc2d,0x00d7441, + 0x01b407f,0x136a7f3,0x01c12ce }, + { 0x1dc1b79,0x11cfeca,0x05aa165,0x087e9cc,0x0728f75,0x117dcf9, + 0x0f133b7,0x13cdce0,0x0d50fae,0x017bb40,0x14c3b41,0x187785a, + 0x0c0546b,0x06eacc5,0x09001af,0x0922001,0x0c9e129,0x09f9943, + 0x1afe58a,0x1044ab6,0x0146777 } }, + /* 155 */ + { { 0x10c98fe,0x0a10f71,0x1c16be0,0x01f859a,0x1eb0feb,0x0fb5696, + 0x1329853,0x1d13658,0x09ba314,0x1c09a6f,0x12c5b74,0x1d709e0, + 0x08a443d,0x183fc65,0x155bb83,0x0722ff8,0x1bb3a4f,0x09e0e41, + 0x06b7350,0x0fba496,0x0199839 }, + { 0x14781e6,0x0f0bf6f,0x0407280,0x128de3f,0x12d7c31,0x18486d1, + 0x0984ed4,0x00f444f,0x0a7c8c6,0x04ad8ee,0x1a5c249,0x17ddbb8, + 0x181cf2f,0x02b0404,0x0f60aed,0x069ae3a,0x1a30851,0x0e7e6ee, + 0x19e6310,0x02e36b2,0x00d23dd } }, + /* 156 */ + { { 0x0dd7e96,0x007c26a,0x10325e9,0x150813f,0x1114c8e,0x0889c9b, + 0x0a79aa7,0x1ad8ade,0x18fd8c6,0x1b03310,0x1a79f0e,0x150c004, + 0x1fad3ba,0x02c94ea,0x04f1ac0,0x06cb628,0x040222e,0x060d6bf, + 0x1e62abb,0x04c4348,0x01d36a8 }, + { 0x1003c81,0x022e260,0x180abab,0x15e87b0,0x1ef9ef5,0x1bba34c, + 0x17d7983,0x0b06d4c,0x1bf5d28,0x18973d5,0x0b3bc7c,0x1903909, + 0x122f53e,0x0e9245a,0x18cb28a,0x0b8c0c7,0x1c581e6,0x1ff4d53, + 0x0a1065c,0x10d934a,0x0017e36 } }, + /* 157 */ + { { 0x090de99,0x17f32cf,0x0d8c2cb,0x195a0b5,0x1e4485b,0x0724495, + 0x1a94b85,0x10f8914,0x0226286,0x16c2a18,0x0f6d50a,0x1d2abd6, + 0x01261f0,0x0a2f2c2,0x1a0618f,0x0ae7291,0x00f8ed7,0x067f0e7, + 0x1612b79,0x1e3feaf,0x003fbd6 }, + { 0x1bf968c,0x188eee8,0x11cb50d,0x1a91bf4,0x1558d7c,0x12d2b36, + 0x0488f90,0x08293e1,0x05c26d0,0x07c199c,0x105d0c3,0x03e2f85, + 0x19be7b8,0x08a1ece,0x0f70cf9,0x07f5dc7,0x03594fd,0x179c2d6, + 0x1f46046,0x039e853,0x0113755 } }, + /* 158 */ + { { 0x0193bb2,0x07aad90,0x01c924a,0x00e6217,0x16e579d,0x02e93b4, + 0x18c274d,0x114bdc0,0x0a87186,0x121f219,0x0e1a0e6,0x07c2220, + 0x0828c11,0x1199788,0x01bb3ce,0x1976905,0x0370385,0x199a455, + 0x1c5636b,0x1ff955d,0x00c6698 }, + { 0x0908745,0x062a57b,0x0fee811,0x08d466a,0x06b336e,0x10f410d, + 0x0a14b55,0x0fed298,0x0363491,0x194bcb8,0x184c546,0x077303e, + 0x0f6e102,0x17a352f,0x05f70af,0x09efed0,0x0af8e11,0x1c9ef50, + 0x15cb16f,0x1e79abd,0x0136c3c } }, + /* 159 */ + { { 0x1080de4,0x1ccd5bd,0x0e5aee1,0x1bad3b0,0x1b8f781,0x17c7b19, + 0x0aaaa61,0x194ed68,0x0a54bc5,0x0ba601c,0x0beee57,0x0c0b538, + 0x1076fcb,0x000bc49,0x146d102,0x0de1b08,0x0389d28,0x1a07806, + 0x1150c98,0x11d2a41,0x014c303 }, + { 0x177aad9,0x1e1c0b4,0x0f8f252,0x05ae10f,0x0dbfd08,0x0ff6845, + 0x008321d,0x1f80da1,0x0345656,0x0e7426a,0x1b753b8,0x11c01fa, + 0x0071c4d,0x152fd5a,0x0ce2c89,0x1d6de46,0x0c10bae,0x06a3bf5, + 0x1e0309b,0x161176b,0x0078e4d } }, + /* 160 */ + { { 0x078342a,0x0e89508,0x0190044,0x1cab342,0x0534725,0x09ffee8, + 0x075643f,0x03fd48b,0x106f0ac,0x1b4a54f,0x06f1a73,0x15b67c3, + 0x00f6d24,0x1ceee68,0x18e3d7a,0x1ba9c79,0x166b632,0x09c2007, + 0x0578715,0x11fbf7c,0x0085cab }, + { 0x109422f,0x01fb5c6,0x10ec2a5,0x0c1f311,0x17d2975,0x19726c8, + 0x107e8bb,0x07eab48,0x135f7c1,0x1a1a91d,0x0b4ffd9,0x080fdb5, + 0x0d274d3,0x09a3921,0x10450d6,0x0c2bab2,0x1013bb8,0x08e5939, + 0x15de533,0x06e0097,0x007da04 } }, + /* 161 */ + { { 0x1712c44,0x1ccd316,0x15de092,0x114d2c4,0x148368f,0x0f11438, + 0x010cb59,0x1f11dad,0x06f5bc5,0x0014183,0x0d1e745,0x02429d8, + 0x10e6cf3,0x09936db,0x16dbd12,0x126d72d,0x098ca32,0x1e52d60, + 0x1fa886b,0x04918e5,0x004d69e }, + { 0x11269fb,0x0484953,0x0d802aa,0x1030ca1,0x0f6bdba,0x1aaed91, + 0x10a8e7e,0x1a03b39,0x16311e9,0x1e7586f,0x10b0743,0x0f39215, + 0x0a6faeb,0x058f9b9,0x04ec88b,0x0832647,0x1dfbc8c,0x0315379, + 0x1fa399d,0x1461645,0x00019de } }, + /* 162 */ + { { 0x0b3118b,0x144d609,0x0959f7d,0x1ad96dd,0x106ee39,0x1e6cbc6, + 0x08b0861,0x10f9f98,0x18d537d,0x0c2db40,0x15b6cae,0x02a5d3e, + 0x1575845,0x0f04c60,0x00e61c5,0x059a41f,0x1c83b21,0x1df4b52, + 0x06b0711,0x140671b,0x01fb3dd }, + { 0x1a0a9b8,0x1bff067,0x1dd7c1a,0x0fc45b9,0x1478bac,0x1443e44, + 0x178104d,0x179e702,0x0914c54,0x0c08eef,0x07a993b,0x02c01ea, + 0x17c8c24,0x064382b,0x045360d,0x17968c7,0x152a8ab,0x1769272, + 0x1913d4b,0x1d73d04,0x00019e5 } }, + /* 163 */ + { { 0x0d52313,0x0d02733,0x0af47d9,0x0a9a7ee,0x1d69454,0x1bd708f, + 0x176be9a,0x08e5781,0x0571ab2,0x10fbcec,0x0a35a24,0x12cd5cb, + 0x13d4c5f,0x1762e70,0x185dc5a,0x17a73fb,0x1a4b764,0x1b87376, + 0x04359e0,0x12810b3,0x01efffe }, + { 0x08f92e8,0x10713ec,0x08f3cfe,0x1b38ee2,0x021ef0f,0x13a6dd5, + 0x05d3224,0x0c4c4b3,0x1b9ba27,0x067d252,0x0f2bdb5,0x13a48dd, + 0x1010c90,0x07c7143,0x05e8436,0x1dd4406,0x1e1453a,0x1d83b8e, + 0x031ac28,0x188f22d,0x00eadf0 } }, + /* 164 */ + { { 0x0854477,0x00f2426,0x11f046f,0x090c71c,0x0bec25b,0x0e2a6c9, + 0x180ae1a,0x1a487a9,0x0be1e7e,0x18c6f19,0x18312b8,0x1d60d68, + 0x1ef5471,0x1521357,0x0b9efce,0x05b8271,0x0ddd845,0x091d713, + 0x1e0b7a7,0x1f83aaa,0x01649d3 }, + { 0x0de1979,0x0571885,0x1ca361f,0x1a76978,0x0847041,0x01e4df5, + 0x0f1015b,0x0ce7124,0x0d74ae4,0x17f0c15,0x1926b8d,0x0de9d97, + 0x1592bff,0x0e20fcf,0x0036e03,0x00e2acd,0x06fe463,0x19add60, + 0x1b41cc1,0x11698fa,0x00c06d6 } }, + /* 165 */ + { { 0x14dfcf2,0x115f3c2,0x0f436f8,0x1f4d5c7,0x0e21a7d,0x10f6237, + 0x0eb4694,0x099e8c6,0x041a948,0x14a293d,0x048fcfb,0x1736554, + 0x121145e,0x0571e54,0x0d2a0ab,0x1b24aac,0x0a0fc85,0x070bb56, + 0x0420b63,0x19eff83,0x0078504 }, + { 0x199793c,0x073e21b,0x1ed75d3,0x116aa33,0x14ddd61,0x1fcc043, + 0x17e4e57,0x1cc59ed,0x1b8bf61,0x07522e8,0x13d53c0,0x0c27b9f, + 0x1026863,0x01801ad,0x108edd8,0x15396ce,0x1344028,0x14fde3a, + 0x14681df,0x059c6e0,0x00f47b5 } }, + /* 166 */ + { { 0x0bec962,0x1ec56cb,0x01ebafd,0x0c2fc02,0x11cc81f,0x07082c6, + 0x1142485,0x13ec988,0x142394c,0x014c621,0x18144db,0x0a5a34c, + 0x03d9100,0x086fc12,0x190dd52,0x1bd4986,0x01efe5c,0x09189df, + 0x09fedec,0x14c1efa,0x0076249 }, + { 0x0f593a0,0x1ac1c0e,0x1679d25,0x1706c98,0x0c9ceef,0x0e4cc88, + 0x04ccf81,0x1c65eb4,0x1421808,0x0752f0f,0x1a3d3cc,0x149e9eb, + 0x0756fb3,0x1b6065a,0x0b9b8ba,0x198d459,0x1fd08bd,0x1b05983, + 0x1fe3045,0x0f20381,0x001aee1 } }, + /* 167 */ + { { 0x1aa9e14,0x019b5c4,0x003f012,0x03ecece,0x0663427,0x15b4c03, + 0x010ce41,0x0469b54,0x1ebb7ab,0x0123f70,0x06814cc,0x154fd6b, + 0x15969b4,0x00007a6,0x03be096,0x0d6b7af,0x0eb4602,0x072ed9c, + 0x15a15b1,0x087cbaf,0x003b06a }, + { 0x12a0ee7,0x1741c76,0x004ea82,0x11e2dd1,0x04bbe52,0x13209b8, + 0x17d713a,0x0cf156d,0x006e298,0x1f4065b,0x07b4ad6,0x16e5e8b, + 0x1af19b1,0x0bb0a90,0x0733934,0x0de76f5,0x194aa51,0x09cd7fc, + 0x0d05a49,0x125d0d6,0x000797d } }, + /* 168 */ + { { 0x0f3a8ca,0x176f0ad,0x07b096b,0x054b86a,0x1392478,0x1f60401, + 0x08fefe4,0x16883cf,0x0e6f425,0x027c9e2,0x1d8026c,0x05d903c, + 0x06e4ec1,0x08c07fe,0x1cd9b51,0x1de74f2,0x1b50e0a,0x0e949e5, + 0x035c764,0x12d288d,0x0061a14 }, + { 0x15a67a1,0x02a0e33,0x041bd4b,0x011ebfd,0x07d38d3,0x1f4c473, + 0x0f333da,0x10c54e1,0x0185898,0x101f65f,0x1c116eb,0x0c2ce0c, + 0x16ecd02,0x086546c,0x0b37664,0x0e6ba3f,0x08230c0,0x03d5085, + 0x0ca3c87,0x0fcaa86,0x00152a2 } }, + /* 169 */ + { { 0x0057e27,0x104f073,0x1368f75,0x0f8f48a,0x07e8b6a,0x196eadc, + 0x045147c,0x1c5feb3,0x0d0ef51,0x11cbd44,0x19d51ba,0x0d424aa, + 0x00c4986,0x19145a4,0x11722c4,0x132f5d4,0x077dd01,0x11edf07, + 0x14619f4,0x1d451f8,0x01f80e2 }, + { 0x1d0820b,0x0a096b4,0x08618a5,0x0e3d4cb,0x0317312,0x031c068, + 0x00887ac,0x00d84f9,0x075fe97,0x1fea77e,0x074941f,0x14aeb4e, + 0x037b396,0x03e5baa,0x1200147,0x17dc6c3,0x0d7ad4d,0x0f03eda, + 0x0c64b51,0x0903e93,0x01431c7 } }, + /* 170 */ + { { 0x0e1cc4d,0x1968204,0x07b97aa,0x075a5b8,0x093758d,0x0e39c9f, + 0x1f7f972,0x10619d6,0x1d33796,0x186c354,0x1e1b5d4,0x0795c49, + 0x0bef528,0x1858dd8,0x1746993,0x09c7956,0x01f54db,0x0cb555e, + 0x0f00316,0x1b0f987,0x01443e3 }, + { 0x160e7b0,0x141098e,0x0063942,0x16ba67a,0x1c9b629,0x0299c6f, + 0x1b90bf4,0x1d58a95,0x0e821c6,0x13c7960,0x10272c1,0x0ebe0d5, + 0x16e5c9d,0x0980c6f,0x0d5d44d,0x18ccf06,0x1ac0bf8,0x0c0e537, + 0x142b8b7,0x10041d3,0x00e17fc } }, + /* 171 */ + { { 0x1aaa5eb,0x0a3a08d,0x00da2b7,0x12f37b0,0x02cbb75,0x1ff6910, + 0x0310337,0x083b0d0,0x04e0911,0x011d478,0x122e1c7,0x03da40e, + 0x0965d14,0x12cf494,0x1a855d5,0x1b7fcb0,0x1cd5006,0x03e346b, + 0x095a69d,0x15a1be4,0x0148da0 }, + { 0x19069d7,0x062edbf,0x069323f,0x0ab80a6,0x0487d24,0x116d9d1, + 0x12267a6,0x0418b56,0x0b4fe97,0x15fea9c,0x1cd7914,0x1949a4f, + 0x1373a04,0x1716d64,0x0ef1527,0x1cfc4f9,0x09dff3e,0x0014391, + 0x036a4d8,0x130f1a5,0x00d0317 } }, + /* 172 */ + { { 0x166c047,0x1f4dd9d,0x187626d,0x12c0547,0x02e6586,0x0dce001, + 0x08a5f23,0x14689f0,0x1d08a74,0x13b5651,0x0e63783,0x0e3bf9a, + 0x0afbf1a,0x0190733,0x0edbaaa,0x13f8a5f,0x0bc179c,0x0541687, + 0x19eacad,0x019ede9,0x000f4e0 }, + { 0x090c439,0x0074d24,0x1ac9093,0x17786b4,0x17564a2,0x1ba4be9, + 0x11e7766,0x0852b48,0x1612de9,0x0ff9f86,0x1400ce8,0x0ff9cc1, + 0x1a35862,0x09120be,0x176a301,0x1070b02,0x0d4ef6b,0x1283082, + 0x05ba5aa,0x0e51a5e,0x0120800 } }, + /* 173 */ + { { 0x1039042,0x191b955,0x13b65db,0x193f410,0x10e6978,0x1f60a18, + 0x174bd62,0x187a07f,0x1fe2045,0x1006080,0x16a4a0c,0x1ef5614, + 0x18e6868,0x130fd7f,0x1257477,0x044ca4d,0x127b7b1,0x1d0f100, + 0x0a97b45,0x07baf18,0x00898e6 }, + { 0x0bba4ee,0x099ed11,0x15d2ed9,0x0fe92d4,0x1eff639,0x19535c9, + 0x0a7dc53,0x07e8126,0x11dfdd7,0x041245e,0x1286c68,0x1e5cd37, + 0x0762f33,0x1d17019,0x05df992,0x1ee8334,0x19375dd,0x05e2874, + 0x095af47,0x152f3e9,0x0095b87 } }, + /* 174 */ + { { 0x1c1f177,0x19b54b3,0x0f27a0d,0x10c0026,0x1b6d350,0x164d2d8, + 0x0ee49ba,0x0392849,0x0c27ef3,0x14e00d3,0x0d21c1e,0x174a245, + 0x05ad93b,0x0e8d64c,0x0e538aa,0x02eb73d,0x006d53f,0x0288e01, + 0x040b645,0x1d64a4a,0x00b1d13 }, + { 0x15a1171,0x1edf5b3,0x0ac73f9,0x182d81a,0x1228295,0x1e44655, + 0x16d6815,0x19f1b64,0x0d300e9,0x1f54f4b,0x154badc,0x06fe4d2, + 0x1fb0e00,0x0f07cc6,0x0740d72,0x0901fd5,0x1b8d290,0x0c30724, + 0x00dacc6,0x1d2a258,0x0037a35 } }, + /* 175 */ + { { 0x100df48,0x194f747,0x0c13159,0x0c23590,0x189ca7b,0x1d4091d, + 0x15fe62c,0x1d492f4,0x1c21ca3,0x0218d8c,0x0cf39f8,0x1bd7c57, + 0x1945a73,0x16e3bc0,0x01b30ae,0x07be25f,0x1e4e5eb,0x02ff802, + 0x149f73c,0x0bbaf5b,0x005ef95 }, + { 0x0ee402f,0x117fd00,0x0d33830,0x1476617,0x1b335e2,0x1e5880a, + 0x1474190,0x110a84a,0x13cd196,0x10c1fa2,0x1952d31,0x1e45e17, + 0x04c6664,0x061066f,0x1d33fb9,0x188eb4b,0x12f80a4,0x0ee554b, + 0x04447b6,0x15e400b,0x019cde4 } }, + /* 176 */ + { { 0x171f428,0x085e46b,0x0e0a7a7,0x13c8794,0x1ac1ecd,0x09d6781, + 0x19203ae,0x07f1abd,0x1065a2a,0x11197c0,0x0e29cc5,0x1f545e1, + 0x021fc04,0x012a3a5,0x037df9c,0x0bede95,0x1f23bb1,0x128d627, + 0x0254394,0x0436e7c,0x006b66e }, + { 0x1a41dee,0x0c24033,0x0cfd672,0x1cf67c5,0x0cfa95a,0x0a2a709, + 0x00e1a24,0x148a9b3,0x1eefca6,0x06eedef,0x072dd7c,0x164823d, + 0x035f691,0x1f79046,0x0e79d9b,0x079ed53,0x00018b3,0x0f46f88, + 0x0705d2a,0x0ab593a,0x01c4b8a } }, + /* 177 */ + { { 0x04cccb8,0x1ac312e,0x0fbea67,0x125de9a,0x10bf520,0x17e43c3, + 0x195da27,0x0dc51e9,0x0da1420,0x11b37cb,0x0841f68,0x1400f8a, + 0x1090331,0x0a50787,0x03533ab,0x08f608f,0x0e2472a,0x0d944cf, + 0x1081d52,0x0ca69cc,0x0110ae9 }, + { 0x0ed05b0,0x0eb2ae6,0x150cb30,0x1202eb2,0x0bac3f0,0x0bbe6bd, + 0x1c29239,0x0db75d6,0x140e98d,0x0580449,0x1493c61,0x0ca6c07, + 0x1d26983,0x12b90b9,0x051620c,0x083bcdc,0x1266111,0x00e9a45, + 0x1e89fcd,0x04afb9d,0x006be52 } }, + /* 178 */ + { { 0x147e655,0x1c799e4,0x1e56499,0x1411246,0x1f0fb76,0x011ce8f, + 0x19d15e4,0x19d65bf,0x03cdbb7,0x1043a49,0x1b5073a,0x1b720be, + 0x0821326,0x1cee2ac,0x06ba6b9,0x02e04b6,0x00ce9c3,0x070a29a, + 0x0b0e2a7,0x0058534,0x00c3075 }, + { 0x156ace2,0x12788e0,0x14a4304,0x0ef3fe4,0x0c170fe,0x08b8d91, + 0x06a05b8,0x12ec1bf,0x155de27,0x0cde541,0x131e768,0x0fd4f8d, + 0x101ad92,0x0eb0fbb,0x1640448,0x00d7650,0x026261c,0x1ff4064, + 0x08990ae,0x01a6715,0x015e405 } }, + /* 179 */ + { { 0x0ad87bc,0x0bc14f5,0x12f724e,0x0f03d09,0x00ac936,0x0f27ef7, + 0x10935ab,0x0ad6af3,0x1690d7f,0x05cd5d2,0x1ec2e54,0x13a7a29, + 0x16f09b2,0x12d073d,0x1a13c8c,0x09fe7a0,0x1d3606f,0x1828a74, + 0x02b5cce,0x17ba4dd,0x0077e63 }, + { 0x0d25c6d,0x0837670,0x173c2bf,0x1401745,0x1d90021,0x0dd9cc6, + 0x15dc231,0x1f83604,0x0198ff8,0x1bf836c,0x0b35a01,0x1fe36fc, + 0x1287d50,0x131d1ab,0x1d7815c,0x0b535de,0x092fa92,0x0df92bc, + 0x0e743a5,0x1a7be0e,0x0111847 } }, + /* 180 */ + { { 0x0c82924,0x1ce63ff,0x15a54aa,0x134e441,0x1c76dd6,0x1778710, + 0x09f7a81,0x0094c6a,0x0271839,0x19f28e1,0x001f22a,0x0bd4e2d, + 0x06f4db3,0x1a47892,0x0fb7829,0x0c12b1e,0x0444115,0x178a49b, + 0x1d2ce37,0x0b07a30,0x00f75f6 }, + { 0x1927eb7,0x0c4f085,0x049e8e4,0x1385c5e,0x087c635,0x14b37a5, + 0x108cdff,0x10a16e5,0x0105e55,0x015c1c1,0x10e7e44,0x000dcb1, + 0x0963fee,0x0c8da99,0x014bb8e,0x1f2f67e,0x14ccbaf,0x03fadc2, + 0x1e01418,0x1cbed8b,0x016a935 } }, + /* 181 */ + { { 0x1d88d38,0x101aaef,0x1d03c66,0x078a93b,0x155cd8e,0x080370a, + 0x0a78c13,0x1cc644e,0x0fd0b0c,0x0b5b836,0x0ab4c7c,0x18126be, + 0x1ff156d,0x1bd1efc,0x031484f,0x0bf6b66,0x092a55e,0x14f94e6, + 0x0e16368,0x19fba85,0x0144a0e }, + { 0x0658a92,0x08aefa9,0x185ad70,0x0f88502,0x1ce3ed1,0x0c9548d, + 0x17dc1ff,0x12d4ab2,0x19cd5d8,0x11e45fe,0x11cac59,0x087eb52, + 0x1d07763,0x1819f0d,0x19132a2,0x005f629,0x1861e5c,0x113d0e4, + 0x113fecc,0x01e5899,0x01b5ece } }, + /* 182 */ + { { 0x1211943,0x13dd598,0x09705c4,0x0cad086,0x04a8cac,0x0afe1f2, + 0x02e2361,0x14ba5fc,0x0ce91ee,0x1d5d586,0x11f4491,0x1b88f1d, + 0x1a5d23d,0x066cff7,0x061b79c,0x0aecd47,0x0678265,0x11963dc, + 0x1abb1fe,0x080317d,0x00873e5 }, + { 0x18d17c1,0x1437959,0x103725b,0x18e3f40,0x1cbfbd0,0x024ce5c, + 0x0ade7e2,0x017c223,0x0f71ec8,0x0a3e2e7,0x025a487,0x17828d9, + 0x11acaa3,0x1e98b19,0x0487038,0x0ecb6bf,0x01ee768,0x018fd04, + 0x07bfc9c,0x15fabe8,0x00fed5d } }, + /* 183 */ + { { 0x0da1348,0x085cea6,0x04ea2bc,0x044b860,0x10769fd,0x0be115d, + 0x096c625,0x1888a15,0x1f5acf1,0x057eb63,0x1e00a57,0x02813fd, + 0x1dcf71a,0x17044fa,0x080a7d7,0x05751c2,0x0fb0fbd,0x04ba954, + 0x1dc32d6,0x044ebed,0x009061e }, + { 0x1bda16a,0x125628f,0x0a8adc2,0x13e3bf4,0x19910e7,0x0a2fb7b, + 0x184cb66,0x1df7459,0x0eb4ba4,0x086acd7,0x0b54f51,0x136697e, + 0x086a8e0,0x131063d,0x0040813,0x18de8ec,0x03d0a53,0x131fc4a, + 0x1fabd5a,0x123a330,0x013214c } }, + /* 184 */ + { { 0x10d66c3,0x1d89024,0x0813953,0x1141b90,0x0aed732,0x1a14a6f, + 0x130e012,0x0cf7402,0x131ddc4,0x197d155,0x0bb444f,0x0bd5068, + 0x0e70ff5,0x1181a70,0x0369cbc,0x1c78363,0x1bebd8a,0x156e186, + 0x1a51680,0x17bede7,0x009c179 }, + { 0x084c26f,0x09477ba,0x0ec51b2,0x03de55b,0x006b7db,0x0c6ed39, + 0x1d520fd,0x16c110f,0x04bc7ed,0x0f27106,0x12bf73f,0x043b2eb, + 0x00484d1,0x035f761,0x0d659c2,0x1b6cf8b,0x088a6d6,0x05abcd5, + 0x0461d22,0x0db0fc8,0x001522c } }, + /* 185 */ + { { 0x071d4ae,0x083abe2,0x09d82a2,0x0a8743b,0x1ef4b1a,0x1380d0f, + 0x0c609aa,0x1277125,0x059c65f,0x1a6a729,0x077cd6f,0x1253af1, + 0x12923af,0x05bce1f,0x12d1b18,0x1e26079,0x0e7cf4c,0x04aac16, + 0x15fc3b1,0x0103684,0x011c7da }, + { 0x0eef274,0x03572cd,0x020fe4b,0x1e286f8,0x06c5bf4,0x1e4357f, + 0x0c08f84,0x0c154e9,0x02a2253,0x10ed673,0x027e974,0x057044b, + 0x0fb3d57,0x0fd3a58,0x128e45b,0x123527a,0x0dcb128,0x0f3b66c, + 0x07d33ef,0x12347eb,0x019aa03 } }, + /* 186 */ + { { 0x03fc3f1,0x1d34f10,0x08a4152,0x16c420d,0x09168cc,0x0afd4f8, + 0x01502ab,0x0df6103,0x0bff7ed,0x05c7907,0x052bf7b,0x0c317df, + 0x1b2c80a,0x1855e8e,0x1763282,0x014f9c4,0x041028e,0x13af33d, + 0x1ba56e6,0x0cc5bba,0x01b2dd7 }, + { 0x089d7ee,0x1f93cf9,0x01721f7,0x13dd444,0x0d755d5,0x056d632, + 0x1f55306,0x0335d61,0x17ec010,0x1462367,0x15c290e,0x1cfd691, + 0x186fc90,0x0859cf7,0x1714f04,0x0b4412c,0x1cc3854,0x122abbb, + 0x1f7408f,0x0861eea,0x016ea33 } }, + /* 187 */ + { { 0x1f53d2c,0x19ca487,0x06e7ea7,0x0d60069,0x0dc9159,0x0cbcb3c, + 0x1405356,0x115e214,0x1a8a6b7,0x0eb96d5,0x05ec413,0x0a8116a, + 0x00ef5de,0x1369cdf,0x0ae42f2,0x0fee028,0x1e9eda1,0x0657551, + 0x1acc446,0x0d13ac0,0x016da01 }, + { 0x06afff7,0x052b1fa,0x17cfa9b,0x14694bc,0x1945c7b,0x0cc7ec1, + 0x19322aa,0x0bd83ff,0x0b63f53,0x15300a3,0x1427950,0x1111a3e, + 0x1b50816,0x0fc6686,0x04636aa,0x0cee5a3,0x0bb78a3,0x13282f3, + 0x131b719,0x0075033,0x01ef4ab } }, + /* 188 */ + { { 0x176d986,0x04e8a69,0x16c0182,0x0f45b86,0x10f4e07,0x1f96436, + 0x1c2694f,0x1903822,0x1123c3f,0x17a5d22,0x15bf0bf,0x0b4e36c, + 0x1b852cd,0x0ff7d45,0x1f1d224,0x016ef6a,0x03e4811,0x0c7829c, + 0x0b1684a,0x0ba75aa,0x004c4b5 }, + { 0x1827633,0x067f9f9,0x1a59444,0x0bc015f,0x086784d,0x16997d0, + 0x1e208fa,0x10d9670,0x02b91cd,0x0e7a68b,0x0d8e28f,0x14b1cde, + 0x02078b6,0x145bfea,0x1e4844b,0x107ce66,0x04dee56,0x1b4b202, + 0x038a10c,0x08421e5,0x01223b8 } }, + /* 189 */ + { { 0x1ebeb27,0x054d4e1,0x03e1b0a,0x0a7deb2,0x17bcdcb,0x173f9be, + 0x0b84536,0x193d114,0x0726ea7,0x19a9172,0x104e200,0x070d182, + 0x1599d50,0x10b10ab,0x0c6bb29,0x0c9b0b3,0x1ebfcc5,0x138cfe7, + 0x0bae38d,0x0ef5e23,0x00433a5 }, + { 0x1eba922,0x1367037,0x1a4f0fc,0x1c8eb4a,0x1f6c83e,0x1f9bc72, + 0x19d00a2,0x1e2fef2,0x0bdc3f6,0x152f1b4,0x1642bb4,0x14154dd, + 0x153d034,0x0523e5e,0x070e931,0x0579076,0x06e4dce,0x1d27855, + 0x132803a,0x0f5e86e,0x01c097c } }, + /* 190 */ + { { 0x1c28de7,0x1b8bc3c,0x0c3000d,0x1557386,0x017aa2a,0x1e30f5b, + 0x060999a,0x0088610,0x14d78b5,0x05adae7,0x03f1cb8,0x0a5b30e, + 0x05d76a7,0x0a05bde,0x11a27d7,0x1a07476,0x06787f2,0x0d4bfec, + 0x158182a,0x0f6bddf,0x01c06ab }, + { 0x1b71704,0x156d8ff,0x0ec7a67,0x16721fc,0x036e58b,0x078cd52, + 0x0e0b2ad,0x1b9dd95,0x0e0f3d9,0x12496fd,0x02b44b6,0x097adc4, + 0x022a0f5,0x1edde93,0x027e83d,0x1d6a95f,0x01ae8d2,0x06e6285, + 0x1df41d6,0x13f02dd,0x00b7979 } }, + /* 191 */ + { { 0x04f98cc,0x0323108,0x1aba7b1,0x04e55db,0x0511592,0x110c37a, + 0x0f741f9,0x16cf5d2,0x08d6d69,0x0be7013,0x0ea3cf4,0x0c11fa8, + 0x17b5347,0x1e055bc,0x1fc704d,0x1323bd0,0x1a8139f,0x11dfacb, + 0x151f835,0x0750b7c,0x008de29 }, + { 0x0f668b1,0x156e9c7,0x1d90260,0x1ac2392,0x054e6b2,0x0ea131e, + 0x1ac4870,0x0e679ce,0x0eff64e,0x09a5947,0x0584a8c,0x135850e, + 0x14af71a,0x1d049ac,0x1222bca,0x011d063,0x112ba91,0x105b248, + 0x13d0df6,0x178b8ab,0x01138fe } }, + /* 192 */ + { { 0x0a2daa2,0x052c4e2,0x0231fa7,0x18801ec,0x18ea703,0x0ba8818, + 0x1416354,0x052df19,0x04abb6f,0x1249a39,0x05aad09,0x07c3285, + 0x1d0be55,0x1628b2b,0x1e4e63e,0x01d5135,0x0ec4f88,0x0f1196f, + 0x1ec786c,0x02ec3cc,0x01372f8 }, + { 0x020f662,0x0a5e39d,0x1409440,0x1893db2,0x1fb7e77,0x15cb290, + 0x025bed8,0x0fd13ea,0x1a2e8d3,0x132ce33,0x105c38e,0x144cb00, + 0x140f2b2,0x0f6a851,0x1d3f39a,0x1801e2c,0x17efdc3,0x1d55229, + 0x13a6764,0x077fb49,0x0198f3c } }, + /* 193 */ + { { 0x1614189,0x0fae6c0,0x07deeac,0x0a4964b,0x07d56c4,0x1da0af6, + 0x092c917,0x1f38f75,0x07af6be,0x015e46e,0x123a08c,0x01c0e96, + 0x1f91b77,0x0db68d8,0x04cdb82,0x0192e94,0x157e668,0x0942e09, + 0x1f32d89,0x1970278,0x012d59b }, + { 0x0019927,0x0c1da3e,0x156f76b,0x0ec61bf,0x010f266,0x102e91f, + 0x1b168c7,0x0c02bb7,0x0456ac4,0x15372fd,0x12b208a,0x0a52487, + 0x0946956,0x06e464f,0x07271fd,0x080cb8d,0x009e24a,0x1d6d93f, + 0x1904c06,0x0f469d5,0x01ccdfa } }, + /* 194 */ + { { 0x1cb1a7d,0x14326ac,0x03b85da,0x06d5df7,0x0d864ca,0x11586c2, + 0x0eb2c70,0x03a1dd0,0x1d980df,0x1405375,0x133b65f,0x1988ff2, + 0x15f582a,0x1d39608,0x073448c,0x0f76f45,0x0a8c710,0x0670951, + 0x1b6028c,0x1394ac9,0x0150022 }, + { 0x11c180b,0x05d6a97,0x08425dd,0x11ae935,0x108be99,0x0de8dd6, + 0x122ad5b,0x1352f18,0x00afbea,0x169f1f2,0x1717f1b,0x12f62a7, + 0x108a8be,0x0df49f6,0x11fc256,0x0477b5b,0x1082cee,0x1469214, + 0x109ca77,0x0a478db,0x0016417 } }, + /* 195 */ + { { 0x014a31e,0x16678b6,0x10b5d3b,0x0965bc7,0x088e253,0x1621e1a, + 0x0d665f3,0x06df376,0x1916ac9,0x10822ce,0x1910010,0x18053ef, + 0x0371d15,0x022a9ac,0x071f049,0x148cf19,0x08dec94,0x0e64baa, + 0x059eeb6,0x0cf0306,0x014e4ca }, + { 0x10312bf,0x1782ac6,0x19980ce,0x0aa82c3,0x1d1bf4f,0x00bc0ed, + 0x1169fe9,0x1aa4b32,0x000eef1,0x1a4a6d4,0x0ee340c,0x1d80f38, + 0x096c505,0x0e4fb73,0x0b86b78,0x01554e1,0x0c17683,0x0014478, + 0x18a8183,0x19fc774,0x000c7f4 } }, + /* 196 */ + { { 0x17d6006,0x1a23e82,0x02c0362,0x0dfae39,0x18b976e,0x07a07a9, + 0x180a6af,0x106bcef,0x0f103a7,0x1df71c3,0x1cb12c4,0x1840bc8, + 0x1420a6a,0x18fe58c,0x0c117d8,0x17e9287,0x19fc00a,0x0f2ee0e, + 0x1555ade,0x0178e14,0x01b528c }, + { 0x08640b8,0x083f745,0x004aea7,0x07a1c68,0x0561102,0x1257449, + 0x1956ef8,0x19b8f9c,0x0fa579d,0x1ac7292,0x0eff978,0x0e2a6ef, + 0x0457ce2,0x1e04a3f,0x19471b0,0x0f04cc8,0x150f4a9,0x12fdec6, + 0x0b87056,0x1ba51fc,0x008d6fc } }, + /* 197 */ + { { 0x07202c8,0x0517b2e,0x0362d59,0x04b4a96,0x1d63405,0x1a7dfab, + 0x159c850,0x1470829,0x01d9830,0x08a10af,0x03ef860,0x11aabde, + 0x1fc7a75,0x137abfc,0x01773e3,0x0d3a6ae,0x056d922,0x1aeea4d, + 0x16d27e5,0x02baf57,0x00f18f0 }, + { 0x0799ce6,0x188885a,0x1f6c1c4,0x1259796,0x15bbfb9,0x1d10f11, + 0x0327fde,0x1fd83e0,0x1b18f49,0x04eb489,0x1e566c0,0x12a3579, + 0x0e8da61,0x06a10a3,0x1a1c84c,0x047e21c,0x017ae5f,0x1aac194, + 0x0b9ce1a,0x0b76d13,0x0143c9b } }, + /* 198 */ + { { 0x0c74424,0x1946da4,0x0bad08c,0x03a3396,0x12616e1,0x0b710b9, + 0x064a903,0x0a5ca68,0x00cbdc7,0x0c1d4a6,0x0eec077,0x00a1ae6, + 0x005c623,0x0dbd229,0x0358c69,0x023919a,0x0259a40,0x0e66e05, + 0x11b9f35,0x022598c,0x01e622f }, + { 0x01e4c4b,0x1714d1f,0x12291f5,0x113f62a,0x15f8253,0x09f18ce, + 0x016d53f,0x0ccfc6e,0x00a08b9,0x02672cd,0x0fa36e3,0x13cfb19, + 0x15bca74,0x17761eb,0x1125baa,0x0627b98,0x03a8a1a,0x00bee39, + 0x13ae4d8,0x1feef51,0x01a5250 } }, + /* 199 */ + { { 0x029bd79,0x103937f,0x0cd2956,0x009f321,0x0574a81,0x0ab4c1b, + 0x051b6ab,0x1ded20d,0x150d41f,0x12c055c,0x1dfd143,0x0a28dcd, + 0x0abc75b,0x1879b8c,0x03325ef,0x0810ea1,0x0a4a563,0x028dd16, + 0x1936244,0x0720efc,0x017275c }, + { 0x17ca6bd,0x06657fb,0x17d7cdf,0x037b631,0x00a0df4,0x0f00fbf, + 0x13fe006,0x0573e8d,0x0aa65d7,0x1279ea2,0x198fa6f,0x1158dc6, + 0x0d7822d,0x1f7cedb,0x0dfe488,0x15354be,0x19dabe4,0x13f8569, + 0x1a7322e,0x0af8e1e,0x0098a0a } }, + /* 200 */ + { { 0x0fd5286,0x0867a00,0x00f3671,0x0ae5496,0x1ea5b9d,0x0d739f0, + 0x03e7814,0x049ebcc,0x0951b38,0x14da8a1,0x13599ff,0x05a13f6, + 0x16b034b,0x16e2842,0x14dea03,0x0045c96,0x0128cb0,0x134f708, + 0x09522bb,0x173cb8d,0x00ed7c8 }, + { 0x133619b,0x003de6c,0x1865d18,0x1c573bf,0x0ce7668,0x1715170, + 0x1574f31,0x05f53dd,0x17eebf3,0x0d0a7af,0x113d90d,0x131acf9, + 0x0c75cb8,0x1c2860b,0x08617f1,0x1392d96,0x07645f7,0x004c3a5, + 0x1f6d1d1,0x11f15c4,0x0139746 } }, + /* 201 */ + { { 0x08684f6,0x13456e4,0x16ff177,0x16c334f,0x1c1edaa,0x1d0c7ab, + 0x05cd6c9,0x1d64b1a,0x18ecd89,0x13f3db2,0x07dfaac,0x138db0f, + 0x1b3d888,0x13eadf7,0x1f725b5,0x1ae7951,0x0ae37ba,0x1e426c3, + 0x1a395b5,0x1232ed9,0x01a4c7e }, + { 0x119ffa6,0x0d2a031,0x0131400,0x18269d8,0x0cae64e,0x0092160, + 0x0a5b355,0x1dc3ed3,0x0bf2cae,0x0d12cf7,0x1ba0167,0x0f18517, + 0x0488e79,0x1c74487,0x1212fae,0x0ffb3d2,0x0d0fb22,0x0072923, + 0x09758c6,0x054a94c,0x01b78be } }, + /* 202 */ + { { 0x072f13a,0x1aaa57a,0x0472888,0x0eae67d,0x1ac993b,0x00b4517, + 0x1a7c25b,0x06a4d5f,0x14b1275,0x07f3b0e,0x01c329f,0x10e7cee, + 0x1684301,0x03f3e6f,0x0daaab7,0x05da8cd,0x1eaa156,0x06d16ea, + 0x07ebe36,0x145c007,0x0016a81 }, + { 0x03de3bf,0x03ace27,0x022aa20,0x02a5e61,0x0c1e2e1,0x1f5d2d8, + 0x1b66aa9,0x195965b,0x19f9c11,0x032eaa9,0x1170653,0x1b0f61b, + 0x010ab9b,0x051fa5b,0x0be325b,0x0bf3fa6,0x1cc28cb,0x1a4c217, + 0x0438877,0x1c4f997,0x00f431a } }, + /* 203 */ + { { 0x00ccd0a,0x10506b5,0x1554eca,0x04b3276,0x03eeec8,0x1339535, + 0x01bf677,0x19f6269,0x00da05d,0x0ce28a4,0x061d363,0x089ace7, + 0x09c4aa4,0x114d1ae,0x13cd6cb,0x0fd5bb3,0x15f8917,0x0eb5ecd, + 0x0811c28,0x01eb3a5,0x01d69af }, + { 0x07535fd,0x02263dd,0x1ce6cbe,0x1b5085f,0x05bd4c3,0x08cba5a, + 0x127b7a5,0x1d8bfc2,0x1fd4453,0x0c174cb,0x0df039a,0x00bbcd8, + 0x0aa63f7,0x0961f7b,0x0c3daa7,0x151ac13,0x1861776,0x05f6e9a, + 0x17846de,0x1148d5d,0x0176404 } }, + /* 204 */ + { { 0x1a251d1,0x03772a8,0x17f691f,0x041a4f3,0x1ef4bf1,0x08c5145, + 0x14e33b1,0x0dc985a,0x13880be,0x195bc43,0x06c82c6,0x1f1c37d, + 0x1ec69cc,0x1bcb50c,0x077fab8,0x17bd5c8,0x1c9fb50,0x012b3b7, + 0x0f86030,0x02b40a0,0x016a8b8 }, + { 0x1f5ef65,0x042fb29,0x0414b28,0x12ef64a,0x01dfbbf,0x1a37f33, + 0x01f8e8c,0x1df11d5,0x01b95f7,0x0eefef7,0x17abb09,0x1cd2b6c, + 0x1b22074,0x0617011,0x01a6855,0x0776a23,0x17742e8,0x0c300da, + 0x0a1df9f,0x08ca59f,0x0015146 } }, + /* 205 */ + { { 0x1fa58f1,0x029e42b,0x19c0942,0x1099498,0x158a4e6,0x00fa06d, + 0x1b4286e,0x17a0f72,0x0558e8c,0x0328f08,0x0e233e9,0x08dc85c, + 0x081a640,0x0221b04,0x0c354e5,0x11fa0a3,0x1b3e26b,0x1615f9a, + 0x1c0b3f3,0x0f0e12a,0x00fd4ae }, + { 0x153d498,0x0de14ef,0x1890f1e,0x1c226fe,0x0cf31c4,0x11e76fa, + 0x015b05e,0x0bb276d,0x06cd911,0x030898e,0x03376c9,0x08a7245, + 0x11ab30a,0x069015f,0x1dd5eda,0x10c25d2,0x07ce610,0x053336f, + 0x1d809ad,0x01fcca9,0x0051c20 } }, + /* 206 */ + { { 0x1a2b4b5,0x1081e58,0x05a3aa5,0x1d08781,0x18dccbf,0x17fdadc, + 0x01cb661,0x184d46e,0x0169d3a,0x1d03d79,0x0dc7c4b,0x1734ee2, + 0x0f8bb85,0x13e14cf,0x18434d3,0x05df9d5,0x069e237,0x09ea5ee, + 0x17615bc,0x1beebb1,0x0039378 }, + { 0x07ff5d9,0x0817fef,0x0728c7a,0x0464b41,0x0e9a85d,0x0c97e68, + 0x04e9bd0,0x167ae37,0x115b076,0x0952b9b,0x047473d,0x150cdce, + 0x19d726a,0x1614940,0x186c77c,0x0bbcc16,0x15cc801,0x191272b, + 0x02de791,0x1127c23,0x01dc68e } }, + /* 207 */ + { { 0x1feda73,0x127fcb7,0x0062de4,0x0d41b44,0x0709f40,0x0ac26ff, + 0x083abe2,0x0806d1c,0x08355a0,0x04a8897,0x1df5f00,0x0a51fae, + 0x08259d4,0x15fc796,0x1125594,0x0623761,0x12844c5,0x0bfb18c, + 0x119b675,0x1a1c9f0,0x00d5698 }, + { 0x15d204d,0x0b27d00,0x114f843,0x14dba21,0x1b626bf,0x14c64a3, + 0x0398e9d,0x0ac10ff,0x105337a,0x12d32a3,0x11e0bd4,0x0489beb, + 0x1f558e2,0x02afdd7,0x0a87906,0x0706091,0x18e47ee,0x1a47910, + 0x0e118f4,0x0472b22,0x004df25 } }, + /* 208 */ + { { 0x0695310,0x07eb4ec,0x03a9dbd,0x1efd0ed,0x028eb09,0x0a99547, + 0x0604b83,0x0f20738,0x0c572ac,0x0d33ba2,0x158a4f7,0x01c0f0b, + 0x121f980,0x1ed3b5d,0x1f8a968,0x0e42e57,0x190a2bc,0x13768ad, + 0x05e22a3,0x1cc37fa,0x004cd80 }, + { 0x0730056,0x001b80b,0x150ee7d,0x1fb9da7,0x06f45fe,0x1283a12, + 0x1d8f06a,0x0e615fa,0x0ff92ae,0x0f2e329,0x0818fc8,0x061a376, + 0x006ef08,0x096912a,0x0c1bb30,0x0003830,0x13a1f15,0x0276ecd, + 0x0331509,0x164b718,0x01f4e4e } }, + /* 209 */ + { { 0x1db5c18,0x0d38a50,0x1d33b58,0x1cecee0,0x1454e61,0x1b42ef4, + 0x1ef95ef,0x1cbd2e1,0x1d2145b,0x10d8629,0x0697c88,0x1037dc9, + 0x03b9318,0x0a588e8,0x0e46be8,0x0426e01,0x0493ec2,0x1e3577f, + 0x098802b,0x0a9d28a,0x013c505 }, + { 0x164c92e,0x022f3b9,0x03a350b,0x0ae6a43,0x0050026,0x09f9e2f, + 0x1680a13,0x0d7a503,0x0dbf764,0x097c212,0x1cc13cc,0x1e5490b, + 0x13e1a88,0x0893d28,0x0fd58c4,0x1c178b0,0x0c71a60,0x076bca8, + 0x0dedc29,0x0abc209,0x00c6928 } }, + /* 210 */ + { { 0x04614e7,0x10c2e32,0x1092341,0x1c8e934,0x0e906ca,0x03f2941, + 0x04ba896,0x19ab0a8,0x0d12857,0x1b1cc85,0x164ed4d,0x1ee174a, + 0x06770c7,0x0eae952,0x13db713,0x1437585,0x0563b69,0x12b26d2, + 0x01e2576,0x1efc283,0x01c8639 }, + { 0x0589620,0x0b5817c,0x0150172,0x0683c88,0x0fe468a,0x15684e1, + 0x1684425,0x1dd7e45,0x09c652a,0x039e14c,0x186e3ef,0x1f16a8f, + 0x13cdef9,0x0bbedfb,0x1cde16a,0x0aa5ae0,0x1aa7e13,0x1854950, + 0x08e4f4f,0x0c22807,0x015b227 } }, + /* 211 */ + { { 0x1bfaf32,0x0d3d80f,0x1486269,0x017ccc3,0x1c5a62d,0x11da26a, + 0x03d7bd7,0x0c48f2e,0x1f43bbf,0x15000f6,0x0b9680f,0x050a4c1, + 0x0ca8e74,0x134be31,0x0267af4,0x0ec87d7,0x1e6751a,0x11b5001, + 0x081c969,0x0f18a37,0x00eaef1 }, + { 0x1d51f28,0x1c74fcd,0x0112ab3,0x1750e24,0x19febbd,0x1e41b29, + 0x0b4e96f,0x11f0f01,0x110e6f0,0x0451a66,0x06ac390,0x1421048, + 0x018104c,0x0c53315,0x0f9c73a,0x091ad08,0x1142320,0x1cee742, + 0x13cf461,0x14477c3,0x01fa5cb } }, + /* 212 */ + { { 0x173a15c,0x064e914,0x07ccbfa,0x1ba852f,0x06fec8d,0x157d9f3, + 0x128e42d,0x044735e,0x0ab65ef,0x1d8f21b,0x17f36c2,0x003ccd8, + 0x0b8f262,0x0d7a438,0x1ffa28d,0x09c4879,0x06f2bb4,0x132d714, + 0x07745c8,0x1c5074a,0x0114da2 }, + { 0x1e3d708,0x04d2b60,0x1e992a7,0x1e3961d,0x0fe62d3,0x143aa02, + 0x0a6125f,0x1f5e0e0,0x13cea46,0x1c5beb5,0x01898c4,0x069d071, + 0x0907806,0x18e1848,0x1a10a01,0x10c8e4f,0x1d7e583,0x1f857bc, + 0x08da899,0x10cb056,0x0104c1b } }, + /* 213 */ + { { 0x126c894,0x184f6d2,0x148ccbf,0x002958f,0x15abf12,0x0c949a4, + 0x13734f3,0x0ad6df2,0x092e6b5,0x1d57589,0x1b0c6ff,0x0dd4206, + 0x0e19379,0x183ff99,0x148df9d,0x0cf7153,0x10d829d,0x1eb2d2d, + 0x0ca4922,0x1b6aadb,0x01b348e }, + { 0x0d46575,0x0fcd96f,0x0b3dbba,0x15ff4d3,0x096ca08,0x169be8a, + 0x0ce87c5,0x003ab5d,0x1789e5d,0x1283ed8,0x1f31152,0x1c53904, + 0x1705e2c,0x14b2733,0x0db9294,0x08de453,0x0ba4c0e,0x082b1d8, + 0x0f11921,0x1848909,0x00a3e75 } }, + /* 214 */ + { { 0x0f6615d,0x1a3b7e9,0x06a43f2,0x11b31b5,0x0b7f9b7,0x1ef883a, + 0x17c734a,0x063c5fb,0x09b956f,0x1ed1843,0x1bab7ca,0x05ef6b2, + 0x18f3cca,0x1aad929,0x1027e2c,0x08db723,0x0f3c6c8,0x12379fb, + 0x085190b,0x12731c5,0x01ff9bb }, + { 0x17bd645,0x06a7ad0,0x1549446,0x17b7ada,0x17033ea,0x0684aba, + 0x01bf1cd,0x06a00fd,0x15f53c4,0x065032f,0x1f74666,0x137ffa4, + 0x0a9949d,0x14a968e,0x1138c11,0x02039bb,0x0fb81ac,0x1c2655a, + 0x095ac01,0x00f3f29,0x000346d } }, + /* 215 */ + { { 0x0bfdedd,0x1c727d3,0x1be657a,0x1cf4e98,0x193a285,0x04d1294, + 0x15344f4,0x0cf17ab,0x019a5f7,0x15085f3,0x0ecd03a,0x107c19d, + 0x03d3db0,0x0edfbd4,0x0ce9e2c,0x047c38c,0x03ec30f,0x093325e, + 0x1e820de,0x01f1e20,0x01c9663 }, + { 0x0f86a80,0x065a5ef,0x06aeefd,0x107f04b,0x1fa4ec7,0x0a99640, + 0x1d81182,0x125497e,0x08b909e,0x0ddbd66,0x010581c,0x062e2f1, + 0x08ca1d7,0x050d5c9,0x1fc52fb,0x0ab4afe,0x16e5f84,0x0dff500, + 0x1c87a26,0x18ed737,0x002d7b8 } }, + /* 216 */ + { { 0x19f8e7d,0x102b1a5,0x02a11a1,0x0ec7f8b,0x001176b,0x176b451, + 0x169f8bf,0x121cf4b,0x0651831,0x033bb1f,0x1deb5b3,0x0205d26, + 0x017d7d0,0x1b81919,0x1f11c81,0x16a0b99,0x031534b,0x0ab9f70, + 0x1c689da,0x03df181,0x00f31bf }, + { 0x0935667,0x1ae2586,0x0e2d8d7,0x120c1a5,0x14152c3,0x01d2ba3, + 0x0b0b8df,0x19bdff5,0x00b72e0,0x0afe626,0x18091ff,0x1373e9e, + 0x13b743f,0x1cf0b79,0x10b8d51,0x1df380b,0x0473074,0x1d111a6, + 0x056ab38,0x05e4f29,0x0124409 } }, + /* 217 */ + { { 0x10f9170,0x0bc28d9,0x16c56ff,0x126ff9c,0x115aa1e,0x021bdcb, + 0x157824a,0x0e79ffa,0x1c32f12,0x056692c,0x1878d22,0x19e4917, + 0x0b5a145,0x1d2de31,0x0d02181,0x0de8c74,0x1151815,0x1b14b75, + 0x1dd3870,0x1f5a324,0x01e7397 }, + { 0x08225b5,0x1ccfa4e,0x1134d8b,0x128d6ef,0x13efce4,0x00f48d9, + 0x1d4c215,0x1268a3b,0x038f3d6,0x1e96c9a,0x1ed5382,0x05adce4, + 0x000b5de,0x1b116ca,0x164a709,0x1529685,0x12356f6,0x09b5673, + 0x132bc81,0x0319abf,0x004464a } }, + /* 218 */ + { { 0x1a95d63,0x10555d5,0x11b636f,0x02f6966,0x12780c6,0x06c0a14, + 0x1e18c38,0x098c861,0x0b56ef0,0x1adf015,0x18d8ce1,0x172af0b, + 0x04c28fe,0x009649f,0x1005e57,0x10547aa,0x1c1e36f,0x144ffa8, + 0x03babf5,0x11912a2,0x016b3c4 }, + { 0x0f064be,0x03f5d6a,0x0a65e4a,0x0aa9d7b,0x1a77d55,0x1b93f50, + 0x17bc988,0x18c8ce8,0x189f366,0x088fac8,0x15baf6a,0x0b9b8b3, + 0x137e543,0x1a92690,0x0136ba9,0x1671a75,0x11c4395,0x0e3d8ee, + 0x0a08f12,0x07ce083,0x001cca1 } }, + /* 219 */ + { { 0x14d64b0,0x0c30643,0x18318e6,0x042ca79,0x1375b09,0x108cc31, + 0x00003aa,0x0ba2ce0,0x1621cd1,0x1633c84,0x1c37358,0x1bacefa, + 0x0dbe1d7,0x182dea6,0x1c3c9c0,0x11e61df,0x021362f,0x003b763, + 0x19116de,0x00902cf,0x01d8812 }, + { 0x01f9758,0x04d070b,0x138a05d,0x1d4789f,0x060915f,0x0eec57f, + 0x1390644,0x013ea6f,0x079a51a,0x11b5456,0x173e3bf,0x0968594, + 0x1567fb5,0x12482bf,0x172b81f,0x096c837,0x0c5a424,0x1db8ff8, + 0x0d81960,0x0b4a6c9,0x0106481 } }, + /* 220 */ + { { 0x139cc39,0x14e1f77,0x1b45e31,0x09f4c6a,0x1830456,0x17dcc84, + 0x0d50904,0x14b7a78,0x179dbb2,0x0ea98e9,0x1d78f68,0x0311cfc, + 0x114865f,0x0580a3d,0x0b13888,0x135605b,0x1ca33d2,0x1facf28, + 0x1ec1d3b,0x09effc6,0x00f1c96 }, + { 0x0301262,0x0605307,0x08b5c20,0x00a7214,0x1a45806,0x054814c, + 0x1fe6b32,0x185b4ce,0x114c0f1,0x1d7482b,0x1b67df7,0x1e2cdcc, + 0x043665f,0x03c2349,0x19b7631,0x060f990,0x18fc4cc,0x062d7f4, + 0x02fd439,0x0774c7c,0x003960e } }, + /* 221 */ + { { 0x19ecdb3,0x0289b4a,0x06f869e,0x0ff3d2b,0x089af61,0x106e441, + 0x0cae337,0x02aa28b,0x07c079e,0x1483858,0x089057f,0x09a6a1c, + 0x02f77f0,0x1ac6b6a,0x0adcdc8,0x0c53567,0x1b9ba7b,0x08a7ea0, + 0x1003f49,0x05b01ce,0x01937b3 }, + { 0x147886f,0x006a6b8,0x072b976,0x02aed90,0x008ced6,0x138bddf, + 0x01a4990,0x043c29d,0x0abb4bd,0x0e6f8cc,0x00c22e7,0x0c8cca6, + 0x07658be,0x0cce8ce,0x1c64b6b,0x1624df7,0x1b3304a,0x0aad1e8, + 0x089378c,0x1e97cbf,0x000e943 } }, + /* 222 */ + { { 0x1e9ea48,0x1202c3f,0x121b150,0x0ac36ae,0x0f24f82,0x18cba05, + 0x104f1e1,0x09b3a58,0x170eb87,0x1d4df3c,0x0e8ea89,0x11c16c5, + 0x0c43fef,0x160df85,0x08fca18,0x061c214,0x0f34af1,0x1a8e13b, + 0x19573af,0x1a3d355,0x0185f6c }, + { 0x0369093,0x17d3fa0,0x1828937,0x0cb0b03,0x11f1d9d,0x0976cf0, + 0x0fccf94,0x12d3201,0x1ed1208,0x1c5422c,0x0f0e66f,0x0abd16e, + 0x1e83245,0x07b7aa7,0x08c15a6,0x046aaa9,0x1a53c25,0x0954eb6, + 0x0824ecc,0x0df2085,0x016ae6a } }, + /* 223 */ + { { 0x12cdd35,0x091e48a,0x1bc6cb8,0x110c805,0x0e6e43a,0x072dead, + 0x1c37ee7,0x0291257,0x0758049,0x0565c25,0x0bbb0ad,0x0bffea0, + 0x0e8c7f5,0x1519f7a,0x029ee4e,0x0400339,0x157fd9d,0x1835881, + 0x0e8ef3a,0x033fe01,0x00273e3 }, + { 0x1e360a3,0x017bbd5,0x129860b,0x095bfdf,0x17ef5c8,0x05b7e62, + 0x0329994,0x005349e,0x0aaf0b2,0x1a7c72b,0x1bc558f,0x1141449, + 0x135c850,0x0f522f8,0x1d8bf64,0x0db7db1,0x1a02803,0x1f96491, + 0x093440e,0x1949803,0x018a4a9 } }, + /* 224 */ + { { 0x048e339,0x1dbcc2a,0x05d8a8f,0x1e31473,0x1e8770c,0x148b866, + 0x15d35e9,0x15822c0,0x12b6067,0x1d82e2c,0x04e2ad2,0x1b61090, + 0x14de0d2,0x0484f3c,0x076ae49,0x02bee29,0x0b67903,0x041d19b, + 0x0cd6896,0x00e9b34,0x013ccd9 }, + { 0x01b784d,0x0e2f056,0x0b87a0e,0x0ddca4f,0x0b65c8c,0x0447605, + 0x1851a87,0x0b1a790,0x046c1bf,0x100fbc8,0x0940a88,0x0c4e7fb, + 0x0571cec,0x112dc83,0x0fe23ac,0x1bf9bfe,0x098c556,0x0360f86, + 0x013e973,0x0445549,0x00acaa3 } }, + /* 225 */ + { { 0x1b4dfd6,0x1a5e1e4,0x0a4c5f9,0x07f1cec,0x05ba805,0x061a901, + 0x1701676,0x168060f,0x0b85a20,0x0481b66,0x1c4d647,0x1e14470, + 0x0ef2c63,0x054afda,0x0676763,0x18d8c35,0x1399850,0x01ebe27, + 0x00a659a,0x12d392d,0x0169162 }, + { 0x163ee53,0x1e133e5,0x0d4df44,0x02ebd58,0x07b12e6,0x0d5fe53, + 0x0684464,0x13f666d,0x1ee1af6,0x168324e,0x10479d6,0x1e0023b, + 0x054d7a6,0x0dcfcbb,0x1c0c2e3,0x0266501,0x1a3f0ab,0x1510000, + 0x0763318,0x1931a47,0x0194e17 } }, + /* 226 */ + { { 0x18fe898,0x0c05a0e,0x14d1c83,0x0e64308,0x0d7a28b,0x190ba04, + 0x10e1413,0x15fe3e7,0x1166aa6,0x09c0e6a,0x1838d57,0x010998a, + 0x0d9cde6,0x0f30f16,0x0107c29,0x12a3596,0x0f5d9b4,0x031088b, + 0x1b8ab0b,0x1c2da6f,0x00c4509 }, + { 0x06fd79e,0x1106216,0x0c3ae0a,0x1c75ef1,0x15b7ee4,0x0c0ce54, + 0x18f06eb,0x0d27b36,0x0985525,0x06b3a6f,0x06743c4,0x0965f38, + 0x0917de6,0x03e2f35,0x0feaebd,0x1b6df40,0x0ad2ce2,0x142c5e2, + 0x1f27463,0x0470143,0x00c976c } }, + /* 227 */ + { { 0x064f114,0x18f7c58,0x1d32445,0x0a9e5e1,0x03cb156,0x19315bc, + 0x161515e,0x0d860a4,0x10f3493,0x1463380,0x107fb51,0x05fd334, + 0x09ef26d,0x13fbfb5,0x168899e,0x1f837ed,0x0dba01b,0x012b1dc, + 0x0d03b50,0x06d90b8,0x000e14b }, + { 0x1db67e6,0x1f13212,0x017d795,0x12fe5d2,0x05df4e8,0x1621344, + 0x1945009,0x126f065,0x03e8750,0x095f131,0x0e1a44c,0x17b078a, + 0x1d856b5,0x0ab9a7c,0x072b956,0x090c2b6,0x1e2d5aa,0x02d03df, + 0x1a2aed6,0x192de19,0x01d07a4 } }, + /* 228 */ + { { 0x03aa2e9,0x0a682a9,0x0181efd,0x19da7a1,0x08841e0,0x0dfdb4e, + 0x1db89fe,0x10aad07,0x0162bdf,0x0583fa2,0x0373277,0x10720f6, + 0x0e62d17,0x12bd29b,0x12ee2ad,0x0fa7945,0x0d27cf4,0x04c5cd0, + 0x1ba98dc,0x0a9ad0b,0x01f2ff1 }, + { 0x0b232ac,0x1bb452b,0x0aad5a2,0x0c7e54a,0x0e8d6e3,0x1bfe302, + 0x1e85a20,0x12375d0,0x1d10a76,0x1e2c541,0x157efba,0x15e1f28, + 0x0ead5e4,0x1eb2a71,0x0835b0d,0x104aa34,0x0b9da7c,0x0c6207e, + 0x0366e4c,0x1679aec,0x00b26d7 } }, + /* 229 */ + { { 0x12eaf45,0x0861f5d,0x04bdec2,0x18c5ff7,0x0d24d91,0x1b791ef, + 0x0fa929c,0x1c77e54,0x16ff0fd,0x0dccf5e,0x040bd6d,0x0abb942, + 0x08bca2b,0x03f0195,0x080f360,0x02f51ec,0x048a8bf,0x0aa085a, + 0x077156c,0x0cc14fc,0x0109b86 }, + { 0x0a2fbd8,0x058ed01,0x0296c52,0x167645d,0x1ed85e8,0x095a84f, + 0x083921c,0x02c26f1,0x0c6a3e5,0x02b00a4,0x0ed40da,0x04382c6, + 0x1171009,0x12a8938,0x049450c,0x0208f27,0x1d207d3,0x1bda498, + 0x150b82e,0x1ce4570,0x00ea623 } }, + /* 230 */ + { { 0x0972688,0x011e992,0x1d88212,0x04007ea,0x18b83c1,0x06a2942, + 0x19a41b4,0x0fc329a,0x02c6f74,0x010cac2,0x1b626a1,0x05d2028, + 0x02c8f8a,0x1a28dde,0x1b0779d,0x109f453,0x0b8f7f2,0x1fb115b, + 0x0dc7913,0x03b7d2f,0x006083f }, + { 0x19dd56b,0x04999cc,0x17a6659,0x152f48f,0x0cfac0b,0x147d901, + 0x162baef,0x194ccc1,0x0f61d7b,0x1e14eec,0x1705351,0x0a3b0b5, + 0x1c6f5fb,0x07cfea0,0x16b1e21,0x07cd9cc,0x1d4ff51,0x10e734e, + 0x1f9674f,0x1cb23df,0x00231ac } }, + /* 231 */ + { { 0x1fda771,0x1d21c54,0x0038b99,0x190cc62,0x026f652,0x19f91db, + 0x0792384,0x03fbf63,0x0035d2d,0x0cfc479,0x0fa1e16,0x02251a2, + 0x071723a,0x1da8e70,0x02a8a4b,0x1750512,0x10ebbd9,0x072f9d3, + 0x1d1452d,0x104ce66,0x0155dde }, + { 0x0f59a95,0x15bbf6b,0x108022c,0x0604040,0x13f853e,0x163bcbc, + 0x0ab07ae,0x0eca44a,0x1b56b66,0x166e5cc,0x0a9401b,0x13f32e4, + 0x104abdb,0x02715d6,0x0843cfc,0x1ba9a4c,0x0ff3034,0x08652d0, + 0x0b02e03,0x1b0101b,0x0041333 } }, + /* 232 */ + { { 0x1a85a06,0x083849a,0x0d13a14,0x0c85de3,0x0e166e7,0x1d9d36a, + 0x02dc681,0x0d50952,0x030329e,0x16eb600,0x1549675,0x14ca7aa, + 0x1e20c4b,0x17c5682,0x0ec9abd,0x1999bdc,0x1412ab4,0x01071ea, + 0x0501909,0x1312695,0x01bd797 }, + { 0x00c7ff0,0x0e8c247,0x0d03ca8,0x192a876,0x1ae85ef,0x0e98c5d, + 0x0c6bbd4,0x14dd2c8,0x075878f,0x0e9f6a7,0x057d4b9,0x13b7851, + 0x1c4d2a2,0x0f88833,0x1c9e1dc,0x09dca75,0x1649e7f,0x13666f4, + 0x15b5d36,0x111b434,0x0192351 } }, + /* 233 */ + { { 0x1d310ed,0x1909001,0x0c46c20,0x1930f60,0x120ee8c,0x02ac546, + 0x0749a13,0x1913ca9,0x0b7167e,0x112f9e7,0x156ed57,0x09e897e, + 0x17acf11,0x030e480,0x07b71dc,0x0878103,0x0e6deb3,0x0bacd22, + 0x1326d7b,0x1f3efc0,0x007858d }, + { 0x1f13222,0x03f5d9d,0x08453e9,0x1bd40fb,0x1e451dc,0x0c12178, + 0x1eb0f03,0x03c37d3,0x136eb87,0x192bea6,0x0c64364,0x0eb57d4, + 0x13f49e7,0x075f159,0x1b4647d,0x0012c80,0x13c0c11,0x033d562, + 0x0e06b1e,0x0b9f17a,0x01f4521 } }, + /* 234 */ + { { 0x0493b79,0x145477d,0x0ab0e1f,0x169d638,0x120e270,0x1911905, + 0x0fe827f,0x07b3e72,0x0a91c39,0x170dd57,0x0a36597,0x0c34271, + 0x04deda9,0x0bdea87,0x0ac8e32,0x191c0d3,0x08a2363,0x17fb46a, + 0x1931305,0x1c01cb9,0x0158af8 }, + { 0x1c509a1,0x0e78367,0x01d5b33,0x1f84d98,0x00f411e,0x0e2bf83, + 0x17f5936,0x158da19,0x132e99c,0x0a8a429,0x1a5442a,0x167b171, + 0x1d58f9a,0x1886e1f,0x1a61c26,0x06a134f,0x03d75ef,0x1c1c842, + 0x0a4c4b1,0x1993a0b,0x01b628c } }, + /* 235 */ + { { 0x141463f,0x1a78071,0x1e80764,0x1c2a1b4,0x14c8a6c,0x04aa9f8, + 0x183f104,0x123b690,0x0a93f4a,0x11def2d,0x16019f0,0x0f0e59a, + 0x009f47c,0x0219ee4,0x0cc0152,0x054fa3a,0x1f975a3,0x08605f3, + 0x031d76a,0x0eefab1,0x012e08b }, + { 0x1a10d37,0x0940bb0,0x16977f0,0x02b8a1e,0x0d7b618,0x03be307, + 0x0576de5,0x016515f,0x133c531,0x05515bb,0x06099e8,0x1570a62, + 0x1f905fa,0x15a0cac,0x03a6059,0x0ef09e8,0x05216b3,0x04e65a1, + 0x0619ab3,0x0baef8d,0x00c5683 } }, + /* 236 */ + { { 0x1450a66,0x18a6595,0x1053a75,0x18fb7fb,0x1318885,0x1350600, + 0x03616d1,0x14ccab5,0x15bdfc1,0x1510f4c,0x1e4b440,0x1931cce, + 0x177a0d7,0x1aa853c,0x006ed5e,0x1a66e54,0x0335d74,0x0a16231, + 0x036b525,0x09c3811,0x008b7be }, + { 0x1812273,0x1d81fca,0x15fc61c,0x05dc7ee,0x0e26ed3,0x1310bd1, + 0x03ab9b6,0x09e58e2,0x0261d9f,0x1a85aba,0x0768b66,0x1f536f8, + 0x0743971,0x02542ef,0x113ee1f,0x026f645,0x051ec22,0x17b961a, + 0x1ee8649,0x0acd18e,0x0173134 } }, + /* 237 */ + { { 0x03ba183,0x1463d45,0x1e9cf8f,0x17fc713,0x0e8cebb,0x0dd307a, + 0x11a1c3e,0x1071d48,0x1cb601a,0x08bb71a,0x14b6d15,0x184c25c, + 0x11f90bd,0x07b895f,0x1e79166,0x0a99b2b,0x00fbea0,0x1cde990, + 0x157f502,0x0337edb,0x017a2cf }, + { 0x0736feb,0x1b65133,0x18bdc73,0x13bcf9f,0x1de86f4,0x1482b1d, + 0x0f3a3f0,0x09f8c15,0x0726b6e,0x17451e7,0x048d6ea,0x088a7e5, + 0x1ed2382,0x1287fd2,0x0d55fd5,0x1ee8949,0x054113e,0x150a29f, + 0x1909b74,0x0ed4a67,0x01b07c6 } }, + /* 238 */ + { { 0x1d96872,0x101f91a,0x032bd79,0x187f4b7,0x0b1a23c,0x046e2fd, + 0x01c6fa6,0x17aa8b3,0x1d430c0,0x1974244,0x16730f8,0x13c0ec9, + 0x0d7ec26,0x1960620,0x08e084b,0x10769ee,0x183887b,0x096ca30, + 0x1c62904,0x1f4ce25,0x0010281 }, + { 0x0858b37,0x00247b2,0x176600a,0x1e6afbc,0x00e149a,0x0f5d8c7, + 0x01e4586,0x1416443,0x19f2b0b,0x0810059,0x072eb88,0x15cc207, + 0x1d5a87e,0x1cabce8,0x1f7376c,0x0a2bc9d,0x0aa2788,0x10d9c47, + 0x0061e2a,0x0a58799,0x002c1a5 } }, + /* 239 */ + { { 0x0a723dc,0x1fa8007,0x08c5eb1,0x088562a,0x0a5f04f,0x042e430, + 0x05116fa,0x004c7a9,0x1ff1197,0x0fccc9f,0x1633a98,0x08b9898, + 0x16c3fba,0x1ce6b01,0x145479a,0x04777cd,0x11557b9,0x13ad1d5, + 0x1acbf51,0x00f8a59,0x01474ec }, + { 0x188239d,0x11e9976,0x1a5311a,0x0d06b5c,0x0d1b8ae,0x1759738, + 0x18c967f,0x16be9fb,0x043bc0b,0x11dfb8e,0x0a9c148,0x016f1ec, + 0x053cd22,0x0ff3ccd,0x092183a,0x0ff2644,0x10324ab,0x1ec2ac3, + 0x1652562,0x1ee6616,0x010f8e0 } }, + /* 240 */ + { { 0x067d520,0x0e3dd9e,0x07b2bcd,0x1647f95,0x18f4958,0x1d54046, + 0x1c6522e,0x15c0ef1,0x02135e8,0x0c61867,0x03bfdd0,0x1353911, + 0x0bcdd8d,0x1b98a25,0x01d77c3,0x14a68e4,0x0954506,0x0daa4e4, + 0x1eedff1,0x0712f2b,0x011c4ef }, + { 0x1f5e698,0x164d621,0x18e8ff8,0x19c714b,0x0e77fcb,0x04e170e, + 0x12438c2,0x002da0b,0x1ac1d58,0x13a79ff,0x0e74a96,0x0440703, + 0x0baeeda,0x1af9cb0,0x162c50f,0x1577db2,0x0510db7,0x032ffe8, + 0x0816dc6,0x0fcd00f,0x00ce8e9 } }, + /* 241 */ + { { 0x0e86a83,0x0f30dc6,0x0580894,0x1f7efce,0x0604159,0x1819bbc, + 0x1f75d23,0x085f824,0x1450522,0x1e5961b,0x1a826e1,0x01e9269, + 0x01bd495,0x0233ca2,0x11b100f,0x082d4a2,0x11023ba,0x0f456a3, + 0x1d8e3ac,0x1034c15,0x01b389b }, + { 0x0150c69,0x0c9a774,0x12f39a6,0x11c4f82,0x14f7590,0x00ca7fb, + 0x0a245a8,0x0ecbb81,0x01bd51b,0x07a4e99,0x1e58c0e,0x00bc30e, + 0x086bc33,0x1e9da53,0x0bcfeff,0x1e313fc,0x177d7ca,0x18a04d9, + 0x0e3c426,0x1d42773,0x01b3029 } }, + /* 242 */ + { { 0x1a2fd88,0x09c6912,0x180fbde,0x199d740,0x090f2f7,0x136ffa4, + 0x072035e,0x10c987c,0x02883f9,0x063c79b,0x194c140,0x0b25331, + 0x13ed92b,0x192eee3,0x02a3c6c,0x0e11403,0x187d5d3,0x1b6ffec, + 0x147ca2e,0x06aa9e1,0x0059dcd }, + { 0x1a74e7d,0x1720e91,0x17d85f1,0x1cbb665,0x14b61eb,0x1ffd05c, + 0x1fe9e79,0x01a785f,0x12ebb7a,0x19b315b,0x17e70d1,0x0bdc035, + 0x04a8641,0x0a33c93,0x00b0c99,0x138ae2a,0x1492fa0,0x10b4889, + 0x11d2421,0x1e69544,0x0195897 } }, + /* 243 */ + { { 0x1adc253,0x0e9acd5,0x0579211,0x198f2f9,0x0054b92,0x10c1097, + 0x0d6f668,0x04e4553,0x0a52b88,0x1dc052f,0x0719da6,0x0f1c5cc, + 0x13ea38e,0x04587c5,0x09d2c68,0x10a99f6,0x0e3db9d,0x1db5521, + 0x1804b5c,0x044a46a,0x01638ba }, + { 0x1c8c576,0x00737ba,0x1749f3b,0x19c978f,0x0bb20e7,0x0c03935, + 0x08321a7,0x16e12b1,0x08a023e,0x0846335,0x042c56a,0x01d4ec2, + 0x06ca9f5,0x0c37b0d,0x0326650,0x0d3b0cd,0x0ed2a0a,0x1ceef91, + 0x0fe2843,0x1c312f7,0x01e0bfe } }, + /* 244 */ + { { 0x0319e4f,0x0340c24,0x1e809b6,0x0ab4b0d,0x0be6f6b,0x189932b, + 0x1621899,0x1f57deb,0x198529c,0x0129562,0x0a73eeb,0x0be2c56, + 0x0de7cc4,0x11531ac,0x0141826,0x158e1dc,0x0a42940,0x07be5ce, + 0x0216c7c,0x0955d95,0x01adfb4 }, + { 0x198678e,0x1d49b73,0x10e19ad,0x0732a80,0x0a01e10,0x14305be, + 0x078de05,0x0afe492,0x1b745d8,0x17fea41,0x017b5bb,0x0c5148e, + 0x175dbb3,0x1952e87,0x15a3526,0x1fdc6af,0x09a2389,0x168d429, + 0x09ff5a1,0x184a923,0x01addbb } }, + /* 245 */ + { { 0x09686a3,0x05d104b,0x0fd7843,0x0bc780a,0x108b1c5,0x1a38811, + 0x0c4d09b,0x0702e25,0x1490330,0x1c8b2d8,0x0549ec7,0x002e5a0, + 0x0245b72,0x154d1a7,0x13d991e,0x06b90df,0x194b0be,0x128faa5, + 0x08578e0,0x16454ab,0x00e3fcc }, + { 0x14dc0be,0x0f2762d,0x1712a9c,0x11b639a,0x1b13624,0x170803d, + 0x1fd0c11,0x147e6d7,0x1da9c99,0x134036b,0x06f1416,0x0ddd069, + 0x109cbfc,0x109f042,0x01c79cf,0x091824d,0x02767f4,0x0af3551, + 0x169eebe,0x0ef0f85,0x01b9ba7 } }, + /* 246 */ + { { 0x1a73375,0x12c7762,0x10e06af,0x1af5158,0x175df69,0x0541ad0, + 0x0542b3b,0x01e59e6,0x1f507d3,0x03d8304,0x0c1092e,0x14578c1, + 0x0c9ae53,0x0087c87,0x0c78609,0x1137692,0x10fadd6,0x122963e, + 0x1d8c6a3,0x0a69228,0x0013ab4 }, + { 0x084f3af,0x0ec2b46,0x0cfabcb,0x043755c,0x029dc09,0x0b58384, + 0x0aa162e,0x02c8ca8,0x0e8a825,0x11306a0,0x14c8ad0,0x1b58b86, + 0x12b9e5e,0x1cf6d06,0x09e5580,0x1721579,0x1c6b962,0x1435e83, + 0x07b14c0,0x05b58f6,0x010a2e2 } }, + /* 247 */ + { { 0x19d8f0a,0x1e04e91,0x0085997,0x1957142,0x12b2e03,0x19a3bdc, + 0x05da005,0x009c86d,0x18e3616,0x19c76cf,0x0186faa,0x123b3d6, + 0x1079b00,0x1f422b3,0x1089950,0x145c19a,0x0c72fe1,0x1d07bbf, + 0x18280c3,0x0842c4e,0x00931d2 }, + { 0x0646bc3,0x1c1a67c,0x1be7ea7,0x04815d2,0x1df94a5,0x08bbe8b, + 0x0e240de,0x19b2038,0x0ffeb66,0x0fe8322,0x0491967,0x05d8ef7, + 0x0f81aec,0x06cc0ea,0x1cedfcb,0x161265b,0x169f377,0x1e4de1f, + 0x1616762,0x1e69e7b,0x0125dae } }, + /* 248 */ + { { 0x0c123bc,0x0228dd1,0x0952b02,0x101031f,0x11e83a6,0x0abdc56, + 0x15c0a62,0x02cadba,0x0f0f12f,0x03f971a,0x1e85373,0x1866153, + 0x0c1f6a9,0x197f3c1,0x1268aee,0x0a9bbdf,0x097709f,0x1e98ce3, + 0x1918294,0x047197a,0x01dc0b8 }, + { 0x0dfb6f6,0x09480a2,0x149bd92,0x08dc803,0x070d7cb,0x09bd6c1, + 0x0903921,0x1b234e1,0x170d8db,0x06b30da,0x03562e1,0x0475e2e, + 0x12ca272,0x11a270e,0x0d33c51,0x1c3f5dd,0x095ab9d,0x1912afe, + 0x0f717a9,0x1c2215b,0x01f8cd6 } }, + /* 249 */ + { { 0x0b8a0a7,0x1e35cbc,0x17a8a95,0x0dd067d,0x04b4aeb,0x089ff39, + 0x05f052f,0x1c93c8c,0x0fc2e8e,0x00c3444,0x11fbbf1,0x1493f62, + 0x1b8d398,0x1733167,0x1c647c4,0x145d9d3,0x089958b,0x0b0c391, + 0x02e3543,0x1a1e360,0x002dbd6 }, + { 0x0c93cc9,0x07eff12,0x039e257,0x0173ce3,0x09ed778,0x1d7bf59, + 0x0e960e2,0x0d20391,0x04ddcbf,0x1129c3f,0x035aec0,0x017f430, + 0x0264b25,0x04a3e3e,0x1a39523,0x1e79ada,0x0329923,0x14153db, + 0x1440f34,0x006c265,0x000fb8f } }, + /* 250 */ + { { 0x0d9d494,0x059f846,0x07ce066,0x1329e9f,0x1b2065b,0x19c7d4c, + 0x08880f1,0x196ecc9,0x0d8d229,0x0cfa60a,0x1152cc6,0x0b898a3, + 0x12ddad7,0x0909d19,0x0cb382f,0x0f65f34,0x085888c,0x179d108, + 0x0c7fc82,0x1f46c4b,0x00d16de }, + { 0x1a296eb,0x002a40c,0x0c4d138,0x0ba3522,0x1d94ff1,0x1522a78, + 0x0b4affa,0x0ffafbd,0x14d40bd,0x132d401,0x0692beb,0x08fc300, + 0x17604f1,0x12f06f3,0x0c123e6,0x0594130,0x0a5ff57,0x1d1d8ce, + 0x0087445,0x0fb74e3,0x00e0a23 } }, + /* 251 */ + { { 0x1630ee8,0x15fc248,0x0c07b6e,0x040bd6a,0x1e6589c,0x08fa3de, + 0x0acb681,0x1033efa,0x0212bbe,0x1554fcb,0x048492b,0x1abd285, + 0x1bdced3,0x1a21af2,0x07d6e27,0x1ecded2,0x0339411,0x10cb026, + 0x0d5bc36,0x1813948,0x00e6b7f }, + { 0x14f811c,0x07209fb,0x176c4a5,0x03bf1b1,0x1a42d83,0x1a0c648, + 0x1c85e58,0x1d84fea,0x088ebcd,0x1ef290c,0x016f257,0x00ddd46, + 0x01fdd5e,0x163345b,0x0798222,0x030c3da,0x016eb81,0x0199d78, + 0x17773af,0x16325a2,0x01c95ec } }, + /* 252 */ + { { 0x0bde442,0x19bd1f0,0x1cfa49e,0x10cdef4,0x00543fe,0x0886177, + 0x074823b,0x065a61b,0x1a6617a,0x1bce1a0,0x173e2eb,0x10e1a3a, + 0x0be7367,0x11d5e7c,0x14373a7,0x0bcf605,0x0dd772b,0x0ff11e9, + 0x1ff1c31,0x19dd403,0x010b29f }, + { 0x0d803ff,0x05726b1,0x1aa4c6f,0x1fb7860,0x13ee913,0x0083314, + 0x19eaf63,0x0b15e3b,0x0e7a6d6,0x042bc15,0x1d381b5,0x125c205, + 0x0691265,0x09b7d7f,0x08c49fc,0x0242723,0x0408837,0x0235c9a, + 0x0c7858d,0x1687014,0x00ba53b } }, + /* 253 */ + { { 0x05636b0,0x08bfe65,0x171d8b9,0x02d5742,0x0296e02,0x173d96a, + 0x1f5f084,0x108b551,0x15717ad,0x08be736,0x0bcd5e5,0x10b7316, + 0x1ce762b,0x0facd83,0x1e65ad7,0x1ede085,0x0bbf37e,0x0f9b995, + 0x150ad22,0x028bd48,0x015da5d }, + { 0x07f6e3f,0x1e2af55,0x16f079d,0x0f54940,0x1f4d99a,0x0141139, + 0x1f5dd16,0x1f74ada,0x177b748,0x1844afd,0x07d7476,0x199c0c5, + 0x1b1c484,0x1acc01f,0x0c72428,0x171a1eb,0x1291720,0x121d627, + 0x0ab04fc,0x017fd0e,0x00e98c1 } }, + /* 254 */ + { { 0x06c4fd6,0x023c2e0,0x0e76747,0x0ba4b85,0x1f4b902,0x0c17925, + 0x17ac752,0x0560826,0x0ba4fef,0x159f6e1,0x181eace,0x073f31b, + 0x1d55a52,0x04b7a5b,0x1f126ac,0x1902bab,0x1603844,0x1e28514, + 0x159daca,0x0291a02,0x0047db1 }, + { 0x0f3bad9,0x1ce6288,0x0753127,0x1804520,0x090888f,0x1da26fa, + 0x157af11,0x0d122f4,0x0f39f2b,0x05975e3,0x0658a88,0x075e09d, + 0x170c58e,0x0b9eead,0x0adf06d,0x1eed8a5,0x1d6a329,0x195aa56, + 0x0bd328e,0x15a3d70,0x010859d } }, + /* 255 */ + { { 0x182d1ad,0x0209450,0x111598b,0x1c4122d,0x1751796,0x140b23b, + 0x109cae9,0x1834ee0,0x0b92c85,0x164587d,0x0cb81fe,0x05bf5df, + 0x0d207ab,0x1c30d99,0x0d4c281,0x1a28b8e,0x16588ae,0x0b1edf6, + 0x094e927,0x179b941,0x00bd547 }, + { 0x1056b51,0x09c17c3,0x044a9f0,0x16261f3,0x03d91ed,0x002da16, + 0x1791b4e,0x12bef8f,0x1fd31a9,0x0b080f5,0x1ee2a91,0x05699a7, + 0x0e1efd2,0x0f58bde,0x0e477de,0x01865fc,0x0c6616c,0x05a6a60, + 0x046fbbd,0x00477ce,0x011219f } }, +}; + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_21(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_stripe_21(r, &p521_base, p521_table, + k, map, ct, heap); +} + +#endif + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_521(const mp_int* km, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[21]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 21, km); + + err = sp_521_ecc_mulmod_base_21(point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_21(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the base point of P521 by the scalar, add point a and return + * the result. If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, + int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[21 + 21 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (21 + 21 * 2 * 6), + heap, DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 21; + + sp_521_from_mp(k, 21, km); + sp_521_point_from_ecc_point_21(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_21(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_21(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_21(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_21(point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_21(point, point, addP, tmp); + + if (map) { + sp_521_map_21(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_21(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_521_add_one_21(sp_digit* a) +{ + a[0]++; + sp_521_norm_21(a); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_521_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i; + int j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 17U) { + r[j] &= 0x1ffffff; + s = 25U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_521_ecc_gen_k_21(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[66]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + buf[0] &= 0x1; + sp_521_from_bin(k, 21, buf, (int)sizeof(buf)); + if (sp_521_cmp_21(k, p521_order2) <= 0) { + sp_521_add_one_21(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; + #else + sp_point_521 point[1]; + #endif + sp_digit k[21]; +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = NULL; +#endif + int err = MP_OKAY; + + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, DYNAMIC_TYPE_ECC); + #else + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, DYNAMIC_TYPE_ECC); + #endif + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + infinity = point + 1; + #endif + + err = sp_521_ecc_gen_k_21(rng, k); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_21(point, k, 1, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_21(infinity, point, p521_order, 1, 1, NULL); + } + if (err == MP_OKAY) { + if (sp_521_iszero_21(point->x) || sp_521_iszero_21(point->y)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_521_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_21(point, pub); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) { + /* point is not sensitive, so no need to zeroize */ + XFREE(point, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_521_ctx { + int state; + sp_521_ecc_mulmod_21_ctx mulmod_ctx; + sp_digit k[21]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; +#else + sp_point_521 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_521_ctx; + +int sp_ecc_make_key_521_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_521_ctx* ctx = (sp_ecc_key_gen_521_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_521_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_521_ecc_gen_k_21(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_521_ecc_mulmod_base_21_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_521_ecc_mulmod_21_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p521_order, 1, 1); + if (err == MP_OKAY) { + if (sp_521_iszero_21(ctx->point->x) || + sp_521_iszero_21(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_521_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_21(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 66 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_521_to_bin_21(sp_digit* r, byte* a) +{ + int i; + int j; + int s = 0; + int b; + + for (i=0; i<20; i++) { + r[i+1] += r[i] >> 25; + r[i] &= 0x1ffffff; + } + j = 528 / 8 - 1; + a[j] = 0; + for (i=0; i<21 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 25) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 25); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_521(const mp_int* priv, const ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[21]; +#endif + int err = MP_OKAY; + + if (*outLen < 65U) { + err = BUFFER_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 21, priv); + sp_521_point_from_ecc_point_21(point, pub); + err = sp_521_ecc_mulmod_21(point, point, k, 1, 1, heap); + } + if (err == MP_OKAY) { + sp_521_to_bin_21(point->x, out); + *outLen = 66; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_521_ctx { + int state; + union { + sp_521_ecc_mulmod_21_ctx mulmod_ctx; + }; + sp_digit k[21]; + sp_point_521 point; +} sp_ecc_sec_gen_521_ctx; + +int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_521_ctx* ctx = (sp_ecc_sec_gen_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_521_from_mp(ctx->k, 21, priv); + sp_521_point_from_ecc_point_21(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_521_ecc_mulmod_21_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_521_to_bin_21(ctx->point.x, out); + *outLen = 66; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +SP_NOINLINE static void sp_521_rshift_21(sp_digit* r, const sp_digit* a, + byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<20; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (25 - n))) & 0x1ffffff; + } +#else + for (i=0; i<16; i += 8) { + r[i+0] = (a[i+0] >> n) | ((a[i+1] << (25 - n)) & 0x1ffffff); + r[i+1] = (a[i+1] >> n) | ((a[i+2] << (25 - n)) & 0x1ffffff); + r[i+2] = (a[i+2] >> n) | ((a[i+3] << (25 - n)) & 0x1ffffff); + r[i+3] = (a[i+3] >> n) | ((a[i+4] << (25 - n)) & 0x1ffffff); + r[i+4] = (a[i+4] >> n) | ((a[i+5] << (25 - n)) & 0x1ffffff); + r[i+5] = (a[i+5] >> n) | ((a[i+6] << (25 - n)) & 0x1ffffff); + r[i+6] = (a[i+6] >> n) | ((a[i+7] << (25 - n)) & 0x1ffffff); + r[i+7] = (a[i+7] >> n) | ((a[i+8] << (25 - n)) & 0x1ffffff); + } + r[16] = (a[16] >> n) | ((a[17] << (25 - n)) & 0x1ffffff); + r[17] = (a[17] >> n) | ((a[18] << (25 - n)) & 0x1ffffff); + r[18] = (a[18] >> n) | ((a[19] << (25 - n)) & 0x1ffffff); + r[19] = (a[19] >> n) | ((a[20] << (25 - n)) & 0x1ffffff); +#endif /* WOLFSSL_SP_SMALL */ + r[20] = a[20] >> n; +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_521_mul_d_21(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + sp_int64 tb = b; + sp_int64 t = 0; + int i; + + for (i = 0; i < 21; i++) { + t += tb * a[i]; + r[i] = (sp_digit)(t & 0x1ffffff); + t >>= 25; + } + r[21] = (sp_digit)t; +#else + sp_int64 tb = b; + sp_int64 t = 0; + sp_digit t2; + sp_int64 p[4]; + int i; + + for (i = 0; i < 20; i += 4) { + p[0] = tb * a[i + 0]; + p[1] = tb * a[i + 1]; + p[2] = tb * a[i + 2]; + p[3] = tb * a[i + 3]; + t += p[0]; + t2 = (sp_digit)(t & 0x1ffffff); + t >>= 25; + r[i + 0] = (sp_digit)t2; + t += p[1]; + t2 = (sp_digit)(t & 0x1ffffff); + t >>= 25; + r[i + 1] = (sp_digit)t2; + t += p[2]; + t2 = (sp_digit)(t & 0x1ffffff); + t >>= 25; + r[i + 2] = (sp_digit)t2; + t += p[3]; + t2 = (sp_digit)(t & 0x1ffffff); + t >>= 25; + r[i + 3] = (sp_digit)t2; + } + t += tb * a[20]; + r[20] = (sp_digit)(t & 0x1ffffff); + t >>= 25; + r[21] = (sp_digit)(t & 0x1ffffff); +#endif /* WOLFSSL_SP_SMALL */ +} + +SP_NOINLINE static void sp_521_lshift_42(sp_digit* r, const sp_digit* a, + byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[42] = a[41] >> (25 - n); + for (i=41; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (25 - n))) & 0x1ffffff; + } +#else + sp_int_digit s; + sp_int_digit t; + + s = (sp_int_digit)a[41]; + r[42] = s >> (25U - n); + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (25U - n))) & 0x1ffffff; +#endif /* WOLFSSL_SP_SMALL */ + r[0] = (a[0] << n) & 0x1ffffff; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * Simplified based on top word of divisor being (1 << 25) - 1 + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_521_div_21(const sp_digit* a, const sp_digit* d, + const sp_digit* m, sp_digit* r) +{ + int i; + sp_digit r1; + sp_digit mask; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t1 = NULL; +#else + sp_digit t1[4 * 21 + 3]; +#endif + sp_digit* t2 = NULL; + sp_digit* sd = NULL; + int err = MP_OKAY; + + (void)m; + +#ifdef WOLFSSL_SP_SMALL_STACK + t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 21 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (t1 == NULL) + err = MEMORY_E; +#endif + + (void)m; + + if (err == MP_OKAY) { + t2 = t1 + 42 + 1; + sd = t2 + 21 + 1; + + sp_521_mul_d_21(sd, d, (sp_digit)1 << 4); + sp_521_lshift_42(t1, a, 4); + t1[21 + 21] += t1[21 + 21 - 1] >> 25; + t1[21 + 21 - 1] &= 0x1ffffff; + for (i=20; i>=0; i--) { + r1 = t1[21 + i]; + sp_521_mul_d_21(t2, sd, r1); + (void)sp_521_sub_21(&t1[i], &t1[i], t2); + t1[21 + i] -= t2[21]; + sp_521_norm_21(&t1[i + 1]); + + mask = ~((t1[21 + i] - 1) >> 31); + sp_521_cond_sub_21(t1 + i, t1 + i, sd, mask); + sp_521_norm_21(&t1[i + 1]); + } + sp_521_norm_21(t1); + sp_521_rshift_21(r, t1, 4); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t1 != NULL) + XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_521_mod_21(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_521_div_21(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P521 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_521_mont_mul_order_21(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_521_mul_21(r, a, b); + sp_521_mont_reduce_order_21(r, p521_order, p521_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P521 curve. */ +static const uint32_t p521_order_minus_2[17] = { + 0x91386407U,0xbb6fb71eU,0x899c47aeU,0x3bb5c9b8U,0xf709a5d0U,0x7fcc0148U, + 0xbf2f966bU,0x51868783U,0xfffffffaU,0xffffffffU,0xffffffffU,0xffffffffU, + 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0x000001ffU +}; +#else +/* The low half of the order-2 of the P521 curve. */ +static const uint32_t p521_order_low[9] = { + 0x91386407U,0xbb6fb71eU,0x899c47aeU,0x3bb5c9b8U,0xf709a5d0U,0x7fcc0148U, + 0xbf2f966bU,0x51868783U,0xfffffffaU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Square number mod the order of P521 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_order_21(sp_digit* r, const sp_digit* a) +{ + sp_521_sqr_21(r, a); + sp_521_mont_reduce_order_21(r, p521_order, p521_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P521 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_n_order_21(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_521_mont_sqr_order_21(r, a); + for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + XMEMCPY(t, a, sizeof(sp_digit) * 21); + ctx->i = 519; + ctx->state = 1; + break; + case 1: + sp_521_mont_sqr_order_21(t, t); + ctx->state = 2; + break; + case 2: + if ((p521_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) { + sp_521_mont_mul_order_21(t, t, a); + } + ctx->i--; + ctx->state = (ctx->i == 0) ? 3 : 1; + break; + case 3: + XMEMCPY(r, t, sizeof(sp_digit) * 21U); + err = MP_OKAY; + break; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +static void sp_521_mont_inv_order_21(sp_digit* r, const sp_digit* a, + sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 21); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_order_21(t, t); + if ((p521_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_521_mont_mul_order_21(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 21U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 21; + sp_digit* t3 = td + 4 * 21; + int i; + + /* t = a^2 */ + sp_521_mont_sqr_order_21(t, a); + /* t = a^3 = t * a */ + sp_521_mont_mul_order_21(t, t, a); + /* t= a^c = t ^ 2 ^ 2 */ + sp_521_mont_sqr_n_order_21(t2, t, 2); + /* t = a^f = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + + /* t3 = a^1e */ + sp_521_mont_sqr_order_21(t3, t); + /* t3 = a^1f = t3 * a */ + sp_521_mont_mul_order_21(t3, t3, a); + + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_521_mont_sqr_n_order_21(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_521_mont_sqr_n_order_21(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_521_mont_sqr_n_order_21(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + + /* t2= a^ffffffff00000000 = t ^ 2 ^ 32 */ + sp_521_mont_sqr_n_order_21(t2, t, 32); + /* t = a^ffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + /* t2= a^ffffffffffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_521_mont_sqr_n_order_21(t2, t, 64); + /* t = a^ffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + /* t2= a^ffffffffffffffffffffffffffffffff00000000000000000000000000000000 = t ^ 2 ^ 128 */ + sp_521_mont_sqr_n_order_21(t2, t, 128); + /* t = a^ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_21(t, t2, t); + + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0 */ + sp_521_mont_sqr_n_order_21(t2, t, 5); + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t * t3 */ + sp_521_mont_mul_order_21(t2, t2, t3); + + for (i=259; i>=1; i--) { + sp_521_mont_sqr_order_21(t2, t2); + if ((p521_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) { + sp_521_mont_mul_order_21(t2, t2, a); + } + } + sp_521_mont_sqr_order_21(t2, t2); + sp_521_mont_mul_order_21(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ +#endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Calculate second signature value S from R, k and private value. + * + * s = (r * x + e) / k + * + * s Signature value. + * r First signature value. + * k Ephemeral private key. + * x Private key as a number. + * e Hash of message as a number. + * tmp Temporary storage for intermediate numbers. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_s_21(sp_digit* s, const sp_digit* r, sp_digit* k, + sp_digit* x, const sp_digit* e, sp_digit* tmp) +{ + int err; + sp_digit carry; + sp_int32 c; + sp_digit* kInv = k; + + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_21(k, k, p521_norm_order); + err = sp_521_mod_21(k, k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_21(k); + + /* kInv = 1/k mod order */ + sp_521_mont_inv_order_21(kInv, k, tmp); + sp_521_norm_21(kInv); + + /* s = r * x + e */ + sp_521_mul_21(x, x, r); + err = sp_521_mod_21(x, x, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_21(x); + carry = sp_521_add_21(s, e, x); + sp_521_cond_sub_21(s, s, p521_order, 0 - carry); + sp_521_norm_21(s); + c = sp_521_cmp_21(s, p521_order); + sp_521_cond_sub_21(s, s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_21(s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_21(s, s, kInv); + sp_521_norm_21(s); + } + + return err; +} + +/* Sign the hash using the private key. + * e = [hash, 521 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_521* point = NULL; +#else + sp_digit e[7 * 2 * 21]; + sp_point_521 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int32 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 21, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 21; + k = e + 4 * 21; + r = e + 6 * 21; + tmp = e + 8 * 21; + s = e; + + if (hashLen > 66U) { + hashLen = 66U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_21(rng, k); + } + else { + sp_521_from_mp(k, 21, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_21(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 21U); + sp_521_norm_21(r); + c = sp_521_cmp_21(r, p521_order); + sp_521_cond_sub_21(r, r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_21(r); + + if (!sp_521_iszero_21(r)) { + /* x is modified in calculation of s. */ + sp_521_from_mp(x, 21, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_521_from_bin(e, 21, hash, (int)hashLen); + + /* Take 521 leftmost bits of hash. */ + if (hashLen == 66U) { + sp_521_rshift_21(e, e, 7); + e[20] |= ((sp_digit)hash[0]) << 13; + } + + err = sp_521_calc_s_21(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_521_iszero_21(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 21); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_521)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sign_521_ctx { + int state; + union { + sp_521_ecc_mulmod_21_ctx mulmod_ctx; + sp_521_mont_inv_order_21_ctx mont_inv_order_ctx; + }; + sp_digit e[2*21]; + sp_digit x[2*21]; + sp_digit k[2*21]; + sp_digit r[2*21]; + sp_digit tmp[3 * 2*21]; + sp_point_521 point; + sp_digit* s; + sp_digit* kInv; + int i; +} sp_ecc_sign_521_ctx; + +int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng, + mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sign_521_ctx* ctx = (sp_ecc_sign_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sign_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->s = ctx->e; + ctx->kInv = ctx->k; + + ctx->i = SP_ECC_MAX_SIG_GEN; + ctx->state = 1; + break; + case 1: /* GEN */ + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_21(rng, ctx->k); + } + else { + sp_521_from_mp(ctx->k, 21, km); + mp_zero(km); + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + break; + case 2: /* MULMOD */ + err = sp_521_ecc_mulmod_21_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &p521_base, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + ctx->state = 3; + } + break; + case 3: /* MODORDER */ + { + sp_int32 c; + /* r = point->x mod order */ + XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 21U); + sp_521_norm_21(ctx->r); + c = sp_521_cmp_21(ctx->r, p521_order); + sp_521_cond_sub_21(ctx->r, ctx->r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_21(ctx->r); + + if (hashLen > 66U) { + hashLen = 66U; + } + sp_521_from_mp(ctx->x, 21, priv); + sp_521_from_bin(ctx->e, 21, hash, (int)hashLen); + if (hashLen == 66U) { + sp_521_rshift_21(ctx->e, ctx->e, 7); + ctx->e[20] |= ((sp_digit)hash[0]) << 13; + } + ctx->state = 4; + break; + } + case 4: /* KMODORDER */ + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_21(ctx->k, ctx->k, p521_norm_order); + err = sp_521_mod_21(ctx->k, ctx->k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_21(ctx->k); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 5; + } + break; + case 5: /* KINV */ + /* kInv = 1/k mod order */ + err = sp_521_mont_inv_order_21_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp); + if (err == MP_OKAY) { + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 6; + } + break; + case 6: /* KINVNORM */ + sp_521_norm_21(ctx->kInv); + ctx->state = 7; + break; + case 7: /* R */ + /* s = r * x + e */ + sp_521_mul_21(ctx->x, ctx->x, ctx->r); + ctx->state = 8; + break; + case 8: /* S1 */ + err = sp_521_mod_21(ctx->x, ctx->x, p521_order); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* S2 */ + { + sp_digit carry; + sp_int32 c; + sp_521_norm_21(ctx->x); + carry = sp_521_add_21(ctx->s, ctx->e, ctx->x); + sp_521_cond_sub_21(ctx->s, ctx->s, + p521_order, 0 - carry); + sp_521_norm_21(ctx->s); + c = sp_521_cmp_21(ctx->s, p521_order); + sp_521_cond_sub_21(ctx->s, ctx->s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_21(ctx->s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_21(ctx->s, ctx->s, ctx->kInv); + sp_521_norm_21(ctx->s); + + /* Check that signature is usable. */ + if (sp_521_iszero_21(ctx->s) == 0) { + ctx->state = 10; + break; + } + #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + ctx->i = 1; + #endif + + /* not usable gen, try again */ + ctx->i--; + if (ctx->i == 0) { + err = RNG_FAILURE_E; + } + ctx->state = 1; + break; + } + case 10: /* RES */ + err = sp_521_to_mp(ctx->r, rm); + if (err == MP_OKAY) { + err = sp_521_to_mp(ctx->s, sm); + } + break; + } + + if (err == MP_OKAY && ctx->state != 10) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 21U); + XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 21U); + XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 21U); + XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 21U); + XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 21U); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_SIGN */ + +#ifndef WOLFSSL_SP_SMALL +static const char sp_521_tab32_21[32] = { + 1, 10, 2, 11, 14, 22, 3, 30, + 12, 15, 17, 19, 23, 26, 4, 31, + 9, 13, 21, 29, 16, 18, 25, 8, + 20, 28, 24, 7, 27, 6, 5, 32}; + +static int sp_521_num_bits_25_21(sp_digit v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return sp_521_tab32_21[(uint32_t)(v*0x07C4ACDD) >> 27]; +} + +static int sp_521_num_bits_21(const sp_digit* a) +{ + int i; + int r = 0; + + for (i = 20; i >= 0; i--) { + if (a[i] != 0) { + r = sp_521_num_bits_25_21(a[i]); + r += i * 25; + break; + } + } + + return r; +} + +/* Non-constant time modular inversion. + * + * @param [out] r Resulting number. + * @param [in] a Number to invert. + * @param [in] m Modulus. + * @return MP_OKAY on success. + * @return MEMEORY_E when dynamic memory allocation fails. + */ +static int sp_521_mod_inv_21(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + int err = MP_OKAY; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u = NULL; +#else + sp_digit u[21 * 4]; +#endif + sp_digit* v = NULL; + sp_digit* b = NULL; + sp_digit* d = NULL; + int ut; + int vt; + +#ifdef WOLFSSL_SP_SMALL_STACK + u = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (u == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + v = u + 21; + b = u + 2 * 21; + d = u + 3 * 21; + + XMEMCPY(u, m, sizeof(sp_digit) * 21); + XMEMCPY(v, a, sizeof(sp_digit) * 21); + + ut = sp_521_num_bits_21(u); + vt = sp_521_num_bits_21(v); + + XMEMSET(b, 0, sizeof(sp_digit) * 21); + if ((v[0] & 1) == 0) { + sp_521_rshift1_21(v, v); + XMEMCPY(d, m, sizeof(sp_digit) * 21); + d[0]++; + sp_521_rshift1_21(d, d); + vt--; + + while ((v[0] & 1) == 0) { + sp_521_rshift1_21(v, v); + if (d[0] & 1) + sp_521_add_21(d, d, m); + sp_521_rshift1_21(d, d); + vt--; + } + } + else { + XMEMSET(d+1, 0, sizeof(sp_digit) * (21 - 1)); + d[0] = 1; + } + + while (ut > 1 && vt > 1) { + if ((ut > vt) || ((ut == vt) && + (sp_521_cmp_21(u, v) >= 0))) { + sp_521_sub_21(u, u, v); + sp_521_norm_21(u); + + sp_521_sub_21(b, b, d); + sp_521_norm_21(b); + if (b[20] < 0) + sp_521_add_21(b, b, m); + sp_521_norm_21(b); + ut = sp_521_num_bits_21(u); + + do { + sp_521_rshift1_21(u, u); + if (b[0] & 1) + sp_521_add_21(b, b, m); + sp_521_rshift1_21(b, b); + ut--; + } + while (ut > 0 && (u[0] & 1) == 0); + } + else { + sp_521_sub_21(v, v, u); + sp_521_norm_21(v); + + sp_521_sub_21(d, d, b); + sp_521_norm_21(d); + if (d[20] < 0) + sp_521_add_21(d, d, m); + sp_521_norm_21(d); + vt = sp_521_num_bits_21(v); + + do { + sp_521_rshift1_21(v, v); + if (d[0] & 1) + sp_521_add_21(d, d, m); + sp_521_rshift1_21(d, d); + vt--; + } + while (vt > 0 && (v[0] & 1) == 0); + } + } + + if (ut == 1) + XMEMCPY(r, b, sizeof(sp_digit) * 21); + else + XMEMCPY(r, d, sizeof(sp_digit) * 21); + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (u != NULL) + XFREE(u, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* WOLFSSL_SP_SMALL */ + +/* Add point p1 into point p2. Handles p1 == p2 and result at infinity. + * + * p1 First point to add and holds result. + * p2 Second point to add. + * tmp Temporary storage for intermediate numbers. + */ +static void sp_521_add_points_21(sp_point_521* p1, const sp_point_521* p2, + sp_digit* tmp) +{ + + sp_521_proj_point_add_21(p1, p1, p2, tmp); + if (sp_521_iszero_21(p1->z)) { + if (sp_521_iszero_21(p1->x) && sp_521_iszero_21(p1->y)) { + sp_521_proj_point_dbl_21(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + p1->x[9] = 0; + p1->x[10] = 0; + p1->x[11] = 0; + p1->x[12] = 0; + p1->x[13] = 0; + p1->x[14] = 0; + p1->x[15] = 0; + p1->x[16] = 0; + p1->x[17] = 0; + p1->x[18] = 0; + p1->x[19] = 0; + p1->x[20] = 0; + XMEMCPY(p1->z, p521_norm_mod, sizeof(p521_norm_mod)); + } + } +} + +/* Calculate the verification point: [e/s]G + [r/s]Q + * + * p1 Calculated point. + * p2 Public point and temporary. + * s Second part of signature as a number. + * u1 Temporary number. + * u2 Temporary number. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_vfy_point_21(sp_point_521* p1, sp_point_521* p2, + sp_digit* s, sp_digit* u1, sp_digit* u2, sp_digit* tmp, void* heap) +{ + int err; + +#ifndef WOLFSSL_SP_SMALL + err = sp_521_mod_inv_21(s, s, p521_order); + if (err == MP_OKAY) +#endif /* !WOLFSSL_SP_SMALL */ + { + sp_521_mul_21(s, s, p521_norm_order); + err = sp_521_mod_21(s, s, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_21(s); +#ifdef WOLFSSL_SP_SMALL + { + sp_521_mont_inv_order_21(s, s, tmp); + sp_521_mont_mul_order_21(u1, u1, s); + sp_521_mont_mul_order_21(u2, u2, s); + } +#else + { + sp_521_mont_mul_order_21(u1, u1, s); + sp_521_mont_mul_order_21(u2, u2, s); + } +#endif /* WOLFSSL_SP_SMALL */ + { + err = sp_521_ecc_mulmod_base_21(p1, u1, 0, 0, heap); + } + } + if ((err == MP_OKAY) && sp_521_iszero_21(p1->z)) { + p1->infinity = 1; + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_21(p2, p2, u2, 0, 0, heap); + } + if ((err == MP_OKAY) && sp_521_iszero_21(p2->z)) { + p2->infinity = 1; + } + + if (err == MP_OKAY) { + sp_521_add_points_21(p1, p2, tmp); + } + + return err; +} + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 521) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_verify_521(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_521* p1 = NULL; +#else + sp_digit u1[18 * 21]; + sp_point_521 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_521* p2 = NULL; + sp_digit carry; + sp_int32 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 21, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 21; + s = u1 + 4 * 21; + tmp = u1 + 6 * 21; + p2 = p1 + 1; + + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(u1, 21, hash, (int)hashLen); + sp_521_from_mp(u2, 21, rm); + sp_521_from_mp(s, 21, sm); + sp_521_from_mp(p2->x, 21, pX); + sp_521_from_mp(p2->y, 21, pY); + sp_521_from_mp(p2->z, 21, pZ); + + if (hashLen == 66U) { + sp_521_rshift_21(u1, u1, 7); + u1[20] |= ((sp_digit)hash[0]) << 13; + } + + err = sp_521_calc_vfy_point_21(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(u2, 21, rm); + err = sp_521_mod_mul_norm_21(u2, u2, p521_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_21(p1->z, p1->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(u1, u2, p1->z, p521_mod, p521_mp_mod); + *res = (int)(sp_521_cmp_21(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_521_from_mp(u2, 21, rm); + carry = sp_521_add_21(u2, u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_21(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_21(u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_21(u2, u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_521_mont_mul_21(u1, u2, p1->z, p521_mod, p521_mp_mod); + } + *res = (sp_521_cmp_21(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_verify_521_ctx { + int state; + union { + sp_521_ecc_mulmod_21_ctx mulmod_ctx; + sp_521_mont_inv_order_21_ctx mont_inv_order_ctx; + sp_521_proj_point_dbl_21_ctx dbl_ctx; + sp_521_proj_point_add_21_ctx add_ctx; + }; + sp_digit u1[2*21]; + sp_digit u2[2*21]; + sp_digit s[2*21]; + sp_digit tmp[2*21 * 6]; + sp_point_521 p1; + sp_point_521 p2; +} sp_ecc_verify_521_ctx; + +int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, + word32 hashLen, const mp_int* pX, const mp_int* pY, const mp_int* pZ, + const mp_int* rm, const mp_int* sm, int* res, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_verify_521_ctx* ctx = (sp_ecc_verify_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_verify_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(ctx->u1, 21, hash, (int)hashLen); + sp_521_from_mp(ctx->u2, 21, rm); + sp_521_from_mp(ctx->s, 21, sm); + sp_521_from_mp(ctx->p2.x, 21, pX); + sp_521_from_mp(ctx->p2.y, 21, pY); + sp_521_from_mp(ctx->p2.z, 21, pZ); + if (hashLen == 66U) { + sp_521_rshift_21(ctx->u1, ctx->u1, 7); + ctx->u1[20] |= ((sp_digit)hash[0]) << 13; + } + ctx->state = 1; + break; + case 1: /* NORMS0 */ + sp_521_mul_21(ctx->s, ctx->s, p521_norm_order); + err = sp_521_mod_21(ctx->s, ctx->s, p521_order); + if (err == MP_OKAY) + ctx->state = 2; + break; + case 2: /* NORMS1 */ + sp_521_norm_21(ctx->s); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 3; + break; + case 3: /* NORMS2 */ + err = sp_521_mont_inv_order_21_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp); + if (err == MP_OKAY) { + ctx->state = 4; + } + break; + case 4: /* NORMS3 */ + sp_521_mont_mul_order_21(ctx->u1, ctx->u1, ctx->s); + ctx->state = 5; + break; + case 5: /* NORMS4 */ + sp_521_mont_mul_order_21(ctx->u2, ctx->u2, ctx->s); + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 6; + break; + case 6: /* MULBASE */ + err = sp_521_ecc_mulmod_21_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p521_base, ctx->u1, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_21(ctx->p1.z)) { + ctx->p1.infinity = 1; + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 7; + } + break; + case 7: /* MULMOD */ + err = sp_521_ecc_mulmod_21_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_21(ctx->p2.z)) { + ctx->p2.infinity = 1; + } + XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx)); + ctx->state = 8; + } + break; + case 8: /* ADD */ + err = sp_521_proj_point_add_21_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* MONT */ + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(ctx->u2, 21, rm); + err = sp_521_mod_mul_norm_21(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) + ctx->state = 10; + break; + case 10: /* SQR */ + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_21(ctx->p1.z, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: /* MUL */ + sp_521_mont_mul_21(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 12; + break; + case 12: /* RES */ + { + sp_int32 c = 0; + err = MP_OKAY; /* math okay, now check result */ + *res = (int)(sp_521_cmp_21(ctx->p1.x, ctx->u1) == 0); + if (*res == 0) { + sp_digit carry; + + /* Reload r and add order. */ + sp_521_from_mp(ctx->u2, 21, rm); + carry = sp_521_add_21(ctx->u2, ctx->u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_21(ctx->u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_21(ctx->u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_21(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_521_mont_mul_21(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, + p521_mp_mod); + *res = (int)(sp_521_cmp_21(ctx->p1.x, ctx->u1) == 0); + } + } + break; + } + } /* switch */ + + if (err == MP_OKAY && ctx->state != 12) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y ordinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_521_ecc_is_point_21(const sp_point_521* point, + void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t1 = NULL; +#else + sp_digit t1[21 * 4]; +#endif + sp_digit* t2 = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21 * 4, heap, DYNAMIC_TYPE_ECC); + if (t1 == NULL) + err = MEMORY_E; +#endif + (void)heap; + + if (err == MP_OKAY) { + t2 = t1 + 2 * 21; + + /* y^2 - x^3 - a.x = b */ + sp_521_sqr_21(t1, point->y); + (void)sp_521_mod_21(t1, t1, p521_mod); + sp_521_sqr_21(t2, point->x); + (void)sp_521_mod_21(t2, t2, p521_mod); + sp_521_mul_21(t2, t2, point->x); + (void)sp_521_mod_21(t2, t2, p521_mod); + sp_521_mont_sub_21(t1, t1, t2, p521_mod); + + /* y^2 - x^3 + 3.x = b, when a = -3 */ + sp_521_mont_add_21(t1, t1, point->x, p521_mod); + sp_521_mont_add_21(t1, t1, point->x, p521_mod); + sp_521_mont_add_21(t1, t1, point->x, p521_mod); + + + if (sp_521_cmp_21(t1, p521_b) != 0) { + err = MP_VAL; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t1 != NULL) + XFREE(t1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the x and y ordinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_521(const mp_int* pX, const mp_int* pY) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* pub = NULL; +#else + sp_point_521 pub[1]; +#endif + const byte one[1] = { 1 }; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(pub->x, 21, pX); + sp_521_from_mp(pub->y, 21, pY); + sp_521_from_bin(pub->z, 21, one, (int)sizeof(one)); + + err = sp_521_ecc_is_point_21(pub, NULL); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_521(const mp_int* pX, const mp_int* pY, + const mp_int* privm, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* priv = NULL; + sp_point_521* pub = NULL; +#else + sp_digit priv[21]; + sp_point_521 pub[2]; +#endif + sp_point_521* p = NULL; + const byte one[1] = { 1 }; + int err = MP_OKAY; + + + /* Quick check the lengs of public key ordinates and private key are in + * range. Proper check later. + */ + if (((mp_count_bits(pX) > 521) || + (mp_count_bits(pY) > 521) || + ((privm != NULL) && (mp_count_bits(privm) > 521)))) { + err = ECC_OUT_OF_RANGE_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY && privm) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 21, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = pub + 1; + + sp_521_from_mp(pub->x, 21, pX); + sp_521_from_mp(pub->y, 21, pY); + sp_521_from_bin(pub->z, 21, one, (int)sizeof(one)); + if (privm) + sp_521_from_mp(priv, 21, privm); + + /* Check point at infinitiy. */ + if ((sp_521_iszero_21(pub->x) != 0) && + (sp_521_iszero_21(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + /* Check range of X and Y */ + if ((err == MP_OKAY) && + ((sp_521_cmp_21(pub->x, p521_mod) >= 0) || + (sp_521_cmp_21(pub->y, p521_mod) >= 0))) { + err = ECC_OUT_OF_RANGE_E; + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_521_ecc_is_point_21(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_521_ecc_mulmod_21(p, pub, p521_order, 1, 1, heap); + } + /* Check result is infinity */ + if ((err == MP_OKAY) && ((sp_521_iszero_21(p->x) == 0) || + (sp_521_iszero_21(p->y) == 0))) { + err = ECC_INF_E; + } + + if (privm) { + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_521_ecc_mulmod_base_21(p, priv, 1, 1, heap); + } + /* Check result is public key */ + if ((err == MP_OKAY) && + ((sp_521_cmp_21(p->x, pub->x) != 0) || + (sp_521_cmp_21(p->y, pub->y) != 0))) { + err = ECC_PRIV_KEY_E; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, heap, DYNAMIC_TYPE_ECC); + if (priv != NULL) + XFREE(priv, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 21 * 6]; + sp_point_521 p[2]; +#endif + sp_point_521* q = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { + q = p + 1; + + sp_521_from_mp(p->x, 21, pX); + sp_521_from_mp(p->y, 21, pY); + sp_521_from_mp(p->z, 21, pZ); + sp_521_from_mp(q->x, 21, qX); + sp_521_from_mp(q->y, 21, qY); + sp_521_from_mp(q->z, 21, qZ); + p->infinity = sp_521_iszero_21(p->x) & + sp_521_iszero_21(p->y); + q->infinity = sp_521_iszero_21(q->x) & + sp_521_iszero_21(q->y); + + sp_521_proj_point_add_21(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 21 * 2]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 21, pX); + sp_521_from_mp(p->y, 21, pY); + sp_521_from_mp(p->z, 21, pZ); + p->infinity = sp_521_iszero_21(p->x) & + sp_521_iszero_21(p->y); + + sp_521_proj_point_dbl_21(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_521(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 21 * 5]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 21, pX); + sp_521_from_mp(p->y, 21, pY); + sp_521_from_mp(p->z, 21, pZ); + p->infinity = sp_521_iszero_21(p->x) & + sp_521_iszero_21(p->y); + + sp_521_map_21(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Square root power for the P521 curve. */ +static const uint32_t p521_sqrt_power[17] = { + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000, + 0x00000000,0x00000000,0x00000080 +}; + +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mont_sqrt_21(sp_digit* y) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t = NULL; +#else + sp_digit t[2 * 21]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 21, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + + { + int i; + + XMEMCPY(t, y, sizeof(sp_digit) * 21); + for (i=518; i>=0; i--) { + sp_521_mont_sqr_21(t, t, p521_mod, p521_mp_mod); + if (p521_sqrt_power[i / 32] & ((sp_digit)1 << (i % 32))) + sp_521_mont_mul_21(t, t, y, p521_mod, p521_mp_mod); + } + XMEMCPY(y, t, sizeof(sp_digit) * 21); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_521(mp_int* xm, int odd, mp_int* ym) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* x = NULL; +#else + sp_digit x[4 * 21]; +#endif + sp_digit* y = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 21, NULL, DYNAMIC_TYPE_ECC); + if (x == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + y = x + 2 * 21; + + sp_521_from_mp(x, 21, xm); + err = sp_521_mod_mul_norm_21(x, x, p521_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_521_mont_sqr_21(y, x, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(y, y, x, p521_mod, p521_mp_mod); + } + /* y = x^3 - 3x */ + sp_521_mont_sub_21(y, y, x, p521_mod); + sp_521_mont_sub_21(y, y, x, p521_mod); + sp_521_mont_sub_21(y, y, x, p521_mod); + /* y = x^3 - 3x + b */ + err = sp_521_mod_mul_norm_21(x, p521_b, p521_mod); + } + if (err == MP_OKAY) { + sp_521_mont_add_21(y, y, x, p521_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_521_mont_sqrt_21(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 21, 0, 21U * sizeof(sp_digit)); + sp_521_mont_reduce_21(y, p521_mod, p521_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_521_mont_sub_21(y, p521_mod, y, p521_mod); + } + + err = sp_521_to_mp(y, ym); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (x != NULL) + XFREE(x, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_521 */ +#ifdef WOLFCRYPT_HAVE_SAKKE #ifdef WOLFSSL_SP_1024 /* Point structure to use. */ @@ -35406,6 +44074,7 @@ SP_NOINLINE static void sp_1024_mul_d_84(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -35417,13 +44086,26 @@ SP_NOINLINE static void sp_1024_mul_d_84(sp_digit* r, const sp_digit* a, static void sp_1024_cond_add_42(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 41; i++) { + for (i = 0; i < 42; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_1024_cond_add_42(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ int i; for (i = 0; i < 40; i += 8) { @@ -35437,8 +44119,9 @@ static void sp_1024_cond_add_42(sp_digit* r, const sp_digit* a, r[i + 7] = a[i + 7] + (b[i + 7] & m); } r[40] = a[40] + (b[40] & m); -#endif /* WOLFSSL_SP_SMALL */ + r[41] = a[41] + (b[41] & m); } +#endif /* !WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Sub b from a into r. (r = a - b) @@ -35505,55 +44188,96 @@ SP_NOINLINE static void sp_1024_rshift_42(sp_digit* r, const sp_digit* a, r[41] = a[41] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_1024_div_word_42(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 25) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 25); + sp_digit t0 = (sp_digit)(d & 0x1ffffff); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int64 m; + + r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + t1 -= dv & (0 - r); + for (i = 23; i >= 1; i--) { + t1 += t1 + (((sp_uint32)t0 >> 24) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 25); + m = d - ((sp_int64)r * div); + r += (sp_digit)(m >> 50) - (sp_digit)(d >> 50); + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + + m = d - ((sp_int64)r * div); + sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31); + r += sign * t2; + return r; +#else + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 10) + 1; - /* All 25 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 19); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 13) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 20); + t = (t / dv) << 10; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 7) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 5); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 1) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 25 bits from d1 and top 6 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_1024_word_div_word_42(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint32)(div - d) >> 31); +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -35570,11 +44294,10 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 42 + 3]; @@ -35585,7 +44308,7 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 42 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -35604,28 +44327,14 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, t1[41 + 41] += t1[41 + 41 - 1] >> 25; t1[41 + 41 - 1] &= 0x1ffffff; for (i=41; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[41 + i]; - d1 <<= 25; - d1 += t1[41 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_42(t1[41 + i], t1[41 + i - 1], dv); -#endif sp_1024_mul_d_42(t2, sd, r1); (void)sp_1024_sub_42(&t1[i], &t1[i], t2); sp_1024_norm_41(&t1[i]); t1[41 + i] += t1[41 + i - 1] >> 25; t1[41 + i - 1] &= 0x1ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[41 + i]; - d1 <<= 25; - d1 -= t1[41 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_42(-t1[41 + i], -t1[41 + i - 1], dv); -#endif r1 -= t1[41 + i]; sp_1024_mul_d_42(t2, sd, r1); (void)sp_1024_add_42(&t1[i], &t1[i], t2); @@ -35634,7 +44343,7 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, } t1[41 - 1] += t1[41 - 2] >> 25; t1[41 - 2] &= 0x1ffffff; - r1 = t1[41 - 1] / dv; + r1 = sp_1024_word_div_word_42(t1[41 - 1], dv); sp_1024_mul_d_42(t2, sd, r1); sp_1024_sub_42(t1, t1, t2); @@ -35643,15 +44352,14 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 25; r[i] &= 0x1ffffff; } - sp_1024_cond_add_42(r, r, sd, 0 - ((r[40] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_add_42(r, r, sd, r[40] >> 31); sp_1024_norm_41(r); sp_1024_rshift_42(r, r, 1); r[41] = 0; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -35699,7 +44407,8 @@ static int sp_1024_point_new_ex_42(void* heap, sp_point_1024* sp, { int ret = MP_OKAY; (void)heap; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) (void)sp; *p = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); #else @@ -35711,7 +44420,8 @@ static int sp_1024_point_new_ex_42(void* heap, sp_point_1024* sp, return ret; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* Allocate memory for point and return error. */ #define sp_1024_point_new_42(heap, sp, p) sp_1024_point_new_ex_42((heap), NULL, &(p)) #else @@ -35728,7 +44438,8 @@ static int sp_1024_point_new_ex_42(void* heap, sp_point_1024* sp, */ static void sp_1024_point_free_42(sp_point_1024* p, int clear, void* heap) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* If valid pointer then clear point data if requested and free data. */ if (p != NULL) { if (clear != 0) { @@ -35755,20 +44466,23 @@ static void sp_1024_point_free_42(sp_point_1024* p, int clear, void* heap) static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 25 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 24); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 24); } #elif DIGIT_BIT > 25 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1ffffff; s = 25U - s; @@ -35798,12 +44512,12 @@ static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 25) { r[j] &= 0x1ffffff; @@ -35860,8 +44574,8 @@ static int sp_1024_to_mp(const sp_digit* a, mp_int* r) err = mp_grow(r, (1024 + DIGIT_BIT - 1) / DIGIT_BIT); if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ #if DIGIT_BIT == 25 - XMEMCPY(r->dp, a, sizeof(sp_digit) * 42); - r->used = 42; + XMEMCPY(r->dp, a, sizeof(sp_digit) * 41); + r->used = 41; mp_clamp(r); #elif DIGIT_BIT < 25 int i; @@ -35869,7 +44583,7 @@ static int sp_1024_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 42; i++) { + for (i = 0; i < 41; i++) { r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; s = DIGIT_BIT - s; @@ -35894,7 +44608,7 @@ static int sp_1024_to_mp(const sp_digit* a, mp_int* r) int s = 0; r->dp[0] = 0; - for (i = 0; i < 42; i++) { + for (i = 0; i < 41; i++) { r->dp[j] |= ((mp_digit)a[i]) << s; if (s + 25 >= DIGIT_BIT) { #if DIGIT_BIT != 32 && DIGIT_BIT != 64 @@ -35952,22 +44666,22 @@ static sp_digit sp_1024_cmp_42(const sp_digit* a, const sp_digit* b) int i; for (i=41; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 24); } #else int i; r |= (a[41] - b[41]) & (0 - (sp_digit)1); - r |= (a[40] - b[40]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[40] - b[40]) & ~(((sp_digit)0 - r) >> 24); for (i = 32; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 24); } #endif /* WOLFSSL_SP_SMALL */ @@ -36152,10 +44866,10 @@ static void sp_1024_mont_reduce_42(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_1024_norm_42(a + 41); -#ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<40; i++) { mu = (a[i] * mp) & 0x1ffffff; @@ -36178,21 +44892,10 @@ static void sp_1024_mont_reduce_42(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 25; a[i] &= 0x1ffffff; } -#else - for (i=0; i<40; i++) { - mu = (a[i] * mp) & 0x1ffffff; - sp_1024_mul_add_42(a+i, m, mu); - a[i+1] += a[i] >> 25; - } - mu = (a[i] * mp) & 0xffffffL; - sp_1024_mul_add_42(a+i, m, mu); - a[i+1] += a[i] >> 25; - a[i] &= 0x1ffffff; -#endif sp_1024_norm_42(a + 41); sp_1024_mont_shift_42(a, a); - sp_1024_cond_sub_42(a, a, m, 0 - (((a[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[40] - m[40]; + sp_1024_cond_sub_42(a, a, m, ~((over - 1) >> 31)); sp_1024_norm_42(a); } @@ -36203,9 +44906,9 @@ static void sp_1024_mont_reduce_42(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_mul_42(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_mul_42(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_1024_mul_42(r, a, b); @@ -36217,9 +44920,9 @@ static void sp_1024_mont_mul_42(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_sqr_42(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_sqr_42(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_1024_sqr_42(r, a); @@ -36259,11 +44962,14 @@ static const uint8_t p1024_mod_minus_2[] = { static void sp_1024_mont_inv_42(sp_digit* r, const sp_digit* a, sp_digit* td) { - sp_digit* t = td; + sp_digit* t = &td[32 * 2 * 42]; int i; int j; - sp_digit table[32][2 * 42]; + sp_digit* table[32]; + for (i = 0; i < 32; i++) { + table[i] = &td[2 * 42 * i]; + } XMEMCPY(table[0], a, sizeof(sp_digit) * 42); for (i = 1; i < 6; i++) { sp_1024_mont_sqr_42(table[0], table[0], p1024_mod, p1024_mp_mod); @@ -36304,27 +45010,24 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_42(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 42, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 42, 0, sizeof(sp_digit) * 42U); sp_1024_mont_reduce_42(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_42(r->x, p1024_mod); - sp_1024_cond_sub_42(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_42(r->x, r->x, p1024_mod, ~(n >> 24)); sp_1024_norm_42(r->x); /* y /= z^3 */ sp_1024_mont_mul_42(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 42, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 42, 0, sizeof(sp_digit) * 42U); sp_1024_mont_reduce_42(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_42(r->y, p1024_mod); - sp_1024_cond_sub_42(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_42(r->y, r->y, p1024_mod, ~(n >> 24)); sp_1024_norm_42(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -36337,10 +45040,11 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, static void sp_1024_mont_add_42(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_42(r, a, b); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); } @@ -36352,10 +45056,11 @@ static void sp_1024_mont_add_42(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_dbl_42(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_42(r, a, a); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); } @@ -36367,15 +45072,16 @@ static void sp_1024_mont_dbl_42(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_tpl_42(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_42(r, a, a); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); (void)sp_1024_add_42(r, r, a); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); } @@ -36461,7 +45167,8 @@ SP_NOINLINE static void sp_1024_rshift1_42(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_42(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_42(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_1024_cond_add_42(r, a, m, 0 - (a[0] & 1)); sp_1024_norm_42(r); @@ -36474,6 +45181,61 @@ static void sp_1024_div2_42(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_42(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*42; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_42(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_42(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_42(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_42(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_42(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_42(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_42(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_42(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_42(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_42(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_mont_div2_42(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_42(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_42(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_42(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_42(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_42(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_42(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_42(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_42_ctx { int state; @@ -36484,7 +45246,14 @@ typedef struct sp_1024_proj_point_dbl_42_ctx { sp_digit* z; } sp_1024_proj_point_dbl_42_ctx; -static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_42_ctx* ctx = (sp_1024_proj_point_dbl_42_ctx*)sp_ctx->data; @@ -36558,7 +45327,7 @@ static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_42(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_42(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -36608,61 +45377,6 @@ static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_42(sp_point_1024* r, const sp_point_1024* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*42; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_42(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_42(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_42(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_42(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_42(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_42(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_42(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_42(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_42(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_42(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_42(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_42(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_42(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_42(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_42(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_42(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_42(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_42(y, y, t2, p1024_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -36688,6 +45402,23 @@ static int sp_1024_cmp_equal_42(const sp_digit* a, const sp_digit* b) (a[39] ^ b[39]) | (a[40] ^ b[40]) | (a[41] ^ b[41])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_42(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | + a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31] | + a[32] | a[33] | a[34] | a[35] | a[36] | a[37] | a[38] | a[39] | + a[40] | a[41]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -36695,6 +45426,84 @@ static int sp_1024_cmp_equal_42(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_42(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*42; + sp_digit* t2 = t + 4*42; + sp_digit* t3 = t + 6*42; + sp_digit* t4 = t + 8*42; + sp_digit* t5 = t + 10*42; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_42(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_42(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_42(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_42(t2, t1) & + sp_1024_cmp_equal_42(t4, t3)) { + sp_1024_proj_point_dbl_42(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_42(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(x, x, t5, p1024_mod); + sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_42(t3, y, p1024_mod); + sp_1024_mont_sub_42(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_42(y, y, x, p1024_mod); + sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(y, y, t5, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 42; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_42_ctx { @@ -36707,11 +45516,19 @@ typedef struct sp_1024_proj_point_add_42_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_1024_proj_point_add_42_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -36730,261 +45547,168 @@ static int sp_1024_proj_point_add_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*42; - ctx->t3 = t + 4*42; - ctx->t4 = t + 6*42; - ctx->t5 = t + 8*42; + ctx->t6 = t; + ctx->t1 = t + 2*42; + ctx->t2 = t + 4*42; + ctx->t3 = t + 6*42; + ctx->t4 = t + 8*42; + ctx->t5 = t + 10*42; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_42(ctx->t1, p1024_mod, q->y); - sp_1024_norm_42(ctx->t1); - if ((sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & - (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_1024_proj_point_dbl_42_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_1024)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<42; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<42; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<42; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_42(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; + break; + case 2: + sp_1024_mont_mul_42(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; + break; + case 3: + sp_1024_mont_mul_42(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_42(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_42(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_42(ctx->t1, ctx->t1, ctx->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_42(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_42(ctx->t4, ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_42(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_42(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_42(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_42(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_42(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_42(ctx->t3, ctx->t3, ctx->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_42(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_42(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_42(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_42(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_42(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_42(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_42(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_42(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_42(ctx->z, ctx->z, ctx->t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_42(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_42(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - sp_1024_mont_sqr_42(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_42(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_42(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_dbl_42(ctx->t1, ctx->y, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t1, p1024_mod); + sp_1024_mont_mul_42(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->x, p1024_mod); + sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - sp_1024_mont_mul_42(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 42; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_42(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*42; - sp_digit* t3 = t + 4*42; - sp_digit* t4 = t + 6*42; - sp_digit* t5 = t + 8*42; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_1024* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_1024_mont_sub_42(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_42(t1); - if ((sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & - (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_42(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<42; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<42; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<42; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_42(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t1, t1, x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_42(t3, t3, y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(z, z, t2, p1024_mod, p1024_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(x, x, t5, p1024_mod); - sp_1024_mont_dbl_42(t1, y, p1024_mod); - sp_1024_mont_sub_42(x, x, t1, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_42(y, y, x, p1024_mod); - sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(y, y, t5, p1024_mod); - } -} - #ifdef WOLFSSL_SP_SMALL /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. @@ -37001,6 +45725,108 @@ static void sp_1024_proj_point_add_42(sp_point_1024* r, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_1024* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_1024 t[3]; + sp_digit tmp[2 * 42 * 37]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 37, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_1024) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_1024_mod_mul_norm_42(t[1].x, g->x, p1024_mod); + } + if (err == MP_OKAY) + err = sp_1024_mod_mul_norm_42(t[1].y, g->y, p1024_mod); + if (err == MP_OKAY) + err = sp_1024_mod_mul_norm_42(t[1].z, g->z, p1024_mod); + + if (err == MP_OKAY) { + i = 40; + c = 24; + n = k[i--] << (25 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 25; + } + + y = (n >> 24) & 1; + n <<= 1; + + sp_1024_proj_point_add_42(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_1024)); + sp_1024_proj_point_dbl_42(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_1024)); + } + + if (map != 0) { + sp_1024_map_42(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_1024)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 42 * 37); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_1024) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_ecc_mulmod_42_ctx { @@ -37010,7 +45836,7 @@ typedef struct sp_1024_ecc_mulmod_42_ctx { sp_1024_proj_point_add_42_ctx add_ctx; }; sp_point_1024 t[3]; - sp_digit tmp[2 * 42 * 5]; + sp_digit tmp[2 * 42 * 37]; sp_digit n; int i; int c; @@ -37116,109 +45942,6 @@ static int sp_1024_ecc_mulmod_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, #endif /* WOLFSSL_SP_NONBLOCK */ -static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, - const sp_digit* k, int map, int ct, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_1024* t = NULL; - sp_digit* tmp = NULL; -#else - sp_point_1024 t[3]; - sp_digit tmp[2 * 42 * 5]; -#endif - sp_digit n; - int i; - int c; - int y; - int err = MP_OKAY; - - /* Implementation is constant time. */ - (void)ct; - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, - DYNAMIC_TYPE_ECC); - if (t == NULL) - err = MEMORY_E; - if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 5, heap, - DYNAMIC_TYPE_ECC); - if (tmp == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - XMEMSET(t, 0, sizeof(sp_point_1024) * 3); - - /* t[0] = {0, 0, 1} * norm */ - t[0].infinity = 1; - /* t[1] = {g->x, g->y, g->z} * norm */ - err = sp_1024_mod_mul_norm_42(t[1].x, g->x, p1024_mod); - } - if (err == MP_OKAY) - err = sp_1024_mod_mul_norm_42(t[1].y, g->y, p1024_mod); - if (err == MP_OKAY) - err = sp_1024_mod_mul_norm_42(t[1].z, g->z, p1024_mod); - - if (err == MP_OKAY) { - i = 40; - c = 24; - n = k[i--] << (25 - c); - for (; ; c--) { - if (c == 0) { - if (i == -1) - break; - - n = k[i--]; - c = 25; - } - - y = (n >> 24) & 1; - n <<= 1; - - sp_1024_proj_point_add_42(&t[y^1], &t[0], &t[1], tmp); - - XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), - sizeof(sp_point_1024)); - sp_1024_proj_point_dbl_42(&t[2], &t[2], tmp); - XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), &t[2], - sizeof(sp_point_1024)); - } - - if (map != 0) { - sp_1024_map_42(r, &t[0], tmp); - } - else { - XMEMCPY(r, &t[0], sizeof(sp_point_1024)); - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (tmp != NULL) -#endif - { - ForceZero(tmp, sizeof(sp_digit) * 2 * 42 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(tmp, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (t != NULL) -#endif - { - ForceZero(t, sizeof(sp_point_1024) * 3); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(t, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} - #else /* A table entry for pre-computed points. */ typedef struct sp_table_entry_1024 { @@ -37340,7 +46063,7 @@ static void sp_1024_cond_copy_42(sp_digit* r, const sp_digit* a, const sp_digit * n Number of times to double * t Temporary ordinate data. */ -static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int n, +static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int i, sp_digit* t) { sp_digit* w = t; @@ -37351,6 +46074,7 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -37361,7 +46085,6 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int n, /* W = Z^4 */ sp_1024_mont_sqr_42(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_42(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -37379,9 +46102,12 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int n, sp_1024_mont_sqr_42(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_42(t2, b, p1024_mod); sp_1024_mont_sub_42(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_42(t2, b, x, p1024_mod); + sp_1024_mont_dbl_42(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_42(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_42(t1, t1, p1024_mod, p1024_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -37391,9 +46117,7 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int n, sp_1024_mont_mul_42(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_42(y, b, x, p1024_mod); - sp_1024_mont_mul_42(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_42(y, y, p1024_mod); + sp_1024_mont_mul_42(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_42(y, y, t1, p1024_mod); } #ifndef WOLFSSL_SP_SMALL @@ -37408,18 +46132,19 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int n, sp_1024_mont_sqr_42(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_42(t2, b, p1024_mod); sp_1024_mont_sub_42(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_42(t2, b, x, p1024_mod); + sp_1024_mont_dbl_42(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_42(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_42(t1, t1, p1024_mod, p1024_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_42(y, b, x, p1024_mod); - sp_1024_mont_mul_42(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_42(y, y, p1024_mod); + sp_1024_mont_mul_42(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_42(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_42(y, y, p1024_mod); + sp_1024_mont_div2_42(y, y, p1024_mod); } /* Double the Montgomery form projective point p a number of times. @@ -37467,30 +46192,30 @@ static void sp_1024_proj_point_dbl_n_store_42(sp_point_1024* r, sp_1024_mont_sub_42(t1, t1, w, p1024_mod); sp_1024_mont_tpl_42(a, t1, p1024_mod); /* B = X*Y^2 */ - sp_1024_mont_sqr_42(t2, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(b, t2, x, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(t1, y, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(b, t1, x, p1024_mod, p1024_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_1024_mont_sqr_42(x, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_42(t1, b, p1024_mod); - sp_1024_mont_sub_42(x, x, t1, p1024_mod); + sp_1024_mont_dbl_42(t2, b, p1024_mod); + sp_1024_mont_sub_42(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_42(t2, b, x, p1024_mod); + sp_1024_mont_dbl_42(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_42(r[j].z, z, y, p1024_mod, p1024_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_1024_mont_sqr_42(t2, t2, p1024_mod, p1024_mp_mod); + /* t1 = Y^4 */ + sp_1024_mont_sqr_42(t1, t1, p1024_mod, p1024_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_1024_mont_mul_42(w, w, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_42(y, b, x, p1024_mod); - sp_1024_mont_mul_42(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_42(y, y, p1024_mod); - sp_1024_mont_sub_42(y, y, t2, p1024_mod); - + sp_1024_mont_mul_42(y, b, a, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_42(r[j].y, y, p1024_mod); + sp_1024_mont_div2_42(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -37513,30 +46238,30 @@ static void sp_1024_proj_point_add_sub_42(sp_point_1024* ra, sp_digit* t4 = t + 6*42; sp_digit* t5 = t + 8*42; sp_digit* t6 = t + 10*42; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_42(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_42(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t1, t1, x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t1, t1, xa, p1024_mod, p1024_mp_mod); /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t4, t2, z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(t2, za, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t4, t2, za, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_42(t3, t3, y, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t3, t3, ya, p1024_mod, p1024_mp_mod); /* S2 = Y2*Z1^3 */ sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - U1 */ @@ -37547,30 +46272,30 @@ static void sp_1024_proj_point_add_sub_42(sp_point_1024* ra, sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(z, z, t2, p1024_mod, p1024_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_1024_mont_mul_42(za, za, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(za, za, t2, p1024_mod, p1024_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(xa, t4, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_42(xs, t6, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ya, t1, t5, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(x, x, t5, p1024_mod); + sp_1024_mont_sub_42(xa, xa, t5, p1024_mod); sp_1024_mont_sub_42(xs, xs, t5, p1024_mod); - sp_1024_mont_dbl_42(t1, y, p1024_mod); - sp_1024_mont_sub_42(x, x, t1, p1024_mod); + sp_1024_mont_dbl_42(t1, ya, p1024_mod); + sp_1024_mont_sub_42(xa, xa, t1, p1024_mod); sp_1024_mont_sub_42(xs, xs, t1, p1024_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_1024_mont_sub_42(ys, y, xs, p1024_mod); - sp_1024_mont_sub_42(y, y, x, p1024_mod); - sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(ys, ya, xs, p1024_mod); + sp_1024_mont_sub_42(ya, ya, xa, p1024_mod); + sp_1024_mont_mul_42(ya, ya, t4, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_42(t6, p1024_mod, t6, p1024_mod); sp_1024_mont_mul_42(ys, ys, t6, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(y, y, t5, p1024_mod); + sp_1024_mont_sub_42(ya, ya, t5, p1024_mod); sp_1024_mont_sub_42(ys, ys, t5, p1024_mod); } @@ -37674,12 +46399,12 @@ static void sp_1024_ecc_recode_7_42(const sp_digit* k, ecc_recode_1024* v) static int sp_1024_ecc_mulmod_win_add_sub_42(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; sp_digit* tmp = NULL; #else sp_point_1024 t[65+2]; - sp_digit tmp[2 * 42 * 6]; + sp_digit tmp[2 * 42 * 37]; #endif sp_point_1024* rt = NULL; sp_point_1024* p = NULL; @@ -37692,13 +46417,13 @@ static int sp_1024_ecc_mulmod_win_add_sub_42(sp_point_1024* r, const sp_point_10 (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * (65+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 6, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 37, heap, DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; @@ -37792,7 +46517,7 @@ static int sp_1024_ecc_mulmod_win_add_sub_42(sp_point_1024* r, const sp_point_10 } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -37813,76 +46538,75 @@ static int sp_1024_ecc_mulmod_win_add_sub_42(sp_point_1024* r, const sp_point_10 * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*42; - sp_digit* t3 = t + 4*42; - sp_digit* t4 = t + 6*42; - sp_digit* t5 = t + 8*42; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*42; + sp_digit* t6 = t + 4*42; + sp_digit* t1 = t + 6*42; + sp_digit* t4 = t + 8*42; + sp_digit* t5 = t + 10*42; - /* Check double */ - (void)sp_1024_mont_sub_42(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_42(t1); - if ((sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & - (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_42(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_42(p->x, t2) & + sp_1024_cmp_equal_42(p->y, t4)) { sp_1024_proj_point_dbl_42(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<42; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<42; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<42; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ - sp_1024_mont_sub_42(t2, t2, x, p1024_mod); + sp_1024_mont_sub_42(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ - sp_1024_mont_sub_42(t4, t4, y, p1024_mod); + sp_1024_mont_sub_42(t4, t4, p->y, p1024_mod); /* Z3 = H*Z1 */ - sp_1024_mont_mul_42(z, z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(z, p->z, t2, p1024_mod, p1024_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_1024_mont_sqr_42(t1, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t3, x, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(x, t1, t5, p1024_mod); - sp_1024_mont_dbl_42(t1, t3, p1024_mod); - sp_1024_mont_sub_42(x, x, t1, p1024_mod); + sp_1024_mont_sqr_42(t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t3, p->x, t1, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t1, t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(t2, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); + sp_1024_mont_dbl_42(t5, t3, p1024_mod); + sp_1024_mont_sub_42(x, t2, t5, p1024_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_1024_mont_sub_42(t3, t3, x, p1024_mod); sp_1024_mont_mul_42(t3, t3, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t5, t5, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(y, t3, t5, p1024_mod); + sp_1024_mont_mul_42(t1, t1, p->y, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(y, t3, t1, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 42; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -37923,7 +46647,7 @@ static void sp_1024_proj_to_affine_42(sp_point_1024* a, sp_digit* t) static int sp_1024_gen_stripe_table_42(const sp_point_1024* a, sp_table_entry_1024* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; #else sp_point_1024 t[3]; @@ -37936,7 +46660,7 @@ static int sp_1024_gen_stripe_table_42(const sp_point_1024* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -37991,7 +46715,7 @@ static int sp_1024_gen_stripe_table_42(const sp_point_1024* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -38020,12 +46744,12 @@ static int sp_1024_ecc_mulmod_stripe_42(sp_point_1024* r, const sp_point_1024* g const sp_table_entry_1024* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* rt = NULL; sp_digit* t = NULL; #else sp_point_1024 rt[2]; - sp_digit t[2 * 42 * 5]; + sp_digit t[2 * 42 * 37]; #endif sp_point_1024* p = NULL; int i; @@ -38040,13 +46764,13 @@ static int sp_1024_ecc_mulmod_stripe_42(sp_point_1024* r, const sp_point_1024* g (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 37, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -38091,7 +46815,7 @@ static int sp_1024_ecc_mulmod_stripe_42(sp_point_1024* r, const sp_point_1024* g } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -38134,7 +46858,7 @@ static THREAD_LS_T int sp_cache_1024_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache) @@ -38205,23 +46929,36 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_42(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 42 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 42 * 38]; +#endif sp_cache_1024_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_1024 == 0) { - wc_InitMutex(&sp_cache_1024_lock); - initCacheMutex_1024 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 42 * 38, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_1024 == 0) { + wc_InitMutex(&sp_cache_1024_lock); + initCacheMutex_1024 = 1; + } + if (wc_LockMutex(&sp_cache_1024_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_1024_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -38242,6 +46979,9 @@ static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, const } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -38260,7 +47000,7 @@ static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, const int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -38269,7 +47009,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -38292,7 +47032,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_1024_point_to_ecc_point_42(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -38319,6 +47059,16 @@ static int sp_1024_ecc_mulmod_base_42(sp_point_1024* r, const sp_digit* k, return sp_1024_ecc_mulmod_42(r, &p1024_base, k, map, ct, heap); } +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_1024_ecc_mulmod_base_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_1024_ecc_mulmod_42_nb(sp_ctx, r, &p1024_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + #else /* Striping precomputation table. * 8 points combined into a table of 256 points. @@ -42196,7 +50946,7 @@ static int sp_1024_ecc_mulmod_base_42(sp_point_1024* r, const sp_digit* k, */ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -42205,7 +50955,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -42227,7 +50977,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_1024_point_to_ecc_point_42(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42241,7 +50991,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -42251,25 +51001,25 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else sp_point_1024 point[2]; - sp_digit k[42 + 42 * 2 * 5]; + sp_digit k[42 + 42 * 2 * 37]; #endif sp_point_1024* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (42 + 42 * 2 * 5), + sizeof(sp_digit) * (42 + 42 * 2 * 37), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -42305,7 +51055,7 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, err = sp_1024_point_to_ecc_point_42(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -42328,12 +51078,12 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* t = NULL; #else sp_point_1024 point[1]; - sp_digit t[5 * 2 * 42]; + sp_digit t[38 * 2 * 42]; #endif int err = MP_OKAY; @@ -42349,7 +51099,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); @@ -42357,7 +51107,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = MEMORY_E; } if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 42, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 2 * 42, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -42373,7 +51123,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, *len = sizeof(sp_table_entry_1024) * 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42431,7 +51181,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -42440,7 +51190,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) { @@ -42469,7 +51219,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, err = sp_1024_point_to_ecc_point_42(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42479,7 +51229,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, return err; } -/* Multiply p* in projective co-ordinates by q*. +/* Multiply p* in projective coordinates by q*. * * r.x = p.x - (p.y * q.y) * r.y = (p.x * q.y) + p.y @@ -42505,7 +51255,7 @@ static void sp_1024_proj_mul_qx1_42(sp_digit* px, sp_digit* py, sp_1024_mont_add_42(py, t1, py, p1024_mod); } -/* Square p* in projective co-ordinates. +/* Square p* in projective coordinates. * * px' = (p.x + p.y) * (p.x - p.y) = p.x^2 - p.y^2 * py' = 2 * p.x * p.y @@ -42544,8 +51294,8 @@ static void sp_1024_proj_sqr_42(sp_digit* px, sp_digit* py, sp_digit* t) */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; @@ -42553,7 +51303,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) sp_digit* b; sp_digit* e; #else - sp_digit t[4 * 2 * 42]; + sp_digit t[36 * 2 * 42]; sp_digit tx[2 * 42]; sp_digit ty[2 * 42]; sp_digit b[2 * 42]; @@ -42564,9 +51314,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) int bits; int i; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 42 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 40 * 42 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -42574,13 +51324,13 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 42 * 2; - ty = td + 5 * 42 * 2; - b = td + 6 * 42 * 2; - e = td + 7 * 42 * 2; + tx = td + 36 * 42 * 2; + ty = td + 37 * 42 * 2; + b = td + 38 * 42 * 2; + e = td + 39 * 42 * 2; #endif r = ty; @@ -42618,8 +51368,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -44443,14 +53193,14 @@ static const sp_digit sp_1024_g_table[256][42] = { */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; sp_digit* ty; #else - sp_digit t[4 * 2 * 42]; + sp_digit t[36 * 2 * 42]; sp_digit tx[2 * 42]; sp_digit ty[2 * 42]; #endif @@ -44462,9 +53212,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) (void)base; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 42 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 42 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -44472,11 +53222,11 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 42 * 2; - ty = td + 5 * 42 * 2; + tx = td + 36 * 42 * 2; + ty = td + 37 * 42 * 2; #endif r = ty; @@ -44516,8 +53266,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -44526,7 +53276,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) } #endif /* WOLFSSL_SP_SMALL */ -/* Multiply p* by q* in projective co-ordinates. +/* Multiply p* by q* in projective coordinates. * * p.x' = (p.x * q.x) - (p.y * q.y) * p.y' = (p.x * q.y) + (p.y * q.x) @@ -44641,7 +53391,7 @@ static void sp_1024_accumulate_line_dbl_42(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_42(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_42(t1, ty, p1024_mod); + sp_1024_mont_div2_42(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_42(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -44661,7 +53411,7 @@ static void sp_1024_accumulate_line_dbl_42(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_42(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_42(t1, t1, p1024_mod); + sp_1024_mont_div2_42(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_42(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -44787,15 +53537,15 @@ static void sp_1024_accumulate_line_add_one_42(sp_digit* vx, sp_digit* vy, int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err = MP_OKAY; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; sp_digit* vy; sp_digit* qx_px; #else - sp_digit t[6 * 2 * 42]; + sp_digit t[36 * 2 * 42]; sp_digit vx[2 * 42]; sp_digit vy[2 * 42]; sp_digit qx_px[2 * 42]; @@ -44817,10 +53567,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_42(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 42 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 42 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -44829,12 +53579,12 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 42 * 2; - vy = td + 7 * 42 * 2; - qx_px = td + 8 * 42 * 2; + vx = td + 36 * 42 * 2; + vy = td + 37 * 42 * 2; + qx_px = td + 38 * 42 * 2; #endif r = vy; @@ -44886,8 +53636,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45079,7 +53829,7 @@ static void sp_1024_accumulate_line_dbl_n_42(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_42(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_42(t1, ty, p1024_mod); + sp_1024_mont_div2_42(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_42(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -45117,7 +53867,7 @@ static void sp_1024_accumulate_line_dbl_n_42(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_42(p->y, p->y, p1024_mod); + sp_1024_mont_div2_42(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -45165,8 +53915,8 @@ static const signed char sp_1024_order_op[] = { int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -45176,7 +53926,7 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) sp_digit (*pre_nvy)[84]; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 42]; + sp_digit t[36 * 2 * 42]; sp_digit vx[2 * 42]; sp_digit vy[2 * 42]; sp_digit pre_vx[16][84]; @@ -45202,10 +53952,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_42(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 42 * 2 + 16 * sizeof(sp_point_1024), NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 42 * 2 + 16 * sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -45214,15 +53964,15 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 42 * 2; - vy = td + 7 * 42 * 2; - pre_vx = (sp_digit(*)[84])(td + 8 * 42 * 2); - pre_vy = (sp_digit(*)[84])(td + 24 * 42 * 2); - pre_nvy = (sp_digit(*)[84])(td + 40 * 42 * 2); - pre_p = (sp_point_1024*)(td + 56 * 42 * 2); + vx = td + 36 * 42 * 2; + vy = td + 37 * 42 * 2; + pre_vx = (sp_digit(*)[84])(td + 38 * 42 * 2); + pre_vy = (sp_digit(*)[84])(td + 54 * 42 * 2); + pre_nvy = (sp_digit(*)[84])(td + 70 * 42 * 2); + pre_p = (sp_point_1024*)(td + 86 * 42 * 2); #endif r = vy; @@ -45313,8 +54063,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45397,10 +54147,9 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, static void sp_1024_accum_dbl_calc_lc_42(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 42; - sp_digit* t2 = t + 2 * 2 * 42; - sp_digit* l = t + 4 * 2 * 42; - + sp_digit* t1 = t + 33 * 2 * 42; + sp_digit* t2 = t + 34 * 2 * 42; + sp_digit* l = t + 35 * 2 * 42; /* l = 1 / 2 * p.y */ sp_1024_mont_dbl_42(l, py, p1024_mod); @@ -45442,10 +54191,9 @@ static void sp_1024_accum_add_calc_lc_42(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, const sp_digit* cx, const sp_digit* cy, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 42; - sp_digit* c = t + 2 * 2 * 42; - sp_digit* l = t + 4 * 2 * 42; - + sp_digit* t1 = t + 33 * 2 * 42; + sp_digit* c = t + 34 * 2 * 42; + sp_digit* l = t + 35 * 2 * 42; /* l = 1 / (c.x - p.x) */ sp_1024_mont_sub_42(l, cx, px, p1024_mod); @@ -45556,13 +54304,13 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 42]; + sp_digit t[36 * 2 * 42]; sp_point_1024 pre_p[16]; sp_point_1024 pd; sp_point_1024 cd; @@ -45596,11 +54344,11 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, err = sp_1024_point_new_42(NULL, negd, neg); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 42 * 2 + 16 * sizeof(sp_point_1024), NULL, - DYNAMIC_TYPE_TMP_BUFFER); + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 42 * 2 + 16 * + sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; } @@ -45608,10 +54356,10 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - pre_p = (sp_point_1024*)(td + 6 * 42 * 2); + pre_p = (sp_point_1024*)(td + 36 * 42 * 2); #endif sp_1024_point_from_ecc_point_42(p, pm); @@ -45642,7 +54390,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, XMEMCPY(c, &pre_p[j], sizeof(sp_point_1024)); for (j = 0; j < sp_1024_order_op_pre[1]; j++) { - sp_1024_accum_dbl_calc_lc_42(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_42(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_42(c, c, t); sp_1024_mont_map_42(c, t); @@ -45671,7 +54420,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, } for (j = 0; j < sp_1024_order_op_pre[i + 1]; j++) { - sp_1024_accum_dbl_calc_lc_42(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_42(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_42(c, c, t); sp_1024_mont_map_42(c, t); @@ -45681,8 +54431,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, *len = sizeof(sp_table_entry_1024) * 1167; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45716,8 +54466,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res, const byte* table, word32 len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -45726,7 +54476,7 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, sp_digit (*pre_vy)[84]; sp_digit (*pre_nvy)[84]; #else - sp_digit t[6 * 2 * 42]; + sp_digit t[36 * 2 * 42]; sp_digit vx[2 * 42]; sp_digit vy[2 * 42]; sp_digit pre_vx[16][84]; @@ -45759,10 +54509,10 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_point_new_42(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 42 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 42 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -45771,14 +54521,14 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 42 * 2; - vy = td + 7 * 42 * 2; - pre_vx = (sp_digit(*)[84])(td + 8 * 42 * 2); - pre_vy = (sp_digit(*)[84])(td + 24 * 42 * 2); - pre_nvy = (sp_digit(*)[84])(td + 40 * 42 * 2); + vx = td + 36 * 42 * 2; + vy = td + 37 * 42 * 2; + pre_vx = (sp_digit(*)[84])(td + 38 * 42 * 2); + pre_vy = (sp_digit(*)[84])(td + 54 * 42 * 2); + pre_nvy = (sp_digit(*)[84])(td + 70 * 42 * 2); #endif r = vy; @@ -45876,8 +54626,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45889,22 +54639,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_42(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | - a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31] | - a[32] | a[33] | a[34] | a[35] | a[36] | a[37] | a[38] | a[39] | - a[40] | a[41]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * @@ -45941,7 +54675,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -45951,7 +54685,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) static int sp_1024_ecc_is_point_42(const sp_point_1024* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[42 * 4]; @@ -45960,7 +54694,7 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, sp_int32 n; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 42 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -45970,29 +54704,30 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 42; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_42(t1, point->y); (void)sp_1024_mod_42(t1, t1, p1024_mod); sp_1024_sqr_42(t2, point->x); (void)sp_1024_mod_42(t2, t2, p1024_mod); sp_1024_mul_42(t2, t2, point->x); (void)sp_1024_mod_42(t2, t2, p1024_mod); - (void)sp_1024_sub_42(t2, p1024_mod, t2); - sp_1024_mont_add_42(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_42(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_42(t1, p1024_mod); - sp_1024_cond_sub_42(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_42(t1, t1, p1024_mod, ~(n >> 24)); sp_1024_norm_42(t1); if (!sp_1024_iszero_42(t1)) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -46000,7 +54735,7 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -46009,7 +54744,7 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, */ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* pub = NULL; #else sp_point_1024 pub[1]; @@ -46017,7 +54752,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -46032,7 +54767,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) err = sp_1024_ecc_is_point_42(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -46054,7 +54789,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_1024* pub = NULL; #else @@ -46075,7 +54810,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); @@ -46141,7 +54876,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -46152,6 +54887,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } #endif #endif /* WOLFSSL_SP_1024 */ +#endif /* WOLFCRYPT_HAVE_SAKKE */ #endif /* WOLFSSL_HAVE_SP_ECC */ #endif /* SP_WORD_SIZE == 32 */ #endif /* !WOLFSSL_SP_ASM */ diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 67ad87b03..5619989b6 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -1,22 +1,12 @@ /* sp.c * - * Copyright (C) 2006-2021 wolfSSL Inc. + * Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved. * * This file is part of wolfSSL. * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Contact licensing@wolfssl.com with any questions or comments. * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * https://www.wolfssl.com */ /* Implementation by Sean Parkinson. */ @@ -49,37 +39,59 @@ #endif #endif +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#undef WOLFSSL_SP_SMALL_STACK +#define WOLFSSL_SP_SMALL_STACK +#endif + #include +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif + #ifndef WOLFSSL_SP_ASM #if SP_WORD_SIZE == 64 #define SP_PRINT_NUM(var, name, total, words, bits) \ do { \ int ii; \ - byte n[bits / 8]; \ - sp_digit s[words]; \ - XMEMCPY(s, var, sizeof(s)); \ - sp_##total##_norm_##words(s); \ - sp_##total##_to_bin_##words(s, n); \ + byte nb[(bits + 7) / 8]; \ + sp_digit _s[words]; \ + XMEMCPY(_s, var, sizeof(_s)); \ + sp_##total##_norm_##words(_s); \ + sp_##total##_to_bin_##words(_s, nb); \ fprintf(stderr, name "=0x"); \ - for (ii=0; iiused; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 60); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 60); } #elif DIGIT_BIT > 61 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1fffffffffffffffL; s = 61U - s; @@ -173,12 +188,12 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 61) { r[j] &= 0x1fffffffffffffffL; @@ -223,7 +238,7 @@ static void sp_2048_to_bin_34(sp_digit* r, byte* a) r[i+1] += r[i] >> 61; r[i] &= 0x1fffffffffffffffL; } - j = 2048 / 8 - 1; + j = 2055 / 8 - 1; a[j] = 0; for (i=0; i<34 && j>=0; i++) { b = 0; @@ -397,7 +412,7 @@ SP_NOINLINE static void sp_2048_sqr_34(sp_digit* r, const sp_digit* a) r[0] = (sp_digit)(c >> 61); } -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -495,7 +510,7 @@ static sp_digit sp_2048_cmp_17(const sp_digit* a, const sp_digit* b) int i; for (i=16; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 60); } return r; @@ -582,21 +597,22 @@ static void sp_2048_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_17(a + 17); for (i=0; i<16; i++) { - mu = (a[i] * mp) & 0x1fffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffffL; sp_2048_mul_add_17(a+i, m, mu); a[i+1] += a[i] >> 61; } - mu = (a[i] * mp) & 0xffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xffffffffffffL; sp_2048_mul_add_17(a+i, m, mu); a[i+1] += a[i] >> 61; a[i] &= 0x1fffffffffffffffL; sp_2048_mont_shift_17(a, a); - sp_2048_cond_sub_17(a, a, m, 0 - (((a[16] - m[16]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[16] - m[16]; + sp_2048_cond_sub_17(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_17(a); } @@ -662,9 +678,9 @@ SP_NOINLINE static void sp_2048_mul_17(sp_digit* r, const sp_digit* a, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_17(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_17(r, a, b); @@ -740,9 +756,9 @@ SP_NOINLINE static void sp_2048_sqr_17(sp_digit* r, const sp_digit* a) * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_17(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_17(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_17(r, a); @@ -770,6 +786,7 @@ SP_NOINLINE static void sp_2048_mul_d_17(sp_digit* r, const sp_digit* a, r[17] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -787,6 +804,7 @@ static void sp_2048_cond_add_17(sp_digit* r, const sp_digit* a, r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -817,263 +835,96 @@ SP_NOINLINE static void sp_2048_rshift_17(sp_digit* r, const sp_digit* a, r[16] = a[16] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_17(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 61) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 61); + sp_digit t0 = (sp_digit)(d & 0x1fffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 59; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 60) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 61); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 122) - (sp_digit)(d >> 122); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 30) + 1; - /* All 61 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 59); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 57) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 60); + t = (t / dv) << 30; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 55) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 29); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 53) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 51) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 49) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 47) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 45) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 43) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 41) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 39) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 37) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 35) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 33) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 31) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 29) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 27) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 35 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 39 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 47 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 59 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 61 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 61 bits from d1 and top 2 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_2048_word_div_word_17(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -1090,11 +941,10 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 17 + 3]; @@ -1105,7 +955,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 17 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -1124,14 +974,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, t1[17 + 17] += t1[17 + 17 - 1] >> 61; t1[17 + 17 - 1] &= 0x1fffffffffffffffL; for (i=17; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[17 + i]; - d1 <<= 61; - d1 += t1[17 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_17(t1[17 + i], t1[17 + i - 1], dv); -#endif sp_2048_mul_d_17(t2, sd, r1); (void)sp_2048_sub_17(&t1[i], &t1[i], t2); @@ -1139,14 +982,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, t1[17 + i] -= t2[17]; t1[17 + i] += t1[17 + i - 1] >> 61; t1[17 + i - 1] &= 0x1fffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[17 + i]; - d1 <<= 61; - d1 -= t1[17 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_17(-t1[17 + i], -t1[17 + i - 1], dv); -#endif r1 -= t1[17 + i]; sp_2048_mul_d_17(t2, sd, r1); (void)sp_2048_add_17(&t1[i], &t1[i], t2); @@ -1155,7 +991,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, } t1[17 - 1] += t1[17 - 2] >> 61; t1[17 - 2] &= 0x1fffffffffffffffL; - r1 = t1[17 - 1] / dv; + r1 = sp_2048_word_div_word_17(t1[17 - 1], dv); sp_2048_mul_d_17(t2, sd, r1); sp_2048_sub_17(t1, t1, t2); @@ -1164,14 +1000,13 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 61; r[i] &= 0x1fffffffffffffffL; } - sp_2048_cond_add_17(r, r, sd, 0 - ((r[16] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_17(r, r, sd, r[16] >> 63); sp_2048_norm_17(r); sp_2048_rshift_17(r, r, 13); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -1198,13 +1033,15 @@ static int sp_2048_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 34]; @@ -1218,11 +1055,17 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 17 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 17 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -1277,20 +1120,19 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_17(t[0], m, mp); n = sp_2048_cmp_17(t[0], m); - sp_2048_cond_sub_17(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_17(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 17 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 34]; @@ -1304,11 +1146,17 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 17 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 17 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -1363,19 +1211,18 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_17(t[0], m, mp); n = sp_2048_cmp_17(t[0], m); - sp_2048_cond_sub_17(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_17(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 17 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 34) + 34]; @@ -1390,11 +1237,17 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 34) + 34), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 34) + 34), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -1504,12 +1357,11 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_17(rt, m, mp); n = sp_2048_cmp_17(rt, m); - sp_2048_cond_sub_17(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_17(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 34); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -1574,7 +1426,7 @@ static sp_digit sp_2048_cmp_34(const sp_digit* a, const sp_digit* b) int i; for (i=33; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 60); } return r; @@ -1664,17 +1516,18 @@ static void sp_2048_mont_reduce_34(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_34(a + 34); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<33; i++) { - mu = (a[i] * mp) & 0x1fffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffffL; sp_2048_mul_add_34(a+i, m, mu); a[i+1] += a[i] >> 61; } - mu = (a[i] * mp) & 0x7ffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffL; sp_2048_mul_add_34(a+i, m, mu); a[i+1] += a[i] >> 61; a[i] &= 0x1fffffffffffffffL; @@ -1692,18 +1545,18 @@ static void sp_2048_mont_reduce_34(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<33; i++) { - mu = (a[i] * mp) & 0x1fffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffffL; sp_2048_mul_add_34(a+i, m, mu); a[i+1] += a[i] >> 61; } - mu = (a[i] * mp) & 0x7ffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffL; sp_2048_mul_add_34(a+i, m, mu); a[i+1] += a[i] >> 61; a[i] &= 0x1fffffffffffffffL; #endif sp_2048_mont_shift_34(a, a); - sp_2048_cond_sub_34(a, a, m, 0 - (((a[33] - m[33]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[33] - m[33]; + sp_2048_cond_sub_34(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_34(a); } @@ -1714,9 +1567,9 @@ static void sp_2048_mont_reduce_34(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_34(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_34(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_34(r, a, b); @@ -1728,9 +1581,9 @@ static void sp_2048_mont_mul_34(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_34(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_34(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_34(r, a); @@ -1758,6 +1611,7 @@ SP_NOINLINE static void sp_2048_mul_d_68(sp_digit* r, const sp_digit* a, r[68] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -1771,10 +1625,11 @@ static void sp_2048_cond_add_34(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 17; i++) { + for (i = 0; i < 34; i++) { r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -1805,263 +1660,96 @@ SP_NOINLINE static void sp_2048_rshift_34(sp_digit* r, const sp_digit* a, r[33] = a[33] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_34(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 61) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 61); + sp_digit t0 = (sp_digit)(d & 0x1fffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 59; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 60) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 61); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 122) - (sp_digit)(d >> 122); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 30) + 1; - /* All 61 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 59); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 57) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 60); + t = (t / dv) << 30; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 55) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 29); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 53) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 51) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 49) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 47) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 45) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 43) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 41) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 39) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 37) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 35) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 33) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 31) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 29) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 27) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 35 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 39 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 47 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 59 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 61 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 61 bits from d1 and top 2 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_2048_word_div_word_34(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -2078,11 +1766,10 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 34 + 3]; @@ -2093,7 +1780,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 34 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -2112,14 +1799,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, t1[34 + 34] += t1[34 + 34 - 1] >> 61; t1[34 + 34 - 1] &= 0x1fffffffffffffffL; for (i=34; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[34 + i]; - d1 <<= 61; - d1 += t1[34 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_34(t1[34 + i], t1[34 + i - 1], dv); -#endif sp_2048_mul_d_34(t2, sd, r1); (void)sp_2048_sub_34(&t1[i], &t1[i], t2); @@ -2127,14 +1807,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, t1[34 + i] -= t2[34]; t1[34 + i] += t1[34 + i - 1] >> 61; t1[34 + i - 1] &= 0x1fffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[34 + i]; - d1 <<= 61; - d1 -= t1[34 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_34(-t1[34 + i], -t1[34 + i - 1], dv); -#endif r1 -= t1[34 + i]; sp_2048_mul_d_34(t2, sd, r1); (void)sp_2048_add_34(&t1[i], &t1[i], t2); @@ -2143,7 +1816,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, } t1[34 - 1] += t1[34 - 2] >> 61; t1[34 - 2] &= 0x1fffffffffffffffL; - r1 = t1[34 - 1] / dv; + r1 = sp_2048_word_div_word_34(t1[34 - 1], dv); sp_2048_mul_d_34(t2, sd, r1); sp_2048_sub_34(t1, t1, t2); @@ -2152,14 +1825,13 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 61; r[i] &= 0x1fffffffffffffffL; } - sp_2048_cond_add_34(r, r, sd, 0 - ((r[33] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_34(r, r, sd, r[33] >> 63); sp_2048_norm_34(r); sp_2048_rshift_34(r, r, 26); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -2187,13 +1859,15 @@ static int sp_2048_mod_34(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 68]; @@ -2207,11 +1881,17 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 34 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 34 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -2266,20 +1946,19 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_34(t[0], m, mp); n = sp_2048_cmp_34(t[0], m); - sp_2048_cond_sub_34(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 34 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 68]; @@ -2293,11 +1972,17 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 34 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 34 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -2352,19 +2037,18 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_34(t[0], m, mp); n = sp_2048_cmp_34(t[0], m); - sp_2048_cond_sub_34(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 34 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 68) + 68]; @@ -2379,11 +2063,17 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 68) + 68), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 68) + 68), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -2476,12 +2166,11 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_34(rt, m, mp); n = sp_2048_cmp_34(rt, m); - sp_2048_cond_sub_34(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 68); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -2508,7 +2197,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[34 * 5]; @@ -2516,8 +2205,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -2526,7 +2215,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 61) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 256U) { @@ -2540,7 +2229,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -2555,12 +2244,12 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_2048_from_bin(a, 34, in, inLen); -#if DIGIT_BIT >= 61 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -2579,7 +2268,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = sp_2048_mod_34(a, a, m); } if (err == MP_OKAY) { - for (i=60; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -2595,21 +2284,20 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_34(r, m, mp); mp = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_2048_cond_sub_34(r, r, m, ~(mp >> 63)); sp_2048_to_bin_34(r, out); *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[34 * 5]; @@ -2617,14 +2305,14 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 256U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 61) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 256U) { @@ -2638,7 +2326,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -2653,12 +2341,12 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, m = r + 34 * 2; sp_2048_from_bin(a, 34, in, inLen); -#if DIGIT_BIT >= 61 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -2688,7 +2376,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = sp_2048_mod_34(a, a, m); if (err == MP_OKAY) { - for (i=60; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -2704,8 +2392,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_34(r, m, mp); mp = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(r, r, m, ~(mp >> 63)); } } } @@ -2715,7 +2402,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -2750,7 +2437,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[34 * 4]; @@ -2784,7 +2471,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -2809,21 +2496,21 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 34); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[34 * 4]; @@ -2857,7 +2544,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -2882,14 +2569,14 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 34); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -2898,7 +2585,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[17 * 8]; @@ -2928,9 +2615,15 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -2961,6 +2654,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_norm_17(tmpa); sp_2048_cond_add_17(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[16] >> 63)); sp_2048_cond_add_17(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[16] >> 63)); + sp_2048_norm_17(tmpa); sp_2048_from_mp(qi, 17, qim); sp_2048_mul_17(tmpa, tmpa, qi); @@ -2977,19 +2671,19 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 17 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[17 * 13]; @@ -3020,9 +2714,15 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -3059,6 +2759,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_norm_17(tmpa); sp_2048_cond_add_17(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[16] >> 63)); sp_2048_cond_add_17(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[16] >> 63)); + sp_2048_norm_17(tmpa); sp_2048_mul_17(tmpa, tmpa, qi); err = sp_2048_mod_17(tmpa, tmpa, p); } @@ -3072,12 +2773,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 17 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -3173,7 +2874,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[34 * 4]; @@ -3196,7 +2897,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 4, NULL, DYNAMIC_TYPE_DH); @@ -3221,20 +2922,20 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_2048_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 34U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[34 * 4]; @@ -3258,7 +2959,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -3283,14 +2984,14 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 34U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -3320,11 +3021,13 @@ SP_NOINLINE static void sp_2048_lshift_34(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_2048_mod_exp_2_34(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[103]; @@ -3339,11 +3042,17 @@ static int sp_2048_mod_exp_2_34(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 103, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 103, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -3409,17 +3118,15 @@ static int sp_2048_mod_exp_2_34(sp_digit* r, const sp_digit* e, int bits, const (void)sp_2048_add_34(r, r, tmp); sp_2048_norm_34(r); o = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(r, r, m, ~(o >> 63)); } sp_2048_mont_reduce_34(r, m, mp); n = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(r, r, m, ~(n >> 63)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -3444,7 +3151,7 @@ static int sp_2048_mod_exp_2_34(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[34 * 4]; @@ -3468,7 +3175,7 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 34 * 4, NULL, DYNAMIC_TYPE_DH); @@ -3509,14 +3216,14 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 34U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -3539,7 +3246,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[17 * 4]; @@ -3562,7 +3269,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17 * 4, NULL, DYNAMIC_TYPE_DH); @@ -3588,20 +3295,20 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_2048_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 34U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[17 * 4]; @@ -3625,7 +3332,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 17 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -3651,14 +3358,14 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 34U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -3714,20 +3421,23 @@ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 57 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 56); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 56); } #elif DIGIT_BIT > 57 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1ffffffffffffffL; s = 57U - s; @@ -3757,12 +3467,12 @@ static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 57) { r[j] &= 0x1ffffffffffffffL; @@ -3807,7 +3517,7 @@ static void sp_2048_to_bin_36(sp_digit* r, byte* a) r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - j = 2048 / 8 - 1; + j = 2055 / 8 - 1; a[j] = 0; for (i=0; i<36 && j>=0; i++) { b = 0; @@ -3888,179 +3598,110 @@ static void sp_2048_norm_36(sp_digit* a) SP_NOINLINE static void sp_2048_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_uint128 t0 = ((sp_uint128)a[ 0]) * b[ 0]; - sp_uint128 t1 = ((sp_uint128)a[ 0]) * b[ 1] - + ((sp_uint128)a[ 1]) * b[ 0]; - sp_uint128 t2 = ((sp_uint128)a[ 0]) * b[ 2] - + ((sp_uint128)a[ 1]) * b[ 1] - + ((sp_uint128)a[ 2]) * b[ 0]; - sp_uint128 t3 = ((sp_uint128)a[ 0]) * b[ 3] - + ((sp_uint128)a[ 1]) * b[ 2] - + ((sp_uint128)a[ 2]) * b[ 1] - + ((sp_uint128)a[ 3]) * b[ 0]; - sp_uint128 t4 = ((sp_uint128)a[ 0]) * b[ 4] - + ((sp_uint128)a[ 1]) * b[ 3] - + ((sp_uint128)a[ 2]) * b[ 2] - + ((sp_uint128)a[ 3]) * b[ 1] - + ((sp_uint128)a[ 4]) * b[ 0]; - sp_uint128 t5 = ((sp_uint128)a[ 0]) * b[ 5] - + ((sp_uint128)a[ 1]) * b[ 4] - + ((sp_uint128)a[ 2]) * b[ 3] - + ((sp_uint128)a[ 3]) * b[ 2] - + ((sp_uint128)a[ 4]) * b[ 1] - + ((sp_uint128)a[ 5]) * b[ 0]; - sp_uint128 t6 = ((sp_uint128)a[ 0]) * b[ 6] - + ((sp_uint128)a[ 1]) * b[ 5] - + ((sp_uint128)a[ 2]) * b[ 4] - + ((sp_uint128)a[ 3]) * b[ 3] - + ((sp_uint128)a[ 4]) * b[ 2] - + ((sp_uint128)a[ 5]) * b[ 1] - + ((sp_uint128)a[ 6]) * b[ 0]; - sp_uint128 t7 = ((sp_uint128)a[ 0]) * b[ 7] - + ((sp_uint128)a[ 1]) * b[ 6] - + ((sp_uint128)a[ 2]) * b[ 5] - + ((sp_uint128)a[ 3]) * b[ 4] - + ((sp_uint128)a[ 4]) * b[ 3] - + ((sp_uint128)a[ 5]) * b[ 2] - + ((sp_uint128)a[ 6]) * b[ 1] - + ((sp_uint128)a[ 7]) * b[ 0]; - sp_uint128 t8 = ((sp_uint128)a[ 0]) * b[ 8] - + ((sp_uint128)a[ 1]) * b[ 7] - + ((sp_uint128)a[ 2]) * b[ 6] - + ((sp_uint128)a[ 3]) * b[ 5] - + ((sp_uint128)a[ 4]) * b[ 4] - + ((sp_uint128)a[ 5]) * b[ 3] - + ((sp_uint128)a[ 6]) * b[ 2] - + ((sp_uint128)a[ 7]) * b[ 1] - + ((sp_uint128)a[ 8]) * b[ 0]; - sp_uint128 t9 = ((sp_uint128)a[ 1]) * b[ 8] - + ((sp_uint128)a[ 2]) * b[ 7] - + ((sp_uint128)a[ 3]) * b[ 6] - + ((sp_uint128)a[ 4]) * b[ 5] - + ((sp_uint128)a[ 5]) * b[ 4] - + ((sp_uint128)a[ 6]) * b[ 3] - + ((sp_uint128)a[ 7]) * b[ 2] - + ((sp_uint128)a[ 8]) * b[ 1]; - sp_uint128 t10 = ((sp_uint128)a[ 2]) * b[ 8] - + ((sp_uint128)a[ 3]) * b[ 7] - + ((sp_uint128)a[ 4]) * b[ 6] - + ((sp_uint128)a[ 5]) * b[ 5] - + ((sp_uint128)a[ 6]) * b[ 4] - + ((sp_uint128)a[ 7]) * b[ 3] - + ((sp_uint128)a[ 8]) * b[ 2]; - sp_uint128 t11 = ((sp_uint128)a[ 3]) * b[ 8] - + ((sp_uint128)a[ 4]) * b[ 7] - + ((sp_uint128)a[ 5]) * b[ 6] - + ((sp_uint128)a[ 6]) * b[ 5] - + ((sp_uint128)a[ 7]) * b[ 4] - + ((sp_uint128)a[ 8]) * b[ 3]; - sp_uint128 t12 = ((sp_uint128)a[ 4]) * b[ 8] - + ((sp_uint128)a[ 5]) * b[ 7] - + ((sp_uint128)a[ 6]) * b[ 6] - + ((sp_uint128)a[ 7]) * b[ 5] - + ((sp_uint128)a[ 8]) * b[ 4]; - sp_uint128 t13 = ((sp_uint128)a[ 5]) * b[ 8] - + ((sp_uint128)a[ 6]) * b[ 7] - + ((sp_uint128)a[ 7]) * b[ 6] - + ((sp_uint128)a[ 8]) * b[ 5]; - sp_uint128 t14 = ((sp_uint128)a[ 6]) * b[ 8] - + ((sp_uint128)a[ 7]) * b[ 7] - + ((sp_uint128)a[ 8]) * b[ 6]; - sp_uint128 t15 = ((sp_uint128)a[ 7]) * b[ 8] - + ((sp_uint128)a[ 8]) * b[ 7]; - sp_uint128 t16 = ((sp_uint128)a[ 8]) * b[ 8]; + sp_uint128 t0; + sp_uint128 t1; + sp_digit t[9]; - t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; - t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; - t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; - t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; - t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; - t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; - t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; - t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; - t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; - t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; - t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; - t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; - t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; - t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; - t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; - t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; - r[17] = (sp_digit)(t16 >> 57); - r[16] = t16 & 0x1ffffffffffffffL; -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_9(sp_digit* r, const sp_digit* a) -{ - sp_uint128 t0 = ((sp_uint128)a[ 0]) * a[ 0]; - sp_uint128 t1 = (((sp_uint128)a[ 0]) * a[ 1]) * 2; - sp_uint128 t2 = (((sp_uint128)a[ 0]) * a[ 2]) * 2 - + ((sp_uint128)a[ 1]) * a[ 1]; - sp_uint128 t3 = (((sp_uint128)a[ 0]) * a[ 3] - + ((sp_uint128)a[ 1]) * a[ 2]) * 2; - sp_uint128 t4 = (((sp_uint128)a[ 0]) * a[ 4] - + ((sp_uint128)a[ 1]) * a[ 3]) * 2 - + ((sp_uint128)a[ 2]) * a[ 2]; - sp_uint128 t5 = (((sp_uint128)a[ 0]) * a[ 5] - + ((sp_uint128)a[ 1]) * a[ 4] - + ((sp_uint128)a[ 2]) * a[ 3]) * 2; - sp_uint128 t6 = (((sp_uint128)a[ 0]) * a[ 6] - + ((sp_uint128)a[ 1]) * a[ 5] - + ((sp_uint128)a[ 2]) * a[ 4]) * 2 - + ((sp_uint128)a[ 3]) * a[ 3]; - sp_uint128 t7 = (((sp_uint128)a[ 0]) * a[ 7] - + ((sp_uint128)a[ 1]) * a[ 6] - + ((sp_uint128)a[ 2]) * a[ 5] - + ((sp_uint128)a[ 3]) * a[ 4]) * 2; - sp_uint128 t8 = (((sp_uint128)a[ 0]) * a[ 8] - + ((sp_uint128)a[ 1]) * a[ 7] - + ((sp_uint128)a[ 2]) * a[ 6] - + ((sp_uint128)a[ 3]) * a[ 5]) * 2 - + ((sp_uint128)a[ 4]) * a[ 4]; - sp_uint128 t9 = (((sp_uint128)a[ 1]) * a[ 8] - + ((sp_uint128)a[ 2]) * a[ 7] - + ((sp_uint128)a[ 3]) * a[ 6] - + ((sp_uint128)a[ 4]) * a[ 5]) * 2; - sp_uint128 t10 = (((sp_uint128)a[ 2]) * a[ 8] - + ((sp_uint128)a[ 3]) * a[ 7] - + ((sp_uint128)a[ 4]) * a[ 6]) * 2 - + ((sp_uint128)a[ 5]) * a[ 5]; - sp_uint128 t11 = (((sp_uint128)a[ 3]) * a[ 8] - + ((sp_uint128)a[ 4]) * a[ 7] - + ((sp_uint128)a[ 5]) * a[ 6]) * 2; - sp_uint128 t12 = (((sp_uint128)a[ 4]) * a[ 8] - + ((sp_uint128)a[ 5]) * a[ 7]) * 2 - + ((sp_uint128)a[ 6]) * a[ 6]; - sp_uint128 t13 = (((sp_uint128)a[ 5]) * a[ 8] - + ((sp_uint128)a[ 6]) * a[ 7]) * 2; - sp_uint128 t14 = (((sp_uint128)a[ 6]) * a[ 8]) * 2 - + ((sp_uint128)a[ 7]) * a[ 7]; - sp_uint128 t15 = (((sp_uint128)a[ 7]) * a[ 8]) * 2; - sp_uint128 t16 = ((sp_uint128)a[ 8]) * a[ 8]; - - t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; - t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; - t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; - t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; - t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; - t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; - t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; - t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; - t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; - t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; - t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; - t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; - t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; - t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; - t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; - t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; - r[17] = (sp_digit)(t16 >> 57); - r[16] = t16 & 0x1ffffffffffffffL; + t0 = ((sp_uint128)a[ 0]) * b[ 0]; + t1 = ((sp_uint128)a[ 0]) * b[ 1] + + ((sp_uint128)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 2] + + ((sp_uint128)a[ 1]) * b[ 1] + + ((sp_uint128)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 0]) * b[ 3] + + ((sp_uint128)a[ 1]) * b[ 2] + + ((sp_uint128)a[ 2]) * b[ 1] + + ((sp_uint128)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 4] + + ((sp_uint128)a[ 1]) * b[ 3] + + ((sp_uint128)a[ 2]) * b[ 2] + + ((sp_uint128)a[ 3]) * b[ 1] + + ((sp_uint128)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 0]) * b[ 5] + + ((sp_uint128)a[ 1]) * b[ 4] + + ((sp_uint128)a[ 2]) * b[ 3] + + ((sp_uint128)a[ 3]) * b[ 2] + + ((sp_uint128)a[ 4]) * b[ 1] + + ((sp_uint128)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 6] + + ((sp_uint128)a[ 1]) * b[ 5] + + ((sp_uint128)a[ 2]) * b[ 4] + + ((sp_uint128)a[ 3]) * b[ 3] + + ((sp_uint128)a[ 4]) * b[ 2] + + ((sp_uint128)a[ 5]) * b[ 1] + + ((sp_uint128)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 0]) * b[ 7] + + ((sp_uint128)a[ 1]) * b[ 6] + + ((sp_uint128)a[ 2]) * b[ 5] + + ((sp_uint128)a[ 3]) * b[ 4] + + ((sp_uint128)a[ 4]) * b[ 3] + + ((sp_uint128)a[ 5]) * b[ 2] + + ((sp_uint128)a[ 6]) * b[ 1] + + ((sp_uint128)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 8] + + ((sp_uint128)a[ 1]) * b[ 7] + + ((sp_uint128)a[ 2]) * b[ 6] + + ((sp_uint128)a[ 3]) * b[ 5] + + ((sp_uint128)a[ 4]) * b[ 4] + + ((sp_uint128)a[ 5]) * b[ 3] + + ((sp_uint128)a[ 6]) * b[ 2] + + ((sp_uint128)a[ 7]) * b[ 1] + + ((sp_uint128)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 1]) * b[ 8] + + ((sp_uint128)a[ 2]) * b[ 7] + + ((sp_uint128)a[ 3]) * b[ 6] + + ((sp_uint128)a[ 4]) * b[ 5] + + ((sp_uint128)a[ 5]) * b[ 4] + + ((sp_uint128)a[ 6]) * b[ 3] + + ((sp_uint128)a[ 7]) * b[ 2] + + ((sp_uint128)a[ 8]) * b[ 1]; + t[ 8] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 2]) * b[ 8] + + ((sp_uint128)a[ 3]) * b[ 7] + + ((sp_uint128)a[ 4]) * b[ 6] + + ((sp_uint128)a[ 5]) * b[ 5] + + ((sp_uint128)a[ 6]) * b[ 4] + + ((sp_uint128)a[ 7]) * b[ 3] + + ((sp_uint128)a[ 8]) * b[ 2]; + r[ 9] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 3]) * b[ 8] + + ((sp_uint128)a[ 4]) * b[ 7] + + ((sp_uint128)a[ 5]) * b[ 6] + + ((sp_uint128)a[ 6]) * b[ 5] + + ((sp_uint128)a[ 7]) * b[ 4] + + ((sp_uint128)a[ 8]) * b[ 3]; + r[10] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 4]) * b[ 8] + + ((sp_uint128)a[ 5]) * b[ 7] + + ((sp_uint128)a[ 6]) * b[ 6] + + ((sp_uint128)a[ 7]) * b[ 5] + + ((sp_uint128)a[ 8]) * b[ 4]; + r[11] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 5]) * b[ 8] + + ((sp_uint128)a[ 6]) * b[ 7] + + ((sp_uint128)a[ 7]) * b[ 6] + + ((sp_uint128)a[ 8]) * b[ 5]; + r[12] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 6]) * b[ 8] + + ((sp_uint128)a[ 7]) * b[ 7] + + ((sp_uint128)a[ 8]) * b[ 6]; + r[13] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 7]) * b[ 8] + + ((sp_uint128)a[ 8]) * b[ 7]; + r[14] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 8]) * b[ 8]; + r[15] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + r[16] = t0 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 57); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -4163,26 +3804,6 @@ SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a, (void)sp_2048_add_18(r + 9, r + 9, z1); } -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z1[18]; - sp_digit* a1 = z1; - sp_digit* z2 = r + 18; - (void)sp_2048_add_9(a1, a, &a[9]); - sp_2048_sqr_9(z2, &a[9]); - sp_2048_sqr_9(z0, a); - sp_2048_sqr_9(z1, a1); - (void)sp_2048_sub_18(z1, z1, z2); - (void)sp_2048_sub_18(z1, z1, z0); - (void)sp_2048_add_18(r + 9, r + 9, z1); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -4265,6 +3886,103 @@ SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a, (void)sp_2048_add_36(r + 18, r + 18, z1); } +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_9(sp_digit* r, const sp_digit* a) +{ + sp_uint128 t0; + sp_uint128 t1; + sp_digit t[9]; + + t0 = ((sp_uint128)a[ 0]) * a[ 0]; + t1 = (((sp_uint128)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 2]) * 2 + + ((sp_uint128)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 0]) * a[ 3] + + ((sp_uint128)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 4] + + ((sp_uint128)a[ 1]) * a[ 3]) * 2 + + ((sp_uint128)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 0]) * a[ 5] + + ((sp_uint128)a[ 1]) * a[ 4] + + ((sp_uint128)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 6] + + ((sp_uint128)a[ 1]) * a[ 5] + + ((sp_uint128)a[ 2]) * a[ 4]) * 2 + + ((sp_uint128)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 0]) * a[ 7] + + ((sp_uint128)a[ 1]) * a[ 6] + + ((sp_uint128)a[ 2]) * a[ 5] + + ((sp_uint128)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 8] + + ((sp_uint128)a[ 1]) * a[ 7] + + ((sp_uint128)a[ 2]) * a[ 6] + + ((sp_uint128)a[ 3]) * a[ 5]) * 2 + + ((sp_uint128)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 1]) * a[ 8] + + ((sp_uint128)a[ 2]) * a[ 7] + + ((sp_uint128)a[ 3]) * a[ 6] + + ((sp_uint128)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 2]) * a[ 8] + + ((sp_uint128)a[ 3]) * a[ 7] + + ((sp_uint128)a[ 4]) * a[ 6]) * 2 + + ((sp_uint128)a[ 5]) * a[ 5]; + r[ 9] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 3]) * a[ 8] + + ((sp_uint128)a[ 4]) * a[ 7] + + ((sp_uint128)a[ 5]) * a[ 6]) * 2; + r[10] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 4]) * a[ 8] + + ((sp_uint128)a[ 5]) * a[ 7]) * 2 + + ((sp_uint128)a[ 6]) * a[ 6]; + r[11] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 5]) * a[ 8] + + ((sp_uint128)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 6]) * a[ 8]) * 2 + + ((sp_uint128)a[ 7]) * a[ 7]; + r[13] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 8]) * a[ 8]; + r[15] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + r[16] = t0 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 57); + XMEMCPY(r, t, sizeof(t)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[18]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 18; + (void)sp_2048_add_9(a1, a, &a[9]); + sp_2048_sqr_9(z2, &a[9]); + sp_2048_sqr_9(z0, a); + sp_2048_sqr_9(z1, a1); + (void)sp_2048_sub_18(z1, z1, z2); + (void)sp_2048_sub_18(z1, z1, z0); + (void)sp_2048_add_18(r + 9, r + 9, z1); +} + /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -4286,7 +4004,7 @@ SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) } #endif /* !WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -4393,16 +4111,16 @@ static sp_digit sp_2048_cmp_18(const sp_digit* a, const sp_digit* b) int i; r |= (a[17] - b[17]) & (0 - (sp_digit)1); - r |= (a[16] - b[16]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[16] - b[16]) & ~(((sp_digit)0 - r) >> 56); for (i = 8; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -4509,21 +4227,22 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_18(a + 18); for (i=0; i<17; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; sp_2048_mul_add_18(a+i, m, mu); a[i+1] += a[i] >> 57; } - mu = (a[i] * mp) & 0x7fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7fffffffffffffL; sp_2048_mul_add_18(a+i, m, mu); a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; sp_2048_mont_shift_18(a, a); - sp_2048_cond_sub_18(a, a, m, 0 - (((a[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[17] - m[17]; + sp_2048_cond_sub_18(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_18(a); } @@ -4534,9 +4253,9 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_18(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_18(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_18(r, a, b); @@ -4548,9 +4267,9 @@ static void sp_2048_mont_mul_18(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_18(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_18(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_18(r, a); @@ -4603,6 +4322,7 @@ SP_NOINLINE static void sp_2048_mul_d_18(sp_digit* r, const sp_digit* a, r[18] = (sp_digit)(t & 0x1ffffffffffffffL); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -4629,6 +4349,7 @@ static void sp_2048_cond_add_18(sp_digit* r, const sp_digit* a, r[16] = a[16] + (b[16] & m); r[17] = a[17] + (b[17] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_2048_rshift_18(sp_digit* r, const sp_digit* a, byte n) @@ -4649,95 +4370,96 @@ SP_NOINLINE static void sp_2048_rshift_18(sp_digit* r, const sp_digit* a, r[17] = a[17] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 57); + sp_digit t0 = (sp_digit)(d & 0x1ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 55; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 56) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 57); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 114) - (sp_digit)(d >> 114); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_2048_word_div_word_18(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -4754,11 +4476,10 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 18 + 3]; @@ -4769,7 +4490,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 18 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -4788,14 +4509,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, t1[18 + 18] += t1[18 + 18 - 1] >> 57; t1[18 + 18 - 1] &= 0x1ffffffffffffffL; for (i=18; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[18 + i]; - d1 <<= 57; - d1 += t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_18(t1[18 + i], t1[18 + i - 1], dv); -#endif sp_2048_mul_d_18(t2, sd, r1); (void)sp_2048_sub_18(&t1[i], &t1[i], t2); @@ -4803,14 +4517,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, t1[18 + i] -= t2[18]; t1[18 + i] += t1[18 + i - 1] >> 57; t1[18 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[18 + i]; - d1 <<= 57; - d1 -= t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_18(-t1[18 + i], -t1[18 + i - 1], dv); -#endif r1 -= t1[18 + i]; sp_2048_mul_d_18(t2, sd, r1); (void)sp_2048_add_18(&t1[i], &t1[i], t2); @@ -4819,7 +4526,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, } t1[18 - 1] += t1[18 - 2] >> 57; t1[18 - 2] &= 0x1ffffffffffffffL; - r1 = t1[18 - 1] / dv; + r1 = sp_2048_word_div_word_18(t1[18 - 1], dv); sp_2048_mul_d_18(t2, sd, r1); sp_2048_sub_18(t1, t1, t2); @@ -4828,14 +4535,13 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_2048_cond_add_18(r, r, sd, 0 - ((r[17] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_18(r, r, sd, r[17] >> 63); sp_2048_norm_18(r); sp_2048_rshift_18(r, r, 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -4862,13 +4568,15 @@ static int sp_2048_mod_18(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 36]; @@ -4882,11 +4590,17 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 18 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 18 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -4941,20 +4655,19 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_18(t[0], m, mp); n = sp_2048_cmp_18(t[0], m); - sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_18(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 18 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 36]; @@ -4968,11 +4681,17 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 18 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 18 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -5027,19 +4746,18 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_18(t[0], m, mp); n = sp_2048_cmp_18(t[0], m); - sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_18(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 18 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 36) + 36]; @@ -5054,11 +4772,17 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 36) + 36), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 36) + 36), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -5168,12 +4892,11 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_18(rt, m, mp); n = sp_2048_cmp_18(rt, m); - sp_2048_cond_sub_18(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_18(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 36); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -5230,18 +4953,18 @@ static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b) int i; r |= (a[35] - b[35]) & (0 - (sp_digit)1); - r |= (a[34] - b[34]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[33] - b[33]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[32] - b[32]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[34] - b[34]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[33] - b[33]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[32] - b[32]) & ~(((sp_digit)0 - r) >> 56); for (i = 24; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -5367,17 +5090,18 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_36(a + 36); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<35; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; sp_2048_mul_add_36(a+i, m, mu); a[i+1] += a[i] >> 57; } - mu = (a[i] * mp) & 0x1fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffL; sp_2048_mul_add_36(a+i, m, mu); a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; @@ -5395,18 +5119,18 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<35; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; sp_2048_mul_add_36(a+i, m, mu); a[i+1] += a[i] >> 57; } - mu = (a[i] * mp) & 0x1fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffL; sp_2048_mul_add_36(a+i, m, mu); a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; #endif sp_2048_mont_shift_36(a, a); - sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] - m[35]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[35] - m[35]; + sp_2048_cond_sub_36(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_36(a); } @@ -5417,9 +5141,9 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_2048_mul_36(r, a, b); @@ -5431,9 +5155,9 @@ static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_2048_sqr_36(r, a); @@ -5480,6 +5204,7 @@ SP_NOINLINE static void sp_2048_mul_d_72(sp_digit* r, const sp_digit* a, r[72] = (sp_digit)(t & 0x1ffffffffffffffL); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -5508,6 +5233,7 @@ static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a, r[34] = a[34] + (b[34] & m); r[35] = a[35] + (b[35] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_2048_rshift_36(sp_digit* r, const sp_digit* a, byte n) @@ -5530,95 +5256,96 @@ SP_NOINLINE static void sp_2048_rshift_36(sp_digit* r, const sp_digit* a, r[35] = a[35] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 57); + sp_digit t0 = (sp_digit)(d & 0x1ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 55; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 56) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 57); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 114) - (sp_digit)(d >> 114); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_2048_word_div_word_36(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -5635,11 +5362,10 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 36 + 3]; @@ -5650,7 +5376,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 36 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -5669,14 +5395,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + 36] += t1[36 + 36 - 1] >> 57; t1[36 + 36 - 1] &= 0x1ffffffffffffffL; for (i=36; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[36 + i]; - d1 <<= 57; - d1 += t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv); -#endif sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_sub_36(&t1[i], &t1[i], t2); @@ -5684,14 +5403,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + i] -= t2[36]; t1[36 + i] += t1[36 + i - 1] >> 57; t1[36 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[36 + i]; - d1 <<= 57; - d1 -= t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(-t1[36 + i], -t1[36 + i - 1], dv); -#endif r1 -= t1[36 + i]; sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_add_36(&t1[i], &t1[i], t2); @@ -5700,7 +5412,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, } t1[36 - 1] += t1[36 - 2] >> 57; t1[36 - 2] &= 0x1ffffffffffffffL; - r1 = t1[36 - 1] / dv; + r1 = sp_2048_word_div_word_36(t1[36 - 1], dv); sp_2048_mul_d_36(t2, sd, r1); sp_2048_sub_36(t1, t1, t2); @@ -5709,14 +5421,13 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_2048_cond_add_36(r, r, sd, 0 - ((r[35] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_36(r, r, sd, r[35] >> 63); sp_2048_norm_36(r); sp_2048_rshift_36(r, r, 4); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -5746,13 +5457,15 @@ static int sp_2048_mod_36(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 72]; @@ -5766,11 +5479,17 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -5825,20 +5544,19 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 72]; @@ -5852,11 +5570,17 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 36 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -5911,19 +5635,18 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 72) + 72]; @@ -5938,11 +5661,17 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 72) + 72), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 72) + 72), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -6035,12 +5764,11 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(rt, m, mp); n = sp_2048_cmp_36(rt, m); - sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 72); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -6069,7 +5797,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[36 * 5]; @@ -6077,8 +5805,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -6087,7 +5815,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 57) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 256U) { @@ -6101,7 +5829,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -6116,12 +5844,12 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_2048_from_bin(a, 36, in, inLen); -#if DIGIT_BIT >= 57 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -6140,7 +5868,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = sp_2048_mod_36(a, a, m); } if (err == MP_OKAY) { - for (i=56; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -6156,21 +5884,20 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_36(r, m, mp); mp = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_2048_cond_sub_36(r, r, m, ~(mp >> 63)); sp_2048_to_bin_36(r, out); *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[36 * 5]; @@ -6178,14 +5905,14 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 256U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 57) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 256U) { @@ -6199,7 +5926,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -6214,12 +5941,12 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, m = r + 36 * 2; sp_2048_from_bin(a, 36, in, inLen); -#if DIGIT_BIT >= 57 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -6249,7 +5976,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, err = sp_2048_mod_36(a, a, m); if (err == MP_OKAY) { - for (i=56; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -6265,8 +5992,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_36(r, m, mp); mp = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(r, r, m, ~(mp >> 63)); } } } @@ -6276,7 +6002,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -6311,7 +6037,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[36 * 4]; @@ -6345,7 +6071,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -6370,21 +6096,21 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 36); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[36 * 4]; @@ -6418,7 +6144,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -6443,14 +6169,14 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 36); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -6459,7 +6185,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[18 * 8]; @@ -6489,9 +6215,15 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -6522,6 +6254,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_norm_18(tmpa); sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_norm_18(tmpa); sp_2048_from_mp(qi, 18, qim); sp_2048_mul_18(tmpa, tmpa, qi); @@ -6538,19 +6271,19 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 18 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[18 * 13]; @@ -6581,9 +6314,15 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -6620,6 +6359,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_norm_18(tmpa); sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_norm_18(tmpa); sp_2048_mul_18(tmpa, tmpa, qi); err = sp_2048_mod_18(tmpa, tmpa, p); } @@ -6633,12 +6373,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 18 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -6734,7 +6474,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[36 * 4]; @@ -6757,7 +6497,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_DH); @@ -6782,20 +6522,20 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_2048_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 36U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[36 * 4]; @@ -6819,7 +6559,7 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -6844,14 +6584,14 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 36U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -6950,11 +6690,13 @@ SP_NOINLINE static void sp_2048_lshift_36(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[109]; @@ -6969,11 +6711,17 @@ static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 109, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 109, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -7039,17 +6787,15 @@ static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const (void)sp_2048_add_36(r, r, tmp); sp_2048_norm_36(r); o = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(r, r, m, ~(o >> 63)); } sp_2048_mont_reduce_36(r, m, mp); n = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(r, r, m, ~(n >> 63)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -7074,7 +6820,7 @@ static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[36 * 4]; @@ -7098,7 +6844,7 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL, DYNAMIC_TYPE_DH); @@ -7139,14 +6885,14 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 36U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -7169,7 +6915,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[18 * 4]; @@ -7192,7 +6938,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 4, NULL, DYNAMIC_TYPE_DH); @@ -7218,20 +6964,20 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_2048_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 36U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[18 * 4]; @@ -7255,7 +7001,7 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -7281,14 +7027,14 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 36U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -7348,20 +7094,23 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 60 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 59); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 59); } #elif DIGIT_BIT > 60 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xfffffffffffffffL; s = 60U - s; @@ -7391,12 +7140,12 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 60) { r[j] &= 0xfffffffffffffffL; @@ -7441,7 +7190,7 @@ static void sp_3072_to_bin_52(sp_digit* r, byte* a) r[i+1] += r[i] >> 60; r[i] &= 0xfffffffffffffffL; } - j = 3072 / 8 - 1; + j = 3079 / 8 - 1; a[j] = 0; for (i=0; i<52 && j>=0; i++) { b = 0; @@ -7576,7 +7325,7 @@ SP_NOINLINE static void sp_3072_sqr_52(sp_digit* r, const sp_digit* a) r[0] = (sp_digit)(c >> 60); } -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -7674,7 +7423,7 @@ static sp_digit sp_3072_cmp_26(const sp_digit* a, const sp_digit* b) int i; for (i=25; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 59); } return r; @@ -7764,21 +7513,22 @@ static void sp_3072_mont_reduce_26(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_26(a + 26); for (i=0; i<25; i++) { - mu = (a[i] * mp) & 0xfffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffffffffffffffL; sp_3072_mul_add_26(a+i, m, mu); a[i+1] += a[i] >> 60; } - mu = (a[i] * mp) & 0xfffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffffffffL; sp_3072_mul_add_26(a+i, m, mu); a[i+1] += a[i] >> 60; a[i] &= 0xfffffffffffffffL; sp_3072_mont_shift_26(a, a); - sp_3072_cond_sub_26(a, a, m, 0 - (((a[25] - m[25]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[25] - m[25]; + sp_3072_cond_sub_26(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_26(a); } @@ -7828,9 +7578,9 @@ SP_NOINLINE static void sp_3072_mul_26(sp_digit* r, const sp_digit* a, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_26(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_26(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_26(r, a, b); @@ -7883,9 +7633,9 @@ SP_NOINLINE static void sp_3072_sqr_26(sp_digit* r, const sp_digit* a) * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_26(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_26(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_26(r, a); @@ -7913,6 +7663,7 @@ SP_NOINLINE static void sp_3072_mul_d_26(sp_digit* r, const sp_digit* a, r[26] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -7930,6 +7681,7 @@ static void sp_3072_cond_add_26(sp_digit* r, const sp_digit* a, r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -7960,175 +7712,96 @@ SP_NOINLINE static void sp_3072_rshift_26(sp_digit* r, const sp_digit* a, r[25] = a[25] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_26(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 60) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 60); + sp_digit t0 = (sp_digit)(d & 0xfffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 58; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 59) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 60); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 120) - (sp_digit)(d >> 120); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 29) + 1; - /* All 60 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 57); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 54) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 58); + t = (t / dv) << 29; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 51) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 27); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 48) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 45) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 42) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 39) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 36) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 33) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 30) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 27) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 34 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 24) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 21) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 40 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 18) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 15) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 46 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 12) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 9) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 52 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 6) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 3) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 58 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 60 bits from d1 and top 3 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_26(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -8145,11 +7818,10 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 26 + 3]; @@ -8160,7 +7832,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 26 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -8179,14 +7851,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, t1[26 + 26] += t1[26 + 26 - 1] >> 60; t1[26 + 26 - 1] &= 0xfffffffffffffffL; for (i=26; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[26 + i]; - d1 <<= 60; - d1 += t1[26 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_26(t1[26 + i], t1[26 + i - 1], dv); -#endif sp_3072_mul_d_26(t2, sd, r1); (void)sp_3072_sub_26(&t1[i], &t1[i], t2); @@ -8194,14 +7859,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, t1[26 + i] -= t2[26]; t1[26 + i] += t1[26 + i - 1] >> 60; t1[26 + i - 1] &= 0xfffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[26 + i]; - d1 <<= 60; - d1 -= t1[26 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_26(-t1[26 + i], -t1[26 + i - 1], dv); -#endif r1 -= t1[26 + i]; sp_3072_mul_d_26(t2, sd, r1); (void)sp_3072_add_26(&t1[i], &t1[i], t2); @@ -8210,7 +7868,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, } t1[26 - 1] += t1[26 - 2] >> 60; t1[26 - 2] &= 0xfffffffffffffffL; - r1 = t1[26 - 1] / dv; + r1 = sp_3072_word_div_word_26(t1[26 - 1], dv); sp_3072_mul_d_26(t2, sd, r1); sp_3072_sub_26(t1, t1, t2); @@ -8219,14 +7877,13 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 60; r[i] &= 0xfffffffffffffffL; } - sp_3072_cond_add_26(r, r, sd, 0 - ((r[25] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_26(r, r, sd, r[25] >> 63); sp_3072_norm_26(r); sp_3072_rshift_26(r, r, 24); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -8253,13 +7910,15 @@ static int sp_3072_mod_26(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 52]; @@ -8273,11 +7932,17 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 26 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 26 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -8332,20 +7997,19 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_26(t[0], m, mp); n = sp_3072_cmp_26(t[0], m); - sp_3072_cond_sub_26(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_26(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 26 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 52]; @@ -8359,11 +8023,17 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 26 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 26 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -8418,19 +8088,18 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_26(t[0], m, mp); n = sp_3072_cmp_26(t[0], m); - sp_3072_cond_sub_26(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_26(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 26 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 52) + 52]; @@ -8445,11 +8114,17 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 52) + 52), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 52) + 52), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -8559,12 +8234,11 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_26(rt, m, mp); n = sp_3072_cmp_26(rt, m); - sp_3072_cond_sub_26(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_26(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 52); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -8629,7 +8303,7 @@ static sp_digit sp_3072_cmp_52(const sp_digit* a, const sp_digit* b) int i; for (i=51; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 59); } return r; @@ -8725,17 +8399,18 @@ static void sp_3072_mont_reduce_52(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_52(a + 52); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<51; i++) { - mu = (a[i] * mp) & 0xfffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffffffffffffffL; sp_3072_mul_add_52(a+i, m, mu); a[i+1] += a[i] >> 60; } - mu = (a[i] * mp) & 0xfffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffL; sp_3072_mul_add_52(a+i, m, mu); a[i+1] += a[i] >> 60; a[i] &= 0xfffffffffffffffL; @@ -8753,18 +8428,18 @@ static void sp_3072_mont_reduce_52(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<51; i++) { - mu = (a[i] * mp) & 0xfffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffffffffffffffL; sp_3072_mul_add_52(a+i, m, mu); a[i+1] += a[i] >> 60; } - mu = (a[i] * mp) & 0xfffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffL; sp_3072_mul_add_52(a+i, m, mu); a[i+1] += a[i] >> 60; a[i] &= 0xfffffffffffffffL; #endif sp_3072_mont_shift_52(a, a); - sp_3072_cond_sub_52(a, a, m, 0 - (((a[51] - m[51]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[51] - m[51]; + sp_3072_cond_sub_52(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_52(a); } @@ -8775,9 +8450,9 @@ static void sp_3072_mont_reduce_52(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_52(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_52(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_52(r, a, b); @@ -8789,9 +8464,9 @@ static void sp_3072_mont_mul_52(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_52(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_52(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_52(r, a); @@ -8819,6 +8494,7 @@ SP_NOINLINE static void sp_3072_mul_d_104(sp_digit* r, const sp_digit* a, r[104] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -8832,10 +8508,11 @@ static void sp_3072_cond_add_52(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 26; i++) { + for (i = 0; i < 52; i++) { r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -8866,175 +8543,96 @@ SP_NOINLINE static void sp_3072_rshift_52(sp_digit* r, const sp_digit* a, r[51] = a[51] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_52(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 60) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 60); + sp_digit t0 = (sp_digit)(d & 0xfffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 58; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 59) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 60); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 120) - (sp_digit)(d >> 120); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 29) + 1; - /* All 60 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 57); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 54) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 58); + t = (t / dv) << 29; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 51) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 27); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 48) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 45) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 42) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 39) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 36) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 33) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 30) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 27) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 34 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 24) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 21) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 40 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 18) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 15) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 46 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 12) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 9) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 52 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 6) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 3) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 58 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 60 bits from d1 and top 3 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_52(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -9051,11 +8649,10 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 52 + 3]; @@ -9066,7 +8663,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 52 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -9085,14 +8682,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, t1[52 + 52] += t1[52 + 52 - 1] >> 60; t1[52 + 52 - 1] &= 0xfffffffffffffffL; for (i=52; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[52 + i]; - d1 <<= 60; - d1 += t1[52 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_52(t1[52 + i], t1[52 + i - 1], dv); -#endif sp_3072_mul_d_52(t2, sd, r1); (void)sp_3072_sub_52(&t1[i], &t1[i], t2); @@ -9100,14 +8690,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, t1[52 + i] -= t2[52]; t1[52 + i] += t1[52 + i - 1] >> 60; t1[52 + i - 1] &= 0xfffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[52 + i]; - d1 <<= 60; - d1 -= t1[52 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_52(-t1[52 + i], -t1[52 + i - 1], dv); -#endif r1 -= t1[52 + i]; sp_3072_mul_d_52(t2, sd, r1); (void)sp_3072_add_52(&t1[i], &t1[i], t2); @@ -9116,7 +8699,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, } t1[52 - 1] += t1[52 - 2] >> 60; t1[52 - 2] &= 0xfffffffffffffffL; - r1 = t1[52 - 1] / dv; + r1 = sp_3072_word_div_word_52(t1[52 - 1], dv); sp_3072_mul_d_52(t2, sd, r1); sp_3072_sub_52(t1, t1, t2); @@ -9125,14 +8708,13 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 60; r[i] &= 0xfffffffffffffffL; } - sp_3072_cond_add_52(r, r, sd, 0 - ((r[51] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_52(r, r, sd, r[51] >> 63); sp_3072_norm_52(r); sp_3072_rshift_52(r, r, 48); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -9160,13 +8742,15 @@ static int sp_3072_mod_52(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 104]; @@ -9180,11 +8764,17 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 52 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 52 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -9239,20 +8829,19 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_52(t[0], m, mp); n = sp_3072_cmp_52(t[0], m); - sp_3072_cond_sub_52(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 52 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 104]; @@ -9266,11 +8855,17 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 52 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 52 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -9325,19 +8920,18 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_52(t[0], m, mp); n = sp_3072_cmp_52(t[0], m); - sp_3072_cond_sub_52(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 52 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 104) + 104]; @@ -9352,11 +8946,17 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 104) + 104), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 104) + 104), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -9449,12 +9049,11 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_52(rt, m, mp); n = sp_3072_cmp_52(rt, m); - sp_3072_cond_sub_52(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 104); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -9481,7 +9080,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[52 * 5]; @@ -9489,8 +9088,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -9499,7 +9098,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 60) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -9513,7 +9112,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -9528,12 +9127,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_3072_from_bin(a, 52, in, inLen); -#if DIGIT_BIT >= 60 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -9552,7 +9151,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_52(a, a, m); } if (err == MP_OKAY) { - for (i=59; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -9568,21 +9167,20 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_52(r, m, mp); mp = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_52(r, r, m, ~(mp >> 63)); sp_3072_to_bin_52(r, out); *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[52 * 5]; @@ -9590,14 +9188,14 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 384U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 60) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -9611,7 +9209,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -9626,12 +9224,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, m = r + 52 * 2; sp_3072_from_bin(a, 52, in, inLen); -#if DIGIT_BIT >= 60 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -9661,7 +9259,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_52(a, a, m); if (err == MP_OKAY) { - for (i=59; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -9677,8 +9275,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_52(r, m, mp); mp = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(r, r, m, ~(mp >> 63)); } } } @@ -9688,7 +9285,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -9723,7 +9320,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[52 * 4]; @@ -9757,7 +9354,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -9782,21 +9379,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 52); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[52 * 4]; @@ -9830,7 +9427,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -9855,14 +9452,14 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 52); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -9871,7 +9468,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[26 * 8]; @@ -9901,9 +9498,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 26 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -9934,6 +9537,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_26(tmpa); sp_3072_cond_add_26(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[25] >> 63)); sp_3072_cond_add_26(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[25] >> 63)); + sp_3072_norm_26(tmpa); sp_3072_from_mp(qi, 26, qim); sp_3072_mul_26(tmpa, tmpa, qi); @@ -9950,19 +9554,19 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 26 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[26 * 13]; @@ -9993,9 +9597,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 26 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -10032,6 +9642,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_26(tmpa); sp_3072_cond_add_26(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[25] >> 63)); sp_3072_cond_add_26(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[25] >> 63)); + sp_3072_norm_26(tmpa); sp_3072_mul_26(tmpa, tmpa, qi); err = sp_3072_mod_26(tmpa, tmpa, p); } @@ -10045,12 +9656,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 26 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -10146,7 +9757,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[52 * 4]; @@ -10169,7 +9780,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 4, NULL, DYNAMIC_TYPE_DH); @@ -10194,20 +9805,20 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 52U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[52 * 4]; @@ -10231,7 +9842,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -10256,14 +9867,14 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 52U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -10293,11 +9904,13 @@ SP_NOINLINE static void sp_3072_lshift_52(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_3072_mod_exp_2_52(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[157]; @@ -10312,11 +9925,17 @@ static int sp_3072_mod_exp_2_52(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 157, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 157, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -10382,17 +10001,15 @@ static int sp_3072_mod_exp_2_52(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_52(r, r, tmp); sp_3072_norm_52(r); o = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(r, r, m, ~(o >> 63)); } sp_3072_mont_reduce_52(r, m, mp); n = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(r, r, m, ~(n >> 63)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -10417,7 +10034,7 @@ static int sp_3072_mod_exp_2_52(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[52 * 4]; @@ -10441,7 +10058,7 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 52 * 4, NULL, DYNAMIC_TYPE_DH); @@ -10482,14 +10099,14 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 52U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -10512,7 +10129,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[26 * 4]; @@ -10535,7 +10152,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 26 * 4, NULL, DYNAMIC_TYPE_DH); @@ -10561,20 +10178,20 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 52U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[26 * 4]; @@ -10598,7 +10215,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 26 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -10624,14 +10241,14 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 52U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -10687,20 +10304,23 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 57 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 56); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 56); } #elif DIGIT_BIT > 57 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1ffffffffffffffL; s = 57U - s; @@ -10730,12 +10350,12 @@ static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 57) { r[j] &= 0x1ffffffffffffffL; @@ -10780,7 +10400,7 @@ static void sp_3072_to_bin_54(sp_digit* r, byte* a) r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - j = 3072 / 8 - 1; + j = 3079 / 8 - 1; a[j] = 0; for (i=0; i<54 && j>=0; i++) { b = 0; @@ -10864,179 +10484,110 @@ static void sp_3072_norm_54(sp_digit* a) SP_NOINLINE static void sp_3072_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_uint128 t0 = ((sp_uint128)a[ 0]) * b[ 0]; - sp_uint128 t1 = ((sp_uint128)a[ 0]) * b[ 1] - + ((sp_uint128)a[ 1]) * b[ 0]; - sp_uint128 t2 = ((sp_uint128)a[ 0]) * b[ 2] - + ((sp_uint128)a[ 1]) * b[ 1] - + ((sp_uint128)a[ 2]) * b[ 0]; - sp_uint128 t3 = ((sp_uint128)a[ 0]) * b[ 3] - + ((sp_uint128)a[ 1]) * b[ 2] - + ((sp_uint128)a[ 2]) * b[ 1] - + ((sp_uint128)a[ 3]) * b[ 0]; - sp_uint128 t4 = ((sp_uint128)a[ 0]) * b[ 4] - + ((sp_uint128)a[ 1]) * b[ 3] - + ((sp_uint128)a[ 2]) * b[ 2] - + ((sp_uint128)a[ 3]) * b[ 1] - + ((sp_uint128)a[ 4]) * b[ 0]; - sp_uint128 t5 = ((sp_uint128)a[ 0]) * b[ 5] - + ((sp_uint128)a[ 1]) * b[ 4] - + ((sp_uint128)a[ 2]) * b[ 3] - + ((sp_uint128)a[ 3]) * b[ 2] - + ((sp_uint128)a[ 4]) * b[ 1] - + ((sp_uint128)a[ 5]) * b[ 0]; - sp_uint128 t6 = ((sp_uint128)a[ 0]) * b[ 6] - + ((sp_uint128)a[ 1]) * b[ 5] - + ((sp_uint128)a[ 2]) * b[ 4] - + ((sp_uint128)a[ 3]) * b[ 3] - + ((sp_uint128)a[ 4]) * b[ 2] - + ((sp_uint128)a[ 5]) * b[ 1] - + ((sp_uint128)a[ 6]) * b[ 0]; - sp_uint128 t7 = ((sp_uint128)a[ 0]) * b[ 7] - + ((sp_uint128)a[ 1]) * b[ 6] - + ((sp_uint128)a[ 2]) * b[ 5] - + ((sp_uint128)a[ 3]) * b[ 4] - + ((sp_uint128)a[ 4]) * b[ 3] - + ((sp_uint128)a[ 5]) * b[ 2] - + ((sp_uint128)a[ 6]) * b[ 1] - + ((sp_uint128)a[ 7]) * b[ 0]; - sp_uint128 t8 = ((sp_uint128)a[ 0]) * b[ 8] - + ((sp_uint128)a[ 1]) * b[ 7] - + ((sp_uint128)a[ 2]) * b[ 6] - + ((sp_uint128)a[ 3]) * b[ 5] - + ((sp_uint128)a[ 4]) * b[ 4] - + ((sp_uint128)a[ 5]) * b[ 3] - + ((sp_uint128)a[ 6]) * b[ 2] - + ((sp_uint128)a[ 7]) * b[ 1] - + ((sp_uint128)a[ 8]) * b[ 0]; - sp_uint128 t9 = ((sp_uint128)a[ 1]) * b[ 8] - + ((sp_uint128)a[ 2]) * b[ 7] - + ((sp_uint128)a[ 3]) * b[ 6] - + ((sp_uint128)a[ 4]) * b[ 5] - + ((sp_uint128)a[ 5]) * b[ 4] - + ((sp_uint128)a[ 6]) * b[ 3] - + ((sp_uint128)a[ 7]) * b[ 2] - + ((sp_uint128)a[ 8]) * b[ 1]; - sp_uint128 t10 = ((sp_uint128)a[ 2]) * b[ 8] - + ((sp_uint128)a[ 3]) * b[ 7] - + ((sp_uint128)a[ 4]) * b[ 6] - + ((sp_uint128)a[ 5]) * b[ 5] - + ((sp_uint128)a[ 6]) * b[ 4] - + ((sp_uint128)a[ 7]) * b[ 3] - + ((sp_uint128)a[ 8]) * b[ 2]; - sp_uint128 t11 = ((sp_uint128)a[ 3]) * b[ 8] - + ((sp_uint128)a[ 4]) * b[ 7] - + ((sp_uint128)a[ 5]) * b[ 6] - + ((sp_uint128)a[ 6]) * b[ 5] - + ((sp_uint128)a[ 7]) * b[ 4] - + ((sp_uint128)a[ 8]) * b[ 3]; - sp_uint128 t12 = ((sp_uint128)a[ 4]) * b[ 8] - + ((sp_uint128)a[ 5]) * b[ 7] - + ((sp_uint128)a[ 6]) * b[ 6] - + ((sp_uint128)a[ 7]) * b[ 5] - + ((sp_uint128)a[ 8]) * b[ 4]; - sp_uint128 t13 = ((sp_uint128)a[ 5]) * b[ 8] - + ((sp_uint128)a[ 6]) * b[ 7] - + ((sp_uint128)a[ 7]) * b[ 6] - + ((sp_uint128)a[ 8]) * b[ 5]; - sp_uint128 t14 = ((sp_uint128)a[ 6]) * b[ 8] - + ((sp_uint128)a[ 7]) * b[ 7] - + ((sp_uint128)a[ 8]) * b[ 6]; - sp_uint128 t15 = ((sp_uint128)a[ 7]) * b[ 8] - + ((sp_uint128)a[ 8]) * b[ 7]; - sp_uint128 t16 = ((sp_uint128)a[ 8]) * b[ 8]; + sp_uint128 t0; + sp_uint128 t1; + sp_digit t[9]; - t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; - t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; - t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; - t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; - t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; - t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; - t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; - t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; - t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; - t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; - t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; - t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; - t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; - t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; - t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; - t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; - r[17] = (sp_digit)(t16 >> 57); - r[16] = t16 & 0x1ffffffffffffffL; -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_9(sp_digit* r, const sp_digit* a) -{ - sp_uint128 t0 = ((sp_uint128)a[ 0]) * a[ 0]; - sp_uint128 t1 = (((sp_uint128)a[ 0]) * a[ 1]) * 2; - sp_uint128 t2 = (((sp_uint128)a[ 0]) * a[ 2]) * 2 - + ((sp_uint128)a[ 1]) * a[ 1]; - sp_uint128 t3 = (((sp_uint128)a[ 0]) * a[ 3] - + ((sp_uint128)a[ 1]) * a[ 2]) * 2; - sp_uint128 t4 = (((sp_uint128)a[ 0]) * a[ 4] - + ((sp_uint128)a[ 1]) * a[ 3]) * 2 - + ((sp_uint128)a[ 2]) * a[ 2]; - sp_uint128 t5 = (((sp_uint128)a[ 0]) * a[ 5] - + ((sp_uint128)a[ 1]) * a[ 4] - + ((sp_uint128)a[ 2]) * a[ 3]) * 2; - sp_uint128 t6 = (((sp_uint128)a[ 0]) * a[ 6] - + ((sp_uint128)a[ 1]) * a[ 5] - + ((sp_uint128)a[ 2]) * a[ 4]) * 2 - + ((sp_uint128)a[ 3]) * a[ 3]; - sp_uint128 t7 = (((sp_uint128)a[ 0]) * a[ 7] - + ((sp_uint128)a[ 1]) * a[ 6] - + ((sp_uint128)a[ 2]) * a[ 5] - + ((sp_uint128)a[ 3]) * a[ 4]) * 2; - sp_uint128 t8 = (((sp_uint128)a[ 0]) * a[ 8] - + ((sp_uint128)a[ 1]) * a[ 7] - + ((sp_uint128)a[ 2]) * a[ 6] - + ((sp_uint128)a[ 3]) * a[ 5]) * 2 - + ((sp_uint128)a[ 4]) * a[ 4]; - sp_uint128 t9 = (((sp_uint128)a[ 1]) * a[ 8] - + ((sp_uint128)a[ 2]) * a[ 7] - + ((sp_uint128)a[ 3]) * a[ 6] - + ((sp_uint128)a[ 4]) * a[ 5]) * 2; - sp_uint128 t10 = (((sp_uint128)a[ 2]) * a[ 8] - + ((sp_uint128)a[ 3]) * a[ 7] - + ((sp_uint128)a[ 4]) * a[ 6]) * 2 - + ((sp_uint128)a[ 5]) * a[ 5]; - sp_uint128 t11 = (((sp_uint128)a[ 3]) * a[ 8] - + ((sp_uint128)a[ 4]) * a[ 7] - + ((sp_uint128)a[ 5]) * a[ 6]) * 2; - sp_uint128 t12 = (((sp_uint128)a[ 4]) * a[ 8] - + ((sp_uint128)a[ 5]) * a[ 7]) * 2 - + ((sp_uint128)a[ 6]) * a[ 6]; - sp_uint128 t13 = (((sp_uint128)a[ 5]) * a[ 8] - + ((sp_uint128)a[ 6]) * a[ 7]) * 2; - sp_uint128 t14 = (((sp_uint128)a[ 6]) * a[ 8]) * 2 - + ((sp_uint128)a[ 7]) * a[ 7]; - sp_uint128 t15 = (((sp_uint128)a[ 7]) * a[ 8]) * 2; - sp_uint128 t16 = ((sp_uint128)a[ 8]) * a[ 8]; - - t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; - t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; - t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; - t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; - t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; - t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; - t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; - t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; - t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; - t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; - t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; - t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; - t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; - t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; - t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; - t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; - r[17] = (sp_digit)(t16 >> 57); - r[16] = t16 & 0x1ffffffffffffffL; + t0 = ((sp_uint128)a[ 0]) * b[ 0]; + t1 = ((sp_uint128)a[ 0]) * b[ 1] + + ((sp_uint128)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 2] + + ((sp_uint128)a[ 1]) * b[ 1] + + ((sp_uint128)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 0]) * b[ 3] + + ((sp_uint128)a[ 1]) * b[ 2] + + ((sp_uint128)a[ 2]) * b[ 1] + + ((sp_uint128)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 4] + + ((sp_uint128)a[ 1]) * b[ 3] + + ((sp_uint128)a[ 2]) * b[ 2] + + ((sp_uint128)a[ 3]) * b[ 1] + + ((sp_uint128)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 0]) * b[ 5] + + ((sp_uint128)a[ 1]) * b[ 4] + + ((sp_uint128)a[ 2]) * b[ 3] + + ((sp_uint128)a[ 3]) * b[ 2] + + ((sp_uint128)a[ 4]) * b[ 1] + + ((sp_uint128)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 6] + + ((sp_uint128)a[ 1]) * b[ 5] + + ((sp_uint128)a[ 2]) * b[ 4] + + ((sp_uint128)a[ 3]) * b[ 3] + + ((sp_uint128)a[ 4]) * b[ 2] + + ((sp_uint128)a[ 5]) * b[ 1] + + ((sp_uint128)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 0]) * b[ 7] + + ((sp_uint128)a[ 1]) * b[ 6] + + ((sp_uint128)a[ 2]) * b[ 5] + + ((sp_uint128)a[ 3]) * b[ 4] + + ((sp_uint128)a[ 4]) * b[ 3] + + ((sp_uint128)a[ 5]) * b[ 2] + + ((sp_uint128)a[ 6]) * b[ 1] + + ((sp_uint128)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 0]) * b[ 8] + + ((sp_uint128)a[ 1]) * b[ 7] + + ((sp_uint128)a[ 2]) * b[ 6] + + ((sp_uint128)a[ 3]) * b[ 5] + + ((sp_uint128)a[ 4]) * b[ 4] + + ((sp_uint128)a[ 5]) * b[ 3] + + ((sp_uint128)a[ 6]) * b[ 2] + + ((sp_uint128)a[ 7]) * b[ 1] + + ((sp_uint128)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 1]) * b[ 8] + + ((sp_uint128)a[ 2]) * b[ 7] + + ((sp_uint128)a[ 3]) * b[ 6] + + ((sp_uint128)a[ 4]) * b[ 5] + + ((sp_uint128)a[ 5]) * b[ 4] + + ((sp_uint128)a[ 6]) * b[ 3] + + ((sp_uint128)a[ 7]) * b[ 2] + + ((sp_uint128)a[ 8]) * b[ 1]; + t[ 8] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 2]) * b[ 8] + + ((sp_uint128)a[ 3]) * b[ 7] + + ((sp_uint128)a[ 4]) * b[ 6] + + ((sp_uint128)a[ 5]) * b[ 5] + + ((sp_uint128)a[ 6]) * b[ 4] + + ((sp_uint128)a[ 7]) * b[ 3] + + ((sp_uint128)a[ 8]) * b[ 2]; + r[ 9] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 3]) * b[ 8] + + ((sp_uint128)a[ 4]) * b[ 7] + + ((sp_uint128)a[ 5]) * b[ 6] + + ((sp_uint128)a[ 6]) * b[ 5] + + ((sp_uint128)a[ 7]) * b[ 4] + + ((sp_uint128)a[ 8]) * b[ 3]; + r[10] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 4]) * b[ 8] + + ((sp_uint128)a[ 5]) * b[ 7] + + ((sp_uint128)a[ 6]) * b[ 6] + + ((sp_uint128)a[ 7]) * b[ 5] + + ((sp_uint128)a[ 8]) * b[ 4]; + r[11] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 5]) * b[ 8] + + ((sp_uint128)a[ 6]) * b[ 7] + + ((sp_uint128)a[ 7]) * b[ 6] + + ((sp_uint128)a[ 8]) * b[ 5]; + r[12] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 6]) * b[ 8] + + ((sp_uint128)a[ 7]) * b[ 7] + + ((sp_uint128)a[ 8]) * b[ 6]; + r[13] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_uint128)a[ 7]) * b[ 8] + + ((sp_uint128)a[ 8]) * b[ 7]; + r[14] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 8]) * b[ 8]; + r[15] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + r[16] = t0 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 57); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -11165,48 +10716,6 @@ SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a, (void)sp_3072_add_18(&r[36], &r[36], p4); } -/* Square a into r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a) -{ - sp_digit p0[18]; - sp_digit p1[18]; - sp_digit p2[18]; - sp_digit p3[18]; - sp_digit p4[18]; - sp_digit p5[18]; - sp_digit t0[18]; - sp_digit t1[18]; - sp_digit t2[18]; - sp_digit a0[9]; - sp_digit a1[9]; - sp_digit a2[9]; - (void)sp_3072_add_9(a0, a, &a[9]); - (void)sp_3072_add_9(a1, &a[9], &a[18]); - (void)sp_3072_add_9(a2, a0, &a[18]); - sp_3072_sqr_9(p0, a); - sp_3072_sqr_9(p2, &a[9]); - sp_3072_sqr_9(p4, &a[18]); - sp_3072_sqr_9(p1, a0); - sp_3072_sqr_9(p3, a1); - sp_3072_sqr_9(p5, a2); - XMEMSET(r, 0, sizeof(*r)*2U*27U); - (void)sp_3072_sub_18(t0, p3, p2); - (void)sp_3072_sub_18(t1, p1, p2); - (void)sp_3072_sub_18(t2, p5, t0); - (void)sp_3072_sub_18(t2, t2, t1); - (void)sp_3072_sub_18(t0, t0, p4); - (void)sp_3072_sub_18(t1, t1, p0); - (void)sp_3072_add_18(r, r, p0); - (void)sp_3072_add_18(&r[9], &r[9], t1); - (void)sp_3072_add_18(&r[18], &r[18], t2); - (void)sp_3072_add_18(&r[27], &r[27], t0); - (void)sp_3072_add_18(&r[36], &r[36], p4); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -11321,6 +10830,125 @@ SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a, (void)sp_3072_add_54(r + 27, r + 27, z1); } +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_9(sp_digit* r, const sp_digit* a) +{ + sp_uint128 t0; + sp_uint128 t1; + sp_digit t[9]; + + t0 = ((sp_uint128)a[ 0]) * a[ 0]; + t1 = (((sp_uint128)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 2]) * 2 + + ((sp_uint128)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 0]) * a[ 3] + + ((sp_uint128)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 4] + + ((sp_uint128)a[ 1]) * a[ 3]) * 2 + + ((sp_uint128)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 0]) * a[ 5] + + ((sp_uint128)a[ 1]) * a[ 4] + + ((sp_uint128)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 6] + + ((sp_uint128)a[ 1]) * a[ 5] + + ((sp_uint128)a[ 2]) * a[ 4]) * 2 + + ((sp_uint128)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 0]) * a[ 7] + + ((sp_uint128)a[ 1]) * a[ 6] + + ((sp_uint128)a[ 2]) * a[ 5] + + ((sp_uint128)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 0]) * a[ 8] + + ((sp_uint128)a[ 1]) * a[ 7] + + ((sp_uint128)a[ 2]) * a[ 6] + + ((sp_uint128)a[ 3]) * a[ 5]) * 2 + + ((sp_uint128)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 1]) * a[ 8] + + ((sp_uint128)a[ 2]) * a[ 7] + + ((sp_uint128)a[ 3]) * a[ 6] + + ((sp_uint128)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 2]) * a[ 8] + + ((sp_uint128)a[ 3]) * a[ 7] + + ((sp_uint128)a[ 4]) * a[ 6]) * 2 + + ((sp_uint128)a[ 5]) * a[ 5]; + r[ 9] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 3]) * a[ 8] + + ((sp_uint128)a[ 4]) * a[ 7] + + ((sp_uint128)a[ 5]) * a[ 6]) * 2; + r[10] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 4]) * a[ 8] + + ((sp_uint128)a[ 5]) * a[ 7]) * 2 + + ((sp_uint128)a[ 6]) * a[ 6]; + r[11] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 5]) * a[ 8] + + ((sp_uint128)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_uint128)a[ 6]) * a[ 8]) * 2 + + ((sp_uint128)a[ 7]) * a[ 7]; + r[13] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_uint128)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_uint128)a[ 8]) * a[ 8]; + r[15] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + r[16] = t0 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 57); + XMEMCPY(r, t, sizeof(t)); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[18]; + sp_digit p1[18]; + sp_digit p2[18]; + sp_digit p3[18]; + sp_digit p4[18]; + sp_digit p5[18]; + sp_digit t0[18]; + sp_digit t1[18]; + sp_digit t2[18]; + sp_digit a0[9]; + sp_digit a1[9]; + sp_digit a2[9]; + (void)sp_3072_add_9(a0, a, &a[9]); + (void)sp_3072_add_9(a1, &a[9], &a[18]); + (void)sp_3072_add_9(a2, a0, &a[18]); + sp_3072_sqr_9(p0, a); + sp_3072_sqr_9(p2, &a[9]); + sp_3072_sqr_9(p4, &a[18]); + sp_3072_sqr_9(p1, a0); + sp_3072_sqr_9(p3, a1); + sp_3072_sqr_9(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*27U); + (void)sp_3072_sub_18(t0, p3, p2); + (void)sp_3072_sub_18(t1, p1, p2); + (void)sp_3072_sub_18(t2, p5, t0); + (void)sp_3072_sub_18(t2, t2, t1); + (void)sp_3072_sub_18(t0, t0, p4); + (void)sp_3072_sub_18(t1, t1, p0); + (void)sp_3072_add_18(r, r, p0); + (void)sp_3072_add_18(&r[9], &r[9], t1); + (void)sp_3072_add_18(&r[18], &r[18], t2); + (void)sp_3072_add_18(&r[27], &r[27], t0); + (void)sp_3072_add_18(&r[36], &r[36], p4); +} + /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -11342,7 +10970,7 @@ SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a) } #endif /* !WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -11484,17 +11112,17 @@ static sp_digit sp_3072_cmp_27(const sp_digit* a, const sp_digit* b) int i; r |= (a[26] - b[26]) & (0 - (sp_digit)1); - r |= (a[25] - b[25]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[24] - b[24]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[25] - b[25]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[24] - b[24]) & ~(((sp_digit)0 - r) >> 56); for (i = 16; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -11615,21 +11243,22 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_27(a + 27); for (i=0; i<26; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; sp_3072_mul_add_27(a+i, m, mu); a[i+1] += a[i] >> 57; } - mu = (a[i] * mp) & 0x3fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x3fffffffffffffL; sp_3072_mul_add_27(a+i, m, mu); a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; sp_3072_mont_shift_27(a, a); - sp_3072_cond_sub_27(a, a, m, 0 - (((a[26] - m[26]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[26] - m[26]; + sp_3072_cond_sub_27(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_27(a); } @@ -11640,9 +11269,9 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_27(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_27(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_27(r, a, b); @@ -11654,9 +11283,9 @@ static void sp_3072_mont_mul_27(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_27(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_27(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_27(r, a); @@ -11712,6 +11341,7 @@ SP_NOINLINE static void sp_3072_mul_d_27(sp_digit* r, const sp_digit* a, r[27] = (sp_digit)(t & 0x1ffffffffffffffL); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -11739,6 +11369,7 @@ static void sp_3072_cond_add_27(sp_digit* r, const sp_digit* a, r[25] = a[25] + (b[25] & m); r[26] = a[26] + (b[26] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_3072_rshift_27(sp_digit* r, const sp_digit* a, byte n) @@ -11760,95 +11391,96 @@ SP_NOINLINE static void sp_3072_rshift_27(sp_digit* r, const sp_digit* a, r[26] = a[26] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 57); + sp_digit t0 = (sp_digit)(d & 0x1ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 55; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 56) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 57); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 114) - (sp_digit)(d >> 114); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_27(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -11865,11 +11497,10 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 27 + 3]; @@ -11880,7 +11511,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 27 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -11899,14 +11530,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, t1[27 + 27] += t1[27 + 27 - 1] >> 57; t1[27 + 27 - 1] &= 0x1ffffffffffffffL; for (i=27; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[27 + i]; - d1 <<= 57; - d1 += t1[27 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_27(t1[27 + i], t1[27 + i - 1], dv); -#endif sp_3072_mul_d_27(t2, sd, r1); (void)sp_3072_sub_27(&t1[i], &t1[i], t2); @@ -11914,14 +11538,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, t1[27 + i] -= t2[27]; t1[27 + i] += t1[27 + i - 1] >> 57; t1[27 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[27 + i]; - d1 <<= 57; - d1 -= t1[27 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_27(-t1[27 + i], -t1[27 + i - 1], dv); -#endif r1 -= t1[27 + i]; sp_3072_mul_d_27(t2, sd, r1); (void)sp_3072_add_27(&t1[i], &t1[i], t2); @@ -11930,7 +11547,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, } t1[27 - 1] += t1[27 - 2] >> 57; t1[27 - 2] &= 0x1ffffffffffffffL; - r1 = t1[27 - 1] / dv; + r1 = sp_3072_word_div_word_27(t1[27 - 1], dv); sp_3072_mul_d_27(t2, sd, r1); sp_3072_sub_27(t1, t1, t2); @@ -11939,14 +11556,13 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_3072_cond_add_27(r, r, sd, 0 - ((r[26] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_27(r, r, sd, r[26] >> 63); sp_3072_norm_27(r); sp_3072_rshift_27(r, r, 3); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -11973,13 +11589,15 @@ static int sp_3072_mod_27(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 54]; @@ -11993,11 +11611,17 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 27 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 27 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12052,20 +11676,19 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_27(t[0], m, mp); n = sp_3072_cmp_27(t[0], m); - sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_27(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 27 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 54]; @@ -12079,11 +11702,17 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 27 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 27 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12138,19 +11767,18 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_27(t[0], m, mp); n = sp_3072_cmp_27(t[0], m); - sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_27(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 27 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 54) + 54]; @@ -12165,11 +11793,17 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 54) + 54), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 54) + 54), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12279,12 +11913,11 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_27(rt, m, mp); n = sp_3072_cmp_27(rt, m); - sp_3072_cond_sub_27(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_27(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 54); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -12343,20 +11976,20 @@ static sp_digit sp_3072_cmp_54(const sp_digit* a, const sp_digit* b) int i; r |= (a[53] - b[53]) & (0 - (sp_digit)1); - r |= (a[52] - b[52]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[51] - b[51]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[50] - b[50]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[49] - b[49]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[48] - b[48]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[52] - b[52]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[51] - b[51]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[50] - b[50]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[49] - b[49]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[48] - b[48]) & ~(((sp_digit)0 - r) >> 56); for (i = 40; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -12485,17 +12118,18 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_54(a + 54); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<53; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; sp_3072_mul_add_54(a+i, m, mu); a[i+1] += a[i] >> 57; } - mu = (a[i] * mp) & 0x7ffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffffffL; sp_3072_mul_add_54(a+i, m, mu); a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; @@ -12513,18 +12147,18 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<53; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; sp_3072_mul_add_54(a+i, m, mu); a[i+1] += a[i] >> 57; } - mu = (a[i] * mp) & 0x7ffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffffffL; sp_3072_mul_add_54(a+i, m, mu); a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; #endif sp_3072_mont_shift_54(a, a); - sp_3072_cond_sub_54(a, a, m, 0 - (((a[53] - m[53]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[53] - m[53]; + sp_3072_cond_sub_54(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_54(a); } @@ -12535,9 +12169,9 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_mul_54(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_mul_54(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_3072_mul_54(r, a, b); @@ -12549,9 +12183,9 @@ static void sp_3072_mont_mul_54(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_3072_mont_sqr_54(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mont_sqr_54(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_3072_sqr_54(r, a); @@ -12598,6 +12232,7 @@ SP_NOINLINE static void sp_3072_mul_d_108(sp_digit* r, const sp_digit* a, r[108] = (sp_digit)(t & 0x1ffffffffffffffL); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -12628,6 +12263,7 @@ static void sp_3072_cond_add_54(sp_digit* r, const sp_digit* a, r[52] = a[52] + (b[52] & m); r[53] = a[53] + (b[53] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_3072_rshift_54(sp_digit* r, const sp_digit* a, byte n) @@ -12652,95 +12288,96 @@ SP_NOINLINE static void sp_3072_rshift_54(sp_digit* r, const sp_digit* a, r[53] = a[53] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 57); + sp_digit t0 = (sp_digit)(d & 0x1ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 55; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 56) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 57); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 114) - (sp_digit)(d >> 114); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_3072_word_div_word_54(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -12757,11 +12394,10 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 54 + 3]; @@ -12772,7 +12408,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 54 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -12791,14 +12427,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, t1[54 + 54] += t1[54 + 54 - 1] >> 57; t1[54 + 54 - 1] &= 0x1ffffffffffffffL; for (i=54; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[54 + i]; - d1 <<= 57; - d1 += t1[54 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_54(t1[54 + i], t1[54 + i - 1], dv); -#endif sp_3072_mul_d_54(t2, sd, r1); (void)sp_3072_sub_54(&t1[i], &t1[i], t2); @@ -12806,14 +12435,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, t1[54 + i] -= t2[54]; t1[54 + i] += t1[54 + i - 1] >> 57; t1[54 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[54 + i]; - d1 <<= 57; - d1 -= t1[54 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_54(-t1[54 + i], -t1[54 + i - 1], dv); -#endif r1 -= t1[54 + i]; sp_3072_mul_d_54(t2, sd, r1); (void)sp_3072_add_54(&t1[i], &t1[i], t2); @@ -12822,7 +12444,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, } t1[54 - 1] += t1[54 - 2] >> 57; t1[54 - 2] &= 0x1ffffffffffffffL; - r1 = t1[54 - 1] / dv; + r1 = sp_3072_word_div_word_54(t1[54 - 1], dv); sp_3072_mul_d_54(t2, sd, r1); sp_3072_sub_54(t1, t1, t2); @@ -12831,14 +12453,13 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_3072_cond_add_54(r, r, sd, 0 - ((r[53] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_54(r, r, sd, r[53] >> 63); sp_3072_norm_54(r); sp_3072_rshift_54(r, r, 6); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -12868,13 +12489,15 @@ static int sp_3072_mod_54(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 108]; @@ -12888,11 +12511,17 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 54 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 54 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -12947,20 +12576,19 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_54(t[0], m, mp); n = sp_3072_cmp_54(t[0], m); - sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 54 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 108]; @@ -12974,11 +12602,17 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 54 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 54 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -13033,19 +12667,18 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_54(t[0], m, mp); n = sp_3072_cmp_54(t[0], m); - sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 54 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 108) + 108]; @@ -13060,11 +12693,17 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 108) + 108), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 108) + 108), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -13157,12 +12796,11 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_54(rt, m, mp); n = sp_3072_cmp_54(rt, m); - sp_3072_cond_sub_54(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 108); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -13191,7 +12829,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[54 * 5]; @@ -13199,8 +12837,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -13209,7 +12847,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 57) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -13223,7 +12861,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -13238,12 +12876,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_3072_from_bin(a, 54, in, inLen); -#if DIGIT_BIT >= 57 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -13262,7 +12900,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_54(a, a, m); } if (err == MP_OKAY) { - for (i=56; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -13278,21 +12916,20 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_54(r, m, mp); mp = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_54(r, r, m, ~(mp >> 63)); sp_3072_to_bin_54(r, out); *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[54 * 5]; @@ -13300,14 +12937,14 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 384U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 57) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 384U) { @@ -13321,7 +12958,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -13336,12 +12973,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, m = r + 54 * 2; sp_3072_from_bin(a, 54, in, inLen); -#if DIGIT_BIT >= 57 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -13371,7 +13008,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, err = sp_3072_mod_54(a, a, m); if (err == MP_OKAY) { - for (i=56; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -13387,8 +13024,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_54(r, m, mp); mp = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(r, r, m, ~(mp >> 63)); } } } @@ -13398,7 +13034,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -13433,7 +13069,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[54 * 4]; @@ -13467,7 +13103,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -13492,21 +13128,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 54); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[54 * 4]; @@ -13540,7 +13176,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -13565,14 +13201,14 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 54); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -13581,7 +13217,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[27 * 8]; @@ -13611,9 +13247,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -13644,6 +13286,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_27(tmpa); sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_norm_27(tmpa); sp_3072_from_mp(qi, 27, qim); sp_3072_mul_27(tmpa, tmpa, qi); @@ -13660,19 +13303,19 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 27 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[27 * 13]; @@ -13703,9 +13346,15 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -13742,6 +13391,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_norm_27(tmpa); sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_norm_27(tmpa); sp_3072_mul_27(tmpa, tmpa, qi); err = sp_3072_mod_27(tmpa, tmpa, p); } @@ -13755,12 +13405,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 27 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -13856,7 +13506,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[54 * 4]; @@ -13879,7 +13529,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL, DYNAMIC_TYPE_DH); @@ -13904,20 +13554,20 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 54U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[54 * 4]; @@ -13941,7 +13591,7 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -13966,14 +13616,14 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 54U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -14108,11 +13758,13 @@ SP_NOINLINE static void sp_3072_lshift_54(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[163]; @@ -14127,11 +13779,17 @@ static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 163, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 163, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -14197,17 +13855,15 @@ static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_54(r, r, tmp); sp_3072_norm_54(r); o = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(r, r, m, ~(o >> 63)); } sp_3072_mont_reduce_54(r, m, mp); n = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(r, r, m, ~(n >> 63)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -14232,7 +13888,7 @@ static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[54 * 4]; @@ -14256,7 +13912,7 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL, DYNAMIC_TYPE_DH); @@ -14297,14 +13953,14 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 54U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -14327,7 +13983,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[27 * 4]; @@ -14350,7 +14006,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 4, NULL, DYNAMIC_TYPE_DH); @@ -14376,20 +14032,20 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_3072_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 54U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[27 * 4]; @@ -14413,7 +14069,7 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -14439,14 +14095,14 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 54U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -14506,20 +14162,23 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 59 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 58); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 58); } #elif DIGIT_BIT > 59 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x7ffffffffffffffL; s = 59U - s; @@ -14549,12 +14208,12 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 59) { r[j] &= 0x7ffffffffffffffL; @@ -14599,7 +14258,7 @@ static void sp_4096_to_bin_70(sp_digit* r, byte* a) r[i+1] += r[i] >> 59; r[i] &= 0x7ffffffffffffffL; } - j = 4096 / 8 - 1; + j = 4103 / 8 - 1; a[j] = 0; for (i=0; i<70 && j>=0; i++) { b = 0; @@ -14736,7 +14395,7 @@ SP_NOINLINE static void sp_4096_sqr_70(sp_digit* r, const sp_digit* a) r[0] = (sp_digit)(c >> 59); } -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -14835,7 +14494,7 @@ static sp_digit sp_4096_cmp_35(const sp_digit* a, const sp_digit* b) int i; for (i=34; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 58); } return r; @@ -14928,21 +14587,22 @@ static void sp_4096_mont_reduce_35(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_35(a + 35); for (i=0; i<34; i++) { - mu = (a[i] * mp) & 0x7ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffffffffL; sp_4096_mul_add_35(a+i, m, mu); a[i+1] += a[i] >> 59; } - mu = (a[i] * mp) & 0x3ffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x3ffffffffffL; sp_4096_mul_add_35(a+i, m, mu); a[i+1] += a[i] >> 59; a[i] &= 0x7ffffffffffffffL; sp_4096_mont_shift_35(a, a); - sp_4096_cond_sub_35(a, a, m, 0 - (((a[34] - m[34]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[34] - m[34]; + sp_4096_cond_sub_35(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_35(a); } @@ -14992,9 +14652,9 @@ SP_NOINLINE static void sp_4096_mul_35(sp_digit* r, const sp_digit* a, * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_35(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_35(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_35(r, a, b); @@ -15047,9 +14707,9 @@ SP_NOINLINE static void sp_4096_sqr_35(sp_digit* r, const sp_digit* a) * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_35(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_35(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_35(r, a); @@ -15077,6 +14737,7 @@ SP_NOINLINE static void sp_4096_mul_d_35(sp_digit* r, const sp_digit* a, r[35] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -15094,6 +14755,7 @@ static void sp_4096_cond_add_35(sp_digit* r, const sp_digit* a, r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -15124,135 +14786,96 @@ SP_NOINLINE static void sp_4096_rshift_35(sp_digit* r, const sp_digit* a, r[34] = a[34] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_35(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 59) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 59); + sp_digit t0 = (sp_digit)(d & 0x7ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 57; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 58) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 59); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 118) - (sp_digit)(d >> 118); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 28) + 1; - /* All 59 bits from d1 and top 4 bits from d0. */ - d = (d1 << 4) + (d0 >> 55); - r = d / dv; - d -= r * dv; - /* Up to 5 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 51) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 56); + t = (t / dv) << 28; r += t; - /* Up to 9 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 47) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 25); + t = t / (dv << 3); r += t; - /* Up to 13 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 43) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 17 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 39) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 35) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 31) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 27) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 23) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 19) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 15) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 11) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 7) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 3) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 59 bits from d1 and top 4 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_35(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -15269,11 +14892,10 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 35 + 3]; @@ -15284,7 +14906,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 35 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -15303,14 +14925,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, t1[35 + 35] += t1[35 + 35 - 1] >> 59; t1[35 + 35 - 1] &= 0x7ffffffffffffffL; for (i=35; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[35 + i]; - d1 <<= 59; - d1 += t1[35 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_35(t1[35 + i], t1[35 + i - 1], dv); -#endif sp_4096_mul_d_35(t2, sd, r1); (void)sp_4096_sub_35(&t1[i], &t1[i], t2); @@ -15318,14 +14933,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, t1[35 + i] -= t2[35]; t1[35 + i] += t1[35 + i - 1] >> 59; t1[35 + i - 1] &= 0x7ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[35 + i]; - d1 <<= 59; - d1 -= t1[35 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_35(-t1[35 + i], -t1[35 + i - 1], dv); -#endif r1 -= t1[35 + i]; sp_4096_mul_d_35(t2, sd, r1); (void)sp_4096_add_35(&t1[i], &t1[i], t2); @@ -15334,7 +14942,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, } t1[35 - 1] += t1[35 - 2] >> 59; t1[35 - 2] &= 0x7ffffffffffffffL; - r1 = t1[35 - 1] / dv; + r1 = sp_4096_word_div_word_35(t1[35 - 1], dv); sp_4096_mul_d_35(t2, sd, r1); sp_4096_sub_35(t1, t1, t2); @@ -15343,14 +14951,13 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 59; r[i] &= 0x7ffffffffffffffL; } - sp_4096_cond_add_35(r, r, sd, 0 - ((r[34] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_35(r, r, sd, r[34] >> 63); sp_4096_norm_35(r); sp_4096_rshift_35(r, r, 17); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -15377,13 +14984,15 @@ static int sp_4096_mod_35(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 70]; @@ -15397,11 +15006,17 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 35 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 35 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -15456,20 +15071,19 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_35(t[0], m, mp); n = sp_4096_cmp_35(t[0], m); - sp_4096_cond_sub_35(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_35(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 35 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 70]; @@ -15483,11 +15097,17 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 35 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 35 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -15542,19 +15162,18 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_35(t[0], m, mp); n = sp_4096_cmp_35(t[0], m); - sp_4096_cond_sub_35(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_35(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 35 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 70) + 70]; @@ -15569,11 +15188,17 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 70) + 70), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 70) + 70), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -15683,12 +15308,11 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_35(rt, m, mp); n = sp_4096_cmp_35(rt, m); - sp_4096_cond_sub_35(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_35(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 70); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -15754,7 +15378,7 @@ static sp_digit sp_4096_cmp_70(const sp_digit* a, const sp_digit* b) int i; for (i=69; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 58); } return r; @@ -15844,17 +15468,18 @@ static void sp_4096_mont_reduce_70(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_70(a + 70); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<69; i++) { - mu = (a[i] * mp) & 0x7ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffffffffL; sp_4096_mul_add_70(a+i, m, mu); a[i+1] += a[i] >> 59; } - mu = (a[i] * mp) & 0x1ffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffL; sp_4096_mul_add_70(a+i, m, mu); a[i+1] += a[i] >> 59; a[i] &= 0x7ffffffffffffffL; @@ -15872,18 +15497,18 @@ static void sp_4096_mont_reduce_70(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<69; i++) { - mu = (a[i] * mp) & 0x7ffffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7ffffffffffffffL; sp_4096_mul_add_70(a+i, m, mu); a[i+1] += a[i] >> 59; } - mu = (a[i] * mp) & 0x1ffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffL; sp_4096_mul_add_70(a+i, m, mu); a[i+1] += a[i] >> 59; a[i] &= 0x7ffffffffffffffL; #endif sp_4096_mont_shift_70(a, a); - sp_4096_cond_sub_70(a, a, m, 0 - (((a[69] - m[69]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[69] - m[69]; + sp_4096_cond_sub_70(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_70(a); } @@ -15894,9 +15519,9 @@ static void sp_4096_mont_reduce_70(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_70(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_70(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_70(r, a, b); @@ -15908,9 +15533,9 @@ static void sp_4096_mont_mul_70(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_70(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_70(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_70(r, a); @@ -15938,6 +15563,7 @@ SP_NOINLINE static void sp_4096_mul_d_140(sp_digit* r, const sp_digit* a, r[140] = (sp_digit)t; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -15951,10 +15577,11 @@ static void sp_4096_cond_add_70(sp_digit* r, const sp_digit* a, { int i; - for (i = 0; i < 35; i++) { + for (i = 0; i < 70; i++) { r[i] = a[i] + (b[i] & m); } } +#endif /* WOLFSSL_SP_SMALL */ /* Add b to a into r. (r = a + b) * @@ -15985,135 +15612,96 @@ SP_NOINLINE static void sp_4096_rshift_70(sp_digit* r, const sp_digit* a, r[69] = a[69] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_70(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 59) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 59); + sp_digit t0 = (sp_digit)(d & 0x7ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 57; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 58) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 59); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 118) - (sp_digit)(d >> 118); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 28) + 1; - /* All 59 bits from d1 and top 4 bits from d0. */ - d = (d1 << 4) + (d0 >> 55); - r = d / dv; - d -= r * dv; - /* Up to 5 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 51) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 56); + t = (t / dv) << 28; r += t; - /* Up to 9 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 47) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 25); + t = t / (dv << 3); r += t; - /* Up to 13 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 43) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 17 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 39) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 35) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 31) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 27) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 23) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 19) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 15) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 11) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 7) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 3) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 59 bits from d1 and top 4 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_70(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -16130,11 +15718,10 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 70 + 3]; @@ -16145,7 +15732,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 70 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -16164,14 +15751,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, t1[70 + 70] += t1[70 + 70 - 1] >> 59; t1[70 + 70 - 1] &= 0x7ffffffffffffffL; for (i=70; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[70 + i]; - d1 <<= 59; - d1 += t1[70 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_70(t1[70 + i], t1[70 + i - 1], dv); -#endif sp_4096_mul_d_70(t2, sd, r1); (void)sp_4096_sub_70(&t1[i], &t1[i], t2); @@ -16179,14 +15759,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, t1[70 + i] -= t2[70]; t1[70 + i] += t1[70 + i - 1] >> 59; t1[70 + i - 1] &= 0x7ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[70 + i]; - d1 <<= 59; - d1 -= t1[70 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_70(-t1[70 + i], -t1[70 + i - 1], dv); -#endif r1 -= t1[70 + i]; sp_4096_mul_d_70(t2, sd, r1); (void)sp_4096_add_70(&t1[i], &t1[i], t2); @@ -16195,7 +15768,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, } t1[70 - 1] += t1[70 - 2] >> 59; t1[70 - 2] &= 0x7ffffffffffffffL; - r1 = t1[70 - 1] / dv; + r1 = sp_4096_word_div_word_70(t1[70 - 1], dv); sp_4096_mul_d_70(t2, sd, r1); sp_4096_sub_70(t1, t1, t2); @@ -16204,14 +15777,13 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 59; r[i] &= 0x7ffffffffffffffL; } - sp_4096_cond_add_70(r, r, sd, 0 - ((r[69] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_70(r, r, sd, r[69] >> 63); sp_4096_norm_70(r); sp_4096_rshift_70(r, r, 34); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -16239,13 +15811,15 @@ static int sp_4096_mod_70(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 140]; @@ -16259,11 +15833,17 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 70 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 70 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -16318,20 +15898,19 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_70(t[0], m, mp); n = sp_4096_cmp_70(t[0], m); - sp_4096_cond_sub_70(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 70 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 140]; @@ -16345,11 +15924,17 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 70 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 70 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -16404,19 +15989,18 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_70(t[0], m, mp); n = sp_4096_cmp_70(t[0], m); - sp_4096_cond_sub_70(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 70 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 140) + 140]; @@ -16431,11 +16015,17 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 140) + 140), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 140) + 140), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -16528,12 +16118,11 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_70(rt, m, mp); n = sp_4096_cmp_70(rt, m); - sp_4096_cond_sub_70(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 140); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -16560,7 +16149,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[70 * 5]; @@ -16568,8 +16157,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -16578,7 +16167,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 59) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -16592,7 +16181,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -16607,12 +16196,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_4096_from_bin(a, 70, in, inLen); -#if DIGIT_BIT >= 59 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -16631,7 +16220,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_70(a, a, m); } if (err == MP_OKAY) { - for (i=58; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -16647,21 +16236,20 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_70(r, m, mp); mp = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_70(r, r, m, ~(mp >> 63)); sp_4096_to_bin_70(r, out); *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[70 * 5]; @@ -16669,14 +16257,14 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 512U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 59) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -16690,7 +16278,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -16705,12 +16293,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, m = r + 70 * 2; sp_4096_from_bin(a, 70, in, inLen); -#if DIGIT_BIT >= 59 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -16740,7 +16328,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_70(a, a, m); if (err == MP_OKAY) { - for (i=58; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -16756,8 +16344,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_70(r, m, mp); mp = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(r, r, m, ~(mp >> 63)); } } } @@ -16767,7 +16354,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -16802,7 +16389,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[70 * 4]; @@ -16836,7 +16423,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -16861,21 +16448,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 70); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[70 * 4]; @@ -16909,7 +16496,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -16934,14 +16521,14 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 70); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -16950,7 +16537,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[35 * 8]; @@ -16980,9 +16567,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 35 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -17013,6 +16606,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_35(tmpa); sp_4096_cond_add_35(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[34] >> 63)); sp_4096_cond_add_35(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[34] >> 63)); + sp_4096_norm_35(tmpa); sp_4096_from_mp(qi, 35, qim); sp_4096_mul_35(tmpa, tmpa, qi); @@ -17029,19 +16623,19 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 35 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[35 * 13]; @@ -17072,9 +16666,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 35 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -17111,6 +16711,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_35(tmpa); sp_4096_cond_add_35(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[34] >> 63)); sp_4096_cond_add_35(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[34] >> 63)); + sp_4096_norm_35(tmpa); sp_4096_mul_35(tmpa, tmpa, qi); err = sp_4096_mod_35(tmpa, tmpa, p); } @@ -17124,12 +16725,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 35 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -17225,7 +16826,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[70 * 4]; @@ -17248,7 +16849,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 4, NULL, DYNAMIC_TYPE_DH); @@ -17273,20 +16874,20 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_4096_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 70U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[70 * 4]; @@ -17310,7 +16911,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -17335,14 +16936,14 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 70U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -17372,11 +16973,13 @@ SP_NOINLINE static void sp_4096_lshift_70(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_4096_mod_exp_2_70(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[211]; @@ -17391,11 +16994,17 @@ static int sp_4096_mod_exp_2_70(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 211, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 211, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -17461,17 +17070,15 @@ static int sp_4096_mod_exp_2_70(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_70(r, r, tmp); sp_4096_norm_70(r); o = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(r, r, m, ~(o >> 63)); } sp_4096_mont_reduce_70(r, m, mp); n = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(r, r, m, ~(n >> 63)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -17496,7 +17103,7 @@ static int sp_4096_mod_exp_2_70(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[70 * 4]; @@ -17520,7 +17127,7 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 4, NULL, DYNAMIC_TYPE_DH); @@ -17561,14 +17168,14 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 70U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -17624,20 +17231,23 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 53 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 52); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 52); } #elif DIGIT_BIT > 53 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1fffffffffffffL; s = 53U - s; @@ -17667,12 +17277,12 @@ static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 53) { r[j] &= 0x1fffffffffffffL; @@ -17717,7 +17327,7 @@ static void sp_4096_to_bin_78(sp_digit* r, byte* a) r[i+1] += r[i] >> 53; r[i] &= 0x1fffffffffffffL; } - j = 4096 / 8 - 1; + j = 4103 / 8 - 1; a[j] = 0; for (i=0; i<78 && j>=0; i++) { b = 0; @@ -17807,329 +17417,206 @@ static void sp_4096_norm_78(sp_digit* a) SP_NOINLINE static void sp_4096_mul_13(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_uint128 t0 = ((sp_uint128)a[ 0]) * b[ 0]; - sp_uint128 t1 = ((sp_uint128)a[ 0]) * b[ 1] - + ((sp_uint128)a[ 1]) * b[ 0]; - sp_uint128 t2 = ((sp_uint128)a[ 0]) * b[ 2] - + ((sp_uint128)a[ 1]) * b[ 1] - + ((sp_uint128)a[ 2]) * b[ 0]; - sp_uint128 t3 = ((sp_uint128)a[ 0]) * b[ 3] - + ((sp_uint128)a[ 1]) * b[ 2] - + ((sp_uint128)a[ 2]) * b[ 1] - + ((sp_uint128)a[ 3]) * b[ 0]; - sp_uint128 t4 = ((sp_uint128)a[ 0]) * b[ 4] - + ((sp_uint128)a[ 1]) * b[ 3] - + ((sp_uint128)a[ 2]) * b[ 2] - + ((sp_uint128)a[ 3]) * b[ 1] - + ((sp_uint128)a[ 4]) * b[ 0]; - sp_uint128 t5 = ((sp_uint128)a[ 0]) * b[ 5] - + ((sp_uint128)a[ 1]) * b[ 4] - + ((sp_uint128)a[ 2]) * b[ 3] - + ((sp_uint128)a[ 3]) * b[ 2] - + ((sp_uint128)a[ 4]) * b[ 1] - + ((sp_uint128)a[ 5]) * b[ 0]; - sp_uint128 t6 = ((sp_uint128)a[ 0]) * b[ 6] - + ((sp_uint128)a[ 1]) * b[ 5] - + ((sp_uint128)a[ 2]) * b[ 4] - + ((sp_uint128)a[ 3]) * b[ 3] - + ((sp_uint128)a[ 4]) * b[ 2] - + ((sp_uint128)a[ 5]) * b[ 1] - + ((sp_uint128)a[ 6]) * b[ 0]; - sp_uint128 t7 = ((sp_uint128)a[ 0]) * b[ 7] - + ((sp_uint128)a[ 1]) * b[ 6] - + ((sp_uint128)a[ 2]) * b[ 5] - + ((sp_uint128)a[ 3]) * b[ 4] - + ((sp_uint128)a[ 4]) * b[ 3] - + ((sp_uint128)a[ 5]) * b[ 2] - + ((sp_uint128)a[ 6]) * b[ 1] - + ((sp_uint128)a[ 7]) * b[ 0]; - sp_uint128 t8 = ((sp_uint128)a[ 0]) * b[ 8] - + ((sp_uint128)a[ 1]) * b[ 7] - + ((sp_uint128)a[ 2]) * b[ 6] - + ((sp_uint128)a[ 3]) * b[ 5] - + ((sp_uint128)a[ 4]) * b[ 4] - + ((sp_uint128)a[ 5]) * b[ 3] - + ((sp_uint128)a[ 6]) * b[ 2] - + ((sp_uint128)a[ 7]) * b[ 1] - + ((sp_uint128)a[ 8]) * b[ 0]; - sp_uint128 t9 = ((sp_uint128)a[ 0]) * b[ 9] - + ((sp_uint128)a[ 1]) * b[ 8] - + ((sp_uint128)a[ 2]) * b[ 7] - + ((sp_uint128)a[ 3]) * b[ 6] - + ((sp_uint128)a[ 4]) * b[ 5] - + ((sp_uint128)a[ 5]) * b[ 4] - + ((sp_uint128)a[ 6]) * b[ 3] - + ((sp_uint128)a[ 7]) * b[ 2] - + ((sp_uint128)a[ 8]) * b[ 1] - + ((sp_uint128)a[ 9]) * b[ 0]; - sp_uint128 t10 = ((sp_uint128)a[ 0]) * b[10] - + ((sp_uint128)a[ 1]) * b[ 9] - + ((sp_uint128)a[ 2]) * b[ 8] - + ((sp_uint128)a[ 3]) * b[ 7] - + ((sp_uint128)a[ 4]) * b[ 6] - + ((sp_uint128)a[ 5]) * b[ 5] - + ((sp_uint128)a[ 6]) * b[ 4] - + ((sp_uint128)a[ 7]) * b[ 3] - + ((sp_uint128)a[ 8]) * b[ 2] - + ((sp_uint128)a[ 9]) * b[ 1] - + ((sp_uint128)a[10]) * b[ 0]; - sp_uint128 t11 = ((sp_uint128)a[ 0]) * b[11] - + ((sp_uint128)a[ 1]) * b[10] - + ((sp_uint128)a[ 2]) * b[ 9] - + ((sp_uint128)a[ 3]) * b[ 8] - + ((sp_uint128)a[ 4]) * b[ 7] - + ((sp_uint128)a[ 5]) * b[ 6] - + ((sp_uint128)a[ 6]) * b[ 5] - + ((sp_uint128)a[ 7]) * b[ 4] - + ((sp_uint128)a[ 8]) * b[ 3] - + ((sp_uint128)a[ 9]) * b[ 2] - + ((sp_uint128)a[10]) * b[ 1] - + ((sp_uint128)a[11]) * b[ 0]; - sp_uint128 t12 = ((sp_uint128)a[ 0]) * b[12] - + ((sp_uint128)a[ 1]) * b[11] - + ((sp_uint128)a[ 2]) * b[10] - + ((sp_uint128)a[ 3]) * b[ 9] - + ((sp_uint128)a[ 4]) * b[ 8] - + ((sp_uint128)a[ 5]) * b[ 7] - + ((sp_uint128)a[ 6]) * b[ 6] - + ((sp_uint128)a[ 7]) * b[ 5] - + ((sp_uint128)a[ 8]) * b[ 4] - + ((sp_uint128)a[ 9]) * b[ 3] - + ((sp_uint128)a[10]) * b[ 2] - + ((sp_uint128)a[11]) * b[ 1] - + ((sp_uint128)a[12]) * b[ 0]; - sp_uint128 t13 = ((sp_uint128)a[ 1]) * b[12] - + ((sp_uint128)a[ 2]) * b[11] - + ((sp_uint128)a[ 3]) * b[10] - + ((sp_uint128)a[ 4]) * b[ 9] - + ((sp_uint128)a[ 5]) * b[ 8] - + ((sp_uint128)a[ 6]) * b[ 7] - + ((sp_uint128)a[ 7]) * b[ 6] - + ((sp_uint128)a[ 8]) * b[ 5] - + ((sp_uint128)a[ 9]) * b[ 4] - + ((sp_uint128)a[10]) * b[ 3] - + ((sp_uint128)a[11]) * b[ 2] - + ((sp_uint128)a[12]) * b[ 1]; - sp_uint128 t14 = ((sp_uint128)a[ 2]) * b[12] - + ((sp_uint128)a[ 3]) * b[11] - + ((sp_uint128)a[ 4]) * b[10] - + ((sp_uint128)a[ 5]) * b[ 9] - + ((sp_uint128)a[ 6]) * b[ 8] - + ((sp_uint128)a[ 7]) * b[ 7] - + ((sp_uint128)a[ 8]) * b[ 6] - + ((sp_uint128)a[ 9]) * b[ 5] - + ((sp_uint128)a[10]) * b[ 4] - + ((sp_uint128)a[11]) * b[ 3] - + ((sp_uint128)a[12]) * b[ 2]; - sp_uint128 t15 = ((sp_uint128)a[ 3]) * b[12] - + ((sp_uint128)a[ 4]) * b[11] - + ((sp_uint128)a[ 5]) * b[10] - + ((sp_uint128)a[ 6]) * b[ 9] - + ((sp_uint128)a[ 7]) * b[ 8] - + ((sp_uint128)a[ 8]) * b[ 7] - + ((sp_uint128)a[ 9]) * b[ 6] - + ((sp_uint128)a[10]) * b[ 5] - + ((sp_uint128)a[11]) * b[ 4] - + ((sp_uint128)a[12]) * b[ 3]; - sp_uint128 t16 = ((sp_uint128)a[ 4]) * b[12] - + ((sp_uint128)a[ 5]) * b[11] - + ((sp_uint128)a[ 6]) * b[10] - + ((sp_uint128)a[ 7]) * b[ 9] - + ((sp_uint128)a[ 8]) * b[ 8] - + ((sp_uint128)a[ 9]) * b[ 7] - + ((sp_uint128)a[10]) * b[ 6] - + ((sp_uint128)a[11]) * b[ 5] - + ((sp_uint128)a[12]) * b[ 4]; - sp_uint128 t17 = ((sp_uint128)a[ 5]) * b[12] - + ((sp_uint128)a[ 6]) * b[11] - + ((sp_uint128)a[ 7]) * b[10] - + ((sp_uint128)a[ 8]) * b[ 9] - + ((sp_uint128)a[ 9]) * b[ 8] - + ((sp_uint128)a[10]) * b[ 7] - + ((sp_uint128)a[11]) * b[ 6] - + ((sp_uint128)a[12]) * b[ 5]; - sp_uint128 t18 = ((sp_uint128)a[ 6]) * b[12] - + ((sp_uint128)a[ 7]) * b[11] - + ((sp_uint128)a[ 8]) * b[10] - + ((sp_uint128)a[ 9]) * b[ 9] - + ((sp_uint128)a[10]) * b[ 8] - + ((sp_uint128)a[11]) * b[ 7] - + ((sp_uint128)a[12]) * b[ 6]; - sp_uint128 t19 = ((sp_uint128)a[ 7]) * b[12] - + ((sp_uint128)a[ 8]) * b[11] - + ((sp_uint128)a[ 9]) * b[10] - + ((sp_uint128)a[10]) * b[ 9] - + ((sp_uint128)a[11]) * b[ 8] - + ((sp_uint128)a[12]) * b[ 7]; - sp_uint128 t20 = ((sp_uint128)a[ 8]) * b[12] - + ((sp_uint128)a[ 9]) * b[11] - + ((sp_uint128)a[10]) * b[10] - + ((sp_uint128)a[11]) * b[ 9] - + ((sp_uint128)a[12]) * b[ 8]; - sp_uint128 t21 = ((sp_uint128)a[ 9]) * b[12] - + ((sp_uint128)a[10]) * b[11] - + ((sp_uint128)a[11]) * b[10] - + ((sp_uint128)a[12]) * b[ 9]; - sp_uint128 t22 = ((sp_uint128)a[10]) * b[12] - + ((sp_uint128)a[11]) * b[11] - + ((sp_uint128)a[12]) * b[10]; - sp_uint128 t23 = ((sp_uint128)a[11]) * b[12] - + ((sp_uint128)a[12]) * b[11]; - sp_uint128 t24 = ((sp_uint128)a[12]) * b[12]; + sp_uint128 t0; + sp_uint128 t1; + sp_digit t[13]; - t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL; - t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL; - t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL; - t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL; - t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL; - t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL; - t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL; - t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL; - t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL; - t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL; - t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL; - t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL; - t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL; - t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL; - t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL; - t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL; - t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL; - t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL; - t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL; - t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL; - t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL; - t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL; - t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL; - t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL; - r[25] = (sp_digit)(t24 >> 53); - r[24] = t24 & 0x1fffffffffffffL; -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_4096_sqr_13(sp_digit* r, const sp_digit* a) -{ - sp_uint128 t0 = ((sp_uint128)a[ 0]) * a[ 0]; - sp_uint128 t1 = (((sp_uint128)a[ 0]) * a[ 1]) * 2; - sp_uint128 t2 = (((sp_uint128)a[ 0]) * a[ 2]) * 2 - + ((sp_uint128)a[ 1]) * a[ 1]; - sp_uint128 t3 = (((sp_uint128)a[ 0]) * a[ 3] - + ((sp_uint128)a[ 1]) * a[ 2]) * 2; - sp_uint128 t4 = (((sp_uint128)a[ 0]) * a[ 4] - + ((sp_uint128)a[ 1]) * a[ 3]) * 2 - + ((sp_uint128)a[ 2]) * a[ 2]; - sp_uint128 t5 = (((sp_uint128)a[ 0]) * a[ 5] - + ((sp_uint128)a[ 1]) * a[ 4] - + ((sp_uint128)a[ 2]) * a[ 3]) * 2; - sp_uint128 t6 = (((sp_uint128)a[ 0]) * a[ 6] - + ((sp_uint128)a[ 1]) * a[ 5] - + ((sp_uint128)a[ 2]) * a[ 4]) * 2 - + ((sp_uint128)a[ 3]) * a[ 3]; - sp_uint128 t7 = (((sp_uint128)a[ 0]) * a[ 7] - + ((sp_uint128)a[ 1]) * a[ 6] - + ((sp_uint128)a[ 2]) * a[ 5] - + ((sp_uint128)a[ 3]) * a[ 4]) * 2; - sp_uint128 t8 = (((sp_uint128)a[ 0]) * a[ 8] - + ((sp_uint128)a[ 1]) * a[ 7] - + ((sp_uint128)a[ 2]) * a[ 6] - + ((sp_uint128)a[ 3]) * a[ 5]) * 2 - + ((sp_uint128)a[ 4]) * a[ 4]; - sp_uint128 t9 = (((sp_uint128)a[ 0]) * a[ 9] - + ((sp_uint128)a[ 1]) * a[ 8] - + ((sp_uint128)a[ 2]) * a[ 7] - + ((sp_uint128)a[ 3]) * a[ 6] - + ((sp_uint128)a[ 4]) * a[ 5]) * 2; - sp_uint128 t10 = (((sp_uint128)a[ 0]) * a[10] - + ((sp_uint128)a[ 1]) * a[ 9] - + ((sp_uint128)a[ 2]) * a[ 8] - + ((sp_uint128)a[ 3]) * a[ 7] - + ((sp_uint128)a[ 4]) * a[ 6]) * 2 - + ((sp_uint128)a[ 5]) * a[ 5]; - sp_uint128 t11 = (((sp_uint128)a[ 0]) * a[11] - + ((sp_uint128)a[ 1]) * a[10] - + ((sp_uint128)a[ 2]) * a[ 9] - + ((sp_uint128)a[ 3]) * a[ 8] - + ((sp_uint128)a[ 4]) * a[ 7] - + ((sp_uint128)a[ 5]) * a[ 6]) * 2; - sp_uint128 t12 = (((sp_uint128)a[ 0]) * a[12] - + ((sp_uint128)a[ 1]) * a[11] - + ((sp_uint128)a[ 2]) * a[10] - + ((sp_uint128)a[ 3]) * a[ 9] - + ((sp_uint128)a[ 4]) * a[ 8] - + ((sp_uint128)a[ 5]) * a[ 7]) * 2 - + ((sp_uint128)a[ 6]) * a[ 6]; - sp_uint128 t13 = (((sp_uint128)a[ 1]) * a[12] - + ((sp_uint128)a[ 2]) * a[11] - + ((sp_uint128)a[ 3]) * a[10] - + ((sp_uint128)a[ 4]) * a[ 9] - + ((sp_uint128)a[ 5]) * a[ 8] - + ((sp_uint128)a[ 6]) * a[ 7]) * 2; - sp_uint128 t14 = (((sp_uint128)a[ 2]) * a[12] - + ((sp_uint128)a[ 3]) * a[11] - + ((sp_uint128)a[ 4]) * a[10] - + ((sp_uint128)a[ 5]) * a[ 9] - + ((sp_uint128)a[ 6]) * a[ 8]) * 2 - + ((sp_uint128)a[ 7]) * a[ 7]; - sp_uint128 t15 = (((sp_uint128)a[ 3]) * a[12] - + ((sp_uint128)a[ 4]) * a[11] - + ((sp_uint128)a[ 5]) * a[10] - + ((sp_uint128)a[ 6]) * a[ 9] - + ((sp_uint128)a[ 7]) * a[ 8]) * 2; - sp_uint128 t16 = (((sp_uint128)a[ 4]) * a[12] - + ((sp_uint128)a[ 5]) * a[11] - + ((sp_uint128)a[ 6]) * a[10] - + ((sp_uint128)a[ 7]) * a[ 9]) * 2 - + ((sp_uint128)a[ 8]) * a[ 8]; - sp_uint128 t17 = (((sp_uint128)a[ 5]) * a[12] - + ((sp_uint128)a[ 6]) * a[11] - + ((sp_uint128)a[ 7]) * a[10] - + ((sp_uint128)a[ 8]) * a[ 9]) * 2; - sp_uint128 t18 = (((sp_uint128)a[ 6]) * a[12] - + ((sp_uint128)a[ 7]) * a[11] - + ((sp_uint128)a[ 8]) * a[10]) * 2 - + ((sp_uint128)a[ 9]) * a[ 9]; - sp_uint128 t19 = (((sp_uint128)a[ 7]) * a[12] - + ((sp_uint128)a[ 8]) * a[11] - + ((sp_uint128)a[ 9]) * a[10]) * 2; - sp_uint128 t20 = (((sp_uint128)a[ 8]) * a[12] - + ((sp_uint128)a[ 9]) * a[11]) * 2 - + ((sp_uint128)a[10]) * a[10]; - sp_uint128 t21 = (((sp_uint128)a[ 9]) * a[12] - + ((sp_uint128)a[10]) * a[11]) * 2; - sp_uint128 t22 = (((sp_uint128)a[10]) * a[12]) * 2 - + ((sp_uint128)a[11]) * a[11]; - sp_uint128 t23 = (((sp_uint128)a[11]) * a[12]) * 2; - sp_uint128 t24 = ((sp_uint128)a[12]) * a[12]; - - t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL; - t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL; - t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL; - t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL; - t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL; - t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL; - t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL; - t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL; - t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL; - t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL; - t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL; - t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL; - t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL; - t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL; - t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL; - t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL; - t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL; - t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL; - t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL; - t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL; - t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL; - t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL; - t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL; - t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL; - r[25] = (sp_digit)(t24 >> 53); - r[24] = t24 & 0x1fffffffffffffL; + t0 = ((sp_uint128)a[ 0]) * b[ 0]; + t1 = ((sp_uint128)a[ 0]) * b[ 1] + + ((sp_uint128)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 0]) * b[ 2] + + ((sp_uint128)a[ 1]) * b[ 1] + + ((sp_uint128)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 0]) * b[ 3] + + ((sp_uint128)a[ 1]) * b[ 2] + + ((sp_uint128)a[ 2]) * b[ 1] + + ((sp_uint128)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 0]) * b[ 4] + + ((sp_uint128)a[ 1]) * b[ 3] + + ((sp_uint128)a[ 2]) * b[ 2] + + ((sp_uint128)a[ 3]) * b[ 1] + + ((sp_uint128)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 0]) * b[ 5] + + ((sp_uint128)a[ 1]) * b[ 4] + + ((sp_uint128)a[ 2]) * b[ 3] + + ((sp_uint128)a[ 3]) * b[ 2] + + ((sp_uint128)a[ 4]) * b[ 1] + + ((sp_uint128)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 0]) * b[ 6] + + ((sp_uint128)a[ 1]) * b[ 5] + + ((sp_uint128)a[ 2]) * b[ 4] + + ((sp_uint128)a[ 3]) * b[ 3] + + ((sp_uint128)a[ 4]) * b[ 2] + + ((sp_uint128)a[ 5]) * b[ 1] + + ((sp_uint128)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 0]) * b[ 7] + + ((sp_uint128)a[ 1]) * b[ 6] + + ((sp_uint128)a[ 2]) * b[ 5] + + ((sp_uint128)a[ 3]) * b[ 4] + + ((sp_uint128)a[ 4]) * b[ 3] + + ((sp_uint128)a[ 5]) * b[ 2] + + ((sp_uint128)a[ 6]) * b[ 1] + + ((sp_uint128)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 0]) * b[ 8] + + ((sp_uint128)a[ 1]) * b[ 7] + + ((sp_uint128)a[ 2]) * b[ 6] + + ((sp_uint128)a[ 3]) * b[ 5] + + ((sp_uint128)a[ 4]) * b[ 4] + + ((sp_uint128)a[ 5]) * b[ 3] + + ((sp_uint128)a[ 6]) * b[ 2] + + ((sp_uint128)a[ 7]) * b[ 1] + + ((sp_uint128)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 0]) * b[ 9] + + ((sp_uint128)a[ 1]) * b[ 8] + + ((sp_uint128)a[ 2]) * b[ 7] + + ((sp_uint128)a[ 3]) * b[ 6] + + ((sp_uint128)a[ 4]) * b[ 5] + + ((sp_uint128)a[ 5]) * b[ 4] + + ((sp_uint128)a[ 6]) * b[ 3] + + ((sp_uint128)a[ 7]) * b[ 2] + + ((sp_uint128)a[ 8]) * b[ 1] + + ((sp_uint128)a[ 9]) * b[ 0]; + t[ 8] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 0]) * b[10] + + ((sp_uint128)a[ 1]) * b[ 9] + + ((sp_uint128)a[ 2]) * b[ 8] + + ((sp_uint128)a[ 3]) * b[ 7] + + ((sp_uint128)a[ 4]) * b[ 6] + + ((sp_uint128)a[ 5]) * b[ 5] + + ((sp_uint128)a[ 6]) * b[ 4] + + ((sp_uint128)a[ 7]) * b[ 3] + + ((sp_uint128)a[ 8]) * b[ 2] + + ((sp_uint128)a[ 9]) * b[ 1] + + ((sp_uint128)a[10]) * b[ 0]; + t[ 9] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 0]) * b[11] + + ((sp_uint128)a[ 1]) * b[10] + + ((sp_uint128)a[ 2]) * b[ 9] + + ((sp_uint128)a[ 3]) * b[ 8] + + ((sp_uint128)a[ 4]) * b[ 7] + + ((sp_uint128)a[ 5]) * b[ 6] + + ((sp_uint128)a[ 6]) * b[ 5] + + ((sp_uint128)a[ 7]) * b[ 4] + + ((sp_uint128)a[ 8]) * b[ 3] + + ((sp_uint128)a[ 9]) * b[ 2] + + ((sp_uint128)a[10]) * b[ 1] + + ((sp_uint128)a[11]) * b[ 0]; + t[10] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 0]) * b[12] + + ((sp_uint128)a[ 1]) * b[11] + + ((sp_uint128)a[ 2]) * b[10] + + ((sp_uint128)a[ 3]) * b[ 9] + + ((sp_uint128)a[ 4]) * b[ 8] + + ((sp_uint128)a[ 5]) * b[ 7] + + ((sp_uint128)a[ 6]) * b[ 6] + + ((sp_uint128)a[ 7]) * b[ 5] + + ((sp_uint128)a[ 8]) * b[ 4] + + ((sp_uint128)a[ 9]) * b[ 3] + + ((sp_uint128)a[10]) * b[ 2] + + ((sp_uint128)a[11]) * b[ 1] + + ((sp_uint128)a[12]) * b[ 0]; + t[11] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 1]) * b[12] + + ((sp_uint128)a[ 2]) * b[11] + + ((sp_uint128)a[ 3]) * b[10] + + ((sp_uint128)a[ 4]) * b[ 9] + + ((sp_uint128)a[ 5]) * b[ 8] + + ((sp_uint128)a[ 6]) * b[ 7] + + ((sp_uint128)a[ 7]) * b[ 6] + + ((sp_uint128)a[ 8]) * b[ 5] + + ((sp_uint128)a[ 9]) * b[ 4] + + ((sp_uint128)a[10]) * b[ 3] + + ((sp_uint128)a[11]) * b[ 2] + + ((sp_uint128)a[12]) * b[ 1]; + t[12] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 2]) * b[12] + + ((sp_uint128)a[ 3]) * b[11] + + ((sp_uint128)a[ 4]) * b[10] + + ((sp_uint128)a[ 5]) * b[ 9] + + ((sp_uint128)a[ 6]) * b[ 8] + + ((sp_uint128)a[ 7]) * b[ 7] + + ((sp_uint128)a[ 8]) * b[ 6] + + ((sp_uint128)a[ 9]) * b[ 5] + + ((sp_uint128)a[10]) * b[ 4] + + ((sp_uint128)a[11]) * b[ 3] + + ((sp_uint128)a[12]) * b[ 2]; + r[13] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 3]) * b[12] + + ((sp_uint128)a[ 4]) * b[11] + + ((sp_uint128)a[ 5]) * b[10] + + ((sp_uint128)a[ 6]) * b[ 9] + + ((sp_uint128)a[ 7]) * b[ 8] + + ((sp_uint128)a[ 8]) * b[ 7] + + ((sp_uint128)a[ 9]) * b[ 6] + + ((sp_uint128)a[10]) * b[ 5] + + ((sp_uint128)a[11]) * b[ 4] + + ((sp_uint128)a[12]) * b[ 3]; + r[14] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 4]) * b[12] + + ((sp_uint128)a[ 5]) * b[11] + + ((sp_uint128)a[ 6]) * b[10] + + ((sp_uint128)a[ 7]) * b[ 9] + + ((sp_uint128)a[ 8]) * b[ 8] + + ((sp_uint128)a[ 9]) * b[ 7] + + ((sp_uint128)a[10]) * b[ 6] + + ((sp_uint128)a[11]) * b[ 5] + + ((sp_uint128)a[12]) * b[ 4]; + r[15] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 5]) * b[12] + + ((sp_uint128)a[ 6]) * b[11] + + ((sp_uint128)a[ 7]) * b[10] + + ((sp_uint128)a[ 8]) * b[ 9] + + ((sp_uint128)a[ 9]) * b[ 8] + + ((sp_uint128)a[10]) * b[ 7] + + ((sp_uint128)a[11]) * b[ 6] + + ((sp_uint128)a[12]) * b[ 5]; + r[16] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 6]) * b[12] + + ((sp_uint128)a[ 7]) * b[11] + + ((sp_uint128)a[ 8]) * b[10] + + ((sp_uint128)a[ 9]) * b[ 9] + + ((sp_uint128)a[10]) * b[ 8] + + ((sp_uint128)a[11]) * b[ 7] + + ((sp_uint128)a[12]) * b[ 6]; + r[17] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 7]) * b[12] + + ((sp_uint128)a[ 8]) * b[11] + + ((sp_uint128)a[ 9]) * b[10] + + ((sp_uint128)a[10]) * b[ 9] + + ((sp_uint128)a[11]) * b[ 8] + + ((sp_uint128)a[12]) * b[ 7]; + r[18] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[ 8]) * b[12] + + ((sp_uint128)a[ 9]) * b[11] + + ((sp_uint128)a[10]) * b[10] + + ((sp_uint128)a[11]) * b[ 9] + + ((sp_uint128)a[12]) * b[ 8]; + r[19] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[ 9]) * b[12] + + ((sp_uint128)a[10]) * b[11] + + ((sp_uint128)a[11]) * b[10] + + ((sp_uint128)a[12]) * b[ 9]; + r[20] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[10]) * b[12] + + ((sp_uint128)a[11]) * b[11] + + ((sp_uint128)a[12]) * b[10]; + r[21] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = ((sp_uint128)a[11]) * b[12] + + ((sp_uint128)a[12]) * b[11]; + r[22] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[12]) * b[12]; + r[23] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + r[24] = t0 & 0x1fffffffffffffL; + r[25] = (sp_digit)(t0 >> 53); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -18262,48 +17749,6 @@ SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a, (void)sp_4096_add_26(&r[52], &r[52], p4); } -/* Square a into r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a) -{ - sp_digit p0[26]; - sp_digit p1[26]; - sp_digit p2[26]; - sp_digit p3[26]; - sp_digit p4[26]; - sp_digit p5[26]; - sp_digit t0[26]; - sp_digit t1[26]; - sp_digit t2[26]; - sp_digit a0[13]; - sp_digit a1[13]; - sp_digit a2[13]; - (void)sp_4096_add_13(a0, a, &a[13]); - (void)sp_4096_add_13(a1, &a[13], &a[26]); - (void)sp_4096_add_13(a2, a0, &a[26]); - sp_4096_sqr_13(p0, a); - sp_4096_sqr_13(p2, &a[13]); - sp_4096_sqr_13(p4, &a[26]); - sp_4096_sqr_13(p1, a0); - sp_4096_sqr_13(p3, a1); - sp_4096_sqr_13(p5, a2); - XMEMSET(r, 0, sizeof(*r)*2U*39U); - (void)sp_4096_sub_26(t0, p3, p2); - (void)sp_4096_sub_26(t1, p1, p2); - (void)sp_4096_sub_26(t2, p5, t0); - (void)sp_4096_sub_26(t2, t2, t1); - (void)sp_4096_sub_26(t0, t0, p4); - (void)sp_4096_sub_26(t1, t1, p0); - (void)sp_4096_add_26(r, r, p0); - (void)sp_4096_add_26(&r[13], &r[13], t1); - (void)sp_4096_add_26(&r[26], &r[26], t2); - (void)sp_4096_add_26(&r[39], &r[39], t0); - (void)sp_4096_add_26(&r[52], &r[52], p4); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. @@ -18422,6 +17867,179 @@ SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a, (void)sp_4096_add_78(r + 39, r + 39, z1); } +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_13(sp_digit* r, const sp_digit* a) +{ + sp_uint128 t0; + sp_uint128 t1; + sp_digit t[13]; + + t0 = ((sp_uint128)a[ 0]) * a[ 0]; + t1 = (((sp_uint128)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 0]) * a[ 2]) * 2 + + ((sp_uint128)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 0]) * a[ 3] + + ((sp_uint128)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 0]) * a[ 4] + + ((sp_uint128)a[ 1]) * a[ 3]) * 2 + + ((sp_uint128)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 0]) * a[ 5] + + ((sp_uint128)a[ 1]) * a[ 4] + + ((sp_uint128)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 0]) * a[ 6] + + ((sp_uint128)a[ 1]) * a[ 5] + + ((sp_uint128)a[ 2]) * a[ 4]) * 2 + + ((sp_uint128)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 0]) * a[ 7] + + ((sp_uint128)a[ 1]) * a[ 6] + + ((sp_uint128)a[ 2]) * a[ 5] + + ((sp_uint128)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 0]) * a[ 8] + + ((sp_uint128)a[ 1]) * a[ 7] + + ((sp_uint128)a[ 2]) * a[ 6] + + ((sp_uint128)a[ 3]) * a[ 5]) * 2 + + ((sp_uint128)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 0]) * a[ 9] + + ((sp_uint128)a[ 1]) * a[ 8] + + ((sp_uint128)a[ 2]) * a[ 7] + + ((sp_uint128)a[ 3]) * a[ 6] + + ((sp_uint128)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 0]) * a[10] + + ((sp_uint128)a[ 1]) * a[ 9] + + ((sp_uint128)a[ 2]) * a[ 8] + + ((sp_uint128)a[ 3]) * a[ 7] + + ((sp_uint128)a[ 4]) * a[ 6]) * 2 + + ((sp_uint128)a[ 5]) * a[ 5]; + t[ 9] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 0]) * a[11] + + ((sp_uint128)a[ 1]) * a[10] + + ((sp_uint128)a[ 2]) * a[ 9] + + ((sp_uint128)a[ 3]) * a[ 8] + + ((sp_uint128)a[ 4]) * a[ 7] + + ((sp_uint128)a[ 5]) * a[ 6]) * 2; + t[10] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 0]) * a[12] + + ((sp_uint128)a[ 1]) * a[11] + + ((sp_uint128)a[ 2]) * a[10] + + ((sp_uint128)a[ 3]) * a[ 9] + + ((sp_uint128)a[ 4]) * a[ 8] + + ((sp_uint128)a[ 5]) * a[ 7]) * 2 + + ((sp_uint128)a[ 6]) * a[ 6]; + t[11] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 1]) * a[12] + + ((sp_uint128)a[ 2]) * a[11] + + ((sp_uint128)a[ 3]) * a[10] + + ((sp_uint128)a[ 4]) * a[ 9] + + ((sp_uint128)a[ 5]) * a[ 8] + + ((sp_uint128)a[ 6]) * a[ 7]) * 2; + t[12] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 2]) * a[12] + + ((sp_uint128)a[ 3]) * a[11] + + ((sp_uint128)a[ 4]) * a[10] + + ((sp_uint128)a[ 5]) * a[ 9] + + ((sp_uint128)a[ 6]) * a[ 8]) * 2 + + ((sp_uint128)a[ 7]) * a[ 7]; + r[13] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 3]) * a[12] + + ((sp_uint128)a[ 4]) * a[11] + + ((sp_uint128)a[ 5]) * a[10] + + ((sp_uint128)a[ 6]) * a[ 9] + + ((sp_uint128)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 4]) * a[12] + + ((sp_uint128)a[ 5]) * a[11] + + ((sp_uint128)a[ 6]) * a[10] + + ((sp_uint128)a[ 7]) * a[ 9]) * 2 + + ((sp_uint128)a[ 8]) * a[ 8]; + r[15] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 5]) * a[12] + + ((sp_uint128)a[ 6]) * a[11] + + ((sp_uint128)a[ 7]) * a[10] + + ((sp_uint128)a[ 8]) * a[ 9]) * 2; + r[16] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 6]) * a[12] + + ((sp_uint128)a[ 7]) * a[11] + + ((sp_uint128)a[ 8]) * a[10]) * 2 + + ((sp_uint128)a[ 9]) * a[ 9]; + r[17] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 7]) * a[12] + + ((sp_uint128)a[ 8]) * a[11] + + ((sp_uint128)a[ 9]) * a[10]) * 2; + r[18] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[ 8]) * a[12] + + ((sp_uint128)a[ 9]) * a[11]) * 2 + + ((sp_uint128)a[10]) * a[10]; + r[19] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[ 9]) * a[12] + + ((sp_uint128)a[10]) * a[11]) * 2; + r[20] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = (((sp_uint128)a[10]) * a[12]) * 2 + + ((sp_uint128)a[11]) * a[11]; + r[21] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + t1 = (((sp_uint128)a[11]) * a[12]) * 2; + r[22] = t0 & 0x1fffffffffffffL; t1 += t0 >> 53; + t0 = ((sp_uint128)a[12]) * a[12]; + r[23] = t1 & 0x1fffffffffffffL; t0 += t1 >> 53; + r[24] = t0 & 0x1fffffffffffffL; + r[25] = (sp_digit)(t0 >> 53); + XMEMCPY(r, t, sizeof(t)); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[26]; + sp_digit p1[26]; + sp_digit p2[26]; + sp_digit p3[26]; + sp_digit p4[26]; + sp_digit p5[26]; + sp_digit t0[26]; + sp_digit t1[26]; + sp_digit t2[26]; + sp_digit a0[13]; + sp_digit a1[13]; + sp_digit a2[13]; + (void)sp_4096_add_13(a0, a, &a[13]); + (void)sp_4096_add_13(a1, &a[13], &a[26]); + (void)sp_4096_add_13(a2, a0, &a[26]); + sp_4096_sqr_13(p0, a); + sp_4096_sqr_13(p2, &a[13]); + sp_4096_sqr_13(p4, &a[26]); + sp_4096_sqr_13(p1, a0); + sp_4096_sqr_13(p3, a1); + sp_4096_sqr_13(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*39U); + (void)sp_4096_sub_26(t0, p3, p2); + (void)sp_4096_sub_26(t1, p1, p2); + (void)sp_4096_sub_26(t2, p5, t0); + (void)sp_4096_sub_26(t2, t2, t1); + (void)sp_4096_sub_26(t0, t0, p4); + (void)sp_4096_sub_26(t1, t1, p0); + (void)sp_4096_add_26(r, r, p0); + (void)sp_4096_add_26(&r[13], &r[13], t1); + (void)sp_4096_add_26(&r[26], &r[26], t2); + (void)sp_4096_add_26(&r[39], &r[39], t0); + (void)sp_4096_add_26(&r[52], &r[52], p4); +} + /* Square a and put result in r. (r = a * a) * * r A single precision integer. @@ -18443,7 +18061,7 @@ SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a) } #endif /* !WOLFSSL_SP_SMALL */ -/* Caclulate the bottom digit of -1/a mod 2^n. +/* Calculate the bottom digit of -1/a mod 2^n. * * a A single precision number. * rho Bottom word of inverse. @@ -18594,21 +18212,21 @@ static sp_digit sp_4096_cmp_39(const sp_digit* a, const sp_digit* b) int i; r |= (a[38] - b[38]) & (0 - (sp_digit)1); - r |= (a[37] - b[37]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[36] - b[36]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[35] - b[35]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[34] - b[34]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[33] - b[33]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[32] - b[32]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[37] - b[37]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[36] - b[36]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[35] - b[35]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[34] - b[34]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[33] - b[33]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[32] - b[32]) & ~(((sp_digit)0 - r) >> 52); for (i = 24; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 52); } return r; @@ -18741,21 +18359,22 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_39(a + 39); for (i=0; i<38; i++) { - mu = (a[i] * mp) & 0x1fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffL; sp_4096_mul_add_39(a+i, m, mu); a[i+1] += a[i] >> 53; } - mu = (a[i] * mp) & 0x3ffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x3ffffffffL; sp_4096_mul_add_39(a+i, m, mu); a[i+1] += a[i] >> 53; a[i] &= 0x1fffffffffffffL; sp_4096_mont_shift_39(a, a); - sp_4096_cond_sub_39(a, a, m, 0 - (((a[38] - m[38]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[38] - m[38]; + sp_4096_cond_sub_39(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_39(a); } @@ -18766,9 +18385,9 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_39(r, a, b); @@ -18780,9 +18399,9 @@ static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_39(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_39(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_39(r, a); @@ -18838,6 +18457,7 @@ SP_NOINLINE static void sp_4096_mul_d_39(sp_digit* r, const sp_digit* a, r[39] = (sp_digit)(t & 0x1fffffffffffffL); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -18869,6 +18489,7 @@ static void sp_4096_cond_add_39(sp_digit* r, const sp_digit* a, r[37] = a[37] + (b[37] & m); r[38] = a[38] + (b[38] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, const sp_digit* a, byte n) @@ -18894,63 +18515,96 @@ SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, const sp_digit* a, r[38] = a[38] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 53) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 53); + sp_digit t0 = (sp_digit)(d & 0x1fffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 51; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 52) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 53); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 106) - (sp_digit)(d >> 106); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 22) + 1; - /* All 53 bits from d1 and top 10 bits from d0. */ - d = (d1 << 10) + (d0 >> 43); - r = d / dv; - d -= r * dv; - /* Up to 11 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 33) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 44); + t = (t / dv) << 22; r += t; - /* Up to 21 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 23) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 9); r += t; - /* Up to 31 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 13) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 41 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 3) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 53 bits from d1 and top 10 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_39(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -18967,11 +18621,10 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 39 + 3]; @@ -18982,7 +18635,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 39 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -19001,14 +18654,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, t1[39 + 39] += t1[39 + 39 - 1] >> 53; t1[39 + 39 - 1] &= 0x1fffffffffffffL; for (i=39; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[39 + i]; - d1 <<= 53; - d1 += t1[39 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_39(t1[39 + i], t1[39 + i - 1], dv); -#endif sp_4096_mul_d_39(t2, sd, r1); (void)sp_4096_sub_39(&t1[i], &t1[i], t2); @@ -19016,14 +18662,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, t1[39 + i] -= t2[39]; t1[39 + i] += t1[39 + i - 1] >> 53; t1[39 + i - 1] &= 0x1fffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[39 + i]; - d1 <<= 53; - d1 -= t1[39 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_39(-t1[39 + i], -t1[39 + i - 1], dv); -#endif r1 -= t1[39 + i]; sp_4096_mul_d_39(t2, sd, r1); (void)sp_4096_add_39(&t1[i], &t1[i], t2); @@ -19032,7 +18671,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, } t1[39 - 1] += t1[39 - 2] >> 53; t1[39 - 2] &= 0x1fffffffffffffL; - r1 = t1[39 - 1] / dv; + r1 = sp_4096_word_div_word_39(t1[39 - 1], dv); sp_4096_mul_d_39(t2, sd, r1); sp_4096_sub_39(t1, t1, t2); @@ -19041,14 +18680,13 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 53; r[i] &= 0x1fffffffffffffL; } - sp_4096_cond_add_39(r, r, sd, 0 - ((r[38] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_39(r, r, sd, r[38] >> 63); sp_4096_norm_39(r); sp_4096_rshift_39(r, r, 19); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19075,13 +18713,15 @@ static int sp_4096_mod_39(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 78]; @@ -19095,11 +18735,17 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 39 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 39 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19154,20 +18800,19 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_39(t[0], m, mp); n = sp_4096_cmp_39(t[0], m); - sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_39(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 39 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 78]; @@ -19181,11 +18826,17 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 39 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 39 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19240,19 +18891,18 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_39(t[0], m, mp); n = sp_4096_cmp_39(t[0], m); - sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_39(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 39 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(32 * 78) + 78]; @@ -19267,11 +18917,17 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 78) + 78), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((32 * 78) + 78), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -19381,12 +19037,11 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_39(rt, m, mp); n = sp_4096_cmp_39(rt, m); - sp_4096_cond_sub_39(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_39(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 78); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19446,20 +19101,20 @@ static sp_digit sp_4096_cmp_78(const sp_digit* a, const sp_digit* b) int i; r |= (a[77] - b[77]) & (0 - (sp_digit)1); - r |= (a[76] - b[76]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[75] - b[75]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[74] - b[74]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[73] - b[73]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[72] - b[72]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[76] - b[76]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[75] - b[75]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[74] - b[74]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[73] - b[73]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[72] - b[72]) & ~(((sp_digit)0 - r) >> 52); for (i = 64; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 52); } return r; @@ -19588,17 +19243,18 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_78(a + 78); #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<77; i++) { - mu = (a[i] * mp) & 0x1fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffL; sp_4096_mul_add_78(a+i, m, mu); a[i+1] += a[i] >> 53; } - mu = (a[i] * mp) & 0x7fffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7fffL; sp_4096_mul_add_78(a+i, m, mu); a[i+1] += a[i] >> 53; a[i] &= 0x1fffffffffffffL; @@ -19616,18 +19272,18 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) } #else for (i=0; i<77; i++) { - mu = (a[i] * mp) & 0x1fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1fffffffffffffL; sp_4096_mul_add_78(a+i, m, mu); a[i+1] += a[i] >> 53; } - mu = (a[i] * mp) & 0x7fffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7fffL; sp_4096_mul_add_78(a+i, m, mu); a[i+1] += a[i] >> 53; a[i] &= 0x1fffffffffffffL; #endif sp_4096_mont_shift_78(a, a); - sp_4096_cond_sub_78(a, a, m, 0 - (((a[77] - m[77]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[77] - m[77]; + sp_4096_cond_sub_78(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_78(a); } @@ -19638,9 +19294,9 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_4096_mul_78(r, a, b); @@ -19652,9 +19308,9 @@ static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_4096_mont_sqr_78(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_4096_mont_sqr_78(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_4096_sqr_78(r, a); @@ -19701,6 +19357,7 @@ SP_NOINLINE static void sp_4096_mul_d_156(sp_digit* r, const sp_digit* a, r[156] = (sp_digit)(t & 0x1fffffffffffffL); } +#ifndef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -19731,6 +19388,7 @@ static void sp_4096_cond_add_78(sp_digit* r, const sp_digit* a, r[76] = a[76] + (b[76] & m); r[77] = a[77] + (b[77] & m); } +#endif /* !WOLFSSL_SP_SMALL */ SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, const sp_digit* a, byte n) @@ -19755,63 +19413,96 @@ SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, const sp_digit* a, r[77] = a[77] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 53) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 53); + sp_digit t0 = (sp_digit)(d & 0x1fffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 51; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 52) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 53); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 106) - (sp_digit)(d >> 106); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 22) + 1; - /* All 53 bits from d1 and top 10 bits from d0. */ - d = (d1 << 10) + (d0 >> 43); - r = d / dv; - d -= r * dv; - /* Up to 11 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 33) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 44); + t = (t / dv) << 22; r += t; - /* Up to 21 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 23) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 9); r += t; - /* Up to 31 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 13) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 41 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 3) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 53 bits from d1 and top 10 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_4096_word_div_word_78(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -19828,11 +19519,10 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 78 + 3]; @@ -19843,7 +19533,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 78 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -19862,14 +19552,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, t1[78 + 78] += t1[78 + 78 - 1] >> 53; t1[78 + 78 - 1] &= 0x1fffffffffffffL; for (i=78; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[78 + i]; - d1 <<= 53; - d1 += t1[78 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_78(t1[78 + i], t1[78 + i - 1], dv); -#endif sp_4096_mul_d_78(t2, sd, r1); (void)sp_4096_sub_78(&t1[i], &t1[i], t2); @@ -19877,14 +19560,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, t1[78 + i] -= t2[78]; t1[78 + i] += t1[78 + i - 1] >> 53; t1[78 + i - 1] &= 0x1fffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[78 + i]; - d1 <<= 53; - d1 -= t1[78 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_78(-t1[78 + i], -t1[78 + i - 1], dv); -#endif r1 -= t1[78 + i]; sp_4096_mul_d_78(t2, sd, r1); (void)sp_4096_add_78(&t1[i], &t1[i], t2); @@ -19893,7 +19569,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, } t1[78 - 1] += t1[78 - 2] >> 53; t1[78 - 2] &= 0x1fffffffffffffL; - r1 = t1[78 - 1] / dv; + r1 = sp_4096_word_div_word_78(t1[78 - 1], dv); sp_4096_mul_d_78(t2, sd, r1); sp_4096_sub_78(t1, t1, t2); @@ -19902,14 +19578,13 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 53; r[i] &= 0x1fffffffffffffL; } - sp_4096_cond_add_78(r, r, sd, 0 - ((r[77] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_78(r, r, sd, r[77] >> 63); sp_4096_norm_78(r); sp_4096_rshift_78(r, r, 38); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -19939,13 +19614,15 @@ static int sp_4096_mod_78(sp_digit* r, const sp_digit* a, const sp_digit* m) * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even or exponent is 0. */ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, const sp_digit* m, int reduceA) { #if defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 156]; @@ -19959,11 +19636,17 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 78 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 78 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -20018,20 +19701,19 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_78(t[0], m, mp); n = sp_4096_cmp_78(t[0], m); - sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 78 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #elif !defined(WC_NO_CACHE_RESISTANT) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[3 * 156]; @@ -20045,11 +19727,17 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 78 * 2, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 3 * 78 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -20104,19 +19792,18 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_78(t[0], m, mp); n = sp_4096_cmp_78(t[0], m); - sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 78 * 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[(16 * 156) + 156]; @@ -20131,11 +19818,17 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 156) + 156), NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * ((16 * 156) + 156), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -20228,12 +19921,11 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_78(rt, m, mp); n = sp_4096_cmp_78(rt, m); - sp_4096_cond_sub_78(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 156); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -20262,7 +19954,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, const mp_int* mm, byte* out, word32* outLen) { #ifdef WOLFSSL_SP_SMALL -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[78 * 5]; @@ -20270,8 +19962,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* m = NULL; sp_digit* r = NULL; sp_digit* norm = NULL; - sp_digit e[1] = {0}; - sp_digit mp; + sp_uint64 e[1] = {0}; + sp_digit mp = 0; int i; int err = MP_OKAY; @@ -20280,7 +19972,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } if (err == MP_OKAY) { - if (mp_count_bits(em) > 53) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -20294,7 +19986,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -20309,12 +20001,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, norm = r; sp_4096_from_bin(a, 78, in, inLen); -#if DIGIT_BIT >= 53 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -20333,7 +20025,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_78(a, a, m); } if (err == MP_OKAY) { - for (i=52; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -20349,21 +20041,20 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_78(r, m, mp); mp = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_78(r, r, m, ~(mp >> 63)); sp_4096_to_bin_78(r, out); *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[78 * 5]; @@ -20371,14 +20062,14 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_digit* a = NULL; sp_digit* m = NULL; sp_digit* r = NULL; - sp_digit e[1] = {0}; + sp_uint64 e[1] = {0}; int err = MP_OKAY; if (*outLen < 512U) { err = MP_TO_E; } if (err == MP_OKAY) { - if (mp_count_bits(em) > 53) { + if (mp_count_bits(em) > 64) { err = MP_READ_E; } else if (inLen > 512U) { @@ -20392,7 +20083,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL, DYNAMIC_TYPE_RSA); @@ -20407,12 +20098,12 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, m = r + 78 * 2; sp_4096_from_bin(a, 78, in, inLen); -#if DIGIT_BIT >= 53 - e[0] = (sp_digit)em->dp[0]; +#if DIGIT_BIT >= 64 + e[0] = (sp_uint64)em->dp[0]; #else - e[0] = (sp_digit)em->dp[0]; + e[0] = (sp_uint64)em->dp[0]; if (em->used > 1) { - e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + e[0] |= ((sp_uint64)em->dp[1]) << DIGIT_BIT; } #endif if (e[0] == 0) { @@ -20442,7 +20133,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, err = sp_4096_mod_78(a, a, m); if (err == MP_OKAY) { - for (i=52; i>=0; i--) { + for (i=63; i>=0; i--) { if ((e[0] >> i) != 0) { break; } @@ -20458,8 +20149,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_78(r, m, mp); mp = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(r, r, m, ~(mp >> 63)); } } } @@ -20469,7 +20159,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif @@ -20504,7 +20194,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, { #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[78 * 4]; @@ -20538,7 +20228,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -20563,21 +20253,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 78); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* d = NULL; #else sp_digit d[78 * 4]; @@ -20611,7 +20301,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, DYNAMIC_TYPE_RSA); @@ -20636,14 +20326,14 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (d != NULL) #endif { /* only "a" and "r" are sensitive and need zeroized (same pointer) */ if (a != NULL) ForceZero(a, sizeof(sp_digit) * 78); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(d, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -20652,7 +20342,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #endif /* WOLFSSL_SP_SMALL */ #else #if defined(WOLFSSL_SP_SMALL) -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[39 * 8]; @@ -20682,9 +20372,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 8, NULL, DYNAMIC_TYPE_RSA); @@ -20715,6 +20411,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_39(tmpa); sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_norm_39(tmpa); sp_4096_from_mp(qi, 39, qim); sp_4096_mul_39(tmpa, tmpa, qi); @@ -20731,19 +20428,19 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 39 * 8); -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* a = NULL; #else sp_digit a[39 * 13]; @@ -20774,9 +20471,15 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 13, NULL, DYNAMIC_TYPE_RSA); @@ -20813,6 +20516,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_4096_norm_39(tmpa); sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_norm_39(tmpa); sp_4096_mul_39(tmpa, tmpa, qi); err = sp_4096_mod_39(tmpa, tmpa, p); } @@ -20826,12 +20530,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (a != NULL) #endif { ForceZero(a, sizeof(sp_digit) * 39 * 13); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(a, NULL, DYNAMIC_TYPE_RSA); #endif } @@ -20927,7 +20631,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, { #ifdef WOLFSSL_SP_SMALL int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[78 * 4]; @@ -20950,7 +20654,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, DYNAMIC_TYPE_DH); @@ -20975,20 +20679,20 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_4096_to_mp(r, res); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 78U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } return err; #else -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[78 * 4]; @@ -21012,7 +20716,7 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, DYNAMIC_TYPE_DH); if (b == NULL) @@ -21037,14 +20741,14 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 78U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -21227,11 +20931,13 @@ SP_NOINLINE static void sp_4096_lshift_78(sp_digit* r, const sp_digit* a, * e A single precision number that is the exponent. * bits The number of bits in the exponent. * m A single precision number that is the modulus. - * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + * returns 0 on success. + * returns MEMORY_E on dynamic memory allocation failure. + * returns MP_VAL when base is even. */ static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* td = NULL; #else sp_digit td[235]; @@ -21246,11 +20952,17 @@ static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 235, NULL, - DYNAMIC_TYPE_TMP_BUFFER); - if (td == NULL) - err = MEMORY_E; + if (bits == 0) { + err = MP_VAL; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 235, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) + err = MEMORY_E; + } #endif if (err == MP_OKAY) { @@ -21316,17 +21028,15 @@ static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_78(r, r, tmp); sp_4096_norm_78(r); o = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(r, r, m, ~(o >> 63)); } sp_4096_mont_reduce_78(r, m, mp); n = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(r, r, m, ~(n >> 63)); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (td != NULL) XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -21351,7 +21061,7 @@ static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* b = NULL; #else sp_digit b[78 * 4]; @@ -21375,7 +21085,7 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { b = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, DYNAMIC_TYPE_DH); @@ -21416,14 +21126,14 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (b != NULL) #endif { /* only "e" is sensitive and needs zeroized */ if (e != NULL) ForceZero(e, sizeof(sp_digit) * 78U); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + #ifdef WOLFSSL_SP_SMALL_STACK XFREE(b, NULL, DYNAMIC_TYPE_DH); #endif } @@ -21773,20 +21483,23 @@ SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a, static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 52 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 51); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 51); } #elif DIGIT_BIT > 52 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0xfffffffffffffL; s = 52U - s; @@ -21816,12 +21529,12 @@ static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 52) { r[j] &= 0xfffffffffffffL; @@ -21956,8 +21669,6 @@ static int sp_256_point_to_ecc_point_5(const sp_point_256* p, ecc_point* pm) return err; } -#define sp_256_mont_reduce_order_5 sp_256_mont_reduce_5 - /* Compare a with b in constant time. * * a A single precision integer. @@ -21972,14 +21683,14 @@ static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b) int i; for (i=4; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 51); } #else r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)1); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 51); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 51); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 51); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 51); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -22119,40 +21830,95 @@ static void sp_256_mont_shift_5(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_256_mont_reduce_5(sp_digit* a, const sp_digit* m, sp_digit mp) +static void sp_256_mont_reduce_order_5(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; - if (mp != 1) { - for (i=0; i<4; i++) { - mu = (a[i] * mp) & 0xfffffffffffffL; - sp_256_mul_add_5(a+i, m, mu); - a[i+1] += a[i] >> 52; - } - mu = (a[i] * mp) & 0xffffffffffffL; + sp_256_norm_5(a + 5); + + for (i=0; i<4; i++) { + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xfffffffffffffL; sp_256_mul_add_5(a+i, m, mu); a[i+1] += a[i] >> 52; - a[i] &= 0xfffffffffffffL; } - else { - for (i=0; i<4; i++) { - mu = a[i] & 0xfffffffffffffL; - sp_256_mul_add_5(a+i, p256_mod, mu); - a[i+1] += a[i] >> 52; - } - mu = a[i] & 0xffffffffffffL; - sp_256_mul_add_5(a+i, p256_mod, mu); - a[i+1] += a[i] >> 52; - a[i] &= 0xfffffffffffffL; - } - + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0xffffffffffffL; + sp_256_mul_add_5(a+i, m, mu); + a[i+1] += a[i] >> 52; + a[i] &= 0xfffffffffffffL; sp_256_mont_shift_5(a, a); - sp_256_cond_sub_5(a, a, m, 0 - (((a[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[4] >> 48; + sp_256_cond_sub_5(a, a, m, ~((over - 1) >> 63)); sp_256_norm_5(a); } +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_256_mont_reduce_5(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_int128 t; + sp_digit am; + + (void)m; + (void)mp; + + for (i = 0; i < 4; i++) { + am = a[i] & 0xfffffffffffffL; + /* Fifth word of modulus word */ + t = am; t *= 0x0ffffffff0000L; + + a[i + 1] += (am << 44) & 0xfffffffffffffL; + a[i + 2] += am >> 8; + a[i + 3] += (am << 36) & 0xfffffffffffffL; + a[i + 4] += (am >> 16) + (t & 0xfffffffffffffL); + a[i + 5] += t >> 52; + + a[i + 1] += a[i] >> 52; + } + am = a[4] & 0xffffffffffff; + /* Fifth word of modulus word */ + t = am; t *= 0x0ffffffff0000L; + + a[4 + 1] += (am << 44) & 0xfffffffffffffL; + a[4 + 2] += am >> 8; + a[4 + 3] += (am << 36) & 0xfffffffffffffL; + a[4 + 4] += (am >> 16) + (t & 0xfffffffffffffL); + a[4 + 5] += t >> 52; + + a[0] = (a[4] >> 48) + ((a[5] << 4) & 0xfffffffffffffL); + a[1] = (a[5] >> 48) + ((a[6] << 4) & 0xfffffffffffffL); + a[2] = (a[6] >> 48) + ((a[7] << 4) & 0xfffffffffffffL); + a[3] = (a[7] >> 48) + ((a[8] << 4) & 0xfffffffffffffL); + a[4] = (a[8] >> 48) + (a[9] << 4); + + a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffL; + a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffL; + a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffL; + a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffL; + + /* Get the bit over, if any. */ + am = a[4] >> 48; + /* Create mask. */ + am = 0 - am; + + a[0] -= 0x000fffffffffffffL & am; + a[1] -= 0x00000fffffffffffL & am; + /* p256_mod[2] is zero */ + a[3] -= 0x0000001000000000L & am; + a[4] -= 0x0000ffffffff0000L & am; + + a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffL; + a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffL; + a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffL; + a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffL; +} + /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -22160,9 +21926,9 @@ static void sp_256_mont_reduce_5(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_mul_5(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_256_mont_mul_5(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_256_mul_5(r, a, b); @@ -22174,9 +21940,9 @@ static void sp_256_mont_mul_5(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_256_sqr_5(r, a); @@ -22190,10 +21956,10 @@ static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_256_mont_sqr_n_5(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_256_mont_sqr_n_5(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_256_mont_sqr_5(r, a, m, mp); for (; n > 1; n--) { @@ -22201,7 +21967,7 @@ static void sp_256_mont_sqr_n_5(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P256 curve. */ static const uint64_t p256_mod_minus_2[4] = { @@ -22299,27 +22065,24 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_5(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 5, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 5, 0, sizeof(sp_digit) * 5U); sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_5(r->x, p256_mod); - sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_5(r->x, r->x, p256_mod, ~(n >> 51)); sp_256_norm_5(r->x); /* y /= z^3 */ sp_256_mont_mul_5(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 5, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 5, 0, sizeof(sp_digit) * 5U); sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_5(r->y, p256_mod); - sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_5(r->y, r->y, p256_mod, ~(n >> 51)); sp_256_norm_5(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -22332,10 +22095,11 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_256_add_5(r, a, b); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); } @@ -22347,10 +22111,11 @@ static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_5(r, a, a); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); } @@ -22362,18 +22127,20 @@ static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_256_mont_tpl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_5(r, a, a); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); (void)sp_256_add_5(r, r, a); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -22385,20 +22152,33 @@ static void sp_256_mont_tpl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) static void sp_256_cond_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; for (i = 0; i < 5; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_256_cond_add_5(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ r[ 0] = a[ 0] + (b[ 0] & m); r[ 1] = a[ 1] + (b[ 1] & m); r[ 2] = a[ 2] + (b[ 2] & m); r[ 3] = a[ 3] + (b[ 3] & m); r[ 4] = a[ 4] + (b[ 4] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * @@ -22445,7 +22225,8 @@ SP_NOINLINE static void sp_256_rshift1_5(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_div2_5(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_256_cond_add_5(r, a, m, 0 - (a[0] & 1)); sp_256_norm_5(r); @@ -22458,6 +22239,61 @@ static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_5(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_5(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_5(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_5(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_5(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_mont_div2_5(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_5(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_5(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_5(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_5(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_5_ctx { int state; @@ -22468,7 +22304,14 @@ typedef struct sp_256_proj_point_dbl_5_ctx { sp_digit* z; } sp_256_proj_point_dbl_5_ctx; -static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_5_ctx* ctx = (sp_256_proj_point_dbl_5_ctx*)sp_ctx->data; @@ -22542,7 +22385,7 @@ static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_5(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_5(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -22592,61 +22435,6 @@ static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*5; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_5(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_5(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_5(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_5(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_5(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_5(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_5(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_5(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_5(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_5(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -22660,6 +22448,18 @@ static int sp_256_cmp_equal_5(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3]) | (a[4] ^ b[4])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_5(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -22667,6 +22467,84 @@ static int sp_256_cmp_equal_5(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_5(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*5; + sp_digit* t2 = t + 4*5; + sp_digit* t3 = t + 6*5; + sp_digit* t4 = t + 8*5; + sp_digit* t5 = t + 10*5; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_5(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_5(t2, t1) & + sp_256_cmp_equal_5(t4, t3)) { + sp_256_proj_point_dbl_5(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_256_mont_sub_5(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_5(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_5(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(x, x, t5, p256_mod); + sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(t3, y, p256_mod); + sp_256_mont_sub_5(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_5(y, y, x, p256_mod); + sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, y, t5, p256_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 5; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_5_ctx { @@ -22679,11 +22557,19 @@ typedef struct sp_256_proj_point_add_5_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_256_proj_point_add_5_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -22702,261 +22588,168 @@ static int sp_256_proj_point_add_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*5; - ctx->t3 = t + 4*5; - ctx->t4 = t + 6*5; - ctx->t5 = t + 8*5; + ctx->t6 = t; + ctx->t1 = t + 2*5; + ctx->t2 = t + 4*5; + ctx->t3 = t + 6*5; + ctx->t4 = t + 8*5; + ctx->t5 = t + 10*5; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_5(ctx->t1, p256_mod, q->y); - sp_256_norm_5(ctx->t1); - if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & - (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_256_proj_point_dbl_5_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_256)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<5; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<5; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<5; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_256_mont_sqr_5(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; + break; + case 2: + sp_256_mont_mul_5(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; + break; + case 3: + sp_256_mont_mul_5(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_5(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_5(ctx->t1, ctx->t1, ctx->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_5(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_5(ctx->t4, ctx->t2, ctx->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_5(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_5(ctx->t2, ctx->t1) & + sp_256_cmp_equal_5(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_5(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_5(ctx->t3, ctx->t3, ctx->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_5(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_5(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_5(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_5(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_5(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_5(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_5(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_5(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_5(ctx->z, ctx->z, ctx->t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_5(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_5(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - sp_256_mont_sqr_5(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_5(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_5(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_dbl_5(ctx->t1, ctx->y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_5(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t1, p256_mod); + sp_256_mont_mul_5(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_5(ctx->y, ctx->y, ctx->x, p256_mod); + sp_256_mont_sub_5(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - sp_256_mont_mul_5(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 5; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_5(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_5(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*5; - sp_digit* t3 = t + 4*5; - sp_digit* t4 = t + 6*5; - sp_digit* t5 = t + 8*5; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_256* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_256_sub_5(t1, p256_mod, q->y); - sp_256_norm_5(t1); - if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & - (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { - sp_256_proj_point_dbl_5(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<5; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<5; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<5; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t1, t1, x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_5(t3, t3, y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_5(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_5(t4, t4, t3, p256_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(x, x, t5, p256_mod); - sp_256_mont_dbl_5(t1, y, p256_mod); - sp_256_mont_sub_5(x, x, t1, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_5(y, y, x, p256_mod); - sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(y, y, t5, p256_mod); - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -22966,7 +22759,7 @@ static void sp_256_proj_point_add_5(sp_point_256* r, */ static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK int64_t* t = NULL; #else int64_t t[2 * 8]; @@ -22977,7 +22770,7 @@ static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC); if (t == NULL) return MEMORY_E; @@ -23058,7 +22851,7 @@ static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* r[4] |= t[7] << 16U; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, NULL, DYNAMIC_TYPE_ECC); #endif @@ -23082,6 +22875,108 @@ static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_256* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_256 t[3]; + sp_digit tmp[2 * 5 * 6]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_256) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod); + } + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod); + if (err == MP_OKAY) + err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod); + + if (err == MP_OKAY) { + i = 4; + c = 48; + n = k[i--] << (52 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 52; + } + + y = (n >> 51) & 1; + n <<= 1; + + sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_256)); + sp_256_proj_point_dbl_5(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_256)); + } + + if (map != 0) { + sp_256_map_5(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_256)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 5 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_256) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_ecc_mulmod_5_ctx { @@ -23091,7 +22986,7 @@ typedef struct sp_256_ecc_mulmod_5_ctx { sp_256_proj_point_add_5_ctx add_ctx; }; sp_point_256 t[3]; - sp_digit tmp[2 * 5 * 5]; + sp_digit tmp[2 * 5 * 6]; sp_digit n; int i; int c; @@ -23197,109 +23092,6 @@ static int sp_256_ecc_mulmod_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, #endif /* WOLFSSL_SP_NONBLOCK */ -static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, - const sp_digit* k, int map, int ct, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_256* t = NULL; - sp_digit* tmp = NULL; -#else - sp_point_256 t[3]; - sp_digit tmp[2 * 5 * 5]; -#endif - sp_digit n; - int i; - int c; - int y; - int err = MP_OKAY; - - /* Implementation is constant time. */ - (void)ct; - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, - DYNAMIC_TYPE_ECC); - if (t == NULL) - err = MEMORY_E; - if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap, - DYNAMIC_TYPE_ECC); - if (tmp == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - XMEMSET(t, 0, sizeof(sp_point_256) * 3); - - /* t[0] = {0, 0, 1} * norm */ - t[0].infinity = 1; - /* t[1] = {g->x, g->y, g->z} * norm */ - err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod); - } - if (err == MP_OKAY) - err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod); - if (err == MP_OKAY) - err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod); - - if (err == MP_OKAY) { - i = 4; - c = 48; - n = k[i--] << (52 - c); - for (; ; c--) { - if (c == 0) { - if (i == -1) - break; - - n = k[i--]; - c = 52; - } - - y = (n >> 51) & 1; - n <<= 1; - - sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp); - - XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), - sizeof(sp_point_256)); - sp_256_proj_point_dbl_5(&t[2], &t[2], tmp); - XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), &t[2], - sizeof(sp_point_256)); - } - - if (map != 0) { - sp_256_map_5(r, &t[0], tmp); - } - else { - XMEMCPY(r, &t[0], sizeof(sp_point_256)); - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (tmp != NULL) -#endif - { - ForceZero(tmp, sizeof(sp_digit) * 2 * 5 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(tmp, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (t != NULL) -#endif - { - ForceZero(t, sizeof(sp_point_256) * 3); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(t, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} - #else /* A table entry for pre-computed points. */ typedef struct sp_table_entry_256 { @@ -23347,7 +23139,7 @@ static void sp_256_cond_copy_5(sp_digit* r, const sp_digit* a, const sp_digit m) * n Number of times to double * t Temporary ordinate data. */ -static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, +static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int i, sp_digit* t) { sp_digit* w = t; @@ -23358,6 +23150,7 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -23368,7 +23161,6 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, /* W = Z^4 */ sp_256_mont_sqr_5(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_5(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -23386,9 +23178,12 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_5(t2, b, p256_mod); sp_256_mont_sub_5(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_5(t2, b, x, p256_mod); + sp_256_mont_dbl_5(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -23398,9 +23193,7 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_256_mont_mul_5(w, w, t1, p256_mod, p256_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_5(y, b, x, p256_mod); - sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_5(y, y, p256_mod); + sp_256_mont_mul_5(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_5(y, y, t1, p256_mod); } #ifndef WOLFSSL_SP_SMALL @@ -23415,18 +23208,19 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_5(t2, b, p256_mod); sp_256_mont_sub_5(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_5(t2, b, x, p256_mod); + sp_256_mont_dbl_5(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_5(y, b, x, p256_mod); - sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_5(y, y, p256_mod); + sp_256_mont_mul_5(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_5(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_5(y, y, p256_mod); + sp_256_mont_div2_5(y, y, p256_mod); } /* Double the Montgomery form projective point p a number of times. @@ -23474,30 +23268,30 @@ static void sp_256_proj_point_dbl_n_store_5(sp_point_256* r, sp_256_mont_sub_5(t1, t1, w, p256_mod); sp_256_mont_tpl_5(a, t1, p256_mod); /* B = X*Y^2 */ - sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(b, t2, x, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_5(t1, b, p256_mod); - sp_256_mont_sub_5(x, x, t1, p256_mod); + sp_256_mont_dbl_5(t2, b, p256_mod); + sp_256_mont_sub_5(x, x, t2, p256_mod); + /* B = 2.(B - X) */ + sp_256_mont_sub_5(t2, b, x, p256_mod); + sp_256_mont_dbl_5(b, t2, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_5(r[j].z, z, y, p256_mod, p256_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_256_mont_sqr_5(t2, t2, p256_mod, p256_mp_mod); + /* t1 = Y^4 */ + sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_256_mont_mul_5(w, w, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(w, w, t1, p256_mod, p256_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_256_mont_sub_5(y, b, x, p256_mod); - sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod); - sp_256_mont_dbl_5(y, y, p256_mod); - sp_256_mont_sub_5(y, y, t2, p256_mod); - + sp_256_mont_mul_5(y, b, a, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_5(r[j].y, y, p256_mod); + sp_256_mont_div2_5(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -23520,30 +23314,30 @@ static void sp_256_proj_point_add_sub_5(sp_point_256* ra, sp_digit* t4 = t + 6*5; sp_digit* t5 = t + 8*5; sp_digit* t6 = t + 10*5; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t1, t1, x, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t1, xa, p256_mod, p256_mp_mod); /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(t2, za, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, za, p256_mod, p256_mp_mod); sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_5(t3, t3, y, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t3, t3, ya, p256_mod, p256_mp_mod); /* S2 = Y2*Z1^3 */ sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - U1 */ @@ -23554,30 +23348,30 @@ static void sp_256_proj_point_add_sub_5(sp_point_256* ra, sp_256_mont_sub_5(t4, t4, t3, p256_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_256_mont_mul_5(za, za, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(za, za, t2, p256_mod, p256_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(xa, t4, p256_mod, p256_mp_mod); sp_256_mont_sqr_5(xs, t6, p256_mod, p256_mp_mod); sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ya, t1, t5, p256_mod, p256_mp_mod); sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(x, x, t5, p256_mod); + sp_256_mont_sub_5(xa, xa, t5, p256_mod); sp_256_mont_sub_5(xs, xs, t5, p256_mod); - sp_256_mont_dbl_5(t1, y, p256_mod); - sp_256_mont_sub_5(x, x, t1, p256_mod); + sp_256_mont_dbl_5(t1, ya, p256_mod); + sp_256_mont_sub_5(xa, xa, t1, p256_mod); sp_256_mont_sub_5(xs, xs, t1, p256_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_256_mont_sub_5(ys, y, xs, p256_mod); - sp_256_mont_sub_5(y, y, x, p256_mod); - sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(ys, ya, xs, p256_mod); + sp_256_mont_sub_5(ya, ya, xa, p256_mod); + sp_256_mont_mul_5(ya, ya, t4, p256_mod, p256_mp_mod); sp_256_sub_5(t6, p256_mod, t6); sp_256_mont_mul_5(ys, ys, t6, p256_mod, p256_mp_mod); sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(y, y, t5, p256_mod); + sp_256_mont_sub_5(ya, ya, t5, p256_mod); sp_256_mont_sub_5(ys, ys, t5, p256_mod); } @@ -23656,7 +23450,7 @@ static void sp_256_ecc_recode_6_5(const sp_digit* k, ecc_recode_256* v) /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_point_33_5(sp_point_256* r, const sp_point_256* table, @@ -23721,7 +23515,7 @@ static void sp_256_get_point_33_5(sp_point_256* r, const sp_point_256* table, static int sp_256_ecc_mulmod_win_add_sub_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; sp_digit* tmp = NULL; #else @@ -23739,8 +23533,8 @@ static int sp_256_ecc_mulmod_win_add_sub_5(sp_point_256* r, const sp_point_256* (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * (33+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -23835,7 +23629,7 @@ static int sp_256_ecc_mulmod_win_add_sub_5(sp_point_256* r, const sp_point_256* } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -23856,76 +23650,75 @@ static int sp_256_ecc_mulmod_win_add_sub_5(sp_point_256* r, const sp_point_256* * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_5(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { - const sp_point_256* ap[2]; - sp_point_256* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*5; - sp_digit* t3 = t + 4*5; - sp_digit* t4 = t + 6*5; - sp_digit* t5 = t + 8*5; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*5; + sp_digit* t6 = t + 4*5; + sp_digit* t1 = t + 6*5; + sp_digit* t4 = t + 8*5; + sp_digit* t5 = t + 10*5; - /* Check double */ - (void)sp_256_sub_5(t1, p256_mod, q->y); - sp_256_norm_5(t1); - if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & - (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_5(p->x, t2) & + sp_256_cmp_equal_5(p->y, t4)) { sp_256_proj_point_dbl_5(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_256)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<5; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<5; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<5; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ - sp_256_mont_sub_5(t2, t2, x, p256_mod); + sp_256_mont_sub_5(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ - sp_256_mont_sub_5(t4, t4, y, p256_mod); + sp_256_mont_sub_5(t4, t4, p->y, p256_mod); /* Z3 = H*Z1 */ - sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(z, p->z, t2, p256_mod, p256_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_256_mont_sqr_5(t1, t4, p256_mod, p256_mp_mod); - sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t3, x, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(x, t1, t5, p256_mod); - sp_256_mont_dbl_5(t1, t3, p256_mod); - sp_256_mont_sub_5(x, x, t1, p256_mod); + sp_256_mont_sqr_5(t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t3, p->x, t1, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(t2, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(t2, t2, t1, p256_mod); + sp_256_mont_dbl_5(t5, t3, p256_mod); + sp_256_mont_sub_5(x, t2, t5, p256_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_256_mont_sub_5(t3, t3, x, p256_mod); sp_256_mont_mul_5(t3, t3, t4, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t5, t5, y, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(y, t3, t5, p256_mod); + sp_256_mont_mul_5(t1, t1, p->y, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, t3, t1, p256_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 5; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -23966,7 +23759,7 @@ static void sp_256_proj_to_affine_5(sp_point_256* a, sp_digit* t) static int sp_256_gen_stripe_table_5(const sp_point_256* a, sp_table_entry_256* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* t = NULL; #else sp_point_256 t[3]; @@ -23979,7 +23772,7 @@ static int sp_256_gen_stripe_table_5(const sp_point_256* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -24034,7 +23827,7 @@ static int sp_256_gen_stripe_table_5(const sp_point_256* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -24047,7 +23840,7 @@ static int sp_256_gen_stripe_table_5(const sp_point_256* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_256_get_entry_256_5(sp_point_256* r, @@ -24101,12 +23894,12 @@ static int sp_256_ecc_mulmod_stripe_5(sp_point_256* r, const sp_point_256* g, const sp_table_entry_256* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* rt = NULL; sp_digit* t = NULL; #else sp_point_256 rt[2]; - sp_digit t[2 * 5 * 5]; + sp_digit t[2 * 5 * 6]; #endif sp_point_256* p = NULL; int i; @@ -24121,13 +23914,13 @@ static int sp_256_ecc_mulmod_stripe_5(sp_point_256* r, const sp_point_256* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 6, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -24187,7 +23980,7 @@ static int sp_256_ecc_mulmod_stripe_5(sp_point_256* r, const sp_point_256* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -24230,7 +24023,7 @@ static THREAD_LS_T int sp_cache_256_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) @@ -24301,23 +24094,36 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_5(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 5 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 5 * 6]; +#endif sp_cache_256_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_256 == 0) { - wc_InitMutex(&sp_cache_256_lock); - initCacheMutex_256 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_256 == 0) { + wc_InitMutex(&sp_cache_256_lock); + initCacheMutex_256 = 1; + } + if (wc_LockMutex(&sp_cache_256_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_256_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -24338,6 +24144,9 @@ static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -24356,7 +24165,7 @@ static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -24365,7 +24174,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -24388,7 +24197,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_256_point_to_ecc_point_5(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -24403,7 +24212,7 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -24413,25 +24222,25 @@ int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_256* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[5 + 5 * 2 * 5]; + sp_digit k[5 + 5 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (5 + 5 * 2 * 5), heap, + sizeof(sp_digit) * (5 + 5 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -24468,7 +24277,7 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, err = sp_256_point_to_ecc_point_5(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -24495,6 +24304,16 @@ static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k, return sp_256_ecc_mulmod_5(r, &p256_base, k, map, ct, heap); } +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_256_ecc_mulmod_base_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_256_ecc_mulmod_5_nb(sp_ctx, r, &p256_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + #else /* Striping precomputation table. * 8 points combined into a table of 256 points. @@ -25816,7 +25635,7 @@ static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k, */ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -25825,7 +25644,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -25847,7 +25666,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_256_point_to_ecc_point_5(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -25861,7 +25680,7 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -25871,25 +25690,25 @@ int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else sp_point_256 point[2]; - sp_digit k[5 + 5 * 2 * 5]; + sp_digit k[5 + 5 * 2 * 6]; #endif sp_point_256* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (5 + 5 * 2 * 5), + sizeof(sp_digit) * (5 + 5 * 2 * 6), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -25925,7 +25744,7 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, err = sp_256_point_to_ecc_point_5(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -25937,17 +25756,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_5(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -26011,7 +25819,7 @@ static int sp_256_ecc_gen_k_5(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_256_from_bin(k, 5, buf, (int)sizeof(buf)); - if (sp_256_cmp_5(k, p256_order2) < 0) { + if (sp_256_cmp_5(k, p256_order2) <= 0) { sp_256_add_one_5(k); break; } @@ -26033,7 +25841,7 @@ static int sp_256_ecc_gen_k_5(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -26048,15 +25856,15 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_256* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -26097,7 +25905,7 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_256_point_to_ecc_point_5(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -26109,6 +25917,84 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_256_ctx { + int state; + sp_256_ecc_mulmod_5_ctx mulmod_ctx; + sp_digit k[5]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256 point[2]; +#else + sp_point_256 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_256_ctx; + +int sp_ecc_make_key_256_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_256_ctx* ctx = (sp_ecc_key_gen_256_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_256* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_256_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_256_ecc_gen_k_5(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_256_ecc_mulmod_base_5_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_256_ecc_mulmod_5_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p256_order, 1, 1); + if (err == MP_OKAY) { + if (sp_256_iszero_5(ctx->point->x) || + sp_256_iszero_5(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_256_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_256_point_to_ecc_point_5(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 32 @@ -26127,7 +26013,7 @@ static void sp_256_to_bin_5(sp_digit* r, byte* a) r[i+1] += r[i] >> 52; r[i] &= 0xfffffffffffffL; } - j = 256 / 8 - 1; + j = 263 / 8 - 1; a[j] = 0; for (i=0; i<5 && j>=0; i++) { b = 0; @@ -26169,7 +26055,7 @@ static void sp_256_to_bin_5(sp_digit* r, byte* a) int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* point = NULL; sp_digit* k = NULL; #else @@ -26182,7 +26068,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC); @@ -26207,7 +26093,7 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 32; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -26216,6 +26102,56 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_256_ctx { + int state; + union { + sp_256_ecc_mulmod_5_ctx mulmod_ctx; + }; + sp_digit k[5]; + sp_point_256 point; +} sp_ecc_sec_gen_256_ctx; + +int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_256_ctx* ctx = (sp_ecc_sec_gen_256_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_256_from_mp(ctx->k, 5, priv); + sp_256_point_from_ecc_point_5(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_256_ecc_mulmod_5_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_256_to_bin_5(ctx->point.x, out); + *outLen = 32; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_256_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -26342,7 +26278,7 @@ static int sp_256_div_5(const sp_digit* a, const sp_digit* d, int i; sp_digit r1; sp_digit mask; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 5 + 3]; @@ -26353,7 +26289,7 @@ static int sp_256_div_5(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 5 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -26383,8 +26319,7 @@ static int sp_256_div_5(const sp_digit* a, const sp_digit* d, t1[5 + i] -= t2[5]; sp_256_norm_5(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[5 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[5 + i] - 1) >> 63); sp_256_cond_sub_5(t1 + i, t1 + i, sd, mask); sp_256_norm_5(&t1[i + 1]); } @@ -26392,7 +26327,7 @@ static int sp_256_div_5(const sp_digit* a, const sp_digit* d, sp_256_rshift_5(r, t1, 4); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -26414,6 +26349,19 @@ static int sp_256_mod_5(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P256 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_256_mont_mul_order_5(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_256_mul_5(r, a, b); + sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P256 curve. */ static const uint64_t p256_order_minus_2[4] = { @@ -26427,18 +26375,6 @@ static const sp_int_digit p256_order_low[2] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P256 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_256_mont_mul_order_5(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_256_mul_5(r, a, b); - sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order); -} - /* Square number mod the order of P256 curve. (r = a * a mod order) * * r Result of the squaring. @@ -26549,7 +26485,7 @@ static void sp_256_mont_inv_order_5(sp_digit* r, const sp_digit* a, sp_256_mont_sqr_n_order_5(t2, t3, 4); /* t = a^ff = t2 * t3 */ sp_256_mont_mul_order_5(t, t2, t3); - /* t3= a^ff00 = t ^ 2 ^ 8 */ + /* t2= a^ff00 = t ^ 2 ^ 8 */ sp_256_mont_sqr_n_order_5(t2, t, 8); /* t = a^ffff = t2 * t */ sp_256_mont_mul_order_5(t, t2, t); @@ -26566,7 +26502,11 @@ static void sp_256_mont_inv_order_5(sp_digit* r, const sp_digit* a, /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */ sp_256_mont_mul_order_5(t2, t2, t); /* t2= a^ffffffff00000000ffffffffffffffffbce6 */ - for (i=127; i>=112; i--) { + sp_256_mont_sqr_order_5(t2, t2); + sp_256_mont_mul_order_5(t2, t2, a); + sp_256_mont_sqr_n_order_5(t2, t2, 5); + sp_256_mont_mul_order_5(t2, t2, t3); + for (i=121; i>=112; i--) { sp_256_mont_sqr_order_5(t2, t2); if ((p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { sp_256_mont_mul_order_5(t2, t2, a); @@ -26609,6 +26549,7 @@ static void sp_256_mont_inv_order_5(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -26683,6 +26624,128 @@ static int sp_256_calc_s_5(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_256* point = NULL; +#else + sp_digit e[7 * 2 * 5]; + sp_point_256 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int64 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 5; + k = e + 4 * 5; + r = e + 6 * 5; + tmp = e + 8 * 5; + s = e; + + if (hashLen > 32U) { + hashLen = 32U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_256_ecc_gen_k_5(rng, k); + } + else { + sp_256_from_mp(k, 5, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_256_ecc_mulmod_base_5(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 5U); + sp_256_norm_5(r); + c = sp_256_cmp_5(r, p256_order); + sp_256_cond_sub_5(r, r, p256_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_256_norm_5(r); + + if (!sp_256_iszero_5(r)) { + /* x is modified in calculation of s. */ + sp_256_from_mp(x, 5, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_256_from_bin(e, 5, hash, (int)hashLen); + + err = sp_256_calc_s_5(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_256_iszero_5(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_256_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_256_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 5); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_256)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_256_ctx { int state; @@ -26710,15 +26773,10 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 32U) { - hashLen = 32U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -26753,6 +26811,9 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_5(ctx->r); + if (hashLen > 32U) { + hashLen = 32U; + } sp_256_from_mp(ctx->x, 5, priv); sp_256_from_bin(ctx->e, 5, hash, (int)hashLen); ctx->state = 4; @@ -26847,124 +26908,6 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_256* point = NULL; -#else - sp_digit e[7 * 2 * 5]; - sp_point_256 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int64 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 5; - k = e + 4 * 5; - r = e + 6 * 5; - tmp = e + 8 * 5; - s = e; - - if (hashLen > 32U) { - hashLen = 32U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_256_ecc_gen_k_5(rng, k); - } - else { - sp_256_from_mp(k, 5, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_256_ecc_mulmod_base_5(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 5U); - sp_256_norm_5(r); - c = sp_256_cmp_5(r, p256_order); - sp_256_cond_sub_5(r, r, p256_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_256_norm_5(r); - - sp_256_from_mp(x, 5, priv); - sp_256_from_bin(e, 5, hash, (int)hashLen); - - err = sp_256_calc_s_5(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_5(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_256_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_256_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_256)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -27016,7 +26959,7 @@ static int sp_256_num_bits_5(const sp_digit* a) static int sp_256_mod_inv_5(sp_digit* r, const sp_digit* a, const sp_digit* m) { int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* u = NULL; #else sp_digit u[5 * 4]; @@ -27027,7 +26970,7 @@ static int sp_256_mod_inv_5(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut; int vt; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK u = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 4, NULL, DYNAMIC_TYPE_ECC); if (u == NULL) @@ -27067,8 +27010,8 @@ static int sp_256_mod_inv_5(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_256_cmp_5(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_256_cmp_5(u, v) >= 0))) { sp_256_sub_5(u, u, v); sp_256_norm_5(u); @@ -27115,7 +27058,7 @@ static int sp_256_mod_inv_5(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(sp_digit) * 5); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (u != NULL) XFREE(u, NULL, DYNAMIC_TYPE_ECC); #endif @@ -27158,7 +27101,7 @@ static void sp_256_add_points_5(sp_point_256* p1, const sp_point_256* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -27230,6 +27173,106 @@ static int sp_256_calc_vfy_point_5(sp_point_256* p1, sp_point_256* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_256* p1 = NULL; +#else + sp_digit u1[18 * 5]; + sp_point_256 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_256* p2 = NULL; + sp_digit carry; + sp_int64 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 5, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 5; + s = u1 + 4 * 5; + tmp = u1 + 6 * 5; + p2 = p1 + 1; + + if (hashLen > 32U) { + hashLen = 32U; + } + + sp_256_from_bin(u1, 5, hash, (int)hashLen); + sp_256_from_mp(u2, 5, rm); + sp_256_from_mp(s, 5, sm); + sp_256_from_mp(p2->x, 5, pX); + sp_256_from_mp(p2->y, 5, pY); + sp_256_from_mp(p2->z, 5, pZ); + + err = sp_256_calc_vfy_point_5(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_256_from_mp(u2, 5, rm); + err = sp_256_mod_mul_norm_5(u2, u2, p256_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_256_mont_sqr_5(p1->z, p1->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod); + *res = (int)(sp_256_cmp_5(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_256_from_mp(u2, 5, rm); + carry = sp_256_add_5(u2, u2, p256_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_256_norm_5(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_256_cmp_5(u2, p256_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_256_mod_mul_norm_5(u2, u2, p256_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod); + } + *res = (sp_256_cmp_5(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_256_ctx { int state; @@ -27242,7 +27285,7 @@ typedef struct sp_ecc_verify_256_ctx { sp_digit u1[2*5]; sp_digit u2[2*5]; sp_digit s[2*5]; - sp_digit tmp[2*5 * 5]; + sp_digit tmp[2*5 * 6]; sp_point_256 p1; sp_point_256 p2; } sp_ecc_verify_256_ctx; @@ -27379,109 +27422,10 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_256* p1 = NULL; -#else - sp_digit u1[16 * 5]; - sp_point_256 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_256* p2 = NULL; - sp_digit carry; - sp_int64 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 5, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 5; - s = u1 + 4 * 5; - tmp = u1 + 6 * 5; - p2 = p1 + 1; - - if (hashLen > 32U) { - hashLen = 32U; - } - - sp_256_from_bin(u1, 5, hash, (int)hashLen); - sp_256_from_mp(u2, 5, rm); - sp_256_from_mp(s, 5, sm); - sp_256_from_mp(p2->x, 5, pX); - sp_256_from_mp(p2->y, 5, pY); - sp_256_from_mp(p2->z, 5, pZ); - - err = sp_256_calc_vfy_point_5(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_256_from_mp(u2, 5, rm); - err = sp_256_mod_mul_norm_5(u2, u2, p256_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_256_mont_sqr_5(p1->z, p1->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod); - *res = (int)(sp_256_cmp_5(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_256_from_mp(u2, 5, rm); - carry = sp_256_add_5(u2, u2, p256_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_256_norm_5(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_256_cmp_5(u2, p256_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_256_mod_mul_norm_5(u2, u2, p256_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, - p256_mp_mod); - *res = (sp_256_cmp_5(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -27491,7 +27435,7 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_256_ecc_is_point_5(const sp_point_256* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[5 * 4]; @@ -27499,7 +27443,7 @@ static int sp_256_ecc_is_point_5(const sp_point_256* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -27509,25 +27453,27 @@ static int sp_256_ecc_is_point_5(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 5; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_5(t1, point->y); (void)sp_256_mod_5(t1, t1, p256_mod); sp_256_sqr_5(t2, point->x); (void)sp_256_mod_5(t2, t2, p256_mod); sp_256_mul_5(t2, t2, point->x); (void)sp_256_mod_5(t2, t2, p256_mod); - (void)sp_256_sub_5(t2, p256_mod, t2); - sp_256_mont_add_5(t1, t1, t2, p256_mod); + sp_256_mont_sub_5(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_5(t1, t1, point->x, p256_mod); sp_256_mont_add_5(t1, t1, point->x, p256_mod); sp_256_mont_add_5(t1, t1, point->x, p256_mod); + if (sp_256_cmp_5(t1, p256_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -27535,7 +27481,7 @@ static int sp_256_ecc_is_point_5(const sp_point_256* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -27544,7 +27490,7 @@ static int sp_256_ecc_is_point_5(const sp_point_256* point, */ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_256* pub = NULL; #else sp_point_256 pub[1]; @@ -27552,7 +27498,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -27567,7 +27513,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) err = sp_256_ecc_is_point_5(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -27589,7 +27535,7 @@ int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_256* pub = NULL; #else @@ -27610,7 +27556,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC); @@ -27676,7 +27622,7 @@ int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -27705,17 +27651,17 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else - sp_digit tmp[2 * 5 * 5]; + sp_digit tmp[2 * 5 * 6]; sp_point_256 p[2]; #endif sp_point_256* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -27723,7 +27669,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -27758,7 +27704,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -27782,7 +27728,7 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -27791,7 +27737,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -27826,7 +27772,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_256_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -27846,7 +27792,7 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_256* p = NULL; #else @@ -27856,7 +27802,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL, DYNAMIC_TYPE_ECC); @@ -27890,7 +27836,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_256_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -27908,7 +27854,7 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_256_mont_sqrt_5(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 5]; @@ -27916,7 +27862,7 @@ static int sp_256_mont_sqrt_5(sp_digit* y) sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) { err = MEMORY_E; @@ -27959,7 +27905,7 @@ static int sp_256_mont_sqrt_5(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -27977,7 +27923,7 @@ static int sp_256_mont_sqrt_5(sp_digit* y) */ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 5]; @@ -27985,7 +27931,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -28025,7 +27971,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) err = sp_256_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -28420,20 +28366,23 @@ SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a, static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 55 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 54); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 54); } #elif DIGIT_BIT > 55 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x7fffffffffffffL; s = 55U - s; @@ -28463,12 +28412,12 @@ static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 55) { r[j] &= 0x7fffffffffffffL; @@ -28603,8 +28552,6 @@ static int sp_384_point_to_ecc_point_7(const sp_point_384* p, ecc_point* pm) return err; } -#define sp_384_mont_reduce_order_7 sp_384_mont_reduce_7 - /* Compare a with b in constant time. * * a A single precision integer. @@ -28619,16 +28566,16 @@ static sp_digit sp_384_cmp_7(const sp_digit* a, const sp_digit* b) int i; for (i=6; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 54); } #else r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)1); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 54); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -28784,28 +28731,98 @@ static void sp_384_mont_shift_7(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_384_mont_reduce_7(sp_digit* a, const sp_digit* m, sp_digit mp) +static void sp_384_mont_reduce_order_7(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_384_norm_7(a + 7); for (i=0; i<6; i++) { - mu = (a[i] * mp) & 0x7fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x7fffffffffffffL; sp_384_mul_add_7(a+i, m, mu); a[i+1] += a[i] >> 55; } - mu = (a[i] * mp) & 0x3fffffffffffffL; + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x3fffffffffffffL; sp_384_mul_add_7(a+i, m, mu); a[i+1] += a[i] >> 55; a[i] &= 0x7fffffffffffffL; sp_384_mont_shift_7(a, a); - sp_384_cond_sub_7(a, a, m, 0 - (((a[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[6] >> 54; + sp_384_cond_sub_7(a, a, m, ~((over - 1) >> 63)); sp_384_norm_7(a); } +/* Reduce the number back to 384 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_384_mont_reduce_7(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit am; + + (void)m; + (void)mp; + + for (i = 0; i < 6; i++) { + am = (a[i] * 0x100000001) & 0x7fffffffffffffL; + a[i + 0] += (am << 32) & 0x7fffffffffffffL; + a[i + 1] += (am >> 23) - ((am << 41) & 0x7fffffffffffffL); + a[i + 2] += -(am >> 14) - ((am << 18) & 0x7fffffffffffffL); + a[i + 3] += -(am >> 37); + a[i + 6] += (am << 54) & 0x7fffffffffffffL; + a[i + 7] += am >> 1; + + a[i + 1] += a[i] >> 55; + } + am = (a[6] * 0x100000001) & 0x3fffffffffffff; + a[6 + 0] += (am << 32) & 0x7fffffffffffffL; + a[6 + 1] += (am >> 23) - ((am << 41) & 0x7fffffffffffffL); + a[6 + 2] += -(am >> 14) - ((am << 18) & 0x7fffffffffffffL); + a[6 + 3] += -(am >> 37); + a[6 + 6] += (am << 54) & 0x7fffffffffffffL; + a[6 + 7] += am >> 1; + + a[0] = (a[6] >> 54) + ((a[7] << 1) & 0x7fffffffffffffL); + a[1] = (a[7] >> 54) + ((a[8] << 1) & 0x7fffffffffffffL); + a[2] = (a[8] >> 54) + ((a[9] << 1) & 0x7fffffffffffffL); + a[3] = (a[9] >> 54) + ((a[10] << 1) & 0x7fffffffffffffL); + a[4] = (a[10] >> 54) + ((a[11] << 1) & 0x7fffffffffffffL); + a[5] = (a[11] >> 54) + ((a[12] << 1) & 0x7fffffffffffffL); + a[6] = (a[12] >> 54) + (a[13] << 1); + + a[1] += a[0] >> 55; a[0] &= 0x7fffffffffffffL; + a[2] += a[1] >> 55; a[1] &= 0x7fffffffffffffL; + a[3] += a[2] >> 55; a[2] &= 0x7fffffffffffffL; + a[4] += a[3] >> 55; a[3] &= 0x7fffffffffffffL; + a[5] += a[4] >> 55; a[4] &= 0x7fffffffffffffL; + a[6] += a[5] >> 55; a[5] &= 0x7fffffffffffffL; + + /* Get the bit over, if any. */ + am = a[6] >> 54; + /* Create mask. */ + am = 0 - am; + + a[0] -= 0x00000000ffffffffL & am; + a[1] -= 0x007ffe0000000000L & am; + a[2] -= 0x007ffffffffbffffL & am; + a[3] -= 0x007fffffffffffffL & am; + a[4] -= 0x007fffffffffffffL & am; + a[5] -= 0x007fffffffffffffL & am; + a[6] -= 0x003fffffffffffffL & am; + + a[1] += a[0] >> 55; a[0] &= 0x7fffffffffffffL; + a[2] += a[1] >> 55; a[1] &= 0x7fffffffffffffL; + a[3] += a[2] >> 55; a[2] &= 0x7fffffffffffffL; + a[4] += a[3] >> 55; a[3] &= 0x7fffffffffffffL; + a[5] += a[4] >> 55; a[4] &= 0x7fffffffffffffL; + a[6] += a[5] >> 55; a[5] &= 0x7fffffffffffffL; +} + /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -28813,9 +28830,9 @@ static void sp_384_mont_reduce_7(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_mul_7(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_mul_7(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_384_mul_7(r, a, b); @@ -28827,9 +28844,9 @@ static void sp_384_mont_mul_7(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_384_sqr_7(r, a); @@ -28843,10 +28860,10 @@ static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, * a Number to square in Montgomery form. * n Number of times to square. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_384_mont_sqr_n_7(sp_digit* r, const sp_digit* a, int n, - const sp_digit* m, sp_digit mp) +SP_NOINLINE static void sp_384_mont_sqr_n_7(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) { sp_384_mont_sqr_7(r, a, m, mp); for (; n > 1; n--) { @@ -28854,7 +28871,7 @@ static void sp_384_mont_sqr_n_7(sp_digit* r, const sp_digit* a, int n, } } -#endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */ +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ #ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P384 curve. */ static const uint64_t p384_mod_minus_2[6] = { @@ -28968,27 +28985,24 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_7(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 7, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 7, 0, sizeof(sp_digit) * 7U); sp_384_mont_reduce_7(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_7(r->x, p384_mod); - sp_384_cond_sub_7(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_7(r->x, r->x, p384_mod, ~(n >> 54)); sp_384_norm_7(r->x); /* y /= z^3 */ sp_384_mont_mul_7(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 7, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 7, 0, sizeof(sp_digit) * 7U); sp_384_mont_reduce_7(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_7(r->y, p384_mod); - sp_384_cond_sub_7(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_7(r->y, r->y, p384_mod, ~(n >> 54)); sp_384_norm_7(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -29001,10 +29015,11 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_384_add_7(r, a, b); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); } @@ -29016,10 +29031,11 @@ static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_7(r, a, a); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); } @@ -29031,18 +29047,20 @@ static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_384_mont_tpl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_7(r, a, a); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); (void)sp_384_add_7(r, r, a); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -29054,13 +29072,26 @@ static void sp_384_mont_tpl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) static void sp_384_cond_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; for (i = 0; i < 7; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_384_cond_add_7(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ r[ 0] = a[ 0] + (b[ 0] & m); r[ 1] = a[ 1] + (b[ 1] & m); r[ 2] = a[ 2] + (b[ 2] & m); @@ -29068,8 +29099,8 @@ static void sp_384_cond_add_7(sp_digit* r, const sp_digit* a, r[ 4] = a[ 4] + (b[ 4] & m); r[ 5] = a[ 5] + (b[ 5] & m); r[ 6] = a[ 6] + (b[ 6] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * @@ -29118,7 +29149,8 @@ SP_NOINLINE static void sp_384_rshift1_7(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_7(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_384_cond_add_7(r, a, m, 0 - (a[0] & 1)); sp_384_norm_7(r); @@ -29131,6 +29163,61 @@ static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_7(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_7(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_7(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_7(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_7(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_mont_div2_7(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_7(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_7(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_7(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_7(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_7_ctx { int state; @@ -29141,7 +29228,14 @@ typedef struct sp_384_proj_point_dbl_7_ctx { sp_digit* z; } sp_384_proj_point_dbl_7_ctx; -static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_7_ctx* ctx = (sp_384_proj_point_dbl_7_ctx*)sp_ctx->data; @@ -29215,7 +29309,7 @@ static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con break; case 11: /* T2 = T2/2 */ - sp_384_div2_7(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_7(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -29265,61 +29359,6 @@ static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*7; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_7(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_7(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_7(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_7(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_7(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_7(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_7(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_7(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_7(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_7(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -29334,6 +29373,18 @@ static int sp_384_cmp_equal_7(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_7(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -29341,6 +29392,84 @@ static int sp_384_cmp_equal_7(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_7(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*7; + sp_digit* t2 = t + 4*7; + sp_digit* t3 = t + 6*7; + sp_digit* t4 = t + 8*7; + sp_digit* t5 = t + 10*7; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_7(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_7(t2, t1) & + sp_384_cmp_equal_7(t4, t3)) { + sp_384_proj_point_dbl_7(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_384_mont_sub_7(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_7(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_7(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(x, x, t5, p384_mod); + sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(t3, y, p384_mod); + sp_384_mont_sub_7(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_7(y, y, x, p384_mod); + sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, y, t5, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 7; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_7_ctx { @@ -29353,11 +29482,19 @@ typedef struct sp_384_proj_point_add_7_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_384_proj_point_add_7_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -29376,261 +29513,168 @@ static int sp_384_proj_point_add_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*7; - ctx->t3 = t + 4*7; - ctx->t4 = t + 6*7; - ctx->t5 = t + 8*7; + ctx->t6 = t; + ctx->t1 = t + 2*7; + ctx->t2 = t + 4*7; + ctx->t3 = t + 6*7; + ctx->t4 = t + 8*7; + ctx->t5 = t + 10*7; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_7(ctx->t1, p384_mod, q->y); - sp_384_norm_7(ctx->t1); - if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & - (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_384_proj_point_dbl_7_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_384)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<7; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<7; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<7; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_384_mont_sqr_7(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; + break; + case 2: + sp_384_mont_mul_7(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; + break; + case 3: + sp_384_mont_mul_7(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_7(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_7(ctx->t1, ctx->t1, ctx->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_7(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_7(ctx->t4, ctx->t2, ctx->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_7(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_7(ctx->t2, ctx->t1) & + sp_384_cmp_equal_7(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_7(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_7(ctx->t3, ctx->t3, ctx->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_7(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_7(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_7(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_7(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_7(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_7(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_7(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_7(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_7(ctx->z, ctx->z, ctx->t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_7(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_7(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - sp_384_mont_sqr_7(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_7(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_7(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_dbl_7(ctx->t1, ctx->y, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_7(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t1, p384_mod); + sp_384_mont_mul_7(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_7(ctx->y, ctx->y, ctx->x, p384_mod); + sp_384_mont_sub_7(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - sp_384_mont_mul_7(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 7; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_7(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_7(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*7; - sp_digit* t3 = t + 4*7; - sp_digit* t4 = t + 6*7; - sp_digit* t5 = t + 8*7; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_384* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_384_sub_7(t1, p384_mod, q->y); - sp_384_norm_7(t1); - if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & - (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { - sp_384_proj_point_dbl_7(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<7; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<7; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<7; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t1, t1, x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_7(t3, t3, y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_7(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_7(t4, t4, t3, p384_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(x, x, t5, p384_mod); - sp_384_mont_dbl_7(t1, y, p384_mod); - sp_384_mont_sub_7(x, x, t1, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_7(y, y, x, p384_mod); - sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(y, y, t5, p384_mod); - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -29640,7 +29684,7 @@ static void sp_384_proj_point_add_7(sp_point_384* r, */ static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* m) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK int64_t* t = NULL; #else int64_t t[2 * 12]; @@ -29651,7 +29695,7 @@ static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -29764,7 +29808,7 @@ static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* r[6] |= t[11] << 22U; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, NULL, DYNAMIC_TYPE_ECC); #endif @@ -29788,6 +29832,108 @@ static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_384* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_384 t[3]; + sp_digit tmp[2 * 7 * 6]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_384) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod); + } + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod); + if (err == MP_OKAY) + err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod); + + if (err == MP_OKAY) { + i = 6; + c = 54; + n = k[i--] << (55 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 55; + } + + y = (n >> 54) & 1; + n <<= 1; + + sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_384)); + sp_384_proj_point_dbl_7(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_384)); + } + + if (map != 0) { + sp_384_map_7(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_384)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 7 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_384) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_ecc_mulmod_7_ctx { @@ -29903,109 +30049,6 @@ static int sp_384_ecc_mulmod_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, #endif /* WOLFSSL_SP_NONBLOCK */ -static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, - const sp_digit* k, int map, int ct, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_384* t = NULL; - sp_digit* tmp = NULL; -#else - sp_point_384 t[3]; - sp_digit tmp[2 * 7 * 6]; -#endif - sp_digit n; - int i; - int c; - int y; - int err = MP_OKAY; - - /* Implementation is constant time. */ - (void)ct; - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, - DYNAMIC_TYPE_ECC); - if (t == NULL) - err = MEMORY_E; - if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap, - DYNAMIC_TYPE_ECC); - if (tmp == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - XMEMSET(t, 0, sizeof(sp_point_384) * 3); - - /* t[0] = {0, 0, 1} * norm */ - t[0].infinity = 1; - /* t[1] = {g->x, g->y, g->z} * norm */ - err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod); - } - if (err == MP_OKAY) - err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod); - if (err == MP_OKAY) - err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod); - - if (err == MP_OKAY) { - i = 6; - c = 54; - n = k[i--] << (55 - c); - for (; ; c--) { - if (c == 0) { - if (i == -1) - break; - - n = k[i--]; - c = 55; - } - - y = (n >> 54) & 1; - n <<= 1; - - sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp); - - XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), - sizeof(sp_point_384)); - sp_384_proj_point_dbl_7(&t[2], &t[2], tmp); - XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), &t[2], - sizeof(sp_point_384)); - } - - if (map != 0) { - sp_384_map_7(r, &t[0], tmp); - } - else { - XMEMCPY(r, &t[0], sizeof(sp_point_384)); - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (tmp != NULL) -#endif - { - ForceZero(tmp, sizeof(sp_digit) * 2 * 7 * 6); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(tmp, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (t != NULL) -#endif - { - ForceZero(t, sizeof(sp_point_384) * 3); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(t, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} - #else /* A table entry for pre-computed points. */ typedef struct sp_table_entry_384 { @@ -30057,7 +30100,7 @@ static void sp_384_cond_copy_7(sp_digit* r, const sp_digit* a, const sp_digit m) * n Number of times to double * t Temporary ordinate data. */ -static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, +static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int i, sp_digit* t) { sp_digit* w = t; @@ -30068,6 +30111,7 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -30078,7 +30122,6 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, /* W = Z^4 */ sp_384_mont_sqr_7(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_7(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -30096,9 +30139,12 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_7(t2, b, p384_mod); sp_384_mont_sub_7(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_7(t2, b, x, p384_mod); + sp_384_mont_dbl_7(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -30108,9 +30154,7 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_384_mont_mul_7(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_7(y, b, x, p384_mod); - sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_7(y, y, p384_mod); + sp_384_mont_mul_7(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_7(y, y, t1, p384_mod); } #ifndef WOLFSSL_SP_SMALL @@ -30125,18 +30169,19 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_7(t2, b, p384_mod); sp_384_mont_sub_7(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_7(t2, b, x, p384_mod); + sp_384_mont_dbl_7(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_7(y, b, x, p384_mod); - sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_7(y, y, p384_mod); + sp_384_mont_mul_7(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_7(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_7(y, y, p384_mod); + sp_384_mont_div2_7(y, y, p384_mod); } /* Double the Montgomery form projective point p a number of times. @@ -30184,30 +30229,30 @@ static void sp_384_proj_point_dbl_n_store_7(sp_point_384* r, sp_384_mont_sub_7(t1, t1, w, p384_mod); sp_384_mont_tpl_7(a, t1, p384_mod); /* B = X*Y^2 */ - sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(b, t2, x, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_7(t1, b, p384_mod); - sp_384_mont_sub_7(x, x, t1, p384_mod); + sp_384_mont_dbl_7(t2, b, p384_mod); + sp_384_mont_sub_7(x, x, t2, p384_mod); + /* B = 2.(B - X) */ + sp_384_mont_sub_7(t2, b, x, p384_mod); + sp_384_mont_dbl_7(b, t2, p384_mod); /* Z = Z*Y */ sp_384_mont_mul_7(r[j].z, z, y, p384_mod, p384_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_384_mont_sqr_7(t2, t2, p384_mod, p384_mp_mod); + /* t1 = Y^4 */ + sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_384_mont_mul_7(w, w, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(w, w, t1, p384_mod, p384_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_384_mont_sub_7(y, b, x, p384_mod); - sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod); - sp_384_mont_dbl_7(y, y, p384_mod); - sp_384_mont_sub_7(y, y, t2, p384_mod); - + sp_384_mont_mul_7(y, b, a, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_7(r[j].y, y, p384_mod); + sp_384_mont_div2_7(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -30230,30 +30275,30 @@ static void sp_384_proj_point_add_sub_7(sp_point_384* ra, sp_digit* t4 = t + 6*7; sp_digit* t5 = t + 8*7; sp_digit* t6 = t + 10*7; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t1, t1, x, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t1, xa, p384_mod, p384_mp_mod); /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(t2, za, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, za, p384_mod, p384_mp_mod); sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_7(t3, t3, y, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t3, t3, ya, p384_mod, p384_mp_mod); /* S2 = Y2*Z1^3 */ sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - U1 */ @@ -30264,30 +30309,30 @@ static void sp_384_proj_point_add_sub_7(sp_point_384* ra, sp_384_mont_sub_7(t4, t4, t3, p384_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_384_mont_mul_7(za, za, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(za, za, t2, p384_mod, p384_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(xa, t4, p384_mod, p384_mp_mod); sp_384_mont_sqr_7(xs, t6, p384_mod, p384_mp_mod); sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ya, t1, t5, p384_mod, p384_mp_mod); sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(x, x, t5, p384_mod); + sp_384_mont_sub_7(xa, xa, t5, p384_mod); sp_384_mont_sub_7(xs, xs, t5, p384_mod); - sp_384_mont_dbl_7(t1, y, p384_mod); - sp_384_mont_sub_7(x, x, t1, p384_mod); + sp_384_mont_dbl_7(t1, ya, p384_mod); + sp_384_mont_sub_7(xa, xa, t1, p384_mod); sp_384_mont_sub_7(xs, xs, t1, p384_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_384_mont_sub_7(ys, y, xs, p384_mod); - sp_384_mont_sub_7(y, y, x, p384_mod); - sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(ys, ya, xs, p384_mod); + sp_384_mont_sub_7(ya, ya, xa, p384_mod); + sp_384_mont_mul_7(ya, ya, t4, p384_mod, p384_mp_mod); sp_384_sub_7(t6, p384_mod, t6); sp_384_mont_mul_7(ys, ys, t6, p384_mod, p384_mp_mod); sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(y, y, t5, p384_mod); + sp_384_mont_sub_7(ya, ya, t5, p384_mod); sp_384_mont_sub_7(ys, ys, t5, p384_mod); } @@ -30366,7 +30411,7 @@ static void sp_384_ecc_recode_6_7(const sp_digit* k, ecc_recode_384* v) /* Touch each possible point that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_point_33_7(sp_point_384* r, const sp_point_384* table, @@ -30443,7 +30488,7 @@ static void sp_384_get_point_33_7(sp_point_384* r, const sp_point_384* table, static int sp_384_ecc_mulmod_win_add_sub_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; sp_digit* tmp = NULL; #else @@ -30461,8 +30506,8 @@ static int sp_384_ecc_mulmod_win_add_sub_7(sp_point_384* r, const sp_point_384* (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * (33+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -30557,7 +30602,7 @@ static int sp_384_ecc_mulmod_win_add_sub_7(sp_point_384* r, const sp_point_384* } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -30578,76 +30623,75 @@ static int sp_384_ecc_mulmod_win_add_sub_7(sp_point_384* r, const sp_point_384* * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_7(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { - const sp_point_384* ap[2]; - sp_point_384* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*7; - sp_digit* t3 = t + 4*7; - sp_digit* t4 = t + 6*7; - sp_digit* t5 = t + 8*7; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*7; + sp_digit* t6 = t + 4*7; + sp_digit* t1 = t + 6*7; + sp_digit* t4 = t + 8*7; + sp_digit* t5 = t + 10*7; - /* Check double */ - (void)sp_384_sub_7(t1, p384_mod, q->y); - sp_384_norm_7(t1); - if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & - (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_7(p->x, t2) & + sp_384_cmp_equal_7(p->y, t4)) { sp_384_proj_point_dbl_7(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_384)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<7; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<7; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<7; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ - sp_384_mont_sub_7(t2, t2, x, p384_mod); + sp_384_mont_sub_7(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ - sp_384_mont_sub_7(t4, t4, y, p384_mod); + sp_384_mont_sub_7(t4, t4, p->y, p384_mod); /* Z3 = H*Z1 */ - sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(z, p->z, t2, p384_mod, p384_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_384_mont_sqr_7(t1, t4, p384_mod, p384_mp_mod); - sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t3, x, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(x, t1, t5, p384_mod); - sp_384_mont_dbl_7(t1, t3, p384_mod); - sp_384_mont_sub_7(x, x, t1, p384_mod); + sp_384_mont_sqr_7(t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t3, p->x, t1, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(t2, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(t2, t2, t1, p384_mod); + sp_384_mont_dbl_7(t5, t3, p384_mod); + sp_384_mont_sub_7(x, t2, t5, p384_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_384_mont_sub_7(t3, t3, x, p384_mod); sp_384_mont_mul_7(t3, t3, t4, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t5, t5, y, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(y, t3, t5, p384_mod); + sp_384_mont_mul_7(t1, t1, p->y, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, t3, t1, p384_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 7; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -30688,7 +30732,7 @@ static void sp_384_proj_to_affine_7(sp_point_384* a, sp_digit* t) static int sp_384_gen_stripe_table_7(const sp_point_384* a, sp_table_entry_384* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* t = NULL; #else sp_point_384 t[3]; @@ -30701,7 +30745,7 @@ static int sp_384_gen_stripe_table_7(const sp_point_384* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -30756,7 +30800,7 @@ static int sp_384_gen_stripe_table_7(const sp_point_384* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -30769,7 +30813,7 @@ static int sp_384_gen_stripe_table_7(const sp_point_384* a, /* Touch each possible entry that could be being copied. * * r Point to copy into. - * table Table - start of the entires to access + * table Table - start of the entries to access * idx Index of entry to retrieve. */ static void sp_384_get_entry_256_7(sp_point_384* r, @@ -30831,7 +30875,7 @@ static int sp_384_ecc_mulmod_stripe_7(sp_point_384* r, const sp_point_384* g, const sp_table_entry_384* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* rt = NULL; sp_digit* t = NULL; #else @@ -30851,7 +30895,7 @@ static int sp_384_ecc_mulmod_stripe_7(sp_point_384* r, const sp_point_384* g, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) @@ -30917,7 +30961,7 @@ static int sp_384_ecc_mulmod_stripe_7(sp_point_384* r, const sp_point_384* g, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -30960,7 +31004,7 @@ static THREAD_LS_T int sp_cache_384_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) @@ -31031,23 +31075,36 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_7(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; #else sp_digit tmp[2 * 7 * 7]; +#endif sp_cache_384_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_384 == 0) { - wc_InitMutex(&sp_cache_384_lock); - initCacheMutex_384 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 7, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_384 == 0) { + wc_InitMutex(&sp_cache_384_lock); + initCacheMutex_384 = 1; + } + if (wc_LockMutex(&sp_cache_384_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_384_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -31068,6 +31125,9 @@ static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_ } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -31086,7 +31146,7 @@ static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -31095,7 +31155,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -31118,7 +31178,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_384_point_to_ecc_point_7(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -31133,7 +31193,7 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, * * km Scalar to multiply by. * p Point to multiply. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -31143,8 +31203,8 @@ int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r, int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_384* point = NULL; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_384* point = NULL; sp_digit* k = NULL; #else sp_point_384 point[2]; @@ -31154,7 +31214,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -31198,7 +31258,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, err = sp_384_point_to_ecc_point_7(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -31225,6 +31285,16 @@ static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k, return sp_384_ecc_mulmod_7(r, &p384_base, k, map, ct, heap); } +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_384_ecc_mulmod_base_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_384_ecc_mulmod_7_nb(sp_ctx, r, &p384_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + #else /* Striping precomputation table. * 8 points combined into a table of 256 points. @@ -33056,7 +33126,7 @@ static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k, */ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -33065,7 +33135,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -33087,7 +33157,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_384_point_to_ecc_point_7(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -33101,7 +33171,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -33111,7 +33181,7 @@ int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -33122,8 +33192,8 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; @@ -33165,7 +33235,7 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, err = sp_384_point_to_ecc_point_7(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -33177,17 +33247,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_7(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -33251,7 +33310,7 @@ static int sp_384_ecc_gen_k_7(WC_RNG* rng, sp_digit* k) err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); if (err == 0) { sp_384_from_bin(k, 7, buf, (int)sizeof(buf)); - if (sp_384_cmp_7(k, p384_order2) < 0) { + if (sp_384_cmp_7(k, p384_order2) <= 0) { sp_384_add_one_7(k); break; } @@ -33273,7 +33332,7 @@ static int sp_384_ecc_gen_k_7(WC_RNG* rng, sp_digit* k) */ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -33288,15 +33347,15 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) sp_point_384* infinity = NULL; #endif int err = MP_OKAY; - + (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); #else - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); #endif if (point == NULL) err = MEMORY_E; @@ -33337,7 +33396,7 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) err = sp_384_point_to_ecc_point_7(point, pub); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) { @@ -33349,6 +33408,84 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) return err; } +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_384_ctx { + int state; + sp_384_ecc_mulmod_7_ctx mulmod_ctx; + sp_digit k[7]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384 point[2]; +#else + sp_point_384 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_384_ctx; + +int sp_ecc_make_key_384_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_384_ctx* ctx = (sp_ecc_key_gen_384_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_384* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_384_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_384_ecc_gen_k_7(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_384_ecc_mulmod_base_7_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_384_ecc_mulmod_7_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p384_order, 1, 1); + if (err == MP_OKAY) { + if (sp_384_iszero_7(ctx->point->x) || + sp_384_iszero_7(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_384_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_384_point_to_ecc_point_7(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + #ifdef HAVE_ECC_DHE /* Write r as big endian to byte array. * Fixed length number of bytes written: 48 @@ -33367,7 +33504,7 @@ static void sp_384_to_bin_7(sp_digit* r, byte* a) r[i+1] += r[i] >> 55; r[i] &= 0x7fffffffffffffL; } - j = 384 / 8 - 1; + j = 391 / 8 - 1; a[j] = 0; for (i=0; i<7 && j>=0; i++) { b = 0; @@ -33409,7 +33546,7 @@ static void sp_384_to_bin_7(sp_digit* r, byte* a) int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, word32* outLen, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* point = NULL; sp_digit* k = NULL; #else @@ -33422,7 +33559,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC); @@ -33447,7 +33584,7 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, *outLen = 48; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -33456,6 +33593,56 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, return err; } + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_384_ctx { + int state; + union { + sp_384_ecc_mulmod_7_ctx mulmod_ctx; + }; + sp_digit k[7]; + sp_point_384 point; +} sp_ecc_sec_gen_384_ctx; + +int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_384_ctx* ctx = (sp_ecc_sec_gen_384_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_384_from_mp(ctx->k, 7, priv); + sp_384_point_from_ecc_point_7(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_384_ecc_mulmod_7_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_384_to_bin_7(ctx->point.x, out); + *outLen = 48; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_384_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ #endif /* HAVE_ECC_DHE */ #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) @@ -33596,7 +33783,7 @@ static int sp_384_div_7(const sp_digit* a, const sp_digit* d, int i; sp_digit r1; sp_digit mask; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 7 + 3]; @@ -33607,7 +33794,7 @@ static int sp_384_div_7(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 7 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -33631,8 +33818,7 @@ static int sp_384_div_7(const sp_digit* a, const sp_digit* d, t1[7 + i] -= t2[7]; sp_384_norm_7(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[7 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[7 + i] - 1) >> 63); sp_384_cond_sub_7(t1 + i, t1 + i, sd, mask); sp_384_norm_7(&t1[i + 1]); } @@ -33640,7 +33826,7 @@ static int sp_384_div_7(const sp_digit* a, const sp_digit* d, sp_384_rshift_7(r, t1, 1); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -33662,6 +33848,19 @@ static int sp_384_mod_7(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P384 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_384_mont_mul_order_7(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_384_mul_7(r, a, b); + sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) #ifdef WOLFSSL_SP_SMALL /* Order-2 for the P384 curve. */ static const uint64_t p384_order_minus_2[6] = { @@ -33675,18 +33874,6 @@ static const uint64_t p384_order_low[3] = { }; #endif /* WOLFSSL_SP_SMALL */ -/* Multiply two number mod the order of P384 curve. (r = a * b mod order) - * - * r Result of the multiplication. - * a First operand of the multiplication. - * b Second operand of the multiplication. - */ -static void sp_384_mont_mul_order_7(sp_digit* r, const sp_digit* a, const sp_digit* b) -{ - sp_384_mul_7(r, a, b); - sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order); -} - /* Square number mod the order of P384 curve. (r = a * a mod order) * * r Result of the squaring. @@ -33828,6 +34015,7 @@ static void sp_384_mont_inv_order_7(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_SIGN #ifndef SP_ECC_MAX_SIG_GEN @@ -33902,6 +34090,128 @@ static int sp_384_calc_s_7(sp_digit* s, const sp_digit* r, sp_digit* k, * returns RNG failures, MEMORY_E when memory allocation fails and * MP_OKAY on success. */ +int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_384* point = NULL; +#else + sp_digit e[7 * 2 * 7]; + sp_point_384 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int64 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 7, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 7; + k = e + 4 * 7; + r = e + 6 * 7; + tmp = e + 8 * 7; + s = e; + + if (hashLen > 48U) { + hashLen = 48U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_384_ecc_gen_k_7(rng, k); + } + else { + sp_384_from_mp(k, 7, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_384_ecc_mulmod_base_7(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 7U); + sp_384_norm_7(r); + c = sp_384_cmp_7(r, p384_order); + sp_384_cond_sub_7(r, r, p384_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_384_norm_7(r); + + if (!sp_384_iszero_7(r)) { + /* x is modified in calculation of s. */ + sp_384_from_mp(x, 7, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_384_from_bin(e, 7, hash, (int)hashLen); + + err = sp_384_calc_s_7(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_384_iszero_7(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_384_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_384_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 7); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_384)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_sign_384_ctx { int state; @@ -33929,15 +34239,10 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W typedef char ctx_size_test[sizeof(sp_ecc_sign_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; (void)sizeof(ctx_size_test); - (void)heap; - switch (ctx->state) { case 0: /* INIT */ ctx->s = ctx->e; ctx->kInv = ctx->k; - if (hashLen > 48U) { - hashLen = 48U; - } ctx->i = SP_ECC_MAX_SIG_GEN; ctx->state = 1; @@ -33972,6 +34277,9 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_7(ctx->r); + if (hashLen > 48U) { + hashLen = 48U; + } sp_384_from_mp(ctx->x, 7, priv); sp_384_from_bin(ctx->e, 7, hash, (int)hashLen); ctx->state = 4; @@ -34066,124 +34374,6 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, - const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* e = NULL; - sp_point_384* point = NULL; -#else - sp_digit e[7 * 2 * 7]; - sp_point_384 point[1]; -#endif - sp_digit* x = NULL; - sp_digit* k = NULL; - sp_digit* r = NULL; - sp_digit* tmp = NULL; - sp_digit* s = NULL; - sp_int64 c; - int err = MP_OKAY; - int i; - - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, - DYNAMIC_TYPE_ECC); - if (point == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 7, heap, - DYNAMIC_TYPE_ECC); - if (e == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - x = e + 2 * 7; - k = e + 4 * 7; - r = e + 6 * 7; - tmp = e + 8 * 7; - s = e; - - if (hashLen > 48U) { - hashLen = 48U; - } - } - - for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { - /* New random point. */ - if (km == NULL || mp_iszero(km)) { - err = sp_384_ecc_gen_k_7(rng, k); - } - else { - sp_384_from_mp(k, 7, km); - mp_zero(km); - } - if (err == MP_OKAY) { - err = sp_384_ecc_mulmod_base_7(point, k, 1, 1, heap); - } - - if (err == MP_OKAY) { - /* r = point->x mod order */ - XMEMCPY(r, point->x, sizeof(sp_digit) * 7U); - sp_384_norm_7(r); - c = sp_384_cmp_7(r, p384_order); - sp_384_cond_sub_7(r, r, p384_order, - (sp_digit)0 - (sp_digit)(c >= 0)); - sp_384_norm_7(r); - - sp_384_from_mp(x, 7, priv); - sp_384_from_bin(e, 7, hash, (int)hashLen); - - err = sp_384_calc_s_7(s, r, k, x, e, tmp); - } - - /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_7(s) == 0)) { - break; - } -#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP - i = 1; -#endif - } - - if (i == 0) { - err = RNG_FAILURE_E; - } - - if (err == MP_OKAY) { - err = sp_384_to_mp(r, rm); - } - if (err == MP_OKAY) { - err = sp_384_to_mp(s, sm); - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (e != NULL) -#endif - { - ForceZero(e, sizeof(sp_digit) * 7 * 2 * 7); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(e, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (point != NULL) -#endif - { - ForceZero(point, sizeof(sp_point_384)); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(point, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL @@ -34235,7 +34425,7 @@ static int sp_384_num_bits_7(const sp_digit* a) static int sp_384_mod_inv_7(sp_digit* r, const sp_digit* a, const sp_digit* m) { int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* u = NULL; #else sp_digit u[7 * 4]; @@ -34246,7 +34436,7 @@ static int sp_384_mod_inv_7(sp_digit* r, const sp_digit* a, const sp_digit* m) int ut; int vt; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK u = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 4, NULL, DYNAMIC_TYPE_ECC); if (u == NULL) @@ -34286,8 +34476,8 @@ static int sp_384_mod_inv_7(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_384_cmp_7(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_384_cmp_7(u, v) >= 0))) { sp_384_sub_7(u, u, v); sp_384_norm_7(u); @@ -34334,7 +34524,7 @@ static int sp_384_mod_inv_7(sp_digit* r, const sp_digit* a, const sp_digit* m) else XMEMCPY(r, d, sizeof(sp_digit) * 7); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (u != NULL) XFREE(u, NULL, DYNAMIC_TYPE_ECC); #endif @@ -34379,7 +34569,7 @@ static void sp_384_add_points_7(sp_point_384* p1, const sp_point_384* p2, * p2 Public point and temporary. * s Second part of signature as a number. * u1 Temporary number. - * u2 Temproray number. + * u2 Temporary number. * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ @@ -34451,6 +34641,106 @@ static int sp_384_calc_vfy_point_7(sp_point_384* p1, sp_point_384* p2, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_384* p1 = NULL; +#else + sp_digit u1[18 * 7]; + sp_point_384 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_384* p2 = NULL; + sp_digit carry; + sp_int64 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 7, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 7; + s = u1 + 4 * 7; + tmp = u1 + 6 * 7; + p2 = p1 + 1; + + if (hashLen > 48U) { + hashLen = 48U; + } + + sp_384_from_bin(u1, 7, hash, (int)hashLen); + sp_384_from_mp(u2, 7, rm); + sp_384_from_mp(s, 7, sm); + sp_384_from_mp(p2->x, 7, pX); + sp_384_from_mp(p2->y, 7, pY); + sp_384_from_mp(p2->z, 7, pZ); + + err = sp_384_calc_vfy_point_7(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_384_from_mp(u2, 7, rm); + err = sp_384_mod_mul_norm_7(u2, u2, p384_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_384_mont_sqr_7(p1->z, p1->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, p384_mp_mod); + *res = (int)(sp_384_cmp_7(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_384_from_mp(u2, 7, rm); + carry = sp_384_add_7(u2, u2, p384_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_384_norm_7(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_384_cmp_7(u2, p384_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_384_mod_mul_norm_7(u2, u2, p384_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, p384_mp_mod); + } + *res = (sp_384_cmp_7(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_ecc_verify_384_ctx { int state; @@ -34463,7 +34753,7 @@ typedef struct sp_ecc_verify_384_ctx { sp_digit u1[2*7]; sp_digit u2[2*7]; sp_digit s[2*7]; - sp_digit tmp[2*7 * 5]; + sp_digit tmp[2*7 * 6]; sp_point_384 p1; sp_point_384 p2; } sp_ecc_verify_384_ctx; @@ -34600,109 +34890,10 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, - const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, - int* res, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_digit* u1 = NULL; - sp_point_384* p1 = NULL; -#else - sp_digit u1[16 * 7]; - sp_point_384 p1[2]; -#endif - sp_digit* u2 = NULL; - sp_digit* s = NULL; - sp_digit* tmp = NULL; - sp_point_384* p2 = NULL; - sp_digit carry; - sp_int64 c = 0; - int err = MP_OKAY; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (err == MP_OKAY) { - p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, - DYNAMIC_TYPE_ECC); - if (p1 == NULL) - err = MEMORY_E; - } - if (err == MP_OKAY) { - u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 7, heap, - DYNAMIC_TYPE_ECC); - if (u1 == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - u2 = u1 + 2 * 7; - s = u1 + 4 * 7; - tmp = u1 + 6 * 7; - p2 = p1 + 1; - - if (hashLen > 48U) { - hashLen = 48U; - } - - sp_384_from_bin(u1, 7, hash, (int)hashLen); - sp_384_from_mp(u2, 7, rm); - sp_384_from_mp(s, 7, sm); - sp_384_from_mp(p2->x, 7, pX); - sp_384_from_mp(p2->y, 7, pY); - sp_384_from_mp(p2->z, 7, pZ); - - err = sp_384_calc_vfy_point_7(p1, p2, s, u1, u2, tmp, heap); - } - if (err == MP_OKAY) { - /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ - /* Reload r and convert to Montgomery form. */ - sp_384_from_mp(u2, 7, rm); - err = sp_384_mod_mul_norm_7(u2, u2, p384_mod); - } - - if (err == MP_OKAY) { - /* u1 = r.z'.z' mod prime */ - sp_384_mont_sqr_7(p1->z, p1->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, p384_mp_mod); - *res = (int)(sp_384_cmp_7(p1->x, u1) == 0); - if (*res == 0) { - /* Reload r and add order. */ - sp_384_from_mp(u2, 7, rm); - carry = sp_384_add_7(u2, u2, p384_order); - /* Carry means result is greater than mod and is not valid. */ - if (carry == 0) { - sp_384_norm_7(u2); - - /* Compare with mod and if greater or equal then not valid. */ - c = sp_384_cmp_7(u2, p384_mod); - } - } - if ((*res == 0) && (c < 0)) { - /* Convert to Montogomery form */ - err = sp_384_mod_mul_norm_7(u2, u2, p384_mod); - if (err == MP_OKAY) { - /* u1 = (r + 1*order).z'.z' mod prime */ - sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, - p384_mp_mod); - *res = (sp_384_cmp_7(p1->x, u1) == 0); - } - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (u1 != NULL) - XFREE(u1, heap, DYNAMIC_TYPE_ECC); - if (p1 != NULL) - XFREE(p1, heap, DYNAMIC_TYPE_ECC); -#endif - - return err; -} #endif /* HAVE_ECC_VERIFY */ #ifdef HAVE_ECC_CHECK_KEY -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -34712,7 +34903,7 @@ int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX, static int sp_384_ecc_is_point_7(const sp_point_384* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[7 * 4]; @@ -34720,7 +34911,7 @@ static int sp_384_ecc_is_point_7(const sp_point_384* point, sp_digit* t2 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -34730,25 +34921,27 @@ static int sp_384_ecc_is_point_7(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 7; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_7(t1, point->y); (void)sp_384_mod_7(t1, t1, p384_mod); sp_384_sqr_7(t2, point->x); (void)sp_384_mod_7(t2, t2, p384_mod); sp_384_mul_7(t2, t2, point->x); (void)sp_384_mod_7(t2, t2, p384_mod); - (void)sp_384_sub_7(t2, p384_mod, t2); - sp_384_mont_add_7(t1, t1, t2, p384_mod); + sp_384_mont_sub_7(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_7(t1, t1, point->x, p384_mod); sp_384_mont_add_7(t1, t1, point->x, p384_mod); sp_384_mont_add_7(t1, t1, point->x, p384_mod); + if (sp_384_cmp_7(t1, p384_b) != 0) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -34756,7 +34949,7 @@ static int sp_384_ecc_is_point_7(const sp_point_384* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -34765,7 +34958,7 @@ static int sp_384_ecc_is_point_7(const sp_point_384* point, */ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_384* pub = NULL; #else sp_point_384 pub[1]; @@ -34773,7 +34966,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -34788,7 +34981,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) err = sp_384_ecc_is_point_7(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -34810,7 +35003,7 @@ int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_384* pub = NULL; #else @@ -34831,7 +35024,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC); @@ -34897,7 +35090,7 @@ int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -34926,17 +35119,17 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else - sp_digit tmp[2 * 7 * 5]; + sp_digit tmp[2 * 7 * 6]; sp_point_384 p[2]; #endif sp_point_384* q = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, NULL, DYNAMIC_TYPE_ECC); @@ -34944,7 +35137,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = MEMORY_E; } if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 5, NULL, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, NULL, DYNAMIC_TYPE_ECC); if (tmp == NULL) { err = MEMORY_E; @@ -34979,7 +35172,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -35003,7 +35196,7 @@ int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -35012,7 +35205,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -35047,7 +35240,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_384_to_mp(p->z, rZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -35067,7 +35260,7 @@ int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ, */ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* tmp = NULL; sp_point_384* p = NULL; #else @@ -35077,7 +35270,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL, DYNAMIC_TYPE_ECC); @@ -35111,7 +35304,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_384_to_mp(p->z, pZ); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (tmp != NULL) XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); if (p != NULL) @@ -35129,7 +35322,7 @@ int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ) */ static int sp_384_mont_sqrt_7(sp_digit* y) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[5 * 2 * 7]; @@ -35140,7 +35333,7 @@ static int sp_384_mont_sqrt_7(sp_digit* y) sp_digit* t5 = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 7, NULL, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -35210,7 +35403,7 @@ static int sp_384_mont_sqrt_7(sp_digit* y) } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_ECC); #endif @@ -35228,7 +35421,7 @@ static int sp_384_mont_sqrt_7(sp_digit* y) */ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* x = NULL; #else sp_digit x[4 * 7]; @@ -35236,7 +35429,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) sp_digit* y = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 7, NULL, DYNAMIC_TYPE_ECC); if (x == NULL) err = MEMORY_E; @@ -35276,7 +35469,7 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) err = sp_384_to_mp(y, ym); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (x != NULL) XFREE(x, NULL, DYNAMIC_TYPE_ECC); #endif @@ -35285,6 +35478,7445 @@ int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym) } #endif #endif /* WOLFSSL_SP_384 */ +#ifdef WOLFSSL_SP_521 + +/* Point structure to use. */ +typedef struct sp_point_521 { + /* X ordinate of point. */ + sp_digit x[2 * 9]; + /* Y ordinate of point. */ + sp_digit y[2 * 9]; + /* Z ordinate of point. */ + sp_digit z[2 * 9]; + /* Indicates point is at infinity. */ + int infinity; +} sp_point_521; + +/* The modulus (prime) of the curve P521. */ +static const sp_digit p521_mod[9] = { + 0x3ffffffffffffffL,0x3ffffffffffffffL,0x3ffffffffffffffL,0x3ffffffffffffffL, + 0x3ffffffffffffffL,0x3ffffffffffffffL,0x3ffffffffffffffL,0x3ffffffffffffffL, + 0x1ffffffffffffffL +}; +/* The Montgomery normalizer for modulus of the curve P521. */ +static const sp_digit p521_norm_mod[9] = { + 0x000000000000001L,0x000000000000000L,0x000000000000000L,0x000000000000000L, + 0x000000000000000L,0x000000000000000L,0x000000000000000L,0x000000000000000L, + 0x000000000000000L +}; +/* The Montgomery multiplier for modulus of the curve P521. */ +static sp_digit p521_mp_mod = 0x00000000000001; +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +/* The order of the curve P521. */ +static const sp_digit p521_order[9] = { + 0x36fb71e91386409L,0x1726e226711ebaeL,0x0148f709a5d03bbL,0x20efcbe59adff30L, + 0x3fffffffa518687L,0x3ffffffffffffffL,0x3ffffffffffffffL,0x3ffffffffffffffL, + 0x1ffffffffffffffL +}; +#endif +/* The order of the curve P521 minus 2. */ +static const sp_digit p521_order2[9] = { + 0x36fb71e91386407L,0x1726e226711ebaeL,0x0148f709a5d03bbL,0x20efcbe59adff30L, + 0x3fffffffa518687L,0x3ffffffffffffffL,0x3ffffffffffffffL,0x3ffffffffffffffL, + 0x1ffffffffffffffL +}; +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery normalizer for order of the curve P521. */ +static const sp_digit p521_norm_order[9] = { + 0x09048e16ec79bf7L,0x28d91dd98ee1451L,0x3eb708f65a2fc44L,0x1f10341a65200cfL, + 0x000000005ae7978L,0x000000000000000L,0x000000000000000L,0x000000000000000L, + 0x000000000000000L +}; +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* The Montgomery multiplier for order of the curve P521. */ +static sp_digit p521_mp_order = 0x12f5ccd79a995c7L; +#endif +/* The base point of curve P521. */ +static const sp_point_521 p521_base = { + /* X ordinate */ + { + 0x17e7e31c2e5bd66L,0x22cf0615a90a6feL,0x0127a2ffa8de334L, + 0x1dfbf9d64a3f877L,0x06b4d3dbaa14b5eL,0x14fed487e0a2bd8L, + 0x15b4429c6481390L,0x3a73678fb2d988eL,0x0c6858e06b70404L, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0 + }, + /* Y ordinate */ + { + 0x0be94769fd16650L,0x31c21a89cb09022L,0x39013fad0761353L, + 0x2657bd099031542L,0x3273e662c97ee72L,0x1e6d11a05ebef45L, + 0x3d1bd998f544495L,0x3001172297ed0b1L,0x11839296a789a3bL, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0 + }, + /* Z ordinate */ + { + 0x000000000000001L,0x000000000000000L,0x000000000000000L, + 0x000000000000000L,0x000000000000000L,0x000000000000000L, + 0x000000000000000L,0x000000000000000L,0x000000000000000L, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, + (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0 + }, + /* infinity */ + 0 +}; +#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY) +static const sp_digit p521_b[9] = { + 0x3451fd46b503f00L,0x0f7e20f4b0d3c7bL,0x00bd3bb1bf07357L,0x147b1fa4dec594bL, + 0x18ef109e1561939L,0x26cc57cee2d2264L,0x0540eea2da725b9L,0x2687e4a688682daL, + 0x051953eb9618e1cL +}; +#endif + +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_521_mul_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + int imax; + int k; + sp_uint128 c; + sp_uint128 lo; + + c = ((sp_uint128)a[8]) * b[8]; + r[17] = (sp_digit)(c >> 58); + c &= 0x3ffffffffffffffL; + for (k = 15; k >= 0; k--) { + if (k >= 9) { + i = k - 8; + imax = 8; + } + else { + i = 0; + imax = k; + } + lo = 0; + for (; i <= imax; i++) { + lo += ((sp_uint128)a[i]) * b[k - i]; + } + c += lo >> 58; + r[k + 2] += (sp_digit)(c >> 58); + r[k + 1] = (sp_digit)(c & 0x3ffffffffffffffL); + c = lo & 0x3ffffffffffffffL; + } + r[0] = (sp_digit)c; +} + +#else +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_521_mul_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_int128 t0; + sp_int128 t1; + sp_digit t[9]; + + t0 = ((sp_int128)a[ 0]) * b[ 0]; + t1 = ((sp_int128)a[ 0]) * b[ 1] + + ((sp_int128)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 0]) * b[ 2] + + ((sp_int128)a[ 1]) * b[ 1] + + ((sp_int128)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 0]) * b[ 3] + + ((sp_int128)a[ 1]) * b[ 2] + + ((sp_int128)a[ 2]) * b[ 1] + + ((sp_int128)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 0]) * b[ 4] + + ((sp_int128)a[ 1]) * b[ 3] + + ((sp_int128)a[ 2]) * b[ 2] + + ((sp_int128)a[ 3]) * b[ 1] + + ((sp_int128)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 0]) * b[ 5] + + ((sp_int128)a[ 1]) * b[ 4] + + ((sp_int128)a[ 2]) * b[ 3] + + ((sp_int128)a[ 3]) * b[ 2] + + ((sp_int128)a[ 4]) * b[ 1] + + ((sp_int128)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 0]) * b[ 6] + + ((sp_int128)a[ 1]) * b[ 5] + + ((sp_int128)a[ 2]) * b[ 4] + + ((sp_int128)a[ 3]) * b[ 3] + + ((sp_int128)a[ 4]) * b[ 2] + + ((sp_int128)a[ 5]) * b[ 1] + + ((sp_int128)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 0]) * b[ 7] + + ((sp_int128)a[ 1]) * b[ 6] + + ((sp_int128)a[ 2]) * b[ 5] + + ((sp_int128)a[ 3]) * b[ 4] + + ((sp_int128)a[ 4]) * b[ 3] + + ((sp_int128)a[ 5]) * b[ 2] + + ((sp_int128)a[ 6]) * b[ 1] + + ((sp_int128)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 0]) * b[ 8] + + ((sp_int128)a[ 1]) * b[ 7] + + ((sp_int128)a[ 2]) * b[ 6] + + ((sp_int128)a[ 3]) * b[ 5] + + ((sp_int128)a[ 4]) * b[ 4] + + ((sp_int128)a[ 5]) * b[ 3] + + ((sp_int128)a[ 6]) * b[ 2] + + ((sp_int128)a[ 7]) * b[ 1] + + ((sp_int128)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 1]) * b[ 8] + + ((sp_int128)a[ 2]) * b[ 7] + + ((sp_int128)a[ 3]) * b[ 6] + + ((sp_int128)a[ 4]) * b[ 5] + + ((sp_int128)a[ 5]) * b[ 4] + + ((sp_int128)a[ 6]) * b[ 3] + + ((sp_int128)a[ 7]) * b[ 2] + + ((sp_int128)a[ 8]) * b[ 1]; + t[ 8] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 2]) * b[ 8] + + ((sp_int128)a[ 3]) * b[ 7] + + ((sp_int128)a[ 4]) * b[ 6] + + ((sp_int128)a[ 5]) * b[ 5] + + ((sp_int128)a[ 6]) * b[ 4] + + ((sp_int128)a[ 7]) * b[ 3] + + ((sp_int128)a[ 8]) * b[ 2]; + r[ 9] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 3]) * b[ 8] + + ((sp_int128)a[ 4]) * b[ 7] + + ((sp_int128)a[ 5]) * b[ 6] + + ((sp_int128)a[ 6]) * b[ 5] + + ((sp_int128)a[ 7]) * b[ 4] + + ((sp_int128)a[ 8]) * b[ 3]; + r[10] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 4]) * b[ 8] + + ((sp_int128)a[ 5]) * b[ 7] + + ((sp_int128)a[ 6]) * b[ 6] + + ((sp_int128)a[ 7]) * b[ 5] + + ((sp_int128)a[ 8]) * b[ 4]; + r[11] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 5]) * b[ 8] + + ((sp_int128)a[ 6]) * b[ 7] + + ((sp_int128)a[ 7]) * b[ 6] + + ((sp_int128)a[ 8]) * b[ 5]; + r[12] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 6]) * b[ 8] + + ((sp_int128)a[ 7]) * b[ 7] + + ((sp_int128)a[ 8]) * b[ 6]; + r[13] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = ((sp_int128)a[ 7]) * b[ 8] + + ((sp_int128)a[ 8]) * b[ 7]; + r[14] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 8]) * b[ 8]; + r[15] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + r[16] = t0 & 0x3ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 58); + XMEMCPY(r, t, sizeof(t)); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_521_sqr_9(sp_digit* r, const sp_digit* a) +{ + int i; + int imax; + int k; + sp_uint128 c; + sp_uint128 t; + + c = ((sp_uint128)a[8]) * a[8]; + r[17] = (sp_digit)(c >> 58); + c = (c & 0x3ffffffffffffffL) << 58; + for (k = 15; k >= 0; k--) { + i = (k + 1) / 2; + if ((k & 1) == 0) { + c += ((sp_uint128)a[i]) * a[i]; + i++; + } + if (k < 8) { + imax = k; + } + else { + imax = 8; + } + t = 0; + for (; i <= imax; i++) { + t += ((sp_uint128)a[i]) * a[k - i]; + } + c += t * 2; + + r[k + 2] += (sp_digit) (c >> 116); + r[k + 1] = (sp_digit)((c >> 58) & 0x3ffffffffffffffL); + c = (c & 0x3ffffffffffffffL) << 58; + } + r[0] = (sp_digit)(c >> 58); +} + +#else +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_521_sqr_9(sp_digit* r, const sp_digit* a) +{ + sp_int128 t0; + sp_int128 t1; + sp_digit t[9]; + + t0 = ((sp_int128)a[ 0]) * a[ 0]; + t1 = (((sp_int128)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 0]) * a[ 2]) * 2 + + ((sp_int128)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 0]) * a[ 3] + + ((sp_int128)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 0]) * a[ 4] + + ((sp_int128)a[ 1]) * a[ 3]) * 2 + + ((sp_int128)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 0]) * a[ 5] + + ((sp_int128)a[ 1]) * a[ 4] + + ((sp_int128)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 0]) * a[ 6] + + ((sp_int128)a[ 1]) * a[ 5] + + ((sp_int128)a[ 2]) * a[ 4]) * 2 + + ((sp_int128)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 0]) * a[ 7] + + ((sp_int128)a[ 1]) * a[ 6] + + ((sp_int128)a[ 2]) * a[ 5] + + ((sp_int128)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 0]) * a[ 8] + + ((sp_int128)a[ 1]) * a[ 7] + + ((sp_int128)a[ 2]) * a[ 6] + + ((sp_int128)a[ 3]) * a[ 5]) * 2 + + ((sp_int128)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 1]) * a[ 8] + + ((sp_int128)a[ 2]) * a[ 7] + + ((sp_int128)a[ 3]) * a[ 6] + + ((sp_int128)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 2]) * a[ 8] + + ((sp_int128)a[ 3]) * a[ 7] + + ((sp_int128)a[ 4]) * a[ 6]) * 2 + + ((sp_int128)a[ 5]) * a[ 5]; + r[ 9] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 3]) * a[ 8] + + ((sp_int128)a[ 4]) * a[ 7] + + ((sp_int128)a[ 5]) * a[ 6]) * 2; + r[10] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 4]) * a[ 8] + + ((sp_int128)a[ 5]) * a[ 7]) * 2 + + ((sp_int128)a[ 6]) * a[ 6]; + r[11] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 5]) * a[ 8] + + ((sp_int128)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = (((sp_int128)a[ 6]) * a[ 8]) * 2 + + ((sp_int128)a[ 7]) * a[ 7]; + r[13] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + t1 = (((sp_int128)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x3ffffffffffffffL; t1 += t0 >> 58; + t0 = ((sp_int128)a[ 8]) * a[ 8]; + r[15] = t1 & 0x3ffffffffffffffL; t0 += t1 >> 58; + r[16] = t0 & 0x3ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 58); + XMEMCPY(r, t, sizeof(t)); +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 9; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#else +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 9; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_521_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] - b[ 0]; + r[ 1] = a[ 1] - b[ 1]; + r[ 2] = a[ 2] - b[ 2]; + r[ 3] = a[ 3] - b[ 3]; + r[ 4] = a[ 4] - b[ 4]; + r[ 5] = a[ 5] - b[ 5]; + r[ 6] = a[ 6] - b[ 6]; + r[ 7] = a[ 7] - b[ 7]; + r[ 8] = a[ 8] - b[ 8]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_521_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 58 + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; + + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 57); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 57); + } +#elif DIGIT_BIT > 58 + unsigned int i; + int j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x3ffffffffffffffL; + s = 58U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 58U) <= (word32)DIGIT_BIT) { + s += 58U; + r[j] &= 0x3ffffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = (sp_digit)0; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + unsigned int i; + int j = 0; + int s = 0; + + r[0] = 0; + for (i = 0; i < (unsigned int)a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 58) { + r[j] &= 0x3ffffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 58 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Convert a point of type ecc_point to type sp_point_521. + * + * p Point of type sp_point_521 (result). + * pm Point of type ecc_point. + */ +static void sp_521_point_from_ecc_point_9(sp_point_521* p, + const ecc_point* pm) +{ + XMEMSET(p->x, 0, sizeof(p->x)); + XMEMSET(p->y, 0, sizeof(p->y)); + XMEMSET(p->z, 0, sizeof(p->z)); + sp_521_from_mp(p->x, 9, pm->x); + sp_521_from_mp(p->y, 9, pm->y); + sp_521_from_mp(p->z, 9, pm->z); + p->infinity = 0; +} + +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_521_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (521 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 58 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 9); + r->used = 9; + mp_clamp(r); +#elif DIGIT_BIT < 58 + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 9; i++) { + r->dp[j] |= (mp_digit)(a[i] << s); + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = (mp_digit)(a[i] >> s); + while (s + DIGIT_BIT <= 58) { + s += DIGIT_BIT; + r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = (mp_digit)(a[i] >> s); + } + } + s = 58 - s; + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i; + int j = 0; + int s = 0; + + r->dp[0] = 0; + for (i = 0; i < 9; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 58 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 58 - s; + } + else { + s += 58; + } + } + r->used = (521 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Convert a point of type sp_point_521 to type ecc_point. + * + * p Point of type sp_point_521. + * pm Point of type ecc_point (result). + * returns MEMORY_E when allocation of memory in ecc_point fails otherwise + * MP_OKAY. + */ +static int sp_521_point_to_ecc_point_9(const sp_point_521* p, ecc_point* pm) +{ + int err; + + err = sp_521_to_mp(p->x, pm->x); + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pm->y); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pm->z); + } + + return err; +} + +/* Normalize the values in each word to 58 bits. + * + * a Array of sp_digit to normalize. + */ +static void sp_521_norm_9(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 8; i++) { + a[i+1] += a[i] >> 58; + a[i] &= 0x3ffffffffffffffL; + } +#else + a[1] += a[0] >> 58; a[0] &= 0x3ffffffffffffffL; + a[2] += a[1] >> 58; a[1] &= 0x3ffffffffffffffL; + a[3] += a[2] >> 58; a[2] &= 0x3ffffffffffffffL; + a[4] += a[3] >> 58; a[3] &= 0x3ffffffffffffffL; + a[5] += a[4] >> 58; a[4] &= 0x3ffffffffffffffL; + a[6] += a[5] >> 58; a[5] &= 0x3ffffffffffffffL; + a[7] += a[6] >> 58; a[6] &= 0x3ffffffffffffffL; + a[8] += a[7] >> 58; a[7] &= 0x3ffffffffffffffL; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_521_mont_reduce_9(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + + (void)m; + (void)mp; + + for (i = 0; i < 8; i++) { + a[i] += ((a[8 + i] >> 57) + (a[8 + i + 1] << 1)) & 0x3ffffffffffffffL; + } + a[8] &= 0x1ffffffffffffff; + a[8] += ((a[16] >> 57) + (a[17] << 1)) & 0x3ffffffffffffffL; + + sp_521_norm_9(a); + + a[0] += a[8] >> 57; + a[8] &= 0x1ffffffffffffff; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_521_cmp_9(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=8; i>=0; i--) { + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 57); + } +#else + r |= (a[ 8] - b[ 8]) & (0 - (sp_digit)1); + r |= (a[ 7] - b[ 7]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 6] - b[ 6]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 57); +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_521_cond_sub_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 9; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + r[ 0] = a[ 0] - (b[ 0] & m); + r[ 1] = a[ 1] - (b[ 1] & m); + r[ 2] = a[ 2] - (b[ 2] & m); + r[ 3] = a[ 3] - (b[ 3] & m); + r[ 4] = a[ 4] - (b[ 4] & m); + r[ 5] = a[ 5] - (b[ 5] & m); + r[ 6] = a[ 6] - (b[ 6] & m); + r[ 7] = a[ 7] - (b[ 7] & m); + r[ 8] = a[ 8] - (b[ 8] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_521_mul_add_9(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + sp_int128 tb = b; + sp_int128 t[4]; + int i; + + t[0] = 0; + for (i = 0; i < 8; i += 4) { + t[0] += (tb * a[i+0]) + r[i+0]; + t[1] = (tb * a[i+1]) + r[i+1]; + t[2] = (tb * a[i+2]) + r[i+2]; + t[3] = (tb * a[i+3]) + r[i+3]; + r[i+0] = t[0] & 0x3ffffffffffffffL; + t[1] += t[0] >> 58; + r[i+1] = t[1] & 0x3ffffffffffffffL; + t[2] += t[1] >> 58; + r[i+2] = t[2] & 0x3ffffffffffffffL; + t[3] += t[2] >> 58; + r[i+3] = t[3] & 0x3ffffffffffffffL; + t[0] = t[3] >> 58; + } + t[0] += (tb * a[8]) + r[8]; + r[8] = t[0] & 0x3ffffffffffffffL; + r[9] += (sp_digit)(t[0] >> 58); +#else + sp_int128 tb = b; + sp_int128 t[9]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + t[ 7] = tb * a[ 7]; + t[ 8] = tb * a[ 8]; + r[ 0] += (sp_digit) (t[ 0] & 0x3ffffffffffffffL); + r[ 1] += (sp_digit)((t[ 0] >> 58) + (t[ 1] & 0x3ffffffffffffffL)); + r[ 2] += (sp_digit)((t[ 1] >> 58) + (t[ 2] & 0x3ffffffffffffffL)); + r[ 3] += (sp_digit)((t[ 2] >> 58) + (t[ 3] & 0x3ffffffffffffffL)); + r[ 4] += (sp_digit)((t[ 3] >> 58) + (t[ 4] & 0x3ffffffffffffffL)); + r[ 5] += (sp_digit)((t[ 4] >> 58) + (t[ 5] & 0x3ffffffffffffffL)); + r[ 6] += (sp_digit)((t[ 5] >> 58) + (t[ 6] & 0x3ffffffffffffffL)); + r[ 7] += (sp_digit)((t[ 6] >> 58) + (t[ 7] & 0x3ffffffffffffffL)); + r[ 8] += (sp_digit)((t[ 7] >> 58) + (t[ 8] & 0x3ffffffffffffffL)); + r[ 9] += (sp_digit) (t[ 8] >> 58); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Shift the result in the high 521 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_521_mont_shift_9(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + sp_uint64 n; + + n = a[8] >> 57; + for (i = 0; i < 8; i++) { + n += (sp_uint64)a[9 + i] << 1; + r[i] = n & 0x3ffffffffffffffL; + n >>= 58; + } + n += (sp_uint64)a[17] << 1; + r[8] = n; +#else + sp_uint64 n; + + n = a[8] >> 57; + n += (sp_uint64)a[ 9] << 1U; r[ 0] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[10] << 1U; r[ 1] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[11] << 1U; r[ 2] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[12] << 1U; r[ 3] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[13] << 1U; r[ 4] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[14] << 1U; r[ 5] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[15] << 1U; r[ 6] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[16] << 1U; r[ 7] = n & 0x3ffffffffffffffUL; n >>= 58U; + n += (sp_uint64)a[17] << 1U; r[ 8] = n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[9], 0, sizeof(*r) * 9U); +} + +/* Reduce the number back to 521 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_521_mont_reduce_order_9(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + sp_digit over; + + sp_521_norm_9(a + 9); + + for (i=0; i<8; i++) { + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x3ffffffffffffffL; + sp_521_mul_add_9(a+i, m, mu); + a[i+1] += a[i] >> 58; + } + mu = ((sp_uint64)a[i] * (sp_uint64)mp) & 0x1ffffffffffffffL; + sp_521_mul_add_9(a+i, m, mu); + a[i+1] += a[i] >> 58; + a[i] &= 0x3ffffffffffffffL; + sp_521_mont_shift_9(a, a); + over = a[8] >> 57; + sp_521_cond_sub_9(a, a, m, ~((over - 1) >> 63)); + sp_521_norm_9(a); +} + +/* Multiply two Montgomery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montgomery form. + * b Second number to multiply in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_mul_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m, sp_digit mp) +{ + sp_521_mul_9(r, a, b); + sp_521_mont_reduce_9(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_9(sp_digit* r, const sp_digit* a, + const sp_digit* m, sp_digit mp) +{ + sp_521_sqr_9(r, a); + sp_521_mont_reduce_9(r, m, mp); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square the Montgomery form number a number of times. (r = a ^ n mod m) + * + * r Result of squaring. + * a Number to square in Montgomery form. + * n Number of times to square. + * m Modulus (prime). + * mp Montgomery multiplier. + */ +SP_NOINLINE static void sp_521_mont_sqr_n_9(sp_digit* r, + const sp_digit* a, int n, const sp_digit* m, sp_digit mp) +{ + sp_521_mont_sqr_9(r, a, m, mp); + for (; n > 1; n--) { + sp_521_mont_sqr_9(r, r, m, mp); + } +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Mod-2 for the P521 curve. */ +static const uint64_t p521_mod_minus_2[9] = { + 0xfffffffffffffffdU,0xffffffffffffffffU,0xffffffffffffffffU, + 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU, + 0xffffffffffffffffU,0xffffffffffffffffU,0x00000000000001ffU +}; +#endif /* !WOLFSSL_SP_SMALL */ + +/* Invert the number, in Montgomery form, modulo the modulus (prime) of the + * P521 curve. (r = 1 / a mod m) + * + * r Inverse result. + * a Number to invert. + * td Temporary data. + */ +static void sp_521_mont_inv_9(sp_digit* r, const sp_digit* a, sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 9); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_9(t, t, p521_mod, p521_mp_mod); + if (p521_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64))) + sp_521_mont_mul_9(t, t, a, p521_mod, p521_mp_mod); + } + XMEMCPY(r, t, sizeof(sp_digit) * 9); +#else + sp_digit* t1 = td; + sp_digit* t2 = td + 2 * 9; + sp_digit* t3 = td + 4 * 9; + + /* 0x2 */ + sp_521_mont_sqr_9(t1, a, p521_mod, p521_mp_mod); + /* 0x3 */ + sp_521_mont_mul_9(t2, t1, a, p521_mod, p521_mp_mod); + /* 0x6 */ + sp_521_mont_sqr_9(t1, t2, p521_mod, p521_mp_mod); + /* 0x7 */ + sp_521_mont_mul_9(t3, t1, a, p521_mod, p521_mp_mod); + /* 0xc */ + sp_521_mont_sqr_n_9(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0xf */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x78 */ + sp_521_mont_sqr_n_9(t1, t2, 3, p521_mod, p521_mp_mod); + /* 0x7f */ + sp_521_mont_mul_9(t3, t3, t1, p521_mod, p521_mp_mod); + /* 0xf0 */ + sp_521_mont_sqr_n_9(t1, t2, 4, p521_mod, p521_mp_mod); + /* 0xff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xff00 */ + sp_521_mont_sqr_n_9(t1, t2, 8, p521_mod, p521_mp_mod); + /* 0xffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffff0000 */ + sp_521_mont_sqr_n_9(t1, t2, 16, p521_mod, p521_mp_mod); + /* 0xffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffff00000000 */ + sp_521_mont_sqr_n_9(t1, t2, 32, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffff0000000000000000 */ + sp_521_mont_sqr_n_9(t1, t2, 64, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffff00000000000000000000000000000000 */ + sp_521_mont_sqr_n_9(t1, t2, 128, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000000000000000 */ + sp_521_mont_sqr_n_9(t1, t2, 256, p521_mod, p521_mp_mod); + /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t2, t1, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80 */ + sp_521_mont_sqr_n_9(t1, t2, 7, p521_mod, p521_mp_mod); + /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */ + sp_521_mont_mul_9(t2, t3, t1, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc */ + sp_521_mont_sqr_n_9(t1, t2, 2, p521_mod, p521_mp_mod); + /* 0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd */ + sp_521_mont_mul_9(r, t1, a, p521_mod, p521_mp_mod); + +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Map the Montgomery form projective coordinate point to an affine point. + * + * r Resulting affine coordinate point. + * p Montgomery form projective coordinate point. + * t Temporary ordinate data. + */ +static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_int64 n; + + sp_521_mont_inv_9(t1, p->z, t + 2*9); + + sp_521_mont_sqr_9(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t2, t1, p521_mod, p521_mp_mod); + + /* x /= z^2 */ + sp_521_mont_mul_9(r->x, p->x, t2, p521_mod, p521_mp_mod); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); + sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); + /* Reduce x to less than modulus */ + n = sp_521_cmp_9(r->x, p521_mod); + sp_521_cond_sub_9(r->x, r->x, p521_mod, ~(n >> 57)); + sp_521_norm_9(r->x); + + /* y /= z^3 */ + sp_521_mont_mul_9(r->y, p->y, t1, p521_mod, p521_mp_mod); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); + sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); + /* Reduce y to less than modulus */ + n = sp_521_cmp_9(r->y, p521_mod); + sp_521_cond_sub_9(r->y, r->y, p521_mod, ~(n >> 57)); + sp_521_norm_9(r->y); + + XMEMSET(r->z, 0, sizeof(r->z) / 2); + r->z[0] = 1; +} + +/* Add two Montgomery form numbers (r = a + b % m). + * + * r Result of addition. + * a First number to add in Montgomery form. + * b Second number to add in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + sp_digit over; + (void)sp_521_add_9(r, a, b); + sp_521_norm_9(r); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); + sp_521_norm_9(r); +} + +/* Double a Montgomery form number (r = a + a % m). + * + * r Result of doubling. + * a Number to double in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit over; + (void)sp_521_add_9(r, a, a); + sp_521_norm_9(r); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); + sp_521_norm_9(r); +} + +/* Triple a Montgomery form number (r = a + a + a % m). + * + * r Result of Tripling. + * a Number to triple in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + sp_digit over; + (void)sp_521_add_9(r, a, a); + sp_521_norm_9(r); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); + sp_521_norm_9(r); + (void)sp_521_add_9(r, r, a); + sp_521_norm_9(r); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); + sp_521_norm_9(r); +} + +#ifdef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_521_cond_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ + int i; + + for (i = 0; i < 9; i++) { + r[i] = a[i] + (b[i] & m); + } +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_521_cond_add_9(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ + r[ 0] = a[ 0] + (b[ 0] & m); + r[ 1] = a[ 1] + (b[ 1] & m); + r[ 2] = a[ 2] + (b[ 2] & m); + r[ 3] = a[ 3] + (b[ 3] & m); + r[ 4] = a[ 4] + (b[ 4] & m); + r[ 5] = a[ 5] + (b[ 5] & m); + r[ 6] = a[ 6] + (b[ 6] & m); + r[ 7] = a[ 7] + (b[ 7] & m); + r[ 8] = a[ 8] + (b[ 8] & m); +} +#endif /* !WOLFSSL_SP_SMALL */ + +/* Subtract two Montgomery form numbers (r = a - b % m). + * + * r Result of subtration. + * a Number to subtract from in Montgomery form. + * b Number to subtract with in Montgomery form. + * m Modulus (prime). + */ +static void sp_521_mont_sub_9(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m) +{ + (void)sp_521_sub_9(r, a, b); + sp_521_norm_9(r); + sp_521_cond_add_9(r, r, m, r[8] >> 57); + sp_521_norm_9(r); +} + +/* Shift number left one bit. + * Bottom bit is lost. + * + * r Result of shift. + * a Number to shift. + */ +SP_NOINLINE static void sp_521_rshift1_9(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<8; i++) { + r[i] = (a[i] >> 1) + ((a[i + 1] << 57) & 0x3ffffffffffffffL); + } +#else + r[0] = (a[0] >> 1) + ((a[1] << 57) & 0x3ffffffffffffffL); + r[1] = (a[1] >> 1) + ((a[2] << 57) & 0x3ffffffffffffffL); + r[2] = (a[2] >> 1) + ((a[3] << 57) & 0x3ffffffffffffffL); + r[3] = (a[3] >> 1) + ((a[4] << 57) & 0x3ffffffffffffffL); + r[4] = (a[4] >> 1) + ((a[5] << 57) & 0x3ffffffffffffffL); + r[5] = (a[5] >> 1) + ((a[6] << 57) & 0x3ffffffffffffffL); + r[6] = (a[6] >> 1) + ((a[7] << 57) & 0x3ffffffffffffffL); + r[7] = (a[7] >> 1) + ((a[8] << 57) & 0x3ffffffffffffffL); +#endif + r[8] = a[8] >> 1; +} + +/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) + * + * r Result of division by 2. + * a Number to divide. + * m Modulus (prime). + */ +static void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) +{ + sp_521_cond_add_9(r, a, m, 0 - (a[0] & 1)); + sp_521_norm_9(r); + sp_521_rshift1_9(r, r); +} + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_9(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_9(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_9(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_9(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_9(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_mont_div2_9(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_9(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_9(y, y, t2, p521_mod); +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_dbl_9_ctx { + int state; + sp_digit* t1; + sp_digit* t2; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_dbl_9_ctx; + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_dbl_9_ctx* ctx = (sp_521_proj_point_dbl_9_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_521_proj_point_dbl_9_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + ctx->t1 = t; + ctx->t2 = t + 2*9; + ctx->x = r->x; + ctx->y = r->y; + ctx->z = r->z; + + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + ctx->state = 1; + break; + case 1: + /* T1 = Z * Z */ + sp_521_mont_sqr_9(ctx->t1, p->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + /* Z = Y * Z */ + sp_521_mont_mul_9(ctx->z, p->y, p->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + /* Z = 2Z */ + sp_521_mont_dbl_9(ctx->z, ctx->z, p521_mod); + ctx->state = 4; + break; + case 4: + /* T2 = X - T1 */ + sp_521_mont_sub_9(ctx->t2, p->x, ctx->t1, p521_mod); + ctx->state = 5; + break; + case 5: + /* T1 = X + T1 */ + sp_521_mont_add_9(ctx->t1, p->x, ctx->t1, p521_mod); + ctx->state = 6; + break; + case 6: + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(ctx->t2, ctx->t1, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* T1 = 3T2 */ + sp_521_mont_tpl_9(ctx->t1, ctx->t2, p521_mod); + ctx->state = 8; + break; + case 8: + /* Y = 2Y */ + sp_521_mont_dbl_9(ctx->y, p->y, p521_mod); + ctx->state = 9; + break; + case 9: + /* Y = Y * Y */ + sp_521_mont_sqr_9(ctx->y, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 10; + break; + case 10: + /* T2 = Y * Y */ + sp_521_mont_sqr_9(ctx->t2, ctx->y, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: + /* T2 = T2/2 */ + sp_521_mont_div2_9(ctx->t2, ctx->t2, p521_mod); + ctx->state = 12; + break; + case 12: + /* Y = Y * X */ + sp_521_mont_mul_9(ctx->y, ctx->y, p->x, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + /* X = T1 * T1 */ + sp_521_mont_sqr_9(ctx->x, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + /* X = X - Y */ + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 15; + break; + case 15: + /* X = X - Y */ + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->y, p521_mod); + ctx->state = 16; + break; + case 16: + /* Y = Y - X */ + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 17; + break; + case 17: + /* Y = Y * T1 */ + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t1, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + /* Y = Y - T2 */ + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t2, p521_mod); + ctx->state = 19; + /* fall-through */ + case 19: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 19) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +/* Compare two numbers to determine if they are equal. + * Constant time implementation. + * + * a First number to compare. + * b Second number to compare. + * returns 1 when equal and 0 otherwise. + */ +static int sp_521_cmp_equal_9(const sp_digit* a, const sp_digit* b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | + (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) | + (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; +} + +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*9; + sp_digit* t2 = t + 4*9; + sp_digit* t3 = t + 6*9; + sp_digit* t4 = t + 8*9; + sp_digit* t5 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(t2, t1) & + sp_521_cmp_equal_9(t4, t3)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_proj_point_add_9_ctx { + int state; + sp_521_proj_point_dbl_9_ctx dbl_ctx; + const sp_point_521* ap[2]; + sp_point_521* rp[2]; + sp_digit* t1; + sp_digit* t2; + sp_digit* t3; + sp_digit* t4; + sp_digit* t5; + sp_digit* t6; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_521_proj_point_add_9_ctx; + +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_521_proj_point_add_9_ctx* ctx = (sp_521_proj_point_add_9_ctx*)sp_ctx->data; + + /* Ensure only the first point is the same as the result. */ + if (q == r) { + const sp_point_521* a = p; + p = q; + q = a; + } + + typedef char ctx_size_test[sizeof(sp_521_proj_point_add_9_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->t6 = t; + ctx->t1 = t + 2*9; + ctx->t2 = t + 4*9; + ctx->t3 = t + 6*9; + ctx->t4 = t + 8*9; + ctx->t5 = t + 10*9; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; + + ctx->state = 1; + break; + case 1: + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; + break; + case 2: + sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; + break; + case 3: + sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 5; + break; + case 5: + sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + ctx->state = 6; + break; + case 6: + sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); + ctx->state = 7; + break; + case 7: + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + ctx->state = 8; + break; + case 8: + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + ctx->state = 9; + break; + case 9: + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(ctx->t2, ctx->t1) & + sp_521_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } + break; + case 10: + /* H = U2 - U1 */ + sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); + ctx->state = 11; + break; + case 11: + /* R = S2 - S1 */ + sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); + ctx->state = 12; + break; + case 12: + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 13; + break; + case 13: + sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + ctx->state = 14; + break; + case 14: + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 15; + break; + case 15: + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + ctx->state = 16; + break; + case 16: + sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + ctx->state = 17; + break; + case 17: + sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 18; + break; + case 18: + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); + ctx->state = 19; + break; + case 19: + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + ctx->state = 20; + break; + case 20: + sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); + ctx->state = 21; + break; + case 21: + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->x, p521_mod); + ctx->state = 22; + break; + case 22: + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); + ctx->state = 23; + break; + case 23: + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); + ctx->state = 24; + break; + case 24: + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + ctx->state = 25; + break; + } + case 25: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 25) { + err = FP_WOULDBLOCK; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +/* Multiply a number by Montgomery normalizer mod modulus (prime). + * + * r The resulting Montgomery form number. + * a The number to convert. + * m The modulus (prime). + * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mod_mul_norm_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + (void)m; + + if (r != a) { + XMEMCPY(r, a, 9 * sizeof(sp_digit)); + } + + return MP_OKAY; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Small implementation using add and double that is cache attack resistant but + * allocates memory rather than use large stacks. + * 521 adds and doubles. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 t[3]; + sp_digit tmp[2 * 9 * 6]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_521) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_9(t[1].x, g->x, p521_mod); + } + if (err == MP_OKAY) + err = sp_521_mod_mul_norm_9(t[1].y, g->y, p521_mod); + if (err == MP_OKAY) + err = sp_521_mod_mul_norm_9(t[1].z, g->z, p521_mod); + + if (err == MP_OKAY) { + i = 8; + c = 57; + n = k[i--] << (58 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 58; + } + + y = (n >> 57) & 1; + n <<= 1; + + sp_521_proj_point_add_9(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_521)); + sp_521_proj_point_dbl_9(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_521)); + } + + if (map != 0) { + sp_521_map_9(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 9 * 6); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_521) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_521_ecc_mulmod_9_ctx { + int state; + union { + sp_521_proj_point_dbl_9_ctx dbl_ctx; + sp_521_proj_point_add_9_ctx add_ctx; + }; + sp_point_521 t[3]; + sp_digit tmp[2 * 9 * 6]; + sp_digit n; + int i; + int c; + int y; +} sp_521_ecc_mulmod_9_ctx; + +static int sp_521_ecc_mulmod_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* g, const sp_digit* k, int map, int ct, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_521_ecc_mulmod_9_ctx* ctx = (sp_521_ecc_mulmod_9_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_521_ecc_mulmod_9_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + /* Implementation is constant time. */ + (void)ct; + + switch (ctx->state) { + case 0: /* INIT */ + XMEMSET(ctx->t, 0, sizeof(sp_point_521) * 3); + ctx->i = 8; + ctx->c = 57; + ctx->n = k[ctx->i--] << (58 - ctx->c); + + /* t[0] = {0, 0, 1} * norm */ + ctx->t[0].infinity = 1; + ctx->state = 1; + break; + case 1: /* T1X */ + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_9(ctx->t[1].x, g->x, p521_mod); + ctx->state = 2; + break; + case 2: /* T1Y */ + err = sp_521_mod_mul_norm_9(ctx->t[1].y, g->y, p521_mod); + ctx->state = 3; + break; + case 3: /* T1Z */ + err = sp_521_mod_mul_norm_9(ctx->t[1].z, g->z, p521_mod); + ctx->state = 4; + break; + case 4: /* ADDPREP */ + if (ctx->c == 0) { + if (ctx->i == -1) { + ctx->state = 7; + break; + } + + ctx->n = k[ctx->i--]; + ctx->c = 58; + } + ctx->y = (ctx->n >> 57) & 1; + ctx->n <<= 1; + XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx)); + ctx->state = 5; + break; + case 5: /* ADD */ + err = sp_521_proj_point_add_9_nb((sp_ecc_ctx_t*)&ctx->add_ctx, + &ctx->t[ctx->y^1], &ctx->t[0], &ctx->t[1], ctx->tmp); + if (err == MP_OKAY) { + XMEMCPY(&ctx->t[2], (void*)(((size_t)&ctx->t[0] & addr_mask[ctx->y^1]) + + ((size_t)&ctx->t[1] & addr_mask[ctx->y])), + sizeof(sp_point_521)); + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + ctx->state = 6; + } + break; + case 6: /* DBL */ + err = sp_521_proj_point_dbl_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->t[2], + &ctx->t[2], ctx->tmp); + if (err == MP_OKAY) { + XMEMCPY((void*)(((size_t)&ctx->t[0] & addr_mask[ctx->y^1]) + + ((size_t)&ctx->t[1] & addr_mask[ctx->y])), &ctx->t[2], + sizeof(sp_point_521)); + ctx->state = 4; + ctx->c--; + } + break; + case 7: /* MAP */ + if (map != 0) { + sp_521_map_9(r, &ctx->t[0], ctx->tmp); + } + else { + XMEMCPY(r, &ctx->t[0], sizeof(sp_point_521)); + } + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 7) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + ForceZero(ctx->tmp, sizeof(ctx->tmp)); + ForceZero(ctx->t, sizeof(ctx->t)); + } + + (void)heap; + + return err; +} + +#endif /* WOLFSSL_SP_NONBLOCK */ + +#else +/* A table entry for pre-computed points. */ +typedef struct sp_table_entry_521 { + sp_digit x[9]; + sp_digit y[9]; +} sp_table_entry_521; + +/* Conditionally copy a into r using the mask m. + * m is -1 to copy and 0 when not. + * + * r A single precision number to copy over. + * a A single precision number to copy. + * m Mask value to apply. + */ +static void sp_521_cond_copy_9(sp_digit* r, const sp_digit* a, const sp_digit m) +{ + sp_digit t[9]; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 9; i++) { + t[i] = r[i] ^ a[i]; + } + for (i = 0; i < 9; i++) { + r[i] ^= t[i] & m; + } +#else + t[ 0] = r[ 0] ^ a[ 0]; + t[ 1] = r[ 1] ^ a[ 1]; + t[ 2] = r[ 2] ^ a[ 2]; + t[ 3] = r[ 3] ^ a[ 3]; + t[ 4] = r[ 4] ^ a[ 4]; + t[ 5] = r[ 5] ^ a[ 5]; + t[ 6] = r[ 6] ^ a[ 6]; + t[ 7] = r[ 7] ^ a[ 7]; + t[ 8] = r[ 8] ^ a[ 8]; + r[ 0] ^= t[ 0] & m; + r[ 1] ^= t[ 1] & m; + r[ 2] ^= t[ 2] & m; + r[ 3] ^= t[ 3] & m; + r[ 4] ^= t[ 4] & m; + r[ 5] ^= t[ 5] & m; + r[ 6] ^= t[ 6] & m; + r[ 7] ^= t[ 7] & m; + r[ 8] ^= t[ 8] & m; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, + sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*9; + sp_digit* b = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t2 = t + 8*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + volatile int n = i; + + x = p->x; + y = p->y; + z = p->z; + + /* Y = 2*Y */ + sp_521_mont_dbl_9(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); +#ifndef WOLFSSL_SP_SMALL + while (--n > 0) +#else + while (--n >= 0) +#endif + { + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_9(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t1, t1, w, p521_mod); + sp_521_mont_tpl_9(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_9(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t2, b, p521_mod); + sp_521_mont_sub_9(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_9(t2, b, x, p521_mod); + sp_521_mont_dbl_9(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_9(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_9(t1, t1, p521_mod, p521_mp_mod); +#ifdef WOLFSSL_SP_SMALL + if (n != 0) +#endif + { + /* W = W*Y^4 */ + sp_521_mont_mul_9(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t1, p521_mod); + } +#ifndef WOLFSSL_SP_SMALL + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_9(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t1, t1, w, p521_mod); + sp_521_mont_tpl_9(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_9(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(b, t1, x, p521_mod, p521_mp_mod); + /* X = A^2 - 2B */ + sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t2, b, p521_mod); + sp_521_mont_sub_9(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_9(t2, b, x, p521_mod); + sp_521_mont_dbl_9(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_9(z, z, y, p521_mod, p521_mp_mod); + /* t1 = Y^4 */ + sp_521_mont_sqr_9(t1, t1, p521_mod, p521_mp_mod); + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t1, p521_mod); +#endif /* WOLFSSL_SP_SMALL */ + /* Y = Y/2 */ + sp_521_mont_div2_9(y, y, p521_mod); +} + +/* Double the Montgomery form projective point p a number of times. + * + * r Result of repeated doubling of point. + * p Point to double. + * n Number of times to double + * t Temporary ordinate data. + */ +static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, + const sp_point_521* p, int n, int m, sp_digit* t) +{ + sp_digit* w = t; + sp_digit* a = t + 2*9; + sp_digit* b = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t2 = t + 8*9; + sp_digit* x = r[2*m].x; + sp_digit* y = r[(1<x[i]; + } + for (i=0; i<9; i++) { + y[i] = p->y[i]; + } + for (i=0; i<9; i++) { + z[i] = p->z[i]; + } + + /* Y = 2*Y */ + sp_521_mont_dbl_9(y, y, p521_mod); + /* W = Z^4 */ + sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); + j = m; + for (i=1; i<=n; i++) { + j *= 2; + + /* A = 3*(X^2 - W) */ + sp_521_mont_sqr_9(t1, x, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t1, t1, w, p521_mod); + sp_521_mont_tpl_9(a, t1, p521_mod); + /* B = X*Y^2 */ + sp_521_mont_sqr_9(t1, y, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(b, t1, x, p521_mod, p521_mp_mod); + x = r[j].x; + /* X = A^2 - 2B */ + sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t2, b, p521_mod); + sp_521_mont_sub_9(x, x, t2, p521_mod); + /* B = 2.(B - X) */ + sp_521_mont_sub_9(t2, b, x, p521_mod); + sp_521_mont_dbl_9(b, t2, p521_mod); + /* Z = Z*Y */ + sp_521_mont_mul_9(r[j].z, z, y, p521_mod, p521_mp_mod); + z = r[j].z; + /* t1 = Y^4 */ + sp_521_mont_sqr_9(t1, t1, p521_mod, p521_mp_mod); + if (i != n) { + /* W = W*Y^4 */ + sp_521_mont_mul_9(w, w, t1, p521_mod, p521_mp_mod); + } + /* y = 2*A*(B - X) - Y^4 */ + sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t1, p521_mod); + /* Y = Y/2 */ + sp_521_mont_div2_9(r[j].y, y, p521_mod); + r[j].infinity = 0; + } +} + +/* Add two Montgomery form projective points. + * + * ra Result of addition. + * rs Result of subtraction. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_sub_9(sp_point_521* ra, + sp_point_521* rs, const sp_point_521* p, const sp_point_521* q, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; + sp_digit* xs = rs->x; + sp_digit* ys = rs->y; + sp_digit* zs = rs->z; + + + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); + ra->infinity = 0; + rs->infinity = 0; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, xa, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, za, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, za, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, ya, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* RS = S2 + S1 */ + sp_521_mont_add_9(t6, t4, t3, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* Z3 = H*Z1*Z2 */ + /* ZS = H*Z1*Z2 */ + sp_521_mont_mul_9(za, za, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(za, za, t2, p521_mod, p521_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + /* XS = RS^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(xa, t4, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(xs, t6, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ya, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(xa, xa, t5, p521_mod); + sp_521_mont_sub_9(xs, xs, t5, p521_mod); + sp_521_mont_dbl_9(t1, ya, p521_mod); + sp_521_mont_sub_9(xa, xa, t1, p521_mod); + sp_521_mont_sub_9(xs, xs, t1, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ + sp_521_mont_sub_9(ys, ya, xs, p521_mod); + sp_521_mont_sub_9(ya, ya, xa, p521_mod); + sp_521_mont_mul_9(ya, ya, t4, p521_mod, p521_mp_mod); + sp_521_sub_9(t6, p521_mod, t6); + sp_521_mont_mul_9(ys, ys, t6, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(ya, ya, t5, p521_mod); + sp_521_mont_sub_9(ys, ys, t5, p521_mod); +} + +/* Structure used to describe recoding of scalar multiplication. */ +typedef struct ecc_recode_521 { + /* Index into pre-computation table. */ + uint8_t i; + /* Use the negative of the point. */ + uint8_t neg; +} ecc_recode_521; + +/* The index into pre-computation table to use. */ +static const uint8_t recode_index_9_6[66] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, +}; + +/* Whether to negate y-ordinate. */ +static const uint8_t recode_neg_9_6[66] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, +}; + +/* Recode the scalar for multiplication using pre-computed values and + * subtraction. + * + * k Scalar to multiply by. + * v Vector of operations to perform. + */ +static void sp_521_ecc_recode_6_9(const sp_digit* k, ecc_recode_521* v) +{ + int i; + int j; + uint8_t y; + int carry = 0; + int o; + sp_digit n; + + j = 0; + n = k[j]; + o = 0; + for (i=0; i<87; i++) { + y = (int8_t)n; + if (o + 6 < 58) { + y &= 0x3f; + n >>= 6; + o += 6; + } + else if (o + 6 == 58) { + n >>= 6; + if (++j < 9) + n = k[j]; + o = 0; + } + else if (++j < 9) { + n = k[j]; + y |= (uint8_t)((n << (58 - o)) & 0x3f); + o -= 52; + n >>= o; + } + + y += (uint8_t)carry; + v[i].i = recode_index_9_6[y]; + v[i].neg = recode_neg_9_6[y]; + carry = (y >> 6) + v[i].neg; + } +} + +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible point that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_point_33_9(sp_point_521* r, const sp_point_521* table, + int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + r->z[0] = 0; + r->z[1] = 0; + r->z[2] = 0; + r->z[3] = 0; + r->z[4] = 0; + r->z[5] = 0; + r->z[6] = 0; + r->z[7] = 0; + r->z[8] = 0; + for (i = 1; i < 33; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + r->z[0] |= mask & table[i].z[0]; + r->z[1] |= mask & table[i].z[1]; + r->z[2] |= mask & table[i].z[2]; + r->z[3] |= mask & table[i].z[3]; + r->z[4] |= mask & table[i].z[4]; + r->z[5] |= mask & table[i].z[5]; + r->z[6] |= mask & table[i].z[6]; + r->z[7] |= mask & table[i].z[7]; + r->z[8] |= mask & table[i].z[8]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Window technique of 6 bits. (Add-Sub variation.) + * Calculate 0..32 times the point. Use function that adds and + * subtracts the same two points. + * Recode to add or subtract one of the computed points. + * Double to push up. + * NOT a sliding window. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_win_add_sub_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_521 t[33+2]; + sp_digit tmp[2 * 9 * 6]; +#endif + sp_point_521* rt = NULL; + sp_point_521* p = NULL; + sp_digit* negy; + int i; + ecc_recode_521 v[87]; + int err = MP_OKAY; + + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * + (33+2), heap, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, + heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + rt = t + 33; + p = t + 33+1; + + /* t[0] = {0, 0, 1} * norm */ + XMEMSET(&t[0], 0, sizeof(t[0])); + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_521_mod_mul_norm_9(t[1].x, g->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t[1].y, g->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t[1].z, g->z, p521_mod); + } + + if (err == MP_OKAY) { + t[1].infinity = 0; + /* t[2] ... t[32] */ + sp_521_proj_point_dbl_n_store_9(t, &t[ 1], 5, 1, tmp); + sp_521_proj_point_add_9(&t[ 3], &t[ 2], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[ 6], &t[ 3], tmp); + sp_521_proj_point_add_sub_9(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[10], &t[ 5], tmp); + sp_521_proj_point_add_sub_9(&t[11], &t[ 9], &t[10], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[12], &t[ 6], tmp); + sp_521_proj_point_dbl_9(&t[14], &t[ 7], tmp); + sp_521_proj_point_add_sub_9(&t[15], &t[13], &t[14], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[18], &t[ 9], tmp); + sp_521_proj_point_add_sub_9(&t[19], &t[17], &t[18], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[20], &t[10], tmp); + sp_521_proj_point_dbl_9(&t[22], &t[11], tmp); + sp_521_proj_point_add_sub_9(&t[23], &t[21], &t[22], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[24], &t[12], tmp); + sp_521_proj_point_dbl_9(&t[26], &t[13], tmp); + sp_521_proj_point_add_sub_9(&t[27], &t[25], &t[26], &t[ 1], tmp); + sp_521_proj_point_dbl_9(&t[28], &t[14], tmp); + sp_521_proj_point_dbl_9(&t[30], &t[15], tmp); + sp_521_proj_point_add_sub_9(&t[31], &t[29], &t[30], &t[ 1], tmp); + + negy = t[0].y; + + sp_521_ecc_recode_6_9(k, v); + + i = 86; + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_33_9(rt, t, v[i].i); + rt->infinity = !v[i].i; + } + else + #endif + { + XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_521)); + } + for (--i; i>=0; i--) { + sp_521_proj_point_dbl_n_9(rt, 6, tmp); + + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_point_33_9(p, t, v[i].i); + p->infinity = !v[i].i; + } + else + #endif + { + XMEMCPY(p, &t[v[i].i], sizeof(sp_point_521)); + } + sp_521_sub_9(negy, p521_mod, p->y); + sp_521_norm_9(negy); + sp_521_cond_copy_9(p->y, negy, (sp_digit)0 - v[i].neg); + sp_521_proj_point_add_9(rt, rt, p, tmp); + } + + if (map != 0) { + sp_521_map_9(r, rt, tmp); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (tmp != NULL) + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef FP_ECC +#endif /* FP_ECC */ +/* Add two Montgomery form projective points. The second point has a q value of + * one. + * Only the first point can be the same pointer as the result point. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ +static void sp_521_proj_point_add_qz1_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t2 = t; + sp_digit* t3 = t + 2*9; + sp_digit* t6 = t + 4*9; + sp_digit* t1 = t + 6*9; + sp_digit* t4 = t + 8*9; + sp_digit* t5 = t + 10*9; + + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(p->x, t2) & + sp_521_cmp_equal_9(p->y, t4)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; + + /* H = U2 - X1 */ + sp_521_mont_sub_9(t2, t2, p->x, p521_mod); + /* R = S2 - Y1 */ + sp_521_mont_sub_9(t4, t4, p->y, p521_mod); + /* Z3 = H*Z1 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + /* X3 = R^2 - H^3 - 2*X1*H^2 */ + sp_521_mont_sqr_9(t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, p->x, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(t2, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + sp_521_mont_dbl_9(t5, t3, p521_mod); + sp_521_mont_sub_9(x, t2, t5, p521_mod); + /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ + sp_521_mont_sub_9(t3, t3, x, p521_mod); + sp_521_mont_mul_9(t3, t3, t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->y, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, t3, t1, p521_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} + +#ifdef FP_ECC +/* Convert the projective point to affine. + * Ordinates are in Montgomery form. + * + * a Point to convert. + * t Temporary data. + */ +static void sp_521_proj_to_affine_9(sp_point_521* a, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2 * 9; + sp_digit* tmp = t + 4 * 9; + + sp_521_mont_inv_9(t1, a->z, tmp); + + sp_521_mont_sqr_9(t2, t1, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t2, t1, p521_mod, p521_mp_mod); + + sp_521_mont_mul_9(a->x, a->x, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(a->y, a->y, t1, p521_mod, p521_mp_mod); + XMEMCPY(a->z, p521_norm_mod, sizeof(p521_norm_mod)); +} + +/* Generate the pre-computed table of points for the base point. + * + * width = 8 + * 256 entries + * 65 bits between + * + * a The base point. + * table Place to store generated point data. + * tmp Temporary data. + * heap Heap to use for allocation. + */ +static int sp_521_gen_stripe_table_9(const sp_point_521* a, + sp_table_entry_521* table, sp_digit* tmp, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* t = NULL; +#else + sp_point_521 t[3]; +#endif + sp_point_521* s1 = NULL; + sp_point_521* s2 = NULL; + int i; + int j; + int err = MP_OKAY; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + s1 = t + 1; + s2 = t + 2; + + err = sp_521_mod_mul_norm_9(t->x, a->x, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t->y, a->y, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_mod_mul_norm_9(t->z, a->z, p521_mod); + } + if (err == MP_OKAY) { + t->infinity = 0; + sp_521_proj_to_affine_9(t, tmp); + + XMEMCPY(s1->z, p521_norm_mod, sizeof(p521_norm_mod)); + s1->infinity = 0; + XMEMCPY(s2->z, p521_norm_mod, sizeof(p521_norm_mod)); + s2->infinity = 0; + + /* table[0] = {0, 0, infinity} */ + XMEMSET(&table[0], 0, sizeof(sp_table_entry_521)); + /* table[1] = Affine version of 'a' in Montgomery form */ + XMEMCPY(table[1].x, t->x, sizeof(table->x)); + XMEMCPY(table[1].y, t->y, sizeof(table->y)); + + for (i=1; i<8; i++) { + sp_521_proj_point_dbl_n_9(t, 66, tmp); + sp_521_proj_to_affine_9(t, tmp); + XMEMCPY(table[1<x, sizeof(table->x)); + XMEMCPY(table[1<y, sizeof(table->y)); + } + + for (i=1; i<8; i++) { + XMEMCPY(s1->x, table[1<x)); + XMEMCPY(s1->y, table[1<y)); + for (j=(1<x, table[j-(1<x)); + XMEMCPY(s2->y, table[j-(1<y)); + sp_521_proj_point_add_qz1_9(t, s1, s2, tmp); + sp_521_proj_to_affine_9(t, tmp); + XMEMCPY(table[j].x, t->x, sizeof(table->x)); + XMEMCPY(table[j].y, t->y, sizeof(table->y)); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* FP_ECC */ +#ifndef WC_NO_CACHE_RESISTANT +/* Touch each possible entry that could be being copied. + * + * r Point to copy into. + * table Table - start of the entries to access + * idx Index of entry to retrieve. + */ +static void sp_521_get_entry_256_9(sp_point_521* r, + const sp_table_entry_521* table, int idx) +{ + int i; + sp_digit mask; + + r->x[0] = 0; + r->x[1] = 0; + r->x[2] = 0; + r->x[3] = 0; + r->x[4] = 0; + r->x[5] = 0; + r->x[6] = 0; + r->x[7] = 0; + r->x[8] = 0; + r->y[0] = 0; + r->y[1] = 0; + r->y[2] = 0; + r->y[3] = 0; + r->y[4] = 0; + r->y[5] = 0; + r->y[6] = 0; + r->y[7] = 0; + r->y[8] = 0; + for (i = 1; i < 256; i++) { + mask = 0 - (i == idx); + r->x[0] |= mask & table[i].x[0]; + r->x[1] |= mask & table[i].x[1]; + r->x[2] |= mask & table[i].x[2]; + r->x[3] |= mask & table[i].x[3]; + r->x[4] |= mask & table[i].x[4]; + r->x[5] |= mask & table[i].x[5]; + r->x[6] |= mask & table[i].x[6]; + r->x[7] |= mask & table[i].x[7]; + r->x[8] |= mask & table[i].x[8]; + r->y[0] |= mask & table[i].y[0]; + r->y[1] |= mask & table[i].y[1]; + r->y[2] |= mask & table[i].y[2]; + r->y[3] |= mask & table[i].y[3]; + r->y[4] |= mask & table[i].y[4]; + r->y[5] |= mask & table[i].y[5]; + r->y[6] |= mask & table[i].y[6]; + r->y[7] |= mask & table[i].y[7]; + r->y[8] |= mask & table[i].y[8]; + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * table Pre-computed table. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_stripe_9(sp_point_521* r, const sp_point_521* g, + const sp_table_entry_521* table, const sp_digit* k, int map, + int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* rt = NULL; + sp_digit* t = NULL; +#else + sp_point_521 rt[2]; + sp_digit t[2 * 9 * 6]; +#endif + sp_point_521* p = NULL; + int i; + int j; + int y; + int x; + int err = MP_OKAY; + + (void)g; + /* Constant time used for cache attack resistance implementation. */ + (void)ct; + (void)heap; + + +#ifdef WOLFSSL_SP_SMALL_STACK + rt = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (rt == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = rt + 1; + + XMEMCPY(p->z, p521_norm_mod, sizeof(p521_norm_mod)); + XMEMCPY(rt->z, p521_norm_mod, sizeof(p521_norm_mod)); + + y = 0; + x = 65; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 58] >> (x % 58)) & 1) << j); + x += 66; + } + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_9(rt, table, y); + } else + #endif + { + XMEMCPY(rt->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(rt->y, table[y].y, sizeof(table[y].y)); + } + rt->infinity = !y; + for (i=64; i>=0; i--) { + y = 0; + x = i; + for (j=0; j<8 && x<521; j++) { + y |= (int)(((k[x / 58] >> (x % 58)) & 1) << j); + x += 66; + } + + sp_521_proj_point_dbl_9(rt, rt, t); + #ifndef WC_NO_CACHE_RESISTANT + if (ct) { + sp_521_get_entry_256_9(p, table, y); + } + else + #endif + { + XMEMCPY(p->x, table[y].x, sizeof(table[y].x)); + XMEMCPY(p->y, table[y].y, sizeof(table[y].y)); + } + p->infinity = !y; + sp_521_proj_point_add_qz1_9(rt, rt, p, t); + } + + if (map != 0) { + sp_521_map_9(r, rt, t); + } + else { + XMEMCPY(r, rt, sizeof(sp_point_521)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, heap, DYNAMIC_TYPE_ECC); + if (rt != NULL) + XFREE(rt, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef FP_ECC +#ifndef FP_ENTRIES + #define FP_ENTRIES 16 +#endif + +/* Cache entry - holds precomputation tables for a point. */ +typedef struct sp_cache_521_t { + /* X ordinate of point that table was generated from. */ + sp_digit x[9]; + /* Y ordinate of point that table was generated from. */ + sp_digit y[9]; + /* Precomputation table for point. */ + sp_table_entry_521 table[256]; + /* Count of entries in table. */ + uint32_t cnt; + /* Point and table set in entry. */ + int set; +} sp_cache_521_t; + +/* Cache of tables. */ +static THREAD_LS_T sp_cache_521_t sp_cache_521[FP_ENTRIES]; +/* Index of last entry in cache. */ +static THREAD_LS_T int sp_cache_521_last = -1; +/* Cache has been initialized. */ +static THREAD_LS_T int sp_cache_521_inited = 0; + +#ifndef HAVE_THREAD_LS + static volatile int initCacheMutex_521 = 0; + static wolfSSL_Mutex sp_cache_521_lock; +#endif + +/* Get the cache entry for the point. + * + * g [in] Point scalar multiplying. + * cache [out] Cache table to use. + */ +static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) +{ + int i; + int j; + uint32_t least; + + if (sp_cache_521_inited == 0) { + for (i=0; ix, sp_cache_521[i].x) & + sp_521_cmp_equal_9(g->y, sp_cache_521[i].y)) { + sp_cache_521[i].cnt++; + break; + } + } + + /* No match. */ + if (i == FP_ENTRIES) { + /* Find empty entry. */ + i = (sp_cache_521_last + 1) % FP_ENTRIES; + for (; i != sp_cache_521_last; i=(i+1)%FP_ENTRIES) { + if (!sp_cache_521[i].set) { + break; + } + } + + /* Evict least used. */ + if (i == sp_cache_521_last) { + least = sp_cache_521[0].cnt; + for (j=1; jx, sizeof(sp_cache_521[i].x)); + XMEMCPY(sp_cache_521[i].y, g->y, sizeof(sp_cache_521[i].y)); + sp_cache_521[i].set = 1; + sp_cache_521[i].cnt = 1; + } + + *cache = &sp_cache_521[i]; + sp_cache_521_last = i; +} +#endif /* FP_ECC */ + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * g Point to multiply. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifndef FP_ECC + return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); +#else +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 9 * 6]; +#endif + sp_cache_521_t* cache; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_521 == 0) { + wc_InitMutex(&sp_cache_521_lock); + initCacheMutex_521 = 1; + } + if (wc_LockMutex(&sp_cache_521_lock) != 0) { + err = BAD_MUTEX_E; + } + } +#endif /* HAVE_THREAD_LS */ + + if (err == MP_OKAY) { + sp_ecc_get_cache_521(g, &cache); + if (cache->cnt == 2) + sp_521_gen_stripe_table_9(g, cache->table, tmp, heap); + +#ifndef HAVE_THREAD_LS + wc_UnLockMutex(&sp_cache_521_lock); +#endif /* HAVE_THREAD_LS */ + + if (cache->cnt < 2) { + err = sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); + } + else { + err = sp_521_ecc_mulmod_stripe_9(r, g, cache->table, k, + map, ct, heap); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif + return err; +#endif +} + +#endif +/* Multiply the point by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_521(const mp_int* km, const ecc_point* gm, ecc_point* r, + int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[9]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 9, km); + sp_521_point_from_ecc_point_9(point, gm); + + err = sp_521_ecc_mulmod_9(point, point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the point by the scalar, add point a and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * p Point to multiply. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, + const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[9 + 9 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (9 + 9 * 2 * 6), heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 9; + + sp_521_from_mp(k, 9, km); + sp_521_point_from_ecc_point_9(point, gm); + sp_521_point_from_ecc_point_9(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_9(point, point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_9(point, point, addP, tmp); + + if (map) { + sp_521_map_9(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_SMALL +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_9(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_521_ecc_mulmod_9(r, &p521_base, k, map, ct, heap); +} + +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_521_ecc_mulmod_base_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_521_ecc_mulmod_9_nb(sp_ctx, r, &p521_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + +#else +/* Striping precomputation table. + * 8 points combined into a table of 256 points. + * Distance of 66 between points. + */ +static const sp_table_entry_521 p521_table[256] = { + /* 0 */ + { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, + /* 1 */ + { { 0x17e7e31c2e5bd66L,0x22cf0615a90a6feL,0x0127a2ffa8de334L, + 0x1dfbf9d64a3f877L,0x06b4d3dbaa14b5eL,0x14fed487e0a2bd8L, + 0x15b4429c6481390L,0x3a73678fb2d988eL,0x0c6858e06b70404L }, + { 0x0be94769fd16650L,0x31c21a89cb09022L,0x39013fad0761353L, + 0x2657bd099031542L,0x3273e662c97ee72L,0x1e6d11a05ebef45L, + 0x3d1bd998f544495L,0x3001172297ed0b1L,0x11839296a789a3bL } }, + /* 2 */ + { { 0x03986670f0ccb51L,0x387404d9525d2a0L,0x0f21b2b29ed9b87L, + 0x2aa8eb74cddfd63L,0x0e9d08ffb06c0e9L,0x19d8589fc4ecd74L, + 0x0a3ef4dd8bf44c9L,0x0eb6e92863051d6L,0x13e96a576dda004L }, + { 0x3de24f8632d95a3L,0x057bc5314920a4aL,0x063e9bdaba1979fL, + 0x3d2a58adc1eab76L,0x214258d98dde053L,0x18708d7316628b7L, + 0x3fd32c9fa5a19d0L,0x33ab03b519443a3L,0x1852aea9dd1ef78L } }, + /* 3 */ + { { 0x0a91dd8eaaf1fe3L,0x0e19891002d4af4L,0x06a921abf0d20dbL, + 0x26a9da32503fda8L,0x09a1eec37941287L,0x1ce0d0f3cde46afL, + 0x22abc1c913fbe62L,0x3cc4dca2d0aaf88L,0x157874c0a862b9eL }, + { 0x2c8f184e6f03d49L,0x0d5f907922f80c2L,0x1ef3815cbdefa9cL, + 0x2ad7f6370f00b39L,0x1faeb109d7a41c7L,0x213d34e12fbd9f2L, + 0x2f0aae2f98cca1aL,0x25a2df80f51f59cL,0x00724b1ab581d58L } }, + /* 4 */ + { { 0x04f2d4bdf9314e0L,0x3a14379e802ab24L,0x1083582efb03daaL, + 0x20fb1ff9b49e48cL,0x2199d74a880f1c2L,0x25401f9cb56ce65L, + 0x33f03e5f120b9b3L,0x2da18c348ddcd1dL,0x121f4c192733b78L }, + { 0x103ff6dfa8b51f0L,0x2bed45038af7c3cL,0x380e83254171ae7L, + 0x2e33684365444c0L,0x24f3a8c01e83501L,0x3201c1a4415ddc7L, + 0x2238218f52196aaL,0x29fc4d826c2aa95L,0x1db8c25790694a0L } }, + /* 5 */ + { { 0x00370ccb2c0958dL,0x3bc599a69ece1ccL,0x33cf480c9b3889aL, + 0x3cbeacf85249e4bL,0x2507489670b2984L,0x34cf6caa5d4790dL, + 0x0a4daa9cab99d5aL,0x1cc95365174cad1L,0x00aa26cca5216c7L }, + { 0x1be1d41f9e66d18L,0x3bbe5aa845f9eb3L,0x14a2ddb0d24b80aL, + 0x09d7262defc14c8L,0x2dfd3c8486dcfb2L,0x329354b184f9d0dL, + 0x151e646e703fa13L,0x149f43238a5dc61L,0x1c6f5e90eacbfa8L } }, + /* 6 */ + { { 0x2c2f1e74ab2d58fL,0x2fe0b0a825e00a8L,0x2b24770bb76ac1bL, + 0x3b5599fdef5960fL,0x2fd96897e8e4ed9L,0x3ef83c576300761L, + 0x1cdcb166395a133L,0x3ac954793ce7766L,0x082de08424a720dL }, + { 0x3aa53b260ea91afL,0x212bdde8c77f765L,0x32395cd09bbea43L, + 0x36bcc016387360bL,0x2e5c78e97997c19L,0x1d6c611510ed831L, + 0x02ce16faae9b5f5L,0x3ea1973a1bccc23L,0x073983ce58f4f63L } }, + /* 7 */ + { { 0x2e931318217609dL,0x2a7750904bf002bL,0x264c286c63297f8L, + 0x359efc7197b845fL,0x38d03eee5cc3782L,0x2ae4de67a305136L, + 0x3784c701acacb29L,0x3361c857ac6d6c1L,0x0f82c409fa81affL }, + { 0x07d3766378139a4L,0x25a7aed56faa4c0L,0x0d6f68c8bc9dc6dL, + 0x1857e4fc90b1f18L,0x2741717d9844e84L,0x02fc483a118728aL, + 0x1699d78e930e79fL,0x2db7b85552809adL,0x07de69c77026a4fL } }, + /* 8 */ + { { 0x1b51bb04bee80d7L,0x3da87dda4b79a58L,0x246ca0ebc3bd0e1L, + 0x29e4c1913c20de7L,0x3390db0771c0bffL,0x2b6873a65f19ee1L, + 0x14b512095c33e1fL,0x21958f1402b76b1L,0x0b0c231d360d311L }, + { 0x228929839bcab2fL,0x019e01937488281L,0x2084763dc2a0c0cL, + 0x1cc64e30f8c18bdL,0x152e46eb988e9daL,0x297783f5a6fa3cbL, + 0x2c0e26e55c8d2d6L,0x3fd5fce8ff58f6cL,0x14a899c6d9f1e4bL } }, + /* 9 */ + { { 0x3f6e3a1ec05ce88L,0x30925adabf480a7L,0x20776fbeb007f8fL, + 0x2f7baf7b5002e74L,0x2693700f7b50ec0L,0x3dec0c3abbe5dd0L, + 0x101f77806e37a13L,0x2b83d73c5f45c6eL,0x1599036e5dfca95L }, + { 0x0af64b5000e8e0cL,0x0ab8101bed37e40L,0x1a67449f23bad3fL, + 0x108956c96a57d87L,0x28e33c6500ca918L,0x0b009f07e9abcf9L, + 0x2840a514373c00cL,0x1090267cf36865cL,0x0e798c62b79d0e8L } }, + /* 10 */ + { { 0x0c7c4a8ae4d0f28L,0x2957bd59b401bbaL,0x1f65066e40233a8L, + 0x2d574c86dd8de61L,0x2b8351b078deccaL,0x1f5522ace2e59b5L, + 0x31ab0b2e889e535L,0x14dedea7a38bf98L,0x05945c60f95e75cL }, + { 0x0a27d347867d79cL,0x182c5607206602fL,0x19ab976b8c517f4L, + 0x21986e47b65fb0bL,0x1d9c1d15ffcd044L,0x253276e5cc29e89L, + 0x2c5a3b8a2cf259fL,0x0c7ba39e12e1d77L,0x004062526073e51L } }, + /* 11 */ + { { 0x2e04e5cf1631bbaL,0x1b077c55bd14937L,0x3f30e4c3099040eL, + 0x10dadaafb1c1980L,0x0f6b94f6edb649aL,0x1adf82d4d53d427L, + 0x1e6dd27fecf4693L,0x1432a9e9c41fae8L,0x022889edac56894L }, + { 0x012916ed05596f2L,0x0076b2c08f2e2e4L,0x13ece7d4abe1e39L, + 0x102a7240c4c9407L,0x1c6d146d0b28150L,0x13b8625a76f34fcL, + 0x1226fb6fa1d5b17L,0x0261126ba8586a4L,0x154754ceedfb8a8L } }, + /* 12 */ + { { 0x24e27b04270b2f0L,0x0d3922fd35d35edL,0x3e8b0c2722ba84bL, + 0x2767fe6dc72c61aL,0x334fd6bc4f54a58L,0x104bd276621f937L, + 0x389d16b7c669fd7L,0x381d1002366eddfL,0x1cfafb9426bc902L }, + { 0x0a4f2d1662935caL,0x1f1c0e65f7311b3L,0x29e5353c79f8284L, + 0x2254857c3d30227L,0x080911b9d9ed8d9L,0x3789ea8d673c22fL, + 0x1e320d4b03540e6L,0x064ed4bd358fbdaL,0x0e6a0217fd694efL } }, + /* 13 */ + { { 0x37de62774214780L,0x19a05c81d167aadL,0x39b7e9c7fb01ca0L, + 0x3075b52df1fde15L,0x0a66caa39e55548L,0x2141693d15d5864L, + 0x0864ebf8141b039L,0x274fe972835f132L,0x053bf8af9509e12L }, + { 0x09b29d885285092L,0x0c76aa3bb5797efL,0x290ef618aab982fL, + 0x3d34989bb4670cdL,0x307ed8e090eee14L,0x1cdb410108a55c2L, + 0x27d01d1977920e8L,0x2dced1fb897ffb7L,0x1b93c921c3abc7aL } }, + /* 14 */ + { { 0x36a07cca08b2b14L,0x1e37aefc5d31fc2L,0x3828c40cb2a4aa9L, + 0x1ca42b720e0a472L,0x28c1edde695c782L,0x03ef4880236a2caL, + 0x2db94e741ceb2f9L,0x152397e272794c8L,0x07d18266085b73cL }, + { 0x1ebf82a2defd012L,0x32c2516854dfbdaL,0x35353ef0811d01eL, + 0x29ecaf537a8f155L,0x27bf969c859c882L,0x2c96b46c0287e5cL, + 0x136005063adf5e0L,0x3f861307fcc1bc9L,0x1178e515bec4112L } }, + /* 15 */ + { { 0x314787fefe3d3d5L,0x1dbd967625c89e4L,0x3ed1e0b6acf529eL, + 0x080717a3764571dL,0x15f5667af9c2b7bL,0x0d5dbbd1e200e3cL, + 0x00154af38c766ffL,0x0ed4e7c188f2001L,0x09647d3c44bde88L }, + { 0x2075638de1b21a4L,0x0e67055c420704cL,0x206775c03599bb6L, + 0x1feb79833d4c8b9L,0x0efc190595c7fdeL,0x35ece5806c65510L, + 0x2fa73e7e70ac8cdL,0x01d912a96a0f5a9L,0x04234f8cfac6308L } }, + /* 16 */ + { { 0x231e71a286492adL,0x0f791197e1ab13bL,0x00d4da713cb408fL, + 0x3a6a1adc413a25cL,0x32572c1617ad0f5L,0x173072676698b93L, + 0x162e0c77d223ef2L,0x2c817b7fda584eeL,0x08e818d28f381d8L }, + { 0x21231cf8cdf1f60L,0x103cad9c5dd83dcL,0x2f8ce045a4038b6L, + 0x3700dc1a27ef9c9L,0x372ea0dcb422285L,0x2021988dc65afe3L, + 0x26fe48a16f7855cL,0x2fd1353867f1f0cL,0x13efdbc856e8f68L } }, + /* 17 */ + { { 0x234d04fe6a3ace5L,0x2d80fa258647077L,0x0007f75ed0f40dbL, + 0x2f256c966d6d370L,0x22615f02015e0e6L,0x0c7a8fe37ef2e99L, + 0x3ff824b2ec5433dL,0x0ccb90ac2c39040L,0x11119315060c480L }, + { 0x197ea28045452f1L,0x19e33dc7cfdcee6L,0x3ddc41e9328e80bL, + 0x1bb9abc708d294aL,0x1b44215e7b7f265L,0x02900a2f10e016eL, + 0x2476e23aa734f2fL,0x033df8f1c91e508L,0x1f16dc2e8b068c6L } }, + /* 18 */ + { { 0x0dfae6ffffc0de5L,0x06053ead297c92fL,0x3658ea2aa8dda80L, + 0x3d7693c11046404L,0x334100611f3b1caL,0x1b833e23c92e736L, + 0x055c8248c324ed9L,0x0b8a52dfa8cd08cL,0x1d36e835b648909L }, + { 0x2b77ae707372f27L,0x26d3ea0eeb8669fL,0x1ae165429ebb477L, + 0x19bf00fbcfe85d7L,0x16991c7c4942ec2L,0x1894f4f0397f1aaL, + 0x34e738a0f61e4f5L,0x3a465e847fd6379L,0x00260524cd4624dL } }, + /* 19 */ + { { 0x1b5d0ca01342e08L,0x3b53c2dd27c2bd1L,0x02d96529d804509L, + 0x36db600d673ad54L,0x34c3848005eb087L,0x1d6a1e13aa99aa1L, + 0x34317ee972c7a0cL,0x3efd2305a7885a1L,0x14f81c556e0e5c9L }, + { 0x2b0b12be120674dL,0x3c26e4867c02b09L,0x332dd658caa6c6bL, + 0x2be0a4b66787879L,0x125fdbf80c771c5L,0x199b0df57604d4aL, + 0x0df680e61bd7983L,0x0260e36b251a874L,0x09f58dcf684c39fL } }, + /* 20 */ + { { 0x01691027b7dc837L,0x065d52d43ac7105L,0x092ad7e6741b2d7L, + 0x076f20928e013d0L,0x2c8e20bcf1d0a7fL,0x286076c15c2c815L, + 0x3b508a6732e3b9dL,0x01249e2018db829L,0x04511af502cc9f7L }, + { 0x3820d94c56f4ffaL,0x08168b13c303e82L,0x3d4ea1a0606a1c6L, + 0x199e6cc5bee67ccL,0x2e4f240fc1bab64L,0x0b5f710c16a8214L, + 0x23c07322539b789L,0x198cc0d95fc481bL,0x05928405280cedbL } }, + /* 21 */ + { { 0x0d087114397760cL,0x082dd8727f341a4L,0x07fa987e24f7b90L, + 0x281488cd6831ffbL,0x1ae21ca100e33b8L,0x2c0c8881cf6fabfL, + 0x145da6458c060a3L,0x18bbe6e71cee3b8L,0x0aa31c661e527ffL }, + { 0x3518eb081430b5eL,0x3e73a943b835a6bL,0x30b5aa6ebe8bb32L, + 0x3ca7f875a243b36L,0x31a59cc9a1f15f7L,0x22aca98f3975a3cL, + 0x07ce54f4d679940L,0x01ddba16c73bd0dL,0x1768ff423c0286dL } }, + /* 22 */ + { { 0x164104c33dcec23L,0x03586f3741d4661L,0x2f514c4f309abafL, + 0x3d779221c5521b6L,0x1d3539ba3f01bc8L,0x28efa3b3775aebcL, + 0x1d865fbb7e665d3L,0x12683e4676b0f2dL,0x173fe203da3f121L }, + { 0x03ae9a178d4a3d1L,0x173d62194c5b601L,0x26c041176463a4dL, + 0x23fe12be913abc0L,0x3ffea422d316c63L,0x188ad84d44bc8e5L, + 0x27068d691eaa046L,0x2ccf12215ba8e5fL,0x1b542d1b2e3f4a1L } }, + /* 23 */ + { { 0x11b2d5e1f487402L,0x005b99eabc7757dL,0x31f56da9c20ae36L, + 0x187b3916ff47acfL,0x3027a9e1825b7d3L,0x210459250b6c18cL, + 0x0773d0bf228777eL,0x297c3d7f3831116L,0x01fb2b3151d2dd7L }, + { 0x02773e8fbaa096aL,0x1c9baf824ea1e04L,0x0d072c7f1781152L, + 0x342ad7729d9714fL,0x187ef2d4a38d3dfL,0x1fac470aed29f61L, + 0x2da22f5c9c2013bL,0x3b2b578d4f0d02dL,0x039846d50a5a325L } }, + /* 24 */ + { { 0x2da77361677df58L,0x2f559c72d435b1dL,0x07d70a080ff2364L, + 0x0a6194c90c0110fL,0x2c35101e7a0a854L,0x231735da0800b74L, + 0x2cf13fbebc61434L,0x23703fc5646bb29L,0x0fb91c7c2e698bfL }, + { 0x27c5cad12de14d5L,0x12317d95872089aL,0x24307cdbb3dabc4L, + 0x0471da0475e7e37L,0x2754620499c58f0L,0x269d39247a2601bL, + 0x3e37c3e52ad0a2cL,0x31cb480d1a172caL,0x0ec7a8632450a0bL } }, + /* 25 */ + { { 0x3818c218a86786eL,0x0dfdd084df8b20cL,0x10d1a7e6eb20ed5L, + 0x1c17371200d765aL,0x024f7bd759790ecL,0x387c3c511a458b2L, + 0x1915ca09e7ef9d4L,0x089bf4c304a2f3aL,0x02d810145f66c71L }, + { 0x12749f5b71d87e5L,0x0ec505ec0b3b68cL,0x2d2ee0baff1625fL, + 0x2a7f2b9989c0915L,0x337bd985f97f7b3L,0x3e9b430435bafe3L, + 0x32f13720aa81b97L,0x376c6ca7c680487L,0x03de326a2f85cc0L } }, + /* 26 */ + { { 0x2f3398b38c2ee78L,0x0f44069d682fb89L,0x1706565a7f8e40cL, + 0x38c10067974d68cL,0x2b8174b6ed12985L,0x3e0294a8878a990L, + 0x18d80e25a15ee8aL,0x3aa6974783f9a14L,0x0848cbbc13804f6L }, + { 0x2828690dfd45169L,0x1f8261674fa341dL,0x0811cdb8bfc238dL, + 0x1e858b3d9208dd6L,0x3b4d15b8c849954L,0x18126699252eaceL, + 0x21cfed822cbc57cL,0x1662eb10c893aa2L,0x0d94356346957c6L } }, + /* 27 */ + { { 0x306925368271323L,0x2782a12734135caL,0x1fbf2b31cc7d24dL, + 0x13d5e8f8d86ab8dL,0x20294e85644f64bL,0x0f3b52b852411a1L, + 0x2cda47ddc82ee74L,0x3e5a32e4a9a95f8L,0x13f989c42efbfc1L }, + { 0x2d98bdfb8651600L,0x18d0d1e8f3ebbafL,0x254335b1a2268c3L, + 0x3775609541e4e84L,0x3852eb1e9558da7L,0x0a57d516945cec8L, + 0x06d101df5ae5852L,0x3e18b951b8bbd99L,0x1faf7e16a2c5d89L } }, + /* 28 */ + { { 0x1746c8ec7ec136dL,0x07609f3444d46c3L,0x3ad3f187a116f8eL, + 0x23c4dba1195d928L,0x0850983c22f1b96L,0x39c5c967506a8a5L, + 0x3c149c2123ecc4bL,0x2e0b77372ad49d8L,0x16da7f50b181022L }, + { 0x19e532d0ca5e258L,0x22b45e5ed8a9efeL,0x242ec77fddefa14L, + 0x335d3e6db123428L,0x07fd122d458518bL,0x2d42cb5f14ecc2eL, + 0x01aae9bb8cd193fL,0x1b824685a6bbaf0L,0x1c57e49b10a1de2L } }, + /* 29 */ + { { 0x0abe67521210716L,0x0a5a8c1f809000bL,0x011d8c83795b81aL, + 0x0d3767be9aa52bfL,0x3677d686f68f203L,0x3d7389d56f8be7aL, + 0x357c5c6a13f277bL,0x12e33df648906e5L,0x13270c3d2f4c74fL }, + { 0x1c8609c8d209aa4L,0x104e8b6cad50dbeL,0x2d1a2992345d46fL, + 0x3ae521f0d3e5adcL,0x2b440a375186f2aL,0x3694d6393e9c85dL, + 0x25b3103a4209367L,0x182e3c47ab78ffcL,0x1a99a570153505dL } }, + /* 30 */ + { { 0x21513936e7495bbL,0x0bf4a12421e746bL,0x2b0b29fd76fcebdL, + 0x26f1839c872708cL,0x3517a09e2a1a0d4L,0x362eb7e27d60ae0L, + 0x148bb4ac37809e9L,0x3121d2a937a782bL,0x027fd041312cb6cL }, + { 0x05502eeead4fb6dL,0x3097b42980b2fb0L,0x2841bd7f4a07760L, + 0x0c953b7385162e9L,0x10397614cc28b60L,0x207bb64ee75078eL, + 0x2d4b0b4221b71d1L,0x3906740438f08ccL,0x096dfe58a27dab0L } }, + /* 31 */ + { { 0x0d6fcd67debd24eL,0x3f29826b8ac1d53L,0x022ef217c26cbe3L, + 0x382e58838fe9f63L,0x2c7f9f87dd42d03L,0x25cbffb98d2fc85L, + 0x0d3e7722b1ec538L,0x14dfa0ea55f0758L,0x162edfe5f860f6aL }, + { 0x0a05400f0ea20b8L,0x0ab1f875e5a4010L,0x25c90edb0cac287L, + 0x0c2d8a4e69ddd96L,0x2af2cb7089df5b9L,0x0bfaf04bde299dbL, + 0x190ad3030732bf5L,0x38d04e999037ae8L,0x0d536eae15f93e7L } }, + /* 32 */ + { { 0x06119f6a1c88f3cL,0x397fb0bb1a5129bL,0x2c605742ff2a924L, + 0x07b76c8b1f1322aL,0x0fa5d25bb60addeL,0x3045f7825ca24e3L, + 0x2929c1fa5ac4f7eL,0x257d507cd6add20L,0x180d1c4e8f90afdL }, + { 0x3c4e73da7cd8358L,0x18695fca872480bL,0x3130ad94d288393L, + 0x198ada9e38bdbcbL,0x379c262cde37e24L,0x06d65ee42eaffe2L, + 0x0d4e646cae01ef6L,0x3e1167078cfc298L,0x00e52a42280dd01L } }, + /* 33 */ + { { 0x2d640a40f013755L,0x3739dfee0e03a5cL,0x0e797eb64b310b6L, + 0x02e4f2968d89e27L,0x358bdffc98e704bL,0x08c30dc8630d83fL, + 0x3385d153b1f323bL,0x0efdf5ace422169L,0x04a071130f556b9L }, + { 0x1a2096bfeef3f88L,0x2ea1a6e0ace514aL,0x184a872664a722eL, + 0x286163fe509ff88L,0x17490c9daa0dc0bL,0x056233a0cde67adL, + 0x32cee21d356f628L,0x2bba5f766f1fe9eL,0x0d21e61a4e8a3cfL } }, + /* 34 */ + { { 0x05db629e9068656L,0x2f5c327fb7937fbL,0x15bdfcd45546623L, + 0x3498a469d071e2bL,0x2761e688ef7981dL,0x16e49cbceb14f64L, + 0x146fec6a96892a5L,0x0bd59085f9ee019L,0x15e793c03cbab9eL }, + { 0x0fd95436eff39beL,0x2bc1fb6ffd3da02L,0x3abdb02416165a1L, + 0x3f751e600a60f51L,0x060b2e6fb37c5d2L,0x3a36e662761b65eL, + 0x28b9bbe3e3284ecL,0x062ce7c127ad761L,0x18e3b3e8a789dadL } }, + /* 35 */ + { { 0x3026c56e51e61f0L,0x2f2a8cc1fc9d5d5L,0x26ebb1aeaf41dddL, + 0x1f38b5fd6ea4a80L,0x2bc5ead91a33e93L,0x391a01f47df3007L, + 0x01951990ab665d2L,0x101270a913d554dL,0x0aa099c1ca67966L }, + { 0x161a9098f97e387L,0x145de1178775a6dL,0x112b7ff1d6abf60L, + 0x293426a72247fe9L,0x1d2282e2b42da55L,0x1d0616b57969f1cL, + 0x0baeffdfa5a203eL,0x0285866c4da77a8L,0x1a3a5eef9141eccL } }, + /* 36 */ + { { 0x2f20d0a7a573b81L,0x3072a1986533bcaL,0x2d8d0b711c347eaL, + 0x1b2e826750bbc34L,0x05067a8ca6aea01L,0x284d47be998274aL, + 0x1c26346a52c6007L,0x00cf36ae16062c4L,0x121f17fa45dbb1cL }, + { 0x3b8b87afc3279d6L,0x39daaf0807c7867L,0x2a83806c21dde30L, + 0x0af18fe093c0abdL,0x246bd1a53eafd7eL,0x084e4591ec1d389L, + 0x32d9bfcd6f4931aL,0x273c6acb3f4e705L,0x10a62f3eb4b4db5L } }, + /* 37 */ + { { 0x002de36e0689a1bL,0x3327f5f72bf9cb9L,0x2d7e255d0bfb9dcL, + 0x3b92b681367937aL,0x2bfd2e774d7ee87L,0x1c2cae6d6a140e7L, + 0x103bba282c66577L,0x141c69eb2a09ae8L,0x11aac7028bac7cdL }, + { 0x261d39c680c8f04L,0x271332d22ced78bL,0x09bd95744f3c2f0L, + 0x2d2ab32d64c4c58L,0x25adfb2096d72e4L,0x3f4fb33f6dc1832L, + 0x352a73c67d9e431L,0x215f0521e89bf85L,0x1e33d95366364d0L } }, + /* 38 */ + { { 0x264506b4cec9e7fL,0x1592d0c2aae63f9L,0x101f173fa284a44L, + 0x1f85725d1c9786dL,0x082dec033e7b7bdL,0x298edd8b5b319eaL, + 0x0e2fcd1fe2e9340L,0x39d48e821386cfeL,0x0fdccce4da89ae6L }, + { 0x397f8eec12fd820L,0x3e24aa5b691ccc1L,0x241d55997bf4325L, + 0x2b00add4f3d65f4L,0x1f677ceba3aef35L,0x06eeb1b229cfe57L, + 0x1278b05b2892b7dL,0x117da41d4560f31L,0x01c2f5ed53fa47fL } }, + /* 39 */ + { { 0x114165eab40b79cL,0x1bbb6096d226a0eL,0x2b7d8a6c107fbfbL, + 0x22e3807ca2f684dL,0x1a4d79907d431dbL,0x11c79a161397437L, + 0x376ff869a91472aL,0x047f56341a5a660L,0x006ce369b74c463L }, + { 0x00773d11add1452L,0x3a7257b63a68a9bL,0x0e32ca15a40c2e4L, + 0x0dabd8bc63fa3feL,0x2eec9484b3fcb7dL,0x2c81016cb28cdbbL, + 0x2d8352a4d6e7a93L,0x00f9db64340c655L,0x0e5dd375603d9caL } }, + /* 40 */ + { { 0x05f297d8b481bf7L,0x0a8f90a84ce0f33L,0x128cdc40b96c06aL, + 0x17c462768f27851L,0x16cd57fa79a2bf3L,0x0d5f4caee2b6e62L, + 0x176fadc1a4935c9L,0x0f78547ec96030bL,0x1ba98721eb424f2L }, + { 0x002daaf52a4b397L,0x17d330342d39523L,0x0db37b7e79cdc3cL, + 0x3b2cce5c2d8a6f9L,0x092808c7ff34336L,0x08a236c7b4f72dfL, + 0x2ed59aec290eff0L,0x3e97ca91e7547a5L,0x0929d7ed87076d8L } }, + /* 41 */ + { { 0x0edaf0be660043cL,0x28b32c05b81d376L,0x28e7e2cc3b3d84aL, + 0x0c1709a7f12748dL,0x13de33e3647b501L,0x2272941340653b8L, + 0x0db11ddb3361b97L,0x24bc2335460ce61L,0x0c6d5b801ecc8ecL }, + { 0x3f91c1547ab9887L,0x2178a9ad6ac044cL,0x0e5a133fc8182f2L, + 0x1d0e361a4b26dcdL,0x043282e815c435aL,0x31ef36a8f24ad1fL, + 0x158c86191231f59L,0x0f328eb90970d34L,0x0117f568febc5a2L } }, + /* 42 */ + { { 0x0cbd9d5bf5caa87L,0x3f183da37632763L,0x0dbbc7d4dede17bL, + 0x11609c2d6fd8fadL,0x1cc098fe7bf6e59L,0x175ee3d621c4de9L, + 0x25a533ca5eb6870L,0x029b12df7bbb92cL,0x0ef8e045c324a70L }, + { 0x20c1c9270cf52bcL,0x0fd8ea43318a605L,0x021cbf3028fb4bfL, + 0x35d48efbfc57ffdL,0x38b9ce1050a8102L,0x19886c7bfccc268L, + 0x0a78078e9da4d00L,0x2184a5dd7e27f30L,0x0eb590448650017L } }, + /* 43 */ + { { 0x26664fdebbd33ecL,0x269983396b55e62L,0x2c0550fb56ed0cfL, + 0x2b4756aa9bbb341L,0x3948a7f07b4ca5fL,0x3f870468db6bb96L, + 0x12544bd2e37887eL,0x363a907d86b1247L,0x0be49df70712bffL }, + { 0x0e2f1f565acdb56L,0x04f21179796f798L,0x1354e17a0412f2fL, + 0x33f6724efbee5ffL,0x325a957e48a2867L,0x28618d7e72a745aL, + 0x26ae711f55c19b4L,0x150766ce1a3d634L,0x000ac4480414c94L } }, + /* 44 */ + { { 0x01bcf89d4ad38dbL,0x03ce04f5c51445bL,0x2759cb70243a118L, + 0x18c58e9c5b16d30L,0x213648bdb5dd64dL,0x137a65a6ef4bbfaL, + 0x1e8c45a47187f9eL,0x3429d9779a44b8bL,0x048e075f29c4bdaL }, + { 0x03354745e4dd88dL,0x20d8e2015debf00L,0x1c01227288f7182L, + 0x2479a26277b92cdL,0x1cd3f71bad008fdL,0x3936878908508c5L, + 0x262bb15cb023ff3L,0x13f95f9ae70d6d5L,0x072143f41657fb0L } }, + /* 45 */ + { { 0x06b046c26f40f2cL,0x3491b1b35f0c76cL,0x22701953a9b7bd5L, + 0x2e23c010dbeaa95L,0x021d0660d5ac929L,0x2f5b6f9864dce4bL, + 0x3c43f9d279ed159L,0x34094ddf1356b45L,0x179800eda50b8fcL }, + { 0x08ddc0b36132f31L,0x3d3c04ab79ce8eeL,0x1ec3203de2b96f8L, + 0x0508c6d94cce216L,0x0a14093caedb038L,0x30af8be6b423348L, + 0x2bc09fb9f86d445L,0x11e37f4f654cbdbL,0x13d757b58991aefL } }, + /* 46 */ + { { 0x19ad100580f894bL,0x09222b649791bdfL,0x3e142e5a6865b61L, + 0x14c5fe6a04d1525L,0x2f8a33541c86e10L,0x299b55e362aa082L, + 0x358e23a67906607L,0x2ad711f7d82b97dL,0x107cadd4c90a7f8L }, + { 0x16b044f6764ad0eL,0x3f8384940626ccdL,0x0a625f14db6af69L, + 0x27c6f5df550b7abL,0x25cfa895ce9f277L,0x1bc66b0e5e6447cL, + 0x2f44b1d4e94cedbL,0x09fd70d4cd05c06L,0x03bcac43fff50c7L } }, + /* 47 */ + { { 0x342951c83c1d4cfL,0x1e4742c9170d3c5L,0x0ef69c2dcc71e03L, + 0x0a4a8c41d9faa3eL,0x3b12948bd2ea57aL,0x3fabae0c956d1aeL, + 0x1abf592adc1e090L,0x29a26834b463112L,0x0199e8c9ff5c4a8L }, + { 0x1f7b9cdeb28171aL,0x1e100f55da61ef2L,0x33bf22ff824cefdL, + 0x24efcccf31562d3L,0x2b01ceb72ee09b3L,0x080a6737affe0e8L, + 0x2bf7515bb34c452L,0x173ce8f0fa2819bL,0x1a65dee07bb49d0L } }, + /* 48 */ + { { 0x1a958d6b114257bL,0x2bf507525d78c02L,0x39b53aae7b11729L, + 0x24fb746b20c1ca1L,0x11eb679750791b0L,0x099d6d2b3fbf1f4L, + 0x29517f0e54bd37eL,0x0268e2698b5fa35L,0x06b96f805d82021L }, + { 0x015d51757b5f9f4L,0x2790d9016d13452L,0x1de0e4870160e5cL, + 0x2547bdacfe0d10bL,0x1f7497faf953fefL,0x05bbc2de467933dL, + 0x12eeed24e3cc4d0L,0x05c0ff172aa1c94L,0x1b6f1ba4029a3bdL } }, + /* 49 */ + { { 0x2668435529252acL,0x189b01d39ec360aL,0x0cc1e0be86ab3daL, + 0x3dd3b57714d5420L,0x00cd41fd0534a53L,0x19d22472a7bfc50L, + 0x13b5ad0e7c945c5L,0x026237a92e257b1L,0x1ffefc67bef1515L }, + { 0x08dc109306033fdL,0x21e5e7cda1d7666L,0x2f26e3c335c51b2L, + 0x3f44938a75934e6L,0x0c41dbdfca47259L,0x33036255758315cL, + 0x28ff8606224b610L,0x21c1e81075397baL,0x1fd2920e15cae4dL } }, + /* 50 */ + { { 0x2d15f0ccd67da85L,0x22dbd16b1528559L,0x2021f1ac71c3ae9L, + 0x0991d564890bc17L,0x166e856dc1feb22L,0x3ed2e91ca8bc558L, + 0x1d920b65eb14566L,0x32e6cd1a22f4a8aL,0x061943ce86ef9d4L }, + { 0x0696218aac91174L,0x1467b1077648d2dL,0x2df29f0763a485bL, + 0x09dc4b22ccedfbeL,0x3b053863098517fL,0x3fcf8f9280b9fb0L, + 0x09648646bc45bb1L,0x2e4fd1aba25bca5L,0x1462aeb1649ebd2L } }, + /* 51 */ + { { 0x334f41fe8e4d3c3L,0x361ffd6edfa76c7L,0x2c0ad910b579c80L, + 0x186e1cd26bbc085L,0x02b0a6cc02a24b7L,0x3cb4655c152f14aL, + 0x3e6cdd3b4c7029aL,0x028d0392e438ab6L,0x0cf8e774f812606L }, + { 0x07f9dbc2e229950L,0x07e11b67e0adc0fL,0x19a3f10c05f3ab1L, + 0x13c3c608328adebL,0x0ccbfb332203eadL,0x199c1bc5476f2f2L, + 0x059d5e3bd9caf00L,0x3993968e6f89418L,0x14c984387c8dcafL } }, + /* 52 */ + { { 0x08a757f8e011531L,0x16c5cb0f7355f1cL,0x09fdc2d99e297f4L, + 0x07ee4ed9056a3abL,0x0a5488e869d4ee8L,0x2edeadc2960ced5L, + 0x3df3a9ddd561c30L,0x0ccaed6f68e12ceL,0x124f909f8e01ddfL }, + { 0x1b8aa84ab41e782L,0x08049a14776e1f1L,0x2a7d99482bd21deL, + 0x3afd2d904efd26eL,0x37cd1e22405963dL,0x2eb583bbb4da7eeL, + 0x2e30eddcf495dd1L,0x084b7ad1d5a4e24L,0x10baaf11bd8af0aL } }, + /* 53 */ + { { 0x146017416ec64e2L,0x052b3df5f1baf9cL,0x04a3668b7176bfdL, + 0x3cdd06c107078d4L,0x22d3b67b072e3f3L,0x15f64a35947e952L, + 0x08f419623edca3eL,0x2ebbca6dd3a2dcbL,0x0383d99cb47327aL }, + { 0x08dd0b3da342a3fL,0x00918b7bd2a5520L,0x242eeab5a860120L, + 0x0141b952db46c71L,0x310c6cf1a5e1e2aL,0x3e40f3426e85c43L, + 0x0166f5334fc3660L,0x10d4e5a7800044dL,0x0fafaa26074155cL } }, + /* 54 */ + { { 0x05cd0e6712de285L,0x3fe2c21a7d77172L,0x2b92df4ed389cd2L, + 0x0c156e67210dca8L,0x2e07a003363524dL,0x1b82524d1bfbd68L, + 0x28952b0a2c82dadL,0x1fadacd899885caL,0x02c9afcb188af21L }, + { 0x3b9d4769a64c5b5L,0x23577913133f874L,0x18ef11c6dbffa0dL, + 0x23d07052bb55821L,0x235efe854ce1d97L,0x11d15d74947e79cL, + 0x289c03f9d0c14c0L,0x2770034b20e3af6L,0x16fa25f040b36ccL } }, + /* 55 */ + { { 0x23d9dea9cad682dL,0x32c6cd18da4e46cL,0x19885c0f24d787aL, + 0x31f50620f3a7d70L,0x353555e46dff62fL,0x2473681746aca77L, + 0x0633ed569b1cb28L,0x150a36c536f114bL,0x1941acbb86c2a34L }, + { 0x06a70c824db8127L,0x1958fd06df3d6f6L,0x1abeb908d9b484aL, + 0x18e2670982a3613L,0x344436957aaeaaeL,0x02a4b2344fb5acaL, + 0x0bcb973bc94f99dL,0x1597e5e3cb8af41L,0x07456a388ef716aL } }, + /* 56 */ + { { 0x082dfe496fc1f77L,0x310d7c4d1eb5a98L,0x14dc25ebe457b04L, + 0x1a6dbdd92abd09aL,0x104d83da164a170L,0x03208cc380e1cf5L, + 0x239b3eb0b9db52eL,0x0536a621acd3b50L,0x16a76587f2a5988L }, + { 0x118f8e8ebc71a5dL,0x10690a150148cdaL,0x09ccc182cbcc491L, + 0x34f82415e9f58fcL,0x1e239d8eb4afe59L,0x365252cb98cf6c3L, + 0x04fd61bac8582dfL,0x3bf662e4569051cL,0x10ee0866a9dfceaL } }, + /* 57 */ + { { 0x350c47052e07a4dL,0x34e2e3975d1740aL,0x047ce1af12267f6L, + 0x12ce71417ded053L,0x186f739be03e4b4L,0x1f0bc6f167cf5e5L, + 0x23fad4ca19bca7eL,0x22bec7147007b01L,0x080da3937a57f42L }, + { 0x1d8ca9d102369faL,0x26ffedc1b038d7aL,0x19a796b55d80e00L, + 0x37ab0342530b828L,0x1787c187ada0e42L,0x33e812d9b06f8b1L, + 0x1773406d4ae2cc9L,0x18a156c33a981d9L,0x0d82d525245c7c9L } }, + /* 58 */ + { { 0x1cb238cae93de69L,0x0f20cceff6ba6dbL,0x1f4de8b79836496L, + 0x112ba2fe2b8cf20L,0x24c3ebacce13a22L,0x15696b582f1b9e1L, + 0x3e9459a837a53c5L,0x1bf361d7634d6f1L,0x01fb3705534f9f4L }, + { 0x0e9270c7fb974a1L,0x123e83a7b49205eL,0x2c3d64bffbd4234L, + 0x10f5e7d2cf05059L,0x13b9f32a0a05aa4L,0x32408d7b615693cL, + 0x352b484bebcf8daL,0x027459612661e36L,0x183aa4d59f1e48dL } }, + /* 59 */ + { { 0x2585d75dbffad9fL,0x3d85d3d06763f3bL,0x3f59e6c6934564dL, + 0x3460f566c31bdceL,0x3929c8950b80793L,0x2658aeadaebd3f0L, + 0x291273bd445a952L,0x1e16d4ad86517aaL,0x1be4fccdfff3d1cL }, + { 0x1c384d97cb2857fL,0x20c1601adeafd01L,0x1d1743ace6b24cfL, + 0x28af10f5adbd4a3L,0x314e564b92c7b8fL,0x0ae7c06a3c38a2fL, + 0x1383e61b69bc73dL,0x251aeae2fad00f7L,0x0aeaccea0c59791L } }, + /* 60 */ + { { 0x268baee0163c2deL,0x342cafac9da2926L,0x3124ffdae767c42L, + 0x3542ab2a50d5a1bL,0x2e01091cf926da5L,0x0c92fb35a670d33L, + 0x13a0a93d2545405L,0x332746dad63c506L,0x14ff144925ed611L }, + { 0x361a60cc1ed9259L,0x0dea8cbc7569fdfL,0x313d07aef4311beL, + 0x12539be9ee80e11L,0x28bd3730c99f33dL,0x2e555f710e4a305L, + 0x22bee573cf8ccf5L,0x158402f1b518346L,0x14527cd194383b1L } }, + /* 61 */ + { { 0x3e651353427af4eL,0x302ec4c4364df52L,0x276acaa671c32e6L, + 0x3534ea70ddaf63aL,0x3471709aa9d7b3fL,0x060147004933a18L, + 0x28ee1c225ce41d0L,0x13b215224a13fe7L,0x13d22d829c9535cL }, + { 0x301ed9da1b15e02L,0x24aeb0c07961a1aL,0x21835764135b1d0L, + 0x2ddbdc56692fe9eL,0x118090d0dc0ee59L,0x2014865a45c6814L, + 0x1279045c1531bbbL,0x1da15d024c3f082L,0x008963b48cc7633L } }, + /* 62 */ + { { 0x3e8b620f4aaaed5L,0x2379f7fa1c7ba03L,0x030ffebfcb4b106L, + 0x39f0e88556cac88L,0x02769b805d4dfbeL,0x34e7abc29e89aa3L, + 0x15f032377de7706L,0x2dcc7c6a4911fd8L,0x12aa1b81a8442d9L }, + { 0x19e67d0b1152e8fL,0x1cf65e4ad78530aL,0x1073f1cb57a22e7L, + 0x272fc76928b8360L,0x2c22b449a03af0aL,0x34b5f4745a6c583L, + 0x098ee4b82c1ac8dL,0x3a855d422b29affL,0x15054992440e3cbL } }, + /* 63 */ + { { 0x0004a0aa13a4602L,0x31c68f434b1839cL,0x2463a6d79bc5505L, + 0x0eb553677d293f8L,0x373d3c7b8e878ebL,0x113b3e95fb32a41L, + 0x24d1795b3bb2782L,0x0abc228c3d87ec4L,0x1155b7e50014f63L }, + { 0x2c42ecc9ef0021aL,0x05ff5fe15b27518L,0x03b82e6478bc580L, + 0x1a45416936c4389L,0x04cd7eea5af0746L,0x14abb42b66ec287L, + 0x09f09de8ba39a2dL,0x3e9901d1d126ad5L,0x13fd5c8f7bd9e57L } }, + /* 64 */ + { { 0x3d8ce7b5a53c22bL,0x0cff35f2ad11a86L,0x24e248acb394787L, + 0x07a8e31e43f1132L,0x315c34237a9888bL,0x2dc0818cdabedbaL, + 0x3508fab913b8a8fL,0x1ccacd2ddf31645L,0x050a931d7a7f9e4L }, + { 0x10a429056d21d18L,0x198c1d56d04286aL,0x0a8b894a6b05826L, + 0x18e0a33dd72d1a1L,0x2127702a38a1adeL,0x37dedc253ecbe16L, + 0x0d1db683ff7d05aL,0x3357074fd6a4a9aL,0x0f5243ce1dbc093L } }, + /* 65 */ + { { 0x3c183c3d37d7891L,0x140527f6197b2a3L,0x03d68f21844117bL, + 0x095681fd9603db9L,0x3ad303202af51ecL,0x019dbbd63f969b2L, + 0x0e000c95de68f31L,0x14951d4238c7f29L,0x159783e5a957773L }, + { 0x01db5712e537ad9L,0x1c44b4d6fa73defL,0x2b48d57f9bcb5e8L, + 0x242a2cf2f1eed48L,0x1e5ecdb5c1eff78L,0x0e1f9fb53cc1b84L, + 0x321e3d30da83923L,0x299f13647f3d1c8L,0x09f8487bb62e412L } }, + /* 66 */ + { { 0x2f5f80f8cb8e08eL,0x34b104925bfb5a1L,0x374360b7dcdf7cfL, + 0x37d5fd3417c0186L,0x2458061f24dbaffL,0x37a65312c664f0aL, + 0x07e0626c6ca8d09L,0x172f3bdc349349dL,0x0ffd4e5d4e3b999L }, + { 0x171e245c6f40077L,0x0b81141c8f9418cL,0x2f7e6a6bfd88159L, + 0x345b6767380d721L,0x03eb5770cba0959L,0x10358f74b9fe3faL, + 0x1e441958eb0881cL,0x07d3558ccef6baeL,0x034fb0397df3afdL } }, + /* 67 */ + { { 0x384e05eb358815cL,0x32cb5390421f65eL,0x188907f05d7a3abL, + 0x355ea7520721e9dL,0x042d64cbd350778L,0x33ca27fa74d33feL, + 0x2b2c6e0859cd5acL,0x02d8a0dcb564774L,0x06bc06d482e18b4L }, + { 0x10695a0da4ed375L,0x2bd620a636abab4L,0x21b4f4b7092c51bL, + 0x2b9e8cd6cd6c0a2L,0x20567efd88ab87dL,0x0c830dd29cd64d8L, + 0x158b307a49fc103L,0x33a6dcdeb2b128dL,0x01ed30696a34c0fL } }, + /* 68 */ + { { 0x1550ab0bd3902feL,0x292d2e1aa74ecf6L,0x20a9975cac379bbL, + 0x0c4ccd81770e967L,0x21afc2c58045e87L,0x3be72fc7cb16630L, + 0x383c4281ff8d6feL,0x0c7560afb57426fL,0x1579d1d9d5b5281L }, + { 0x07da3055519258eL,0x14e7e409f78aa1aL,0x1747d6a230d673fL, + 0x08d7d745a11a7eaL,0x35f7e41f5ab1aebL,0x1a9ffacd6effa51L, + 0x2d5187bd546abb1L,0x14f74abef53a385L,0x1607437be13bcc9L } }, + /* 69 */ + { { 0x1f165a9ee9755a3L,0x35686ae0b26ac55L,0x245aab6b97e60c8L, + 0x2c2ac1789c59687L,0x26db0830f3004cdL,0x16b2f7ae7830ed4L, + 0x1e8498aae1ec1a7L,0x318b904f51211d8L,0x1e9589e09bbb1b9L }, + { 0x35120819c72258dL,0x335cd170564f519L,0x3a7b91c11fdb61dL, + 0x2fe215e4239b189L,0x2530bc68ed1d3e9L,0x2d6d13fe6ab01bfL, + 0x10edd5125c16bb6L,0x36d70e2182edb6eL,0x1aa96fe8b08fbbeL } }, + /* 70 */ + { { 0x23a5dd8f257c0f8L,0x13724b74e84364cL,0x39cebbb8ce03488L, + 0x14e91c98aa40fcdL,0x352e06c6d6217adL,0x0c90a336877c805L, + 0x30c62cf5b723e0cL,0x20b307974e224b0L,0x1fdd9a90f1f477fL }, + { 0x30d27ba1763ab59L,0x1f64f9c8de0fa60L,0x0264945968aacf2L, + 0x0c85c0357560556L,0x303146d9f63251aL,0x196fc3cb3daef9cL, + 0x2323fb6cdcf455eL,0x11d1202a803398cL,0x1496e49e62cd96aL } }, + /* 71 */ + { { 0x2ff0b7e40574c09L,0x3c990cffa03a5afL,0x1352eb237d91b76L, + 0x2ddfb70c4082cefL,0x3424a36dc3c0c62L,0x31b10d7be624e52L, + 0x08d076e9ea64c27L,0x2792cb7f087138eL,0x139cc3852f6a4e6L }, + { 0x238a3ffbb096b91L,0x0b2795cf6350f94L,0x1b118c577558ee7L, + 0x34b711f52d3045bL,0x142e1955f54ec89L,0x10dd1d70801b74dL, + 0x2e9041004aed6a7L,0x0cb2707770ca8afL,0x1fb597417a2ed93L } }, + /* 72 */ + { { 0x00f1981859bae66L,0x23a6c61175f06cfL,0x1c03452a3c1eab4L, + 0x033fe040ce71b3aL,0x15f98d6fe2384a0L,0x2283756f35fb784L, + 0x3e1c06f7a00e3d3L,0x2987d5b765228f1L,0x0d09d21a7d18e53L }, + { 0x1cfdbaf880eb3fbL,0x3f4a5d7a0fdf27eL,0x3d6fa28a74b464cL, + 0x17f7ec4f80d86e9L,0x3232a6128b8200dL,0x06a361b80ef23d2L, + 0x2d6ea7d1fb92c28L,0x06309a19d7eb9c1L,0x11d9b08608aefabL } }, + /* 73 */ + { { 0x3cf6146bbd2f539L,0x14bf01db89ae885L,0x1d18d4be4a67960L, + 0x08a7cfce6a0da08L,0x1433f873a8f8234L,0x05bd15a1a2e11aeL, + 0x1477507a1d3f367L,0x3889b7d80f8a0bfL,0x00377cb02c56975L }, + { 0x275add38c01dd59L,0x04ea7ae7068debcL,0x11044dfc54039c2L, + 0x0181fb83619a42bL,0x1661fc40e202ee2L,0x02c0bd5a25bb7a5L, + 0x2f1a246b4d7398dL,0x1c49732e5a64796L,0x09fd5c281afc13fL } }, + /* 74 */ + { { 0x058c54bd7073a5aL,0x206972187ab1f72L,0x0a39e720201a87cL, + 0x23903800f3947e1L,0x358f199de952a9fL,0x15b300addaf712aL, + 0x3162f31cf12322dL,0x27846d98d398e0fL,0x16984c017ee8f96L }, + { 0x1f433625c89f1faL,0x0a98c2da5ec1e3cL,0x1e5c4b05b7f44a0L, + 0x1453fb79330ccc4L,0x04b025aa4a7ccaeL,0x2136deb4349ba1dL, + 0x31c1fe7d5b77bbfL,0x33480e7bc6aa3d5L,0x18d65eba928418cL } }, + /* 75 */ + { { 0x37866ab8abb2537L,0x3132ed96cc25be8L,0x27ed2a428ad314aL, + 0x18843a7865a09feL,0x089801b4e95d19fL,0x2ba2e08cc7ae5e8L, + 0x1c9642aae77a62aL,0x22e125a4f58a97dL,0x0adff5bfe973e36L }, + { 0x3efae21492b0deeL,0x0fa7ba580b0b3a8L,0x3c996f3b99e5214L, + 0x2c3a4ee3d6484d9L,0x01064c13edd78b2L,0x15ce39ea355070eL, + 0x33b1a4e6b970dafL,0x0823ebdbb305a0dL,0x180dbfa3f4f74aeL } }, + /* 76 */ + { { 0x024621a907a6aa0L,0x1b2da101e1e7dacL,0x0b688168a934ef5L, + 0x34e6e6a4121130eL,0x082541f2070d638L,0x3f222d41a5a32a8L, + 0x2357840c5970531L,0x2533d55937b56bdL,0x097e7e898c7c4d4L }, + { 0x1dc98d96b6ebb2fL,0x285ff1eaa7849b8L,0x0fdbfa2a2c68292L, + 0x032cb86146ed83cL,0x181ca4cfe9c6327L,0x046567562636c99L, + 0x0b8d1994082638bL,0x0c253913cc23a95L,0x0d696399eb844e6L } }, + /* 77 */ + { { 0x200f362b83769eeL,0x0102b0fbf132cfeL,0x388957abd68772dL, + 0x0965029c4a30e4cL,0x3ec242a31622644L,0x168695464271323L, + 0x1c2172d1e48f1e6L,0x1ff51a2f5c3c412L,0x041c8692d2b709bL }, + { 0x2388aa1df816784L,0x23229406f9d7393L,0x1ffb02a678124a5L, + 0x383b69c87826d27L,0x1e67a65eca73299L,0x15b1c6da282f47dL, + 0x05aa30d81e91e88L,0x2efc8debb8bd300L,0x073d94007500595L } }, + /* 78 */ + { { 0x112ac4a010c0ef3L,0x152f613a06c682aL,0x23dc4f3535090e6L, + 0x3ced1f4626a3c15L,0x2f238c09c10dc41L,0x106b3d9c48bb741L, + 0x358520224c16afcL,0x2b9bc732e4cd20dL,0x1271a4b5f292275L }, + { 0x12fd4733ce688b5L,0x19b4df72a71a2deL,0x326e541711d0145L, + 0x3b8f30d06a3f3a4L,0x02122c11fe3ba14L,0x174de6d5ae2ad33L, + 0x122f91c0fa763bfL,0x25696578b4abbc5L,0x0acd4e21b3d31cfL } }, + /* 79 */ + { { 0x013a7791d8e061aL,0x01f9c2b32128c10L,0x0266eb2f636a627L, + 0x085dec97275ab02L,0x170ff35cfe917eaL,0x106262fb76de2efL, + 0x0ae4455008db2b0L,0x3439c3d6293f338L,0x043ed0923972257L }, + { 0x0ad77b3e2e129e6L,0x312a1c3c6f935cbL,0x0dff20056333fb8L, + 0x304a9a4550ebb94L,0x2b8fe2640bc2658L,0x259682be5770332L, + 0x11d99e694eb5841L,0x3721df4eea94fb7L,0x0832df13b208a1eL } }, + /* 80 */ + { { 0x2ad2247d181c3f2L,0x34d6fbccdec8fffL,0x3cba74890672915L, + 0x23ff69e8e876d33L,0x179275686e4f70dL,0x3fc7de7889ad906L, + 0x1fa4e8e80408636L,0x27d8263a12ce73dL,0x0da57aa0be9d8a0L }, + { 0x00cecf54efcea66L,0x3cabb2bf1dbebb5L,0x1a48c91585a898dL, + 0x29c4fc02a958fc6L,0x344b5cb9fb111bdL,0x149883459a1ebeaL, + 0x0b35abc6d5fb126L,0x3134abe54fc6eebL,0x0ed99709370ff94L } }, + /* 81 */ + { { 0x09f56e068b54c89L,0x3305f739cdf08abL,0x283fab089b5308eL, + 0x0a550fef46c823bL,0x0844dd706b0f3a1L,0x3b0b90346c8133eL, + 0x19914a80975c89dL,0x137dc22c046ba4eL,0x0176b4ba1707467L }, + { 0x1216ea98fdfc175L,0x1ff18df83d6c31cL,0x285fceb33a3477bL, + 0x13c088faade2340L,0x351c6d922b67981L,0x304fd47641e1c82L, + 0x2d60b55859d5a49L,0x32acb9a7e142febL,0x05c2499a8446d0cL } }, + /* 82 */ + { { 0x1d581fb73e7bcf1L,0x37987374f05ef90L,0x17ecfa199fd916dL, + 0x1cf05676e5f18a6L,0x2641328301a7588L,0x250aa4613b5de25L, + 0x2ba4bb9672ce892L,0x375ffcfb9161e05L,0x1234fb7a148ce54L }, + { 0x05d80aff009be8cL,0x24e35de37c6e87cL,0x2e84312de62062eL, + 0x1fd81c312e69f88L,0x3a1b5da3748d29eL,0x11c5d14d73670faL, + 0x2b9e671e51bd2faL,0x31a8650262ac15aL,0x049bb584abc49f7L } }, + /* 83 */ + { { 0x1f255301ea470f7L,0x2fe023a49538c2aL,0x29ea71a0038da01L, + 0x385644f2a1c2615L,0x3b8281fdb0d2b2eL,0x063970aab85c012L, + 0x2943abdb5c6eb01L,0x3540695ab19307eL,0x0531aaf64771a92L }, + { 0x279ef4906345730L,0x2aa93a11bcdf0a5L,0x26b01a7c3aab946L, + 0x28a059b7d3be05cL,0x24e04dc3ecb808dL,0x1bb066d3a7ecff0L, + 0x16d13e9e0b61db7L,0x14e11b9fd997bbbL,0x0e570ed8c0786a7L } }, + /* 84 */ + { { 0x2456e58108ce13fL,0x3f163438e5e04d9L,0x284bea3949e9b5bL, + 0x2f1d6bd99f412daL,0x0a891566bea9b66L,0x3d856569f2d35b7L, + 0x2e25201b3cecf0bL,0x297e90c4b1cf400L,0x14b81d768986135L }, + { 0x047bc25841078ecL,0x2a72585e7115350L,0x06094851f8fc75aL, + 0x0fb38d0247da858L,0x088e54102998d4eL,0x36a2b17a6a7d9c1L, + 0x2c230cbf280f885L,0x2ddd71932b2823fL,0x02b0ac864b05094L } }, + /* 85 */ + { { 0x3606e398f5daf7fL,0x2152244249d419aL,0x1c5c08c58a72483L, + 0x343243cfb8e8895L,0x008795f022f362fL,0x1097d6ab258cebdL, + 0x06dbfb71710bd10L,0x2ef370805f817b0L,0x1c8d9c7dc82c1b8L }, + { 0x1b41fdf18b8bed9L,0x20cc238e88c495fL,0x1de77291c4bbe94L, + 0x0ad05122abef3e4L,0x3c44da4629b0b97L,0x06fd428a577f18cL, + 0x1e313190b9c4630L,0x2ab6462d9bdde1aL,0x0f5a8a4e2fa121bL } }, + /* 86 */ + { { 0x0a55109ca0251eaL,0x3bb62c9e9b26c23L,0x0beb5620f528f2aL, + 0x3a2b84ff15a406aL,0x085993c079a8421L,0x346ac35c4d27c71L, + 0x35d90929e083590L,0x299be5b8a4a6ebaL,0x0ce96c2f1f8f599L }, + { 0x0bc4b5112be8bd7L,0x11a83cf19fa66f9L,0x07d34d3a3864f48L, + 0x049cfd0e6076273L,0x026dce5671f6471L,0x00ac25af0caf0c9L, + 0x0682b7f7134ebffL,0x22d655813c02c34L,0x11cfd23d7eae3ceL } }, + /* 87 */ + { { 0x09646cca27689a6L,0x1f710d55905cafeL,0x248eb57cbfccd6aL, + 0x3ed6c6b7f94c2f6L,0x3711d8bf49b11ffL,0x1c39696e7cb6036L, + 0x118a1de879fdf0bL,0x354125d4d060dafL,0x114c8c526bd8cbfL }, + { 0x1fe725bef7388bdL,0x0f6f7f9ffeba9f5L,0x1b897e6de2acf1cL, + 0x26a7afc6fede0e5L,0x36978514681a72cL,0x1499c2bd94995c1L, + 0x157d483925ecd9fL,0x32c090def374a0fL,0x1ceb5d732a7c80eL } }, + /* 88 */ + { { 0x3f9fccecfd376d7L,0x3aacfa99ac21369L,0x0d08d5b91bd86b4L, + 0x1fa2a8c1361ab24L,0x37f866a4faa3d5bL,0x2e04eb849fcf50aL, + 0x0a920695d19fa8bL,0x073774e1e635f8dL,0x073df7c0a69a32cL }, + { 0x22c01bb38315b16L,0x29f226786323e6fL,0x3fb408b6b8531daL, + 0x231a024aa068f50L,0x2836faad4b159e4L,0x11a65cc1dfa4f67L, + 0x17e476d4ed6361aL,0x07e995a72cfd98aL,0x185b69d8183e781L } }, + /* 89 */ + { { 0x0f27eb3ab9cb764L,0x3bf0863af075b46L,0x0ddb0479aa79bbbL, + 0x09027950bd51dd8L,0x1bc699b96b4d16dL,0x3236322b8d70e34L, + 0x23a45d13b2ae258L,0x1301215e705499eL,0x0d9773b73576c55L }, + { 0x220a4730218c299L,0x38a6ce67de28ce5L,0x2009484f414f69bL, + 0x0de68b293511a12L,0x268db7ab3b2c749L,0x0d70d5fc2701dcfL, + 0x3de3f26181f0599L,0x1b82024c4c0f62dL,0x060f3effcd0e0fbL } }, + /* 90 */ + { { 0x23c14beb25d6530L,0x056ce66a5f503dcL,0x3c4bfbf7f6225e0L, + 0x27052d3c3c48270L,0x23f7e8ecf83d8c5L,0x3ac7bc3f3c00bf7L, + 0x1f0c6035d353c91L,0x3b8d0e5310a9480L,0x1b5787128ab7be8L }, + { 0x0937d3ab70110cdL,0x293bf11de446d68L,0x2f5bc53a4c19e0fL, + 0x3cce35427cb1ab2L,0x3e54ac1c6bd3010L,0x13ca8efcfb8aa0aL, + 0x09c7b931ea67c3eL,0x0d8bde93299bbc2L,0x0b05bda2c4f34a2L } }, + /* 91 */ + { { 0x024a071d1f575cdL,0x24ec06948dc60adL,0x36029a2c9d40156L, + 0x22e72452980504cL,0x1095b31c150c434L,0x0bf5258a40915cfL, + 0x10b2776f975fd22L,0x24dee85c1221b88L,0x1f6ac29b8136dbaL }, + { 0x1edef55775da491L,0x14fe78adaab6082L,0x21061bb40d5b259L, + 0x04535449f619a5aL,0x181ead062cfc453L,0x3cedc48cbc8772aL, + 0x06f20d3f3e4f07aL,0x3d6ec4b341ae259L,0x15e241363696910L } }, + /* 92 */ + { { 0x0844fd03ecfc44eL,0x17cb21410ecf543L,0x27dbc9bd059a409L, + 0x3ebd96fb37e697fL,0x1a67961cd239328L,0x2ed77f778c4091cL, + 0x3dc5baea9e39bfbL,0x30de6008adb404cL,0x141bed7aa9b5f12L }, + { 0x16f0059fd94d941L,0x3a7c01f53fc0602L,0x3598779f05e3fc6L, + 0x2cc0120f26798ebL,0x372a198704c40f0L,0x192929c4134bfbbL, + 0x367f1edb773b5b4L,0x2f4a802d9dc3d24L,0x1694f7e03012a9fL } }, + /* 93 */ + { { 0x1f5dd738a9095fdL,0x1e80874f3a15e83L,0x396be5edc767c4bL, + 0x3fc6028202242a9L,0x366f10aab56497eL,0x261e5d9ae615b87L, + 0x280601312988243L,0x2a4a585d233dceeL,0x01207d9076c555dL }, + { 0x3049a011c44394dL,0x097bdc339279142L,0x09f0b1694265f5fL, + 0x3f8426ccfe078e8L,0x3a30932e42c5bd9L,0x1b3e2bc81fca90fL, + 0x366722736abfcacL,0x09ac2b7dfe813ccL,0x0e02f1e92fbfa9dL } }, + /* 94 */ + { { 0x124e4a663be4d4aL,0x15efb59bcf32465L,0x13fa7e7a7ccd1faL, + 0x1aa2317474f75f2L,0x23f251f1e70e8cfL,0x0d5533d6c95e65eL, + 0x1a71090a5ec58eeL,0x227a9a349a35c19L,0x04c7c23d4d20850L }, + { 0x3ae575bbd52d132L,0x236a9ce32073158L,0x2e51e4e63b990fbL, + 0x19ac8e74e1c25a9L,0x0a5d49fed51d6b3L,0x0ea301ebb57e21dL, + 0x286ae2025091d94L,0x3bd68403e116b91L,0x1c21af59d747eb4L } }, + /* 95 */ + { { 0x37bc01edd441308L,0x0d251218c222417L,0x0a74759611cd0dcL, + 0x185308f3998abceL,0x1f8bafed211a712L,0x324f81e4dfcc5edL, + 0x0c52cf4efbb9ff4L,0x360aa203c3b763bL,0x028480cdd2cddc9L }, + { 0x0f1ca0dc3f807acL,0x393f0af41c1527aL,0x0a1491f8bb6c6a3L, + 0x3f4f5b7eb36b4f4L,0x15fb46ffbe3ee1cL,0x37573ef3b91ac6eL, + 0x38e8b75207b3ac7L,0x3446b56030366c6L,0x08452c669f4c7bdL } }, + /* 96 */ + { { 0x02b4747c0ace6d5L,0x32d92ef9ca1eb69L,0x089989bc2614d5aL, + 0x0dbfc171c7bccc1L,0x2d35ac450817fe8L,0x1d6a70f1dcbac91L, + 0x00d6fd7f5fc2163L,0x25ccfedbe786b2fL,0x09a7643c315720eL }, + { 0x32216b4f3845ccfL,0x1d3a0242f016f52L,0x0c74d60490379c1L, + 0x2858d632019e954L,0x1aa677b6dbd7220L,0x1b8b823a0e3e710L, + 0x2f6da537332c196L,0x18c36c0ca1d7925L,0x00c52b274cf9c30L } }, + /* 97 */ + { { 0x2c2e7828ea58bebL,0x013074d997e921bL,0x1fad20b40ff02b4L, + 0x2d8a74f9a9551b5L,0x166c81991fb5df7L,0x38b3f8fbc61a11bL, + 0x10d16bbe690bde6L,0x23a4a5ebae68050L,0x0cb59d81548baccL }, + { 0x105d3adbaf66a23L,0x0dce1d037ec2076L,0x35de4b00f432c33L, + 0x3a01f4e80f9b554L,0x3066bca80e17fe8L,0x2b7fe954a5513fdL, + 0x226ea460c2b96cbL,0x13ff27c06365116L,0x11ed543816724a3L } }, + /* 98 */ + { { 0x2a873fbbd7f8a61L,0x2335c6ef9602ed8L,0x1eb3667f69805e1L, + 0x1855c74f703f572L,0x1783f9bc8ab8d4fL,0x10e62c538b91485L, + 0x1811b536c3774b2L,0x38f0cb6d28d8dd3L,0x1389f7f12972debL }, + { 0x397f21c798fefb2L,0x1bf2d441eea9caeL,0x3760fadbb5689c7L, + 0x39f4cfa9b144befL,0x3236134a51a648bL,0x261624ed04a8a64L, + 0x26ada44a3d81698L,0x2d15d8512563cf9L,0x140b4dfc79b7687L } }, + /* 99 */ + { { 0x3b145707abe5bb9L,0x32ff63947606fa0L,0x1f49c9827affae0L, + 0x1229a1ed550836bL,0x3eeb41733c3a725L,0x0e09f18c20098feL, + 0x23b70e7014fdc3dL,0x1c5a1f4063e12d7L,0x0151d483e00fbcfL }, + { 0x14e3c7c6b578aa3L,0x33a6d74c10f6b85L,0x1e9bb6008101511L, + 0x04bd016b1bd57e2L,0x02008ac7b4ec311L,0x1714be99f99a936L, + 0x0ac2eb73c00d392L,0x1d14fb86e66622bL,0x08fdfa31d9560b5L } }, + /* 100 */ + { { 0x074a0e0251cf8d8L,0x225274107caf4b3L,0x0a4933ebce52d4dL, + 0x145716f36b82dcdL,0x016200b93e1ac5fL,0x1e4dcdbb4fb37f3L, + 0x2e69402506a266aL,0x3e4d56168722fa9L,0x00e081cdd539190L }, + { 0x15f995653e28412L,0x149bcb6c9c592c1L,0x25eb1df3adc70d1L, + 0x32b74d77b773558L,0x1a838ffe2d2c453L,0x30339627b510a12L, + 0x19b609ad20c1375L,0x3ec1cb57eea06f6L,0x1ad5be41dcc622eL } }, + /* 101 */ + { { 0x23af6678f850756L,0x0deab94bced65d5L,0x0a53796842f586dL, + 0x27fdd0fe65c434eL,0x193f1a8bacdaaf9L,0x027df364be9d579L, + 0x10650b1af04e154L,0x3f6698efe682b5bL,0x00e67b1cead55abL }, + { 0x260a8e0b5f43178L,0x3504b6730d6cccdL,0x3a63880f680856bL, + 0x198b988b1c4f5efL,0x36ff824457f372dL,0x36c13946b5edef9L, + 0x115c8d0f2bde808L,0x00bcb879e07f92fL,0x1941f475bfbb8e5L } }, + /* 102 */ + { { 0x1482bf9d63543ecL,0x32d9f2845fbcf9eL,0x0638160ccc63985L, + 0x355ca6f707a2b14L,0x1a22686df556cbeL,0x207addf358bb65fL, + 0x3a2ed9b124cb5fcL,0x16e5935ed3d99cbL,0x17260b29aa77833L }, + { 0x1bfc7b6a43df7c6L,0x32b08ef081c1b08L,0x37bc345d958085aL, + 0x34a8ca822f3adbcL,0x2d1953c5e9d8f20L,0x13da0343c22493dL, + 0x29912c7d25d7c6cL,0x19131939a88dcb7L,0x0ebda1c06c452ceL } }, + /* 103 */ + { { 0x2677c5c411dd110L,0x1e1ea8b26471289L,0x2a41a45666d60d6L, + 0x2ab057e7c554ef9L,0x30e0cc7b273e716L,0x29892ac2a4ee18fL, + 0x39c260a40571172L,0x3c4c3979d95b868L,0x046af8d78b52ef6L }, + { 0x16214b170f38dffL,0x1760a048e84415eL,0x04d4957ed8123e3L, + 0x2e83698058411a9L,0x154f84413618fa9L,0x27aa56af9f374a9L, + 0x2a30b4f1c2563e1L,0x26aa7111678532cL,0x183c748add661ffL } }, + /* 104 */ + { { 0x2981f399de58cafL,0x2e03f61d4fa990cL,0x1f242d11948605bL, + 0x0180fbac02b20feL,0x17c73d79cf490cfL,0x0935186d00dfc94L, + 0x2420cf844209fd7L,0x23e89ac0fdb489cL,0x1526f4bd29eb343L }, + { 0x24d034ac389e51cL,0x2957a5b6df663a5L,0x17dee913c583acdL, + 0x1effac0d102cabaL,0x09d461e29079307L,0x10efe2faa85b8deL, + 0x3d8c3fb0a675330L,0x0977275d2690ae9L,0x0ec7c41e6d66bb9L } }, + /* 105 */ + { { 0x29b345dc5da8398L,0x1a107eece310c0bL,0x05627c3bb47abc6L, + 0x0adce34b37738ebL,0x3687311858fbeb1L,0x2f53d3d352f0ab5L, + 0x0e1b0e9521db1cbL,0x2f8f8a9a432bbf9L,0x194375215eb7bfeL }, + { 0x0b234f12edfd661L,0x26613bb54b07d13L,0x3260d8f8f98c014L, + 0x391ef8e1640cb49L,0x195e8b672fe76e4L,0x0ac03a0950d61cfL, + 0x161eb8916c397ffL,0x06ef8ee6fdc16ebL,0x0007ee90182ae13L } }, + /* 106 */ + { { 0x36fea9e93fbcb5cL,0x2f960e7ea14a6f4L,0x3125fd611ba0382L, + 0x1ff362898dc2c90L,0x23d8d4704a59ae3L,0x13106de6ade3183L, + 0x249cc51bac243d4L,0x1fa7f10007fabb6L,0x0f6988ea44a83dcL }, + { 0x190caa4f077f79eL,0x05d807678964353L,0x3bb3d21b4b77f4dL, + 0x18240df86d8477aL,0x2135becf0031b3fL,0x0a40f76bc44fb60L, + 0x319296f6c01379fL,0x2b614daf79f2a9bL,0x06c57d3b6849dbbL } }, + /* 107 */ + { { 0x23fee389abfccb0L,0x38a892e59db98e5L,0x0f0284ba6d276c6L, + 0x2e919614f47e1daL,0x11b8ab9b6c38ba3L,0x1e81ccc5b8eacdbL, + 0x233f3201fc97424L,0x379ebf7505c6094L,0x0214dacfa81ac61L }, + { 0x25a9f37eaa3198cL,0x228d17f22e6754dL,0x312ad4f5ecbccbeL, + 0x180308dd452909fL,0x228a27b05e841ffL,0x0a167fcd767a316L, + 0x0bde372d3774446L,0x16fe0701183ffaaL,0x1810a0e49a129cfL } }, + /* 108 */ + { { 0x08203af45843c3eL,0x078c0eaafaeb9daL,0x08f3624df62b460L, + 0x22b48796aa0e5ecL,0x39a242b0e568734L,0x0a9db1b4b3c4b1cL, + 0x2751a2c848ed013L,0x0b416dcaa870bd4L,0x0f3b63296c392c0L }, + { 0x24b42adc6f3d1f0L,0x37314cbd4cae533L,0x333583443d9c2f0L, + 0x3bb7237672d5e04L,0x1ee87192fb50118L,0x15d06708c0e7869L, + 0x396b0c9977267d5L,0x30d6918bbe930c3L,0x1f7454fb7963cd3L } }, + /* 109 */ + { { 0x0f281949d153926L,0x0a32460ad5d5204L,0x3b30509e94c942eL, + 0x0ab7a75ad5d2d08L,0x18b3ca314c5acc5L,0x18f56f16a9d1b0eL, + 0x0cc9890f4ea307cL,0x2465109554e8b87L,0x08e271198bff76dL }, + { 0x3900e463c8e672bL,0x19d734fcb7f09f1L,0x11f7af2163c9703L, + 0x021eb3aaac1c125L,0x17e8d236974d699L,0x04f7045520bc86aL, + 0x36cd13dcfbc1dc8L,0x2bfc8338af20013L,0x03f2a54662c82bfL } }, + /* 110 */ + { { 0x1cf41e61588a8bcL,0x23343884314b2c3L,0x22bd758e7a456f4L, + 0x12d22e6e55cce15L,0x3a6b89b9e1600d5L,0x263320bd1877e02L, + 0x177147f7fd4f170L,0x317e459fc073452L,0x048b13385116116L }, + { 0x2b763335d2617f0L,0x295dc9bb2e181b7L,0x032d1b91fce93f9L, + 0x22db212e65ea4f0L,0x1823ca5bef7a438L,0x168cbdaeffa0089L, + 0x0b5c586f19c0283L,0x07767c9b356b78fL,0x1e77f5ddc776d0cL } }, + /* 111 */ + { { 0x09feec86ee764c9L,0x3b20dac1f20b30fL,0x32e6a005b142d1bL, + 0x28ca7a297a9afc6L,0x23ffe241c70ef51L,0x0a59b0a145f4a63L, + 0x3acc76bb389e287L,0x086d4e8b6a2a4b1L,0x04a902c9126732aL }, + { 0x2c51b9c8f7ce110L,0x0cea1ebac0dbc65L,0x10980a6a59e2dccL, + 0x29f9e36d40209a5L,0x0c95bb030ceaf26L,0x1310bd0a0bcf0e1L, + 0x2c4a0a6dd6e9f72L,0x0bbf1da3778a5c2L,0x16f4aedce4b03d2L } }, + /* 112 */ + { { 0x37f032aeded03c0L,0x128149623775341L,0x3c4f9a85be0f268L, + 0x1ff82e6daedb426L,0x2f2fb5887bdda0cL,0x30f339f865a271fL, + 0x0d2ae5f8a96960eL,0x0866ac10f6755daL,0x06829c8081bdb21L }, + { 0x3f872fade59f006L,0x27ff1b2e5fbd69aL,0x15db58ae7ef8c2bL, + 0x287d332a87cdc64L,0x289c27cc4c2e23cL,0x21af73186be3183L, + 0x18de43eee5d7e7cL,0x3c22e4896d1fe6fL,0x0b453e7f4634b24L } }, + /* 113 */ + { { 0x0c496d0e3048bdaL,0x19d2650f0f79395L,0x09f74c2d509ee2bL, + 0x07950f14226b081L,0x3105a365bb01f69L,0x22c5c1273665828L, + 0x2c946734d93ffe7L,0x29d540a7e66cfe0L,0x091785c5ea20161L }, + { 0x055f978953dbdb6L,0x3a13665fb2867edL,0x102936d4d75aea9L, + 0x2a30549dbe91cefL,0x347c76356a9c17cL,0x0e5ce34a73d984cL, + 0x3336094a68360b0L,0x1fc874f90c2a1a5L,0x1b40ae532dee2b2L } }, + /* 114 */ + { { 0x0110e825164cb8bL,0x26bd3c954a99f5aL,0x2d0e8d185527697L, + 0x21fed93ab138435L,0x3ac424592cf6c57L,0x33836042102058eL, + 0x04c15c5d8fff37fL,0x0fb262ca139276aL,0x010ed8055673266L }, + { 0x06f403051f3ee9eL,0x38fba6ce2b7c784L,0x3a6ea13d64492e8L, + 0x1160386aec74f21L,0x10bfd729827b49fL,0x3f1e8d7f0a0f45eL, + 0x23ad4f8fe50fa5aL,0x077c9dcf69516b7L,0x1f878bfaae4d9a2L } }, + /* 115 */ + { { 0x260d8e8abad5678L,0x29cb3b9803096ebL,0x20b44c288e210afL, + 0x1db49533e7ee753L,0x0959e2ba564447fL,0x25844cb07ecdaf1L, + 0x140f19393c44d72L,0x199235ea2207ff0L,0x09127a861288d09L }, + { 0x136c0218a9e690cL,0x331487aad3e856dL,0x0423b00ee54c85dL, + 0x096bcea392026bdL,0x0b7731d85b37935L,0x1073ed5787cd8c2L, + 0x3c4529b5361d781L,0x098d3a907ca7bbfL,0x0e8cf5755b19f7dL } }, + /* 116 */ + { { 0x1edb80dd212b398L,0x25860754f74dcc0L,0x20478a52fa95d03L, + 0x0ca9e0979b43821L,0x1330ece4fad1e64L,0x01e24dbf80616f1L, + 0x3f6ea3508f7313bL,0x1ad8077260bf679L,0x0e8dbf3a602d555L }, + { 0x3763234279e05bcL,0x3d03b3d1114f4f0L,0x1f4d7fa307937f5L, + 0x0d84235f888c431L,0x3c2a98bbc5cffadL,0x1f51fe03cbc07bcL, + 0x322e1c30ab1719dL,0x37e51ef27e462a6L,0x1f9f53dc52ae834L } }, + /* 117 */ + { { 0x266b49ec183f89bL,0x2d7c097d601b53cL,0x02b594ec3080d3fL, + 0x100dc73645f4c29L,0x3b7f7e26d4b6b19L,0x356ded93dd506aaL, + 0x0036c5e55269eb2L,0x099d4386a1705feL,0x1cea0ff0f22da5fL }, + { 0x02bd56a3a8e11f8L,0x190087d7e6ad518L,0x2c5a0ccc92d7298L, + 0x39948fd942f19d0L,0x3f7fabfb4d64569L,0x0f279b2f2391a06L, + 0x35ff20b4275947cL,0x2ba88ace54b54e3L,0x1b0818f8e381f04L } }, + /* 118 */ + { { 0x3e5bffae50d90f0L,0x0ec46fd4047370eL,0x2711a691dfac4cbL, + 0x0753a869dcf8432L,0x3e586eeb662ec21L,0x030bc7f56a5e7aeL, + 0x3bbfea4df16ab1aL,0x09bdbfa78fdfb15L,0x15e1b05960e5ae5L }, + { 0x08e04a58630e62eL,0x00c439911f86dc7L,0x2b6143b4447a3d0L, + 0x145d18b9e8f3c79L,0x00002724d92abb8L,0x114a5b7e0c27a82L, + 0x0ed8121d805d70eL,0x351383ce126ccf5L,0x0962d6bffbc6834L } }, + /* 119 */ + { { 0x13fe58d48e07711L,0x20d92349c28ecb4L,0x092d8cdff04c70fL, + 0x1e145047c50545eL,0x03e4f8a5515bb65L,0x104cd8bdb0c7364L, + 0x206d4d73f871520L,0x0c5fcbf8097bbb2L,0x0ad32a6e417954eL }, + { 0x238c63f69d147dfL,0x2ec1b9c42fcdedfL,0x2bef28d514deb69L, + 0x3ee34470f66e537L,0x10385c6044b2307L,0x1e003a0cecda77eL, + 0x101c1c68ea2f49eL,0x1e063c0a2c961f5L,0x055970782215cefL } }, + /* 120 */ + { { 0x0c351db54c1d751L,0x114c06e83e54484L,0x334fbfdc8bed814L, + 0x0e33c8da02a9dfaL,0x0e04f2860498d81L,0x1a96db6a4a30529L, + 0x1a910396192dba1L,0x10409277aa56d7eL,0x08580dd45780172L }, + { 0x10725000e09221cL,0x016c87c877815baL,0x2fa1e0e6095062eL, + 0x1edbddd44a51232L,0x1f1f34aca657fb9L,0x27fc575974a646fL, + 0x09ec79a66cd5ac4L,0x2baa37075a25f41L,0x067388fca84e72bL } }, + /* 121 */ + { { 0x120b49da6ef1dd3L,0x281178ee9b35d99L,0x180af33d5f48391L, + 0x2cbbc1d1d2a7212L,0x278bfb1eae53cf5L,0x36a41bea8d6cba6L, + 0x1f2cf4eca97fd6eL,0x21627c6a4de246eL,0x10d667533693ab2L }, + { 0x351049673691fafL,0x0f4ea755fb18616L,0x21bb930a8525dc7L, + 0x07902c16da5f8a4L,0x3413bedca094f57L,0x3469ae617a5a805L, + 0x2de8b79e7d4f728L,0x115355450ff68faL,0x0fb859b8444d16eL } }, + /* 122 */ + { { 0x022083e7c667aafL,0x1172e52a4732e9fL,0x19318ca0e94a335L, + 0x08f93aa831f287aL,0x242f56844c3afffL,0x0354b42e886b10dL, + 0x1301d4fcc68a8b6L,0x2f3850069616daaL,0x0a3547f762c907aL }, + { 0x3dd3ed3fbe260ceL,0x1dd4b6037007e98L,0x375d6f1da3e4271L, + 0x1294987c43b57eaL,0x3d20cd6bb5f1686L,0x086b195af9ec7d8L, + 0x3b918e9d638c102L,0x0bee0c4dee3d99cL,0x17423eb44384adaL } }, + /* 123 */ + { { 0x14e27c42a1fbcf4L,0x34a16d7eb357b86L,0x2bdd915e66074c0L, + 0x043bc29aa69d70bL,0x1067cf4581e6965L,0x2fb87ee84f16be8L, + 0x1279e72be013c17L,0x33d6616901b5b6bL,0x0310042951d5142L }, + { 0x2735ec1a22bbc45L,0x14e469fd5bd361aL,0x39d0236001de4eeL, + 0x146a8be3494c16bL,0x0187db78aa8b218L,0x06a2230c38b0db6L, + 0x3e7d5bcfcc083faL,0x3408ee476adfef4L,0x0f462d85460f4fdL } }, + /* 124 */ + { { 0x168ba024972d703L,0x132874e426280fdL,0x2542ae28c855fc4L, + 0x1816c6d14dba6e3L,0x34c7f7e484fd4f3L,0x08c208f4b822c1eL, + 0x09fd13042f3b982L,0x20d6727ff4c4c62L,0x1bb56af0652c6c6L }, + { 0x1bf05e206e0f16aL,0x2b0beb5d191297bL,0x0a980f92c71afc1L, + 0x35cdb2002879668L,0x2236178dc13ae37L,0x2d1bbc417c83bf1L, + 0x2509e4443a58b82L,0x366c32545f73d10L,0x1667d0bb415640eL } }, + /* 125 */ + { { 0x2a30a613d22842dL,0x3803d6cf13b380eL,0x0f876df82b798c6L, + 0x1b5e34823161d93L,0x1e788854ada92d8L,0x166c2650294b4e4L, + 0x05fc9a499b26fbaL,0x3c4d17704ceb413L,0x1dda5c0926934e3L }, + { 0x30dcac2fad6d673L,0x3f7c1403cecff9bL,0x1941631756e96d8L, + 0x24c2936038fb39cL,0x231d130013990f4L,0x156058e3cab2a4dL, + 0x1d5679ee91966c7L,0x07369b7c3d5d39bL,0x111be124868ccd7L } }, + /* 126 */ + { { 0x244c726475cc1b4L,0x3f0be4adce5e33dL,0x26d10e3d7eb7915L, + 0x06bd030e381969fL,0x1e1ad24fcbb44e2L,0x0d581b9662198aeL, + 0x0f93f7270ba4ddcL,0x2935f0e0d28b069L,0x02193d0c9a23362L }, + { 0x2cb7b8cf769fd7fL,0x176a5e26884ee78L,0x0c566b910fef181L, + 0x0249a4c50e1ed3eL,0x1925b37c02088b3L,0x1a9903951dedc6fL, + 0x21c6efa049a9212L,0x15acb4f77c6f7f4L,0x0649b5f9d7d232aL } }, + /* 127 */ + { { 0x240adf8679a9c35L,0x36638f2dd35e5b5L,0x0ebb5f8e9dafcdaL, + 0x13ab5281cf1192eL,0x22edde557473861L,0x1db382e6f61b03bL, + 0x15fb96773317385L,0x2bab66d74cc9d02L,0x13672f0aeb3ee09L }, + { 0x388c76d64e54ba5L,0x39ebc7711d34868L,0x29d1b2a7708163fL, + 0x27b784902b5fe8fL,0x2c720303a0447b4L,0x1af4084f67d92d9L, + 0x203ea5b1c78029eL,0x174ac72bc71c02aL,0x103179180eb3bb8L } }, + /* 128 */ + { { 0x1bf4f9faf2ed12fL,0x346793ce03f62abL,0x3db5a39e81aece1L, + 0x08589bbdaf0255eL,0x20cf5b28df98333L,0x00e4b350442b97aL, + 0x067855ab1594502L,0x187199f12621dafL,0x04ace7e5938a3fdL }, + { 0x1c5b9ef28c7dea9L,0x3e56e829a9c6116L,0x02578202769cd02L, + 0x0225375a2580d37L,0x3b5dea95a213b0bL,0x05f2a2240dcc2dfL, + 0x1ba052fe243ed06L,0x25b685b3d345fecL,0x1c0d8691d6b226fL } }, + /* 129 */ + { { 0x22edf3fbf8015c2L,0x208db712540b62aL,0x36e0a6a43157e7fL, + 0x0968b412c33a243L,0x1a809dbab318ef3L,0x299f288673019a3L, + 0x3ebc49dd26937adL,0x261123c9f04b20fL,0x02987b3db2f3c9bL }, + { 0x3e7aed0fd2e3dc7L,0x3a2f6dd057f554dL,0x2c9a58a45f25498L, + 0x2e882721743f035L,0x2d579e1ee83d5baL,0x140affb4c7b2371L, + 0x01bef11f4cad0baL,0x3299710cb9b387dL,0x1913b10afaabbffL } }, + /* 130 */ + { { 0x19f7df053053af7L,0x011d96ca2873d2fL,0x38fc7ce90438603L, + 0x1bab2317775105dL,0x3fb59ec618fbed3L,0x06c6fb3c9ec4c4eL, + 0x1973a99d2656ffaL,0x2d654cd384d1651L,0x18c3261888cc362L }, + { 0x013a414aa7f6ff8L,0x2bae20feadf1ebdL,0x086b7cc307ba092L, + 0x0948d18403be876L,0x302140c93dc81c1L,0x184120d64f5349cL, + 0x1795f3a1ed7e3ceL,0x3505b8ae47b3f7cL,0x191160dc11a369eL } }, + /* 131 */ + { { 0x272f46e8b57d7ccL,0x02c3952fc08e1a6L,0x396e05b3a91d314L, + 0x2a693b09b8221b0L,0x3c50f58e91b9ab3L,0x1789abc1d0bfabaL, + 0x1cd9f71592c6085L,0x0b22650f351daecL,0x17c3ed97fd4c7f0L }, + { 0x3b02503e6d54964L,0x34458b1a8c63014L,0x2cf49cc28c22d9bL, + 0x1000d4d190063fdL,0x2b4cc0668a45c78L,0x10b6f80e3a8ccd7L, + 0x36c3cd7ad727f8fL,0x0b5dac55fa447f7L,0x1b3a7f894c9ec99L } }, + /* 132 */ + { { 0x1e6e397af09ea77L,0x1d82e5d77097164L,0x0c08b94a197b26aL, + 0x2a2da3398663010L,0x15bd23564041bacL,0x25deccfe8668345L, + 0x3bd02986ca5b94dL,0x07e67cc7e1fe397L,0x0b8f76c55a6b190L }, + { 0x35bf8c33846ec9fL,0x08817277ab29185L,0x1ec0a3108df0f46L, + 0x20f3ebb64a24b2dL,0x065049fb2879db2L,0x1bb940c51df7001L, + 0x2dce4548d24bac9L,0x1a13e9f6dac595aL,0x0fc0110cdabab1cL } }, + /* 133 */ + { { 0x11b66d84d308bf2L,0x04f27f598e00105L,0x1f92fd383bf9990L, + 0x210fff23bf1a24bL,0x0313ea287a10efdL,0x2837dd0149f8c5bL, + 0x2bd2a18ef6e3cd3L,0x3933b2e5b90c3dbL,0x18cc1ebecf2a70eL }, + { 0x0d14ad71a70404cL,0x087743e738a8c20L,0x3cde3aa3e0726adL, + 0x0458d8e9a42e532L,0x1c6b1e2b40ab596L,0x1b3bb16f9c2ffd1L, + 0x3757c01296dd0b6L,0x247a3532ca9d1d1L,0x0aa08988ca63d7dL } }, + /* 134 */ + { { 0x22dcfcaf8db0396L,0x3a3cded08b69daaL,0x034996485724e8aL, + 0x311efc524fd94beL,0x2b0247a4ef647c3L,0x2baf6a3a2d802d1L, + 0x158df0abf3e4397L,0x2eac8b8748c7e9eL,0x0ef38e692b1f881L }, + { 0x33c168926cf3047L,0x053e51654e61607L,0x1d1c293f20b6dadL, + 0x1bbd5eaec5ff7a1L,0x01794de382ea543L,0x2ffb34bc346a3ffL, + 0x3860429ba508e22L,0x0c7e0443c29ff6dL,0x1962ade6f647cdeL } }, + /* 135 */ + { { 0x196a537fec78898L,0x2779cb783e9dff2L,0x36acd34cb08f0b3L, + 0x20b69e34d4fdb41L,0x3a0392cc1acd8bbL,0x160552757fa0134L, + 0x27c6d9ab7adedeeL,0x0fcde20e4068301L,0x1915855ffa24ed9L }, + { 0x1570e36bf9ebef3L,0x011a977d2cc5dcaL,0x1a95a6816b5ce21L, + 0x204a2343847e6e2L,0x13979159aadf392L,0x323eaecb5aeaaf9L, + 0x07af10411afee05L,0x38defc64b0ebf97L,0x0f7aa72e81cd7dcL } }, + /* 136 */ + { { 0x0fa3c0f16c386eeL,0x2c11a7530260e48L,0x1722876a3136b33L, + 0x248f101b019e783L,0x24debe27d343c0aL,0x25bc03abbc8838fL, + 0x29dcff09d7b1e11L,0x34215283d776092L,0x1e253582ec599c1L }, + { 0x08ef2625138c7edL,0x10c651951fe2373L,0x13addd0a9488decL, + 0x3ea095faf70adb9L,0x31f08c989eb9f1eL,0x0058dda3160f1baL, + 0x020e3df17369114L,0x145398a0bfe2f6fL,0x0d526b810059cbdL } }, + /* 137 */ + { { 0x049522fa0025949L,0x36223c2ef625149L,0x2f5fe637216fb26L, + 0x1911ca09fd8cd10L,0x399fc2681d8ec3bL,0x231dc4364762868L, + 0x1b27626d232ead6L,0x27e9e396ff8bf94L,0x0040f9f4fedfd10L }, + { 0x152ea516b4a05e0L,0x3523bbc871e3ac6L,0x26191997dfdbcb0L, + 0x0122d3087f5934dL,0x2be92303a0d11b2L,0x2317a0269bd5a6dL, + 0x005d8e2b8f60967L,0x27289c89ad6acdaL,0x1bdd6cff180db34L } }, + /* 138 */ + { { 0x09f8576943cc612L,0x10c67a0cacc71e9L,0x2297cccadebdc91L, + 0x10ac18660864897L,0x025b1cc7c4918fbL,0x191b97c2b32cc21L, + 0x0e3e22751d3347aL,0x00023abed2ab964L,0x151821460382c4aL }, + { 0x02481dbbf96a461L,0x048ba6d4a8ee90fL,0x058e464db08b51cL, + 0x1e1b5a82074870aL,0x0f533cef7b1014bL,0x05517df059f4fb5L, + 0x1b7b9f6cfb32948L,0x30a67a91b4c7112L,0x081cfad76139621L } }, + /* 139 */ + { { 0x3796327478a7f0eL,0x060f8b785dc177bL,0x26df572117e8914L, + 0x026df354b3f4928L,0x3ad83c1603cdb1bL,0x027be326790ae7eL, + 0x254ccd6971d2ea7L,0x083f06253f16e3bL,0x0fcf757b4e534a5L }, + { 0x25518cc86b62347L,0x072749ef0aa4a16L,0x2b052966727fec5L, + 0x0e82b90f9bcbba8L,0x205ca066bbc8a8eL,0x20ce61b6014d6d7L, + 0x374cdd91ffcdb18L,0x0890cbd296ee8c8L,0x12408763a490d20L } }, + /* 140 */ + { { 0x098b9724efac14dL,0x12fe369e6a74f39L,0x0dbdd6e07c29b6fL, + 0x3f5c5dc54e03c7aL,0x271b03263fac30cL,0x26d157d53247b48L, + 0x3092bfbf9383351L,0x0ef65da979e2449L,0x128a97674e1b481L }, + { 0x1b63c41583e5924L,0x26bfc63c5c7418aL,0x33cdab227a2861fL, + 0x36a2846adc0ad16L,0x0e8db6971939d5dL,0x3b042014afed1ecL, + 0x0e1801562379df0L,0x12aeabd69920493L,0x1508d98c43434f9L } }, + /* 141 */ + { { 0x2a9fe73cfffc80fL,0x38ba6f50d1cfdb7L,0x3ed3c9d37ba7e23L, + 0x349e8ff0d5c9fecL,0x38e04a03d733766L,0x2ef83d0f436d33cL, + 0x186f4f8ce017522L,0x2c0df61fadc676aL,0x1536d1b50ae2fe6L }, + { 0x31f5defda40bab1L,0x1aa2be6caf698cdL,0x1c890d4aca8707dL, + 0x3fd90ffe2ad7a29L,0x14bf8ec2f4d72f0L,0x3ae4f88a7130436L, + 0x2dfd0136b0eaba0L,0x2820af12c3a3c74L,0x1429f252e5a9d34L } }, + /* 142 */ + { { 0x2ffd4c17d0e7020L,0x1a6aaad52085a12L,0x1708588f348f9b1L, + 0x3fe21661aef6f80L,0x115f9c381daebf6L,0x12a529eecce61fdL, + 0x2d68497e455f2c0L,0x1e630e690510a83L,0x1541c1ad4a61ef7L }, + { 0x247b628072709c4L,0x035a2e204397f9dL,0x0874e92e0f63b33L, + 0x2e7e2faa6eb46f6L,0x08318981a144e4fL,0x1a31a81f056bf06L, + 0x200b66e19c5c82bL,0x1ebb216315e88dbL,0x0119b25511007cbL } }, + /* 143 */ + { { 0x21ced27c887027dL,0x03ccd4afeaca184L,0x3c1c19d511e2605L, + 0x2a5fd31a7d5b8dcL,0x325226bb402d4c3L,0x0f9eb0c39bcd5abL, + 0x18fdfb3b9011c38L,0x28d8d0ec308f4cfL,0x00ba8c390f7af2eL }, + { 0x030c3d67e851bacL,0x070e2697d513f31L,0x3c6467fba061899L, + 0x13a5f2f6fd001aeL,0x17734adadd49d02L,0x232db4a914e6df7L, + 0x24b3ad90ba8f9f2L,0x1a4a1ea4860c137L,0x06ab28732efa7b9L } }, + /* 144 */ + { { 0x1dab52d22ed5986L,0x3989e9614cf819cL,0x237acf155fe3deeL, + 0x035eba2c4cba3fbL,0x134a08b94cd6149L,0x270570c09c1b861L, + 0x25ad46a85ffd52fL,0x002ef568893cd46L,0x1e644d1b6d554d7L }, + { 0x2830686862e4e9cL,0x335db121d8ff925L,0x1679c0839caafe5L, + 0x3ae360f58b580c2L,0x211bc4ae2c0e4cbL,0x13f2818a4478953L, + 0x22704596a0d7c86L,0x104b3d5e17757a6L,0x1be2f4677d0f3e0L } }, + /* 145 */ + { { 0x00012ddab01a6dcL,0x2f5b06b86b6da53L,0x1aecb9b05079391L, + 0x2798a84187ceb9fL,0x3a96536b7c2714fL,0x385d952dc65e3b9L, + 0x2b3dd4eec11bd05L,0x2fd871c459b83beL,0x1d70f7aa57287edL }, + { 0x2ea6f7d51eb5932L,0x3a82a97e20b2909L,0x20977739f7dc354L, + 0x0aa6f95e4d05d6dL,0x378545eccd33519L,0x2d90f2766007d08L, + 0x23abec32b8e2567L,0x19426e504775c8fL,0x0ee656dea68cf1cL } }, + /* 146 */ + { { 0x138e140a0890debL,0x2f61f6f3ae12f53L,0x3f72ba041decbf7L, + 0x02a9a082fa547c3L,0x38c486298afeec7L,0x1c043b11d546428L, + 0x3879b1ecdba558eL,0x085733b6476e231L,0x14c08de3e4cef5eL }, + { 0x01534ed16266da2L,0x0c8baded3240267L,0x0aef699276889ceL, + 0x1fc170a1134df7bL,0x31ac519ab652509L,0x168f321b48edf84L, + 0x0c4575682ebb726L,0x14dcc314c76e58aL,0x0be2e00e8b87380L } }, + /* 147 */ + { { 0x007c80057ed32e9L,0x39033df009265ceL,0x2abbabb54830427L, + 0x1bf3a082fd16141L,0x3b2c43e81564977L,0x3fbd9922d4d4ca4L, + 0x3bdca5671e8353cL,0x3f5e49c85f4fe40L,0x1dc40a9c109a813L }, + { 0x3eaa6c33db21a38L,0x088b875cfbdf91aL,0x04e7bd1d507fcaeL, + 0x19161e9deac7fdaL,0x20c64a4d6f5bac6L,0x29f0de29631d3d8L, + 0x02e4094ca837d96L,0x3853fd0f7d4c4f9L,0x13f8a9a4347fb49L } }, + /* 148 */ + { { 0x1ab4edf992f8923L,0x2a9781bf4827ce1L,0x1b871b1340eee24L, + 0x07e4782ed009efaL,0x2f3d4c62c2957d1L,0x1ffdeabd096beb4L, + 0x14cbe92d231286cL,0x0d4a65904acac04L,0x19f6706a231c3e2L }, + { 0x2b3bbd2225c02afL,0x2f0598fe8fa6341L,0x2b75b84f482e53eL, + 0x084aff1577e9b7cL,0x0512a73da912b45L,0x354faa90c2f6f50L, + 0x27fd53ac0f43d93L,0x092d3f0d63f9030L,0x0a32cb183be9194L } }, + /* 149 */ + { { 0x39b0c2d3fa6a746L,0x29e488756892a38L,0x091478cdf2b5e84L, + 0x1f4c199b2cdc296L,0x2f6d71d068a8806L,0x01974612c269c27L, + 0x1c944850007a3e0L,0x24eb1c11abd2ee3L,0x1fd2b6a3129c654L }, + { 0x3d5d5bde45f2771L,0x0ac22bd0cbb6574L,0x00fbf232a6bb854L, + 0x10fa2fb32c8bb35L,0x2bf8e247f0fcb61L,0x368c0e6f3b3144eL, + 0x02a0df955d56f78L,0x3f8aa455f18655bL,0x18ca6d35cbf3031L } }, + /* 150 */ + { { 0x1800b1bbe0c4923L,0x2b9d01a40a41ef7L,0x337f957bd0c7046L, + 0x2765957e2e08e62L,0x2500f4150aa8e1aL,0x00b9ebbb34a49feL, + 0x29692e826a9c6d2L,0x15df2d33d62ce7cL,0x11f3093868cbf41L }, + { 0x1cb5e7a333ed442L,0x3238be41bfbdeebL,0x01233d98f228ae5L, + 0x369fff84970b66cL,0x1ba2318354632f2L,0x0b4b14496521dccL, + 0x17d9c4a0caae5b1L,0x003dafc03996261L,0x172c5d1008654f2L } }, + /* 151 */ + { { 0x09540462fc283e0L,0x0ce611fb8220396L,0x340eb7fd1622f76L, + 0x07bd66317b7ebc6L,0x37e00d9bbecf515L,0x2310ff51ad364bdL, + 0x11d1d27543e3b3aL,0x2db4ce65384b194L,0x0c6dd841a1daf05L }, + { 0x3da17e023b991adL,0x0ac84dc7ee94508L,0x2c5a0ddc1879aabL, + 0x2b57d8eb372d05fL,0x01e2a7d50173bc8L,0x041b4020bf3d484L, + 0x3012cf63373fd06L,0x117bc7a084779f6L,0x18ca07766d95765L } }, + /* 152 */ + { { 0x24347b9af80dfafL,0x2d8c7e71199fce3L,0x1b266ddbc238a80L, + 0x196aa1c6281bfc7L,0x0af31c35f6161e3L,0x31a11ba39fdeb24L, + 0x0175b4c03831d1fL,0x1cc68799a7441a1L,0x0c76da9d620934bL }, + { 0x01f597ba3e4e78bL,0x137b7154267e6a6L,0x399593088c612c1L, + 0x01e6c81d162fcdcL,0x3a22769007c5683L,0x1f9b6bcf1110311L, + 0x129103b6df23c8fL,0x1e58d3d98b0950aL,0x0f9f4ea6db18b3bL } }, + /* 153 */ + { { 0x269eb88ced36049L,0x13ff87d06e67e31L,0x35636a72e10887aL, + 0x2319682ee29a42dL,0x096e4295567dd6aL,0x2aaffeb50b3e316L, + 0x2f26a45286b5f31L,0x3940c7df7ebca3dL,0x120c5d9e0ac0e1aL }, + { 0x3bee3ffacc10da7L,0x0b57e651251b96bL,0x3e863c4220ff67eL, + 0x052f5bd8cba3b05L,0x3c3fc9ef4fe6f74L,0x0efee1c12a78f03L, + 0x03342d25ff3cba0L,0x334b863f4d802ecL,0x1ac1e63e7530050L } }, + /* 154 */ + { { 0x183d07c8f3d6a02L,0x3050f1fbd343477L,0x0bf0d4c7af6171fL, + 0x26209f173c32a65L,0x32b697882c8a93eL,0x2957a2e92840b1eL, + 0x2d64f0633c87d58L,0x007f06ba208bf30L,0x1c12ce9b53f986dL }, + { 0x19639fd95dc1b79L,0x23dd50fd3985aa1L,0x3c4cede2fb9f272L, + 0x203543eba79b9c0L,0x3c2d530ed042f76L,0x375662b0151af0eL, + 0x29491000a4006bcL,0x258a4fcca1b2784L,0x14677782255b6bfL } }, + /* 155 */ + { { 0x381421ee30c98feL,0x03fac3f0b35c16bL,0x0ca614df6ad2debL, + 0x37a6e8c53a26cb1L,0x04f04b16dd38134L,0x01fe32a2910f7aeL, + 0x0f3917fc556ee0fL,0x33504f0720eece9L,0x1998397dd24b1adL }, + { 0x201e17edf4781e6L,0x1f0c651bc7e4072L,0x2613b53090da32dL, + 0x3729f23181e889eL,0x2ddc697092495b1L,0x1582026073cbefbL, + 0x1134d71d3d82bb4L,0x231073f37768c21L,0x0d23dd171b59679L } }, + /* 156 */ + { { 0x3a40f84d4dd7e96L,0x1323aa1027f0325L,0x29e6a9d11393711L, + 0x0863f631b5b15bcL,0x200269e7c3b6066L,0x164a757eb4eeaa1L, + 0x2e365b1413c6b00L,0x2abb306b5f90088L,0x1d36a82621a4798L }, + { 0x2ac45c4c1003c81L,0x27bd6bd0f6180abL,0x1f5e60f774699efL, + 0x2aefd74a160da99L,0x1c84acef1f312e7L,0x34922d48bd4fb20L, + 0x265c6063e32ca29L,0x065cffa6a9f1607L,0x017e3686c9a5284L } }, + /* 157 */ + { { 0x32efe659e90de99L,0x1216f2b416ad8c2L,0x2a52e14e4892be4L, + 0x0c0898a1a1f1229L,0x15eb3db542ad854L,0x11796104987c3a5L, + 0x17573948e81863dL,0x2b7933f87383e3bL,0x03fbd6f1ff57d84L }, + { 0x03711ddd1bf968cL,0x235f35237e91cb5L,0x1223e425a566d55L, + 0x0e1709b410527c2L,0x17c2c17430cf833L,0x050f6766f9ee07cL, + 0x3d3faee3bdc33e5L,0x2046bce16b0d653L,0x1137551cf429fd1L } }, + /* 158 */ + { { 0x128f55b20193bb2L,0x15e741cc42e1c92L,0x2309d345d27696eL, + 0x0caa1c61a297b81L,0x1110386839a43e4L,0x0ccbc420a3044f8L, + 0x05cbb48286ecf3aL,0x236bccd22a8dc0eL,0x0c6698ffcaaef15L }, + { 0x044c54af6908745L,0x0cdb91a8cd4fee8L,0x2852d561e821a6bL, + 0x1c0d8d245fda530L,0x181f613151b2979L,0x3d1a97bdb8408eeL, + 0x114f7f6817dc2beL,0x316fe4f7a82be38L,0x136c3cf3cd5ed72L } }, + /* 159 */ + { { 0x38799ab7b080de4L,0x3de0775a760e5aeL,0x2aaa986f8f633b8L, + 0x0e2952f1729dad0L,0x1a9c2fbb95d74c0L,0x005e24c1dbf2d81L, + 0x286f0d8451b4408L,0x0c98d03c030e274L,0x14c3038e9520c54L }, + { 0x14bc3816977aad9L,0x3f420b5c21ef8f2L,0x020c875fed08adbL, + 0x350d1595bf01b42L,0x00fd6dd4ee1ce84L,0x297ead01c713638L, + 0x2eeb6f23338b226L,0x309b351dfab042eL,0x078e4db08bb5f80L } }, + /* 160 */ + { { 0x111d12a1078342aL,0x11c979566841900L,0x1d590fd3ffdd053L, + 0x27c1bc2b07fa916L,0x33e19bc69cf694aL,0x27773403db492b6L, + 0x32dd4e3ce38f5ebL,0x07154e1003d9ad8L,0x085cab8fdfbe15eL }, + { 0x2943f6b8d09422fL,0x0a5d583e6230ec2L,0x01fa2ef2e4d917dL, + 0x0ecd7df04fd5691L,0x3edaad3ff674352L,0x0d1c90b49d34d01L, + 0x38615d594114359L,0x2533472c9cc04eeL,0x07da0437004bd77L } }, + /* 161 */ + { { 0x24b99a62d712c44L,0x0da3e29a5895de0L,0x0432d65e2287148L, + 0x019bd6f17e23b5aL,0x14ec3479d140283L,0x0c9b6dc39b3cc48L, + 0x32936b96db6f449L,0x086bf296b026328L,0x04d69e248c72feaL }, + { 0x2a89092a71269fbL,0x2f6ea061942d802L,0x02a39fb55db22f6L, + 0x37d8c47a7407673L,0x090ac2c1d0fceb0L,0x2c7cdca9bebade7L, + 0x0c41932393b222cL,0x399d18a9bcf7ef2L,0x0019dea30b22fe8L } }, + /* 162 */ + { { 0x1f689ac12b3118bL,0x3b8e75b2dba959fL,0x22c2187cd978d06L, + 0x206354df61f3f30L,0x2e9f56db2b985b6L,0x38263055d611454L, + 0x212cd20f8398715L,0x0711efa5a9720ecL,0x1fb3dda0338d9acL }, + { 0x06b7fe0cfa0a9b8L,0x22eb1f88b73dd7cL,0x1e04136887c8947L, + 0x37a453152f3ce05L,0x00f51ea64ed811dL,0x321c15df2309058L, + 0x2bbcb463914d834L,0x3d4bbb493954aa2L,0x0019e5eb9e82644L } }, + /* 163 */ + { { 0x365a04e66d52313L,0x25151534fdcaf47L,0x1dafa6b7ae11fd6L, + 0x3615c6ac91caf03L,0x2ae5a8d68921f79L,0x3b17384f5317e59L, + 0x24bd39fde17716aL,0x19e0dc39bb692ddL,0x1efffe94085990dL }, + { 0x3fa0e27d88f92e8L,0x3bc3f671dc48f3cL,0x174c89274dbaa21L, + 0x296e6e89d898966L,0x246ebcaf6d4cfa4L,0x3e38a1c04324274L, + 0x3aeea20317a10d8L,0x2c28ec1dc778514L,0x0eadf0c479168c6L } }, + /* 164 */ + { { 0x1bc1e484c854477L,0x3096d218e391f04L,0x202b869c54d92beL, + 0x0caf879fb490f53L,0x06b460c4ae318deL,0x2909abfbd51c7acL, + 0x052dc138ae7bf3aL,0x37a748eb89b7761L,0x1649d3fc1d55782L }, + { 0x07cae310ade1979L,0x1c1074ed2f1ca36L,0x3c4056c3c9bea84L, + 0x0ab5d2b919ce248L,0x0ecbe49ae36fe18L,0x3107e7d64affdbdL, + 0x2307156680db80dL,0x1cc1cd6eb01bf91L,0x0c06d68b4c7d6d0L } }, + /* 165 */ + { { 0x3e22be7854dfcf2L,0x069f7e9ab8ef436L,0x3ad1a521ec46ee2L, + 0x1e906a52133d18cL,0x32aa123f3ee9452L,0x2b8f2a484517ae6L, + 0x05d9255634a82acL,0x0b63385dab283f2L,0x078504cf7fc1908L }, + { 0x34ce7c43799793cL,0x375862d5467ed75L,0x1f9395ff980874dL, + 0x346e2fd8798b3dbL,0x3dcfcf54f00ea45L,0x0c00d6c09a18d84L, + 0x28a9cb67423b760L,0x01dfa7ef1d4d100L,0x0f47b52ce37051aL } }, + /* 166 */ + { { 0x3f7d8ad96bec962L,0x3207d85f8041ebaL,0x0509214e1058d1cL, + 0x10d08e5327d9311L,0x11a6605136c298cL,0x037e090f644014bL, + 0x1cdea4c36437549L,0x2dec48c4ef87bf9L,0x076249a60f7d27fL }, + { 0x09758381cf593a0L,0x33bbee0d931679dL,0x1333e05c99910c9L, + 0x07d0860238cbd68L,0x34f5e8f4f30ea5eL,0x1b032d1d5bece93L, + 0x3dcc6a2cae6e2ebL,0x3045d82cc1ff422L,0x01aee17901c0ff8L } }, + /* 167 */ + { { 0x048336b89aa9e14L,0x0d09c7d9d9c03f0L,0x0433906b6980666L, + 0x387aedeac8d36a8L,0x3eb59a05330247eL,0x0003d3565a6d2a9L, + 0x026b5bd78ef8258L,0x15b13976ce3ad18L,0x03b06a43e5d7d68L }, + { 0x20ae838ed2a0ee7L,0x2f94a3c5ba204eaL,0x1f5c4ea6413704bL, + 0x2d81b8a619e2adbL,0x2f459ed2b5be80cL,0x1d85486bc66c6dcL, + 0x116f3b7a9cce4d1L,0x1a494e6bfe652a9L,0x00797d92e86b341L } }, + /* 168 */ + { { 0x1aeede15af3a8caL,0x091e0a970d47b09L,0x23fbf93ec080339L, + 0x3139bd096d1079eL,0x081e76009b04f93L,0x0603ff1b93b04bbL, + 0x0aef3a797366d45L,0x076474a4f2ed438L,0x061a149694468d7L }, + { 0x12c541c675a67a1L,0x0e34c23d7fa41bdL,0x3cccf6be988e67dL, + 0x2f861626218a9c2L,0x27067045bae03ecL,0x032a365bb340985L, + 0x00735d1facdd991L,0x3c871ea842a08c3L,0x0152a27e5543328L } }, + /* 169 */ + { { 0x1d609e0e6057e27L,0x22da9f1e915368fL,0x11451f32dd5b87eL, + 0x22343bd478bfd66L,0x125567546ea397aL,0x08a2d20312619a8L, + 0x01997aea45c8b13L,0x19f48f6f839df74L,0x1f80e2ea28fc518L }, + { 0x295412d69d0820bL,0x1cc49c7a9968618L,0x0221eb06380d031L, + 0x3f1d7fa5c1b09f2L,0x35a71d2507ffd4eL,0x1f2dd50dece5a95L, + 0x0dbee361c80051cL,0x0b51781f6d35eb5L,0x1431c7481f49b19L } }, + /* 170 */ + { { 0x2ab2d0408e1cc4dL,0x1d634eb4b707b97L,0x3dfe5c9c7393e93L, + 0x2a74cde5a0c33adL,0x2e24f86d7530d86L,0x02c6ec2fbd4a0f2L, + 0x1b4e3cab5d1a64fL,0x031665aaaf07d53L,0x1443e3d87cc3bc0L }, + { 0x10a82131d60e7b0L,0x2d8a6d74cf40639L,0x2e42fd05338dfc9L, + 0x303a0871bab152bL,0x306ac09cb0678f2L,0x0c0637db97275d7L, + 0x38c667833575135L,0x38b760729beb02fL,0x0e17fc8020e9d0aL } }, + /* 171 */ + { { 0x2dd47411baaa5ebL,0x2edd65e6f600da2L,0x0c40cdffed2202cL, + 0x3c13824450761a0L,0x120748b871c23a8L,0x167a4a25974507bL, + 0x06dbfe586a15756L,0x269d1f1a35f3540L,0x148da0ad0df2256L }, + { 0x0fcc5db7f9069d7L,0x1f49157014c6932L,0x0899e9a2db3a248L, + 0x0e2d3fa5c8316adL,0x0d27f35e452bfd5L,0x38b6b24dce81329L, + 0x3ee7e27cbbc549eL,0x24d800a1c8a77fcL,0x0d03179878d28daL } }, + /* 172 */ + { { 0x1b7e9bb3b66c047L,0x1961a580a8f8762L,0x2297c8db9c0022eL, + 0x28f4229d28d13e0L,0x1fcd398de0e76acL,0x0c8399abefc69c7L, + 0x1c9fc52fbb6eaa8L,0x2cad2a0b43af05eL,0x00f4e00cf6f4e7aL }, + { 0x24c0e9a4890c439L,0x1928aef0d69ac90L,0x079dd9b7497d375L, + 0x03584b7a50a5691L,0x0e60d0033a1ff3fL,0x08905f68d6189ffL, + 0x2b8385815da8c05L,0x25aa941841353bdL,0x120800728d2f16eL } }, + /* 173 */ + { { 0x36f2372ab039042L,0x1a5e327e8213b65L,0x1d2f58bec14310eL, + 0x007f881170f40ffL,0x2b0a5a9283200c1L,0x187ebfe39a1a3deL, + 0x31226526c95d1deL,0x3b45e8788049edeL,0x0898e63dd78c2a5L }, + { 0x36533da22bba4eeL,0x3d8e5fd25a95d2eL,0x29f714f2a6b93efL, + 0x2f477f75cfd024cL,0x269bca1b1a08248L,0x28b80c9d8bccfcbL, + 0x1df7419a177e64bL,0x2f472f143a64dd7L,0x095b87a979f4a56L } }, + /* 174 */ + { { 0x03736a967c1f177L,0x34d4218004cf27aL,0x3b926eac9a5b1b6L, + 0x29b09fbcc725092L,0x1122b48707a9c01L,0x346b2616b64eee9L, + 0x3f175b9eb94e2a9L,0x364514470081b54L,0x0b1d13eb2525102L }, + { 0x3e7dbeb675a1171L,0x20a5705b034ac73L,0x1b5a057c88cab22L, + 0x25b4c03a73e36c9L,0x3269552eb73ea9eL,0x383e637ec3800dfL, + 0x10480fea9d035c9L,0x2cc66183926e34aL,0x037a35e9512c036L } }, + /* 175 */ + { { 0x16729ee8f00df48L,0x329ed846b20c131L,0x17f98b3a8123b89L, + 0x06708728fa925e9L,0x3e2bb3ce7e0431bL,0x371de065169cf7aL, + 0x2b3df12f86cc2baL,0x373c17fc0179397L,0x05ef955dd7add27L }, + { 0x0c22ffa00ee402fL,0x0d78a8ecc2ed338L,0x11d0643cb1015b3L, + 0x114f3465a215095L,0x2f0be54b4c6183fL,0x3083379319993c8L, + 0x24c475a5f4cfee4L,0x07b6772aa5cbe02L,0x19cde4af2005911L } }, + /* 176 */ + { { 0x29d0bc8d771f428L,0x07b36790f28e0a7L,0x2480eb93acf03acL, + 0x2041968a8fe357bL,0x22f0b8a7316232fL,0x0951d2887f013eaL, + 0x315f6f4a8df7e70L,0x0394946b13fc8eeL,0x06b66e21b73e095L }, + { 0x1c9848067a41deeL,0x2a56b9ecf8acfd6L,0x0386891454e12cfL, + 0x37fbbf29a915366L,0x011e9cb75f0dddbL,0x3bc8230d7da46c9L, + 0x333cf6a9b9e766fL,0x1d2a7a37c400062L,0x1c4b8a55ac9d1c1L } }, + /* 177 */ + { { 0x19f58625c4cccb8L,0x3d4824bbd34fbeaL,0x257689efc87870bL, + 0x25b685081b8a3d3L,0x07c52107da2366fL,0x1283c3c240cc680L, + 0x2a47b0478d4ceadL,0x1d526ca267b891cL,0x110ae96534e6420L }, + { 0x0c1d655cced05b0L,0x30fc2405d6550cbL,0x30a48e577cd7abaL, + 0x24d03a635b6ebadL,0x3603d24f184b008L,0x15c85cf49a60d94L, + 0x1141de6e1458832L,0x1fcd074d22c9984L,0x06be52257dcefa2L } }, + /* 178 */ + { { 0x2678f33c947e655L,0x3edda82248de564L,0x2745790239d1ff0L, + 0x248f36edf3acb7fL,0x105f6d41cea0874L,0x2771562084c9b6eL, + 0x0317025b1ae9ae7L,0x22a738514d033a7L,0x0c307502c29a2c3L }, + { 0x0124f11c156ace2L,0x1c3f9de7fc94a43L,0x1a816e1171b22c1L, + 0x20d57789e5d837eL,0x27c6cc79da19bcaL,0x3587ddc06b649faL, + 0x1c06bb285901121L,0x10aeffa03209898L,0x15e4050d338aa26L } }, + /* 179 */ + { { 0x1397829eaad87bcL,0x324d9e07a132f72L,0x024d6ade4fdee0aL, + 0x295a435fd5ad5e7L,0x3d14fb0b950b9abL,0x16839edbc26ca74L, + 0x2f4ff3d0684f232L,0x1ccec1453a74d81L,0x077e63bdd26e8adL }, + { 0x2fd06ece0d25c6dL,0x00086802e8b73c2L,0x17708c5bb398dd9L, + 0x360663fe3f06c09L,0x1b7e2cd68077f06L,0x18e8d5ca1f543fcL, + 0x125a9aef75e0572L,0x03a56fc95e24beaL,0x111847d3df0739dL } }, + /* 180 */ + { { 0x2ab9cc7fec82924L,0x1b75a69c8835a54L,0x27dea06ef0e21c7L, + 0x3089c60e41298d4L,0x2716807c8ab3e51L,0x123c491bd36cd7aL, + 0x1560958f3ede0a7L,0x0e37bc524d91104L,0x0f75f6583d1874bL }, + { 0x39189e10b927eb7L,0x318d670b8bc49e8L,0x02337fe966f4a87L, + 0x208417956142dcbL,0x2e58c39f9102b83L,0x246d4ca58ffb801L, + 0x2ff97b3f052ee39L,0x14181fd6e15332eL,0x16a935e5f6c5f80L } }, + /* 181 */ + { { 0x19a0355dfd88d38L,0x33638f15277d03cL,0x29e304d006e1555L, + 0x1b3f42c3398c89cL,0x135f2ad31f16b70L,0x1e8f7e7fc55b702L, + 0x1e5fb5b30c5213fL,0x2368a7ca7324a95L,0x144a0ecfdd42b85L }, + { 0x1c115df52658a92L,0x0fb45f10a0585adL,0x1f707fd92a91bceL, + 0x3f67357625a9565L,0x35a9472b1663c8bL,0x00cf86f41dd8d0fL, + 0x1c02fb14e44ca8bL,0x3ecc89e87261879L,0x1b5ece0f2c4cc4fL } }, + /* 182 */ + { { 0x3127bab31211943L,0x232b195a10c9705L,0x0b88d855fc3e44aL, + 0x0333a47ba974bf8L,0x078ec7d1247ababL,0x3367fbe9748f771L, + 0x255766a3986de70L,0x31fe8cb1ee19e09L,0x0873e54018beeaeL }, + { 0x16e86f2b38d17c1L,0x3ef431c7e810372L,0x2b79f88499cb9cbL, + 0x33bdc7b202f8446L,0x146c896921d47c5L,0x34c58cc6b2a8ef0L, + 0x28765b5f921c0e3L,0x3c9c0c7e8207b9dL,0x0fed5dafd5f41efL } }, + /* 183 */ + { { 0x2f10b9d4cda1348L,0x1a7f48970c04ea2L,0x25b18957c22bb07L, + 0x31fd6b3c711142aL,0x09fef80295cafd6L,0x38227d773dc6850L, + 0x3d2ba8e12029f5eL,0x32d625d4aa3ec3eL,0x09061e2275f6f70L }, + { 0x30a4ac51fbda16aL,0x0439e7c77e8a8adL,0x2132d9945f6f799L, + 0x2bbad2e93bee8b3L,0x34bf2d53d450d59L,0x18831ea1aa3826cL, + 0x13c6f476010204eL,0x3d5a98fe250f429L,0x13214c91d1987eaL } }, + /* 184 */ + { { 0x14fb120490d66c3L,0x35cca2837208139L,0x0c3804b4294deaeL, + 0x2acc777119ee805L,0x28342ed113f2fa2L,0x0c0d3839c3fd57aL, + 0x0ae3c1b18da72f2L,0x1680ab70c36faf6L,0x09c179bdf6f3e94L }, + { 0x2c928ef7484c26fL,0x2df6c7bcab6ec51L,0x35483f58dda7206L, + 0x0312f1fb6d8221fL,0x1975cafdcfde4e2L,0x1afbb0812134487L, + 0x16db67c5b596708L,0x1d222d5e6aa229bL,0x01522c6d87e4118L } }, + /* 185 */ + { { 0x2890757c471d4aeL,0x12c6950e8769d82L,0x31826aa701a1fefL, + 0x14967197e4ee24aL,0x1d789df35bf4d4eL,0x2de70fca48ebe4aL, + 0x0cf1303ccb46c60L,0x03b125560b39f3dL,0x11c7da081b4257fL }, + { 0x12c6ae59aeef274L,0x16fd3c50df020feL,0x3023e13c86afe6cL, + 0x398a8894d82a9d2L,0x022589fa5d21dacL,0x3e9d2c3ecf55caeL, + 0x2891a93d4a3916dL,0x33ef79db36372c4L,0x19aa0391a3f59f4L } }, + /* 186 */ + { { 0x14ba69e203fc3f1L,0x1a332d8841a8a41L,0x0540aad5fa9f091L, + 0x03affdfb5bec206L,0x0bef94afdecb8f2L,0x02af476cb202986L, + 0x0e0a7ce25d8ca0bL,0x16e69d799e9040aL,0x1b2dd7662ddd6e9L }, + { 0x3dff279f289d7eeL,0x157567ba8881721L,0x3d54c18adac64d7L, + 0x33dfb004066bac3L,0x2b48d70a43a8c46L,0x02ce7be1bf2439fL, + 0x145a20965c53c11L,0x008f9155ddf30e1L,0x16ea33430f757ddL } }, + /* 187 */ + { { 0x29f39490ff53d2cL,0x24565ac00d26e7eL,0x1014d59979678dcL, + 0x2aea29ade2bc429L,0x08b517b104dd72dL,0x1b4e6f83bd77950L, + 0x217f70142b90bcaL,0x044632baa8fa7b6L,0x16da01689d606b3L }, + { 0x26ca563f46afff7L,0x171ee8d29797cfaL,0x24c8aa998fd8394L, + 0x11ad8fd4d7b07ffL,0x0d1f509e542a601L,0x3e33436d4205a22L, + 0x236772d1918daa9L,0x3719994179aede2L,0x1ef4ab03a819cc6L } }, + /* 188 */ + { { 0x2089d14d376d986L,0x1381de8b70d6c01L,0x309a53ff2c86d0fL, + 0x11448f0ff207045L,0x31b656fc2fef4baL,0x3fbea2ee14b3569L, + 0x110b77b57c74891L,0x284a63c14e0f920L,0x04c4b55d3ad52c5L }, + { 0x110cff3f3827633L,0x1e1357802bfa594L,0x38823ead32fa086L, + 0x058ae47361b2ce1L,0x0e6f3638a3dcf4dL,0x22dff5081e2da96L, + 0x1683e733792112eL,0x210cda5901137b9L,0x1223b84210f28e2L } }, + /* 189 */ + { { 0x028a9a9c3ebeb27L,0x3372d4fbd643e1bL,0x2e114dae7f37d7bL, + 0x391c9ba9f27a228L,0x28c141388033522L,0x058855d667540e1L, + 0x0564d859b1aeca6L,0x238d9c67f3faff3L,0x0433a577af11aebL }, + { 0x3f26ce06feba922L,0x320fb91d695a4f0L,0x274028bf378e5f6L, + 0x1a2f70fdbc5fde5L,0x2a6ed90aed2a5e3L,0x291f2f54f40d282L, + 0x0e2bc83b1c3a4c4L,0x003ae93c2a9b937L,0x1c097c7af4374caL } }, + /* 190 */ + { { 0x037717879c28de7L,0x2a8aaaae70cc300L,0x182666bc61eb617L, + 0x33d35e2d4110c20L,0x19870fc72e0b5b5L,0x102def175da9d4bL, + 0x32d03a3b4689f5dL,0x182a6a5ff619e1fL,0x1c06ab7b5eefd60L }, + { 0x19eadb1ffb71704L,0x3962ece43f8ec7aL,0x382cab4f19aa436L, + 0x3eb83cf6773bb2aL,0x16e20ad12da492dL,0x36ef4988a83d52fL, + 0x12eb54af89fa0f7L,0x01d637314286ba3L,0x0b79799f816ef7dL } }, + /* 191 */ + { { 0x2c46462104f98ccL,0x056489cabb7aba7L,0x3dd07e62186f451L, + 0x09a35b5a6d9eba4L,0x0fd43a8f3d17ce0L,0x302ade5ed4d1d82L, + 0x1f991de87f1c137L,0x38358efd65ea04eL,0x08de293a85be547L }, + { 0x182add38ef668b1L,0x39acb584725d902L,0x2b121c1d4263c54L, + 0x23bbfd939ccf39dL,0x02871612a3134b2L,0x2824d652bdc6a6bL, + 0x1108e831c88af2bL,0x0df682d92444aeaL,0x1138febc5c55cf4L } }, + /* 192 */ + { { 0x29ca589c4a2daa2L,0x29c0f1003d8231fL,0x1058d517510318eL, + 0x1c92aedbca5be33L,0x194296ab4264934L,0x314595f42f954f8L, + 0x080ea89af9398faL,0x386c788cb7bb13eL,0x1372f81761e67b1L }, + { 0x1014bc73a20f662L,0x1f9df127b654094L,0x096fb62b96521fbL, + 0x19e8ba34dfa27d4L,0x25804170e3a659cL,0x3b5428d03caca89L, + 0x03c00f1674fce69L,0x2764eaa914dfbf7L,0x198f3c3bfda4ce9L } }, + /* 193 */ + { { 0x2b1f5cd81614189L,0x15b11492c967deeL,0x24b245fb415ec7dL, + 0x371ebdafbe71eeaL,0x074b48e82302bc8L,0x2db46c7e46ddc38L, + 0x280c974a1336e09L,0x2d894a1704d5f99L,0x12d59bcb813c7ccL }, + { 0x1ad83b47c019927L,0x3c999d8c37f56f7L,0x2c5a31e05d23e10L, + 0x3e915ab1180576fL,0x1243cac822aa6e5L,0x372327a51a5594aL, + 0x0a4065c69c9c7f4L,0x0c06eb6c9f82789L,0x1ccdfa7a34eae41L } }, + /* 194 */ + { { 0x36a864d59cb1a7dL,0x19328dabbee3b85L,0x3acb1c22b0d84d8L, + 0x3af66037c743ba0L,0x07f94ced97e80a6L,0x29cb0457d60ab31L, + 0x107bb7a29cd1233L,0x028c3384a8aa31cL,0x1500229ca564ed8L }, + { 0x374bad52f1c180bL,0x2fa6635d26a8425L,0x08ab56dbd1bad08L, + 0x3902befaa6a5e31L,0x3153dc5fc6ed3e3L,0x2fa4fb422a2fa5eL, + 0x2e23bdadc7f0959L,0x0a77a3490a420b3L,0x016417523c6dc27L } }, + /* 195 */ + { { 0x0eeccf16c14a31eL,0x3894d2cb78f0b5dL,0x35997cec43c3488L, + 0x27645ab24dbe6ecL,0x29f7e4400421045L,0x1154d60dc745700L, + 0x14a4678c9c7c124L,0x2eb67325d5237b2L,0x14e4ca678183167L }, + { 0x33af0558d0312bfL,0x2fd3d5505879980L,0x05a7fa41781dbd1L, + 0x2a003bbc7549665L,0x079c3b8d033494dL,0x327db9a5b1417b0L, + 0x030aaa70ae1ade1L,0x018300a23c305daL,0x00c7f4cfe3ba62aL } }, + /* 196 */ + { { 0x18b447d057d6006L,0x25db9bf5c722c03L,0x2029abcf40f538bL, + 0x21bc40e9e0d79dfL,0x05e472c4b13bee3L,0x07f2c650829ab08L, + 0x0abf4943b045f63L,0x1ade79770767f00L,0x1b528c0bc70a555L }, + { 0x29d07ee8a8640b8L,0x04408f438d004aeL,0x255bbe24ae89256L, + 0x093e95e77371f39L,0x1377bbfe5e358e5L,0x30251f915f389c5L, + 0x29782664651c6c3L,0x305697ef63543d2L,0x08d6fcdd28fe2e1L } }, + /* 197 */ + { { 0x164a2f65c7202c8L,0x0d01496952c362dL,0x16721434fbf57d6L, + 0x1787660c28e1053L,0x15ef0fbe1811421L,0x1bd5fe7f1e9d635L, + 0x2269d35705dcf8eL,0x27e5d7752695b64L,0x0f18f015d7abdb4L }, + { 0x3131110b4799ce6L,0x2fee64b2f2df6c1L,0x0c9ff7ba21e235bL, + 0x04ec63d27fb07c0L,0x1abcf959b009d69L,0x350851ba3698654L, + 0x1f23f10e6872130L,0x0e1ad560ca05eb9L,0x143c9b5bb689ae7L } }, + /* 198 */ + { { 0x23328db48c74424L,0x05b8474672cbad0L,0x192a40d6e217326L, + 0x13032f71d4b94d0L,0x0d733bb01dd83a9L,0x2de914817188c14L, + 0x0011c8cd0d631a5L,0x1f3573370289669L,0x1e622f112cc646eL }, + { 0x3d6e29a3e1e4c4bL,0x2094e27ec552291L,0x05b54fd3e319d5fL, + 0x2682822e599f8dcL,0x3d8cbe8db8c4ce5L,0x3bb0f5d6f29d279L, + 0x1a313dcc4496eaaL,0x24d805f71c8ea28L,0x1a5250ff77a8cebL } }, + /* 199 */ + { { 0x15a0726fe29bd79L,0x12a0413e642cd29L,0x146daad56983657L, + 0x2e543507fbda41aL,0x06e6f7f450e580aL,0x03cdc62af1d6d45L, + 0x234087508cc97bfL,0x2244146e8b29295L,0x17275c39077e64dL }, + { 0x37cccaff77ca6bdL,0x037d06f6c637d7cL,0x0ff8019e01f7e0aL, + 0x112a9975cae7d1bL,0x06e3663e9be4f3dL,0x3be76db5e08b62bL, + 0x24a9aa5f37f9223L,0x322e9fc2b4e76afL,0x098a0a57c70f69cL } }, + /* 200 */ + { { 0x1c50cf400fd5286L,0x16e755ca92c0f36L,0x0f9e051ae73e1eaL, + 0x10a546ce093d798L,0x09fb4d667fe9b51L,0x3714215ac0d2cb4L, + 0x30022e4b537a80eL,0x22bb9a7b8404a32L,0x0ed7c8b9e5c6a54L }, + { 0x06007bcd933619bL,0x1d9a38ae77f865dL,0x15d3cc6e2a2e0ceL, + 0x17dfbafccbea7bbL,0x167cc4f6435a14fL,0x214305b1d72e263L, + 0x379c96cb2185fc7L,0x11d10261d29d917L,0x1397468f8ae27dbL } }, + /* 201 */ + { { 0x1de68adc88684f6L,0x3b6aad8669f6ff1L,0x1735b27a18f57c1L, + 0x1963b3627ac9634L,0x2d879f7eab27e7bL,0x1f56fbecf622271L, + 0x3ad73ca8fdc96d6L,0x15b5f21361ab8deL,0x1a4c7e91976ce8eL }, + { 0x001a5406319ffa6L,0x3993b04d3b01314L,0x296cd541242c0caL, + 0x3bafcb2bbb87da6L,0x028bee8059da259L,0x23a24392239e5e3L, + 0x227fd9e9484bebbL,0x18c6039491b43ecL,0x1b78be2a54a625dL } }, + /* 202 */ + { { 0x223554af472f13aL,0x264edd5ccfa4728L,0x29f096c168a2facL, + 0x0752c49d4d49abfL,0x3e77070ca7cfe76L,0x1f9f37da10c061cL, + 0x162ed466b6aaadcL,0x3e36368b757aa85L,0x016a81a2e0039faL }, + { 0x080759c4e3de3bfL,0x38b8454bcc222aaL,0x2d9aaa7eba5b0c1L, + 0x14e7e70472b2cb7L,0x3b0dc5c194c65d5L,0x28fd2d842ae6f61L, + 0x0b5f9fd32f8c96cL,0x0877d2610bf30a3L,0x0f431ae27ccb90eL } }, + /* 203 */ + { { 0x32a0a0d6a0ccd0aL,0x3bb209664ed554eL,0x06fd9de672a6a3eL, + 0x1203681773ec4d2L,0x16739874d8d9c51L,0x0a68d72712a9113L, + 0x177eadd9cf35b2eL,0x1c2875af66d7e24L,0x1d69af0f59d2a04L }, + { 0x2f844c7ba7535fdL,0x3530f6a10bfce6cL,0x09ede951974b45bL, + 0x25ff5114fb17f85L,0x1e6c37c0e6982e9L,0x0b0fbdaa98fdc17L, + 0x36a8d609b0f6a9dL,0x06de2fb74d6185dL,0x1764048a46aede1L } }, + /* 204 */ + { { 0x07c6ee551a251d1L,0x12fc48349e77f69L,0x138cec518a28befL, + 0x21ce202f9b930b5L,0x21be9b20b1b2b78L,0x1e5a867b1a733e3L, + 0x10bdeae41dfeae3L,0x20300959dbf27edL,0x16a8b815a0503e1L }, + { 0x0a085f653f5ef65L,0x3eefe5dec94414bL,0x07e3a3346fe661dL, + 0x3b86e57dfbe23aaL,0x15b65eaec25ddfdL,0x30b808ec881d39aL, + 0x283bb511869a154L,0x1f9f61806d5dd0bL,0x0151464652cfa87L } }, + /* 205 */ + { { 0x10853c857fa58f1L,0x2939a1329319c09L,0x2d0a1b81f40db58L, + 0x041563a32f41ee5L,0x242e388cfa4651eL,0x110d8220699011bL, + 0x2b8fd051b0d5394L,0x33f3b0afcd6cf89L,0x0fd4ae787095702L }, + { 0x079bc29df53d498L,0x0c713844dfd890fL,0x056c17a3cedf4cfL, + 0x071b36445764edaL,0x39228cddb246113L,0x3480afc6acc2914L, + 0x108612e97757b68L,0x09ad2999b79f398L,0x051c200fe654f60L } }, + /* 206 */ + { { 0x296103cb1a2b4b5L,0x332ffa10f025a3aL,0x072d986ffb5b98dL, + 0x3c85a74eb09a8dcL,0x2771371f12fa07aL,0x1f0a67be2ee16e6L, + 0x372efceae10d34eL,0x15bc4f52f71a788L,0x039378df75d8dd8L }, + { 0x1e902ffde7ff5d9L,0x2a1748c9682728cL,0x13a6f4192fcd0e9L, + 0x0dc56c1dacf5c6eL,0x26e711d1cf52a57L,0x30a4a0675c9aaa1L, + 0x015de60b61b1df2L,0x2791c89395d7320L,0x1dc68e893e118b7L } }, + /* 207 */ + { { 0x3924ff96ffeda73L,0x27d01a83688062dL,0x20eaf89584dfe70L, + 0x0ba0d568100da38L,0x0fd777d7c009511L,0x2fe3cb20967514aL, + 0x05311bb0c495652L,0x36755fd8c64a113L,0x0d5698d0e4f8466L }, + { 0x10d64fa015d204dL,0x09afe9b744314f8L,0x0e63a7698c947b6L, + 0x11c14cde95821feL,0x0df5c782f525a65L,0x157eebfd5638891L, + 0x2e383048aa1e418L,0x18f4d23c886391fL,0x04df25239591384L } }, + /* 208 */ + { { 0x2f4fd69d8695310L,0x3ac27dfa1da3a9dL,0x1812e0d532a8e28L, + 0x11315cab1e40e70L,0x0785d6293dda677L,0x369daec87e60038L, + 0x3c72172bfe2a5a3L,0x22a39bb456e428aL,0x04cd80e61bfd178L }, + { 0x1f4037016730056L,0x117fbf73b4f50eeL,0x363c1aa5074246fL, + 0x14bfe4ab9cc2bf5L,0x11bb2063f21e5c6L,0x0b489501bbc20c3L, + 0x15001c18306ecc1L,0x150913b766ce87cL,0x1f4e4eb25b8c0ccL } }, + /* 209 */ + { { 0x161a714a1db5c18L,0x139879d9dc1d33bL,0x3be57bf685de945L, + 0x14f48516f97a5c3L,0x3ee49a5f2221b0cL,0x12c4740ee4c6206L, + 0x02213700b91afa1L,0x002bf1abbf924fbL,0x13c50554e945262L }, + { 0x02c45e77364c92eL,0x000995cd4863a35L,0x1a0284d3f3c5e05L, + 0x0936fdd91af4a07L,0x2485f304f312f84L,0x049e944f86a23caL, + 0x20e0bc583f56311L,0x1c293b5e5431c69L,0x0c692855e104b7bL } }, + /* 210 */ + { { 0x106185c644614e7L,0x01b2b91d2690923L,0x12ea2587e5282e9L, + 0x02b44a15f356150L,0x0ba5593b5376399L,0x3574a919dc31fdcL, + 0x29a1bac2cf6dc4dL,0x2576959369158edL,0x1c8639f7e141878L }, + { 0x1c96b02f8589620L,0x11a28d079101501L,0x1a11096ad09c2feL, + 0x2627194abbafc8bL,0x3547e1b8fbc73c2L,0x1df6fdcf37be7e2L, + 0x13552d7073785a9L,0x0f4fc2a4a86a9f8L,0x15b227611403a39L } }, + /* 211 */ + { { 0x1a5a7b01fbfaf32L,0x298b42f99874862L,0x0f5ef5e3b44d5c5L, + 0x3b7d0eefd891e5cL,0x1260ae5a03ea001L,0x1a5f18b2a39d0a1L, + 0x1a7643eb899ebd2L,0x09698da800f99d4L,0x0eaef178c51ba07L }, + { 0x2cf8e9f9bd51f28L,0x3aef6ea1c48112aL,0x2d3a5bfc836539fL, + 0x334439bc23e1e02L,0x08241ab0e408a34L,0x22998a860413284L, + 0x2048d6843e71ce9L,0x3461e773a14508cL,0x1fa5cba23be1cf3L } }, + /* 212 */ + { { 0x3e8c9d22973a15cL,0x3b237750a5e7ccbL,0x0a390b6afb3e66fL, + 0x0daad97bc88e6bdL,0x266c5fcdb0bb1e4L,0x2bd21c2e3c98807L, + 0x344e243cffe8a35L,0x05c8996b8a1bcaeL,0x114da2e283a51ddL }, + { 0x29c9a56c1e3d708L,0x18b4fc72c3be992L,0x298497e875404feL, + 0x1acf3a91bebc1c0L,0x283886263138b7dL,0x070c24241e018d3L, + 0x03864727e842807L,0x2899fc2bde75f96L,0x104c1b86582b236L } }, + /* 213 */ + { { 0x2ff09eda526c894L,0x2fc48052b1f48ccL,0x0dcd3cd9293495aL, + 0x04a4b9ad55adbe5L,0x21036c31bffaaebL,0x01ffccb864de5baL, + 0x1d67b8a9d237e77L,0x0922f59696c360aL,0x1b348edb556db29L }, + { 0x2e9f9b2ded46575L,0x32822bfe9a6b3dbL,0x33a1f16d37d1496L, + 0x2c5e279740756baL,0x1c827cc454a507dL,0x259399dc178b38aL, + 0x0e46f229b6e4a52L,0x19214158ec2e930L,0x0a3e75c24484bc4L } }, + /* 214 */ + { { 0x3cb476fd2f6615dL,0x3e6de36636a6a43L,0x1f1cd2bdf1074b7L, + 0x21a6e55bcc78bf7L,0x3b596eadf2bda30L,0x156c94e3cf328bdL, + 0x0846db91c09f8b3L,0x190b91bcfdbcf1bL,0x1ff9bb9398e2a14L }, + { 0x118d4f5a17bd645L,0x0cfaaf6f5b55494L,0x06fc734d0957570L, + 0x17d7d4f10d401faL,0x3fd27dd1998ca06L,0x254b472a652766fL, + 0x2c101cddc4e3046L,0x2c01e132ad3ee06L,0x00346d079f94a56L } }, + /* 215 */ + { { 0x1eb8e4fa6bfdeddL,0x28a179e9d31be65L,0x14d13d09a252993L, + 0x3986697dd9e2f57L,0x20cebb340eaa10bL,0x36fdea0f4f6c20fL, + 0x0f23e1c633a78b1L,0x20de49992f0fb0cL,0x1c96630f8f107a0L }, + { 0x3f4cb4bdef86a80L,0x13b1e0fe0966aeeL,0x3604609532c81faL, + 0x3322e427a4a92fdL,0x31788416071bb7aL,0x286ae4a32875cc5L, + 0x0455a57f7f14becL,0x3a266ffa805b97eL,0x02d7b8c76b9bf21L } }, + /* 216 */ + { { 0x28605634b9f8e7dL,0x05dadd8ff162a11L,0x1a7e2feed68a201L, + 0x0f99460c6439e97L,0x2e9377ad6cc6776L,0x1c0c8c85f5f4040L, + 0x0bb505ccfc47207L,0x09da55cfb80c54dL,0x0f31bf1ef8c0f1aL }, + { 0x35f5c4b0c935667L,0x14b0e41834ae2d8L,0x2c2e37c3a574741L, + 0x1302dcb8337bfeaL,0x1f4f60247fd5fccL,0x2785bccedd0fe6eL, + 0x34ef9c05c2e3547L,0x2b38e888d311cc1L,0x1244092f279495aL } }, + /* 217 */ + { { 0x3fd7851b30f9170L,0x2a87a4dff396c56L,0x15e0928437b9715L, + 0x1670cbc49cf3ff5L,0x248be1e3488acd2L,0x296f18ad685173cL, + 0x156f463a3408607L,0x3870d8a5bac5460L,0x1e7397fad192774L }, + { 0x22f99f49c8225b5L,0x3f39251addf134dL,0x35308541e91b33eL, + 0x0d0e3cf5a4d1477L,0x2e727b54e0bd2d9L,0x188b65002d778b5L, + 0x36a94b42d929c27L,0x3c814dab39c8d5bL,0x04464a18cd5fccaL } }, + /* 218 */ + { { 0x1be0aababa95d63L,0x203185ed2cd1b63L,0x38630e0d8142927L, + 0x0aad5bbc13190c3L,0x1785e3633875be0L,0x04b24f930a3fae5L, + 0x2f82a3d5401795cL,0x2bf5a27fd47078dL,0x16b3c48c89510eeL }, + { 0x1287ebad4f064beL,0x1f555553af6a65eL,0x1ef2623727ea1a7L, + 0x24627cd9b1919d1L,0x1c59d6ebda911f5L,0x1493484df950d73L, + 0x15b38d3a84daea7L,0x0f1271ec774710eL,0x01cca13e7041a82L } }, + /* 219 */ + { { 0x399860c874d64b0L,0x16c248594f38318L,0x0000eaa11986337L, + 0x0258873457459c0L,0x277d70dcd62c679L,0x016f5336f875f75L, + 0x2f8f30eff0f2703L,0x16de01dbb1884d8L,0x1d8812048167e44L }, + { 0x1749a0e161f9758L,0x2457fa8f13f38a0L,0x0e41911dd8afe60L, + 0x2b1e6946827d4dfL,0x02ca5cf8efe36a8L,0x12415fd59fed52dL, + 0x244b641bdcae07eL,0x1960edc7fc31690L,0x1064815a5364b60L } }, + /* 220 */ + { { 0x0c69c3eef39cc39L,0x011593e98d5b45eL,0x3542412fb990983L, + 0x34de76eca96f4f0L,0x0e7e75e3da1d531L,0x2c051ec52197c62L, + 0x129ab02dac4e220L,0x1d3bfd6794728cfL,0x0f1c964f7fe37b0L }, + { 0x080c0a60e301262L,0x1601814e4288b5cL,0x3f9acc8a90299a4L, + 0x15c5303c70b699dL,0x26e66d9f7dfae90L,0x1e11a490d997fc5L, + 0x0c307cc866dd8c4L,0x1439316bfa63f13L,0x03960e3ba63e0bfL } }, + /* 221 */ + { { 0x2785136959ecdb3L,0x2bd85fe7a566f86L,0x32b8cde0dc88289L, + 0x2c1f01e78554516L,0x350e22415fe9070L,0x1635b50bddfc134L, + 0x3b629ab3ab73723L,0x3f49453f506e6e9L,0x1937b32d80e7400L }, + { 0x1d80d4d7147886fL,0x33b5855db2072b9L,0x0692642717bbe08L, + 0x262aed2f487853aL,0x26530308b9dcdf1L,0x2674671d962f991L, + 0x0ab126fbf192dadL,0x378c5568f46ccc1L,0x00e943f4be5fa24L } }, + /* 222 */ + { { 0x14240587fe9ea48L,0x13e09586d5d21b1L,0x013c78719740af2L, + 0x1e5c3ae1d3674b1L,0x0b62ba3aa27a9beL,0x306fc2b10ffbe38L, + 0x3130e10a23f2862L,0x33afd4709dbcd2bL,0x185f6cd1e9aae55L }, + { 0x0defa7f40369093L,0x076759616078289L,0x3f33e512ed9e11fL, + 0x167b448225a6402L,0x28b73c399bf8a84L,0x3dbd53fa0c91557L, + 0x25235554a305698L,0x0ecc4aa75b694f0L,0x16ae6a6f9042a09L } }, + /* 223 */ + { { 0x2e123c9152cdd35L,0x390ea21900bbc6cL,0x30dfb9ce5bd5ae6L, + 0x129d601245224afL,0x3f502eec2b4acb8L,0x28cfbd3a31fd57fL, + 0x1d20019c8a7b93aL,0x2f3ac1ac40d5ff6L,0x0273e319ff00ba3L }, + { 0x02c2f77abe360a3L,0x3d7212b7fbf2986L,0x0ca6650b6fcc57eL, + 0x15aabc2c80a693cL,0x0a24ef1563f4f8eL,0x3a917c4d7214228L, + 0x036dbed8f62fd91L,0x040efcb248e80a0L,0x18a4a9ca4c01a4dL } }, + /* 224 */ + { { 0x23fb7985448e339L,0x1dc33c628e65d8aL,0x174d7a69170cde8L, + 0x164ad819eb04581L,0x0848138ab4bb05cL,0x24279e537834b6cL, + 0x0315f7149dab924L,0x289620e8cdad9e4L,0x13ccd9074d9a335L }, + { 0x039c5e0ac1b784dL,0x17231bb949eb87aL,0x2146a1c88ec0ab6L, + 0x2411b06fd634f21L,0x33fda502a2201f7L,0x096e4195c73b189L, + 0x16dfcdff3f88eb2L,0x29731b07c326315L,0x0acaa3222aa484fL } }, + /* 225 */ + { { 0x3e74bc3c9b4dfd6L,0x2a014fe39d8a4c5L,0x1c059d8c352025bL, + 0x332e16882d00c1fL,0x2238713591c9036L,0x2a57ed3bcb18fc2L, + 0x10c6c61a99d9d8cL,0x259a0f5f13ce661L,0x169162969c96829L }, + { 0x113c267cb63ee53L,0x04b985d7ab0d4dfL,0x1a11191abfca67bL, + 0x277b86bda7eccdaL,0x011dc11e75ad064L,0x2e7e5d9535e9bc0L, + 0x2b133280f030b8dL,0x3318a8800068fc2L,0x194e17c98d239d8L } }, + /* 226 */ + { { 0x20d80b41d8fe898L,0x28a2dcc86114d1cL,0x038504f217408d7L, + 0x35459aa9abfc7cfL,0x0cc560e355d381cL,0x39878b367379821L, + 0x34951acb041f0a5L,0x2b0b188445bd766L,0x0c4509e16d37ee2L }, + { 0x02a20c42c6fd79eL,0x1fb938ebde2c3aeL,0x23c1bad819ca95bL, + 0x37a615495a4f66dL,0x2f9c19d0f10d674L,0x1f179aa45f7992cL, + 0x22db6fa03fabaf4L,0x3463a162f12b4b3L,0x0c976c2380a1fc9L } }, + /* 227 */ + { { 0x1171ef8b064f114L,0x2c55953cbc3d324L,0x185457b262b783cL, + 0x0043cd24db0c149L,0x299a41fed468c67L,0x1fdfdaa7bc9b4bfL, + 0x1bfc1bf6da2267aL,0x3b500958ee36e80L,0x00e14b36c85c340L }, + { 0x257e26425db67e6L,0x3d3a25fcba417d7L,0x2514026c426885dL, + 0x188fa1d424de0cbL,0x03c538691312be2L,0x15cd3e7615ad6f6L, + 0x2a48615b1cae559L,0x2ed61681eff8b56L,0x1d07a4c96f0ce8aL } }, + /* 228 */ + { { 0x3f54d05523aa2e9L,0x107833b4f42181eL,0x36e27f9bfb69c88L, + 0x11058af7e155a0fL,0x107b0dcc9dcb07fL,0x15e94db98b45e0eL, + 0x347d3ca2cbb8ab6L,0x18dc262e68349f3L,0x1f2ff154d685eeaL }, + { 0x28b768a56b232acL,0x35b8d8fca94aad5L,0x3a168837fc604e8L, + 0x20f4429da46eba1L,0x0f9455fbeebc58aL,0x359538bab5792bcL, + 0x3c82551a20d6c37L,0x2e4c63103f2e769L,0x0b26d7b3cd760d9L } }, + /* 229 */ + { { 0x3090c3ebb2eaf45L,0x1364718bfee4bdeL,0x3ea4a736f23ded2L, + 0x2f5bfc3f78efca8L,0x1ca1102f5b5b99eL,0x1f80caa2f28ad57L, + 0x3f17a8f6203cd80L,0x156c55042d122a2L,0x109b86660a7e1dcL }, + { 0x148b1da02a2fbd8L,0x217a2cec8ba296cL,0x20e48712b509fedL, + 0x1231a8f94584de2L,0x01633b503685601L,0x15449c45c402487L, + 0x131047939251432L,0x382eded24c7481fL,0x0ea623e722b8542L } }, + /* 230 */ + { { 0x04823d324972688L,0x20f04800fd5d882L,0x26906d0d452858bL, + 0x210b1bdd1f86535L,0x10146d89a842195L,0x1146ef0b23e28baL, + 0x3284fa29ec1de77L,0x3913fd88adae3dfL,0x06083f1dbe97b71L }, + { 0x1649333999dd56bL,0x2b02ea5e91f7a66L,0x18aebbe8fb202cfL, + 0x363d875ef299983L,0x185adc14d47c29dL,0x3e7f5071bd7ed47L, + 0x113e6ce65ac7884L,0x274f8739a7753fdL,0x0231ace591effe5L } }, + /* 231 */ + { { 0x267a438a9fda771L,0x3d94b2198c4038bL,0x1e48e133f23b626L, + 0x3c80d74b47f7ec6L,0x28d13e878599f88L,0x2d47381c5c8e844L, + 0x19ba82890aa292fL,0x052d397ce9c3aefL,0x155dde826733745L }, + { 0x0b2b77ed6f59a95L,0x214f8c080810802L,0x2ac1ebac779793fL, + 0x266d5ad99d94894L,0x19722a5006ecdcbL,0x138aeb412af6e7eL, + 0x34dd4d26210f3f0L,0x2e034329683fcc0L,0x041333d8080dac0L } }, + /* 232 */ + { { 0x051070935a85a06L,0x19b9d90bbc6d13aL,0x0b71a07b3a6d4e1L, + 0x000c0ca79aa12a4L,0x13d555259d6dd6cL,0x3e2b41788312e99L, + 0x34cccdee3b26af6L,0x19090838f5504aaL,0x1bd79798934a940L }, + { 0x2a1d1848e0c7ff0L,0x217bf2550ecd03cL,0x31aef51d318bbaeL, + 0x139d61e3e9ba590L,0x3c2895f52e5d3edL,0x3c4419f134a8a76L, + 0x3f4ee53af278771L,0x1d369b337a59279L,0x19235188da1a56dL } }, + /* 233 */ + { { 0x083212003d310edL,0x3ba33261ec0c46cL,0x1d2684c558a8d20L, + 0x33adc59fb227952L,0x04bf55bb55e25f3L,0x1872405eb3c453dL, + 0x3343c0819edc770L,0x2d7b5d669139b7aL,0x07858df9f7e04c9L }, + { 0x3a47ebb3bf13222L,0x147737a81f68453L,0x3ac3c0d8242f1e4L, + 0x134dbae1c786fa7L,0x2bea3190d93257dL,0x3af8accfd279dd6L, + 0x110096406d191f4L,0x2b1e19eab14f030L,0x1f45215cf8bd381L } }, + /* 234 */ + { { 0x07e8a8efa493b79L,0x389c2d3ac70ab0eL,0x3fa09ff22320b20L, + 0x2baa470e4f67ce4L,0x2138a8d965ee1baL,0x1ef543937b6a586L, + 0x23c8e069ab238c9L,0x1305bfda352288dL,0x158af8e00e5ce4cL }, + { 0x0cdcf06cfc509a1L,0x1047bf09b301d5bL,0x1fd64d9c57f060fL, + 0x14ccba672b1b433L,0x18b8e9510a95148L,0x04370ff563e6acfL, + 0x2f3509a7e98709bL,0x04b1e0e4210f5d7L,0x1b628ccc9d05a93L } }, + /* 235 */ + { { 0x1934f00e341463fL,0x229b3854369e807L,0x20fc4109553f14cL, + 0x16aa4fd2a476d21L,0x32cd58067c23bdeL,0x10cf72027d1f1e1L, + 0x232a7d1d3300548L,0x176a4302f9fe5d6L,0x12e08b777d588c7L }, + { 0x3c1281761a10d37L,0x2d86057143d6977L,0x15db79477c60ed7L, + 0x1dccf14c42ca2beL,0x053118267a0aa2bL,0x2d06567e417eaaeL, + 0x337784f40e98166L,0x1ab32732d09485aL,0x0c56835d77c6986L } }, + /* 236 */ + { { 0x1d714cb2b450a66L,0x222171f6ff7053aL,0x0d85b466a0c0131L, + 0x2656f7f0699956aL,0x0e67792d102a21eL,0x15429e5de835f26L, + 0x34d3372a01bb57bL,0x352550b1188cd75L,0x08b7be4e1c088daL }, + { 0x073b03f95812273L,0x1bb4cbb8fdd5fc6L,0x0eae6da6217a2e2L, + 0x1d098767d3cb1c4L,0x1b7c1da2d9b50b5L,0x12a1779d0e5c7eaL, + 0x22137b22c4fb87cL,0x0649bdcb0d147b0L,0x1731345668c77baL } }, + /* 237 */ + { { 0x23e8c7a8a3ba183L,0x33aeeff8e27e9cfL,0x06870f9ba60f4e8L, + 0x0d72d806a0e3a91L,0x212e52db455176eL,0x3dc4afc7e42f709L, + 0x2054cd95f9e4598L,0x3502e6f4c803efaL,0x17a2cf19bf6dd5fL }, + { 0x1cf6ca266736febL,0x21bd2779f3f8bdcL,0x3ce8fc290563bdeL, + 0x339c9adb93f182aL,0x13f29235baae8a3L,0x143fe97b48e0911L, + 0x3ef744a4b557f56L,0x1b74a8514f95044L,0x1b07c676a533e42L } }, + /* 238 */ + { { 0x1e603f235d96872L,0x288f30fe96e32bdL,0x071be988dc5fab1L, + 0x22750c302f55166L,0x0764d9cc3e32e84L,0x0b031035fb09a78L, + 0x3b83b4f7238212fL,0x29044b651860e21L,0x010281fa6712f18L }, + { 0x028048f64858b37L,0x0526bcd5f797660L,0x0791619ebb18e0eL, + 0x2ce7cac2e82c886L,0x21039cbae210200L,0x255e74756a1fab9L, + 0x08515e4efdcddb3L,0x1e2a86ce23aa89eL,0x02c1a552c3cc818L } }, + /* 239 */ + { { 0x2c7f5000ea723dcL,0x3c13d10ac548c5eL,0x1445be885c860a5L, + 0x0fffc465c098f52L,0x0c4c58cea61f999L,0x273580db0fee917L, + 0x3923bbe6d151e6bL,0x3f519d68eac555eL,0x1474ec07c52ceb2L }, + { 0x06a3d32ed88239dL,0x2e2b9a0d6b9a531L,0x23259feeb2e70d1L, + 0x0710ef02ed7d3f7L,0x38f62a705223bf7L,0x3f9e6694f34882dL, + 0x2b7f932224860e9L,0x2562f61561c0c92L,0x10f8e0f7330b594L } }, + /* 240 */ + { { 0x335c7bb3c67d520L,0x12562c8ff2a7b2bL,0x31948bbaa808d8fL, + 0x33884d7a2b81de3L,0x1c888eff7418c30L,0x1cc512af376366aL, + 0x06a53472075df0fL,0x1ff16d527225514L,0x11c4ef389795fbbL }, + { 0x3e2c9ac43f5e698L,0x1ff2f38e2978e8fL,0x090e3089c2e1ce7L, + 0x3feb0756005b417L,0x0381b9d2a5a74f3L,0x17ce582ebbb6888L, + 0x37abbed958b143fL,0x2dc6197ff414436L,0x0ce8e97e6807a05L } }, + /* 241 */ + { { 0x251e61b8ce86a83L,0x10567efdf9c5808L,0x3dd748f03377860L, + 0x0dd1414890bf049L,0x0934ea09b87cb2cL,0x119e5106f52543dL, + 0x3a416a5146c403cL,0x23ac7a2b51c408eL,0x1b389b81a60af63L }, + { 0x299934ee8150c69L,0x1d642389f052f39L,0x28916a0194ff74fL, + 0x0c86f546dd97702L,0x21877963038f49dL,0x34ed29a1af0cc17L, + 0x0af189fe2f3fbffL,0x0426c5026cddf5fL,0x1b3029ea13b9b8fL } }, + /* 242 */ + { { 0x37938d225a2fd88L,0x3cbdf33ae8180fbL,0x1c80d7a6dff4890L, + 0x0d8a20fe61930f8L,0x2998e530500c78fL,0x097771cfb64ad64L, + 0x13708a018a8f1b3L,0x0a2edb7ff661f57L,0x059dcd3554f0d1fL }, + { 0x3c6e41d23a74e7dL,0x187af976ccb7d85L,0x3fa79e7ffa0b94bL, + 0x2dcbaede834f0bfL,0x201adf9c3473662L,0x119e4992a19057bL, + 0x209c571502c3265L,0x242185a444d24beL,0x195897f34aa2474L } }, + /* 243 */ + { { 0x045d359abadc253L,0x12e4b31e5f25792L,0x35bd9a218212e05L, + 0x17a94ae209c8aa6L,0x22e61c6769bb80aL,0x22c3e2cfa8e39e3L, + 0x1d854cfb274b1a0L,0x0b5cedaa90b8f6eL,0x1638ba225235601L }, + { 0x0ec0e6f75c8c576L,0x0839f392f1f749fL,0x20c869d80726abbL, + 0x1aa2808fadc2562L,0x276110b15a908c6L,0x21bd869b2a7d43aL, + 0x0a69d8668c99941L,0x2843e777c8bb4a8L,0x1e0bfee1897bbf8L } }, + /* 244 */ + { { 0x2d8681848319e4fL,0x1bdad56961be809L,0x1886267132656beL, + 0x316614a73eafbd7L,0x162b29cfbac252aL,0x0a98d6379f3117cL, + 0x00ac70ee050609aL,0x2c7c3df2e7290a5L,0x1adfb44aaeca885L }, + { 0x2b7a936e798678eL,0x07840e655010e19L,0x1e37816860b7ca0L, + 0x20edd17615fc924L,0x0a4705ed6eeffd4L,0x0a9743dd76ecd8aL, + 0x09fee357d68d49bL,0x35a1b46a14a688eL,0x1addbbc25491a7fL } }, + /* 245 */ + { { 0x10cba20969686a3L,0x2c71578f014fd78L,0x313426f47102308L, + 0x2c5240cc0e05c4aL,0x32d01527b1f9165L,0x2a68d38916dc805L, + 0x3e35c86fcf6647aL,0x38e0947d52e52c2L,0x0e3fccb22a55a15L }, + { 0x271e4ec5b4dc0beL,0x0d89236c735712aL,0x3f43046e1007bb1L, + 0x35f6a72668fcdafL,0x28349bc505a6806L,0x04f8214272ff1bbL, + 0x3448c126871e73eL,0x2ebe579aa889d9fL,0x1b9ba77787c2da7L } }, + /* 246 */ + { { 0x2be58eec5a73375L,0x37da75ea2b10e06L,0x150aceca835a175L, + 0x027d41f4c3cb3ccL,0x3c60b0424b87b06L,0x043e43b26b94e8aL, + 0x1689bb4931e1824L,0x06a3914b1f43eb7L,0x013ab4534914763L }, + { 0x32dd8568c84f3afL,0x3702486eab8cfabL,0x2a858b96b070829L, + 0x103a2a094591950L,0x05c35322b42260dL,0x27b6834ae797b6bL, + 0x22b90abca795603L,0x14c0a1af41f1ae5L,0x10a2e22dac7b1ecL } }, + /* 247 */ + { { 0x25fc09d239d8f0aL,0x0b80f2ae2840859L,0x17680173477b92bL, + 0x27e38d8581390daL,0x19eb061beab38edL,0x3a1159c1e6c0247L, + 0x21a2e0cd4226543L,0x00c3e83ddfb1cbfL,0x0931d242162760aL }, + { 0x29f834cf8646bc3L,0x25294902ba5be7eL,0x3890379177d17dfL, + 0x113ffad9b364070L,0x077b924659dfd06L,0x3660753e06bb0bbL, + 0x37b0932df3b7f2cL,0x2762f26f0fda7cdL,0x125daef34f3dd85L } }, + /* 248 */ + { { 0x008451ba2c123bcL,0x20e9a02063e952bL,0x170298957b8ad1eL, + 0x0d3c3c4bc595b75L,0x30a9fa14dcc7f2eL,0x0bf9e0b07daa70cL, + 0x1f54ddefc9a2bbbL,0x0294f4c671a5dc2L,0x1dc0b8238cbd646L }, + { 0x249290144dfb6f6L,0x35f2d1b900749bdL,0x240e48537ad8270L, + 0x2d5c3636f6469c2L,0x2f170d58b84d661L,0x0d13874b289c88eL, + 0x1de1faeeb4cf146L,0x17a9c8957f256aeL,0x1f8cd6e110adbdcL } }, + /* 249 */ + { { 0x257c6b978b8a0a7L,0x12badba0cfb7a8aL,0x17c14bd13fe724bL, + 0x223f0ba3b927918L,0x1fb147eefc41868L,0x3998b3ee34e6292L, + 0x0ba2ece9f191f12L,0x35435861c8a2656L,0x02dbd6d0f1b00b8L }, + { 0x15cfdfe24c93cc9L,0x35de02e79c639e2L,0x3a5838baf7eb29eL, + 0x1f93772fda40722L,0x3a180d6bb022538L,0x251f1f0992c942fL, + 0x23f3cd6d68e548cL,0x0f34a0a9ed8ca64L,0x00fb8f036132d10L } }, + /* 250 */ + { { 0x198b3f08cd9d494L,0x0196e653d3e7ce0L,0x22203c738fa99b2L, + 0x0536348a72dd992L,0x0c51c54b3199f4cL,0x084e8ccb76b5d71L, + 0x0c7b2f9a32ce0bdL,0x3c82bce88421622L,0x0d16defa3625b1fL }, + { 0x0e0054819a296ebL,0x13fc5746a44c4d1L,0x2d2bfeaa454f1d9L, + 0x00d3502f5ff5f7aL,0x21801a4afae65a8L,0x178379dd813c51fL, + 0x172ca0983048f9aL,0x3445e8ec67297fdL,0x0e0a237dba71821L } }, + /* 251 */ + { { 0x1babf8491630ee8L,0x16270817ad4c07bL,0x2b2da051f47bde6L, + 0x25884aefa067df4L,0x294292124aeaa9fL,0x110d796f73b4f57L, + 0x11f66f691f5b89fL,0x3c368658130ce50L,0x0e6b7fc09ca4356L }, + { 0x294e413f74f811cL,0x0b60c77e36376c4L,0x3217963418c91a4L, + 0x06223af37b09fd5L,0x2ea305bc95fde52L,0x319a2d87f75781bL, + 0x011861ed1e6088aL,0x33af0ccebc05baeL,0x1c95ecb192d15ddL } }, + /* 252 */ + { { 0x27b37a3e0bde442L,0x10ffa19bde9cfa4L,0x1d208ed10c2ee05L, + 0x1069985e8cb4c36L,0x0d1d5cf8baf79c3L,0x0eaf3e2f9cd9e1cL, + 0x2b5e7b02d0dce9eL,0x1c317f88f4b75dcL,0x10b29fceea01ffcL }, + { 0x1bcae4d62d803ffL,0x3a44ff6f0c1aa4cL,0x27abd8c1066293eL, + 0x0ab9e9b5962bc77L,0x2102f4e06d48578L,0x0dbebf9a449964bL, + 0x37121391a3127f1L,0x058d11ae4d10220L,0x0ba53bb4380a31eL } }, + /* 253 */ + { { 0x2e517fcca5636b0L,0x1b8085aae8571d8L,0x3d7c212e7b2d429L, + 0x1b55c5eb6116aa3L,0x398b2f3579517ceL,0x3d66c1f39d8ae16L, + 0x3ef6f042f996b5dL,0x2d227cdccaaefcdL,0x15da5d145ea4542L }, + { 0x277c55eaa7f6e3fL,0x36669ea92816f07L,0x3d77458282273f4L, + 0x3eddedd23ee95b5L,0x20629f5d1db0895L,0x16600fec7121333L, + 0x20b8d0f5b1c90a3L,0x04fc90eb13ca45cL,0x0e98c10bfe872acL } }, + /* 254 */ + { { 0x11c4785c06c4fd6L,0x2e40974970ae767L,0x1eb1d4982f24bf4L, + 0x30ae93fbcac104dL,0x398de07ab3ab3edL,0x25bd2df556948e7L, + 0x04c815d5fc49ab0L,0x1acaf1428a580e1L,0x047db1148d01567L }, + { 0x09f9cc510f3bad9L,0x2223f008a407531L,0x15ebc47b44df490L, + 0x31bce7cada245e9L,0x304e9962a20b2ebL,0x1cf756dc31638ebL, + 0x29f76c52ab7c1b5L,0x328ecad52b75a8cL,0x10859dad1eb82f4L } }, + /* 255 */ + { { 0x22c4128a182d1adL,0x05e5b88245b1159L,0x0272ba681647775L, + 0x3eae4b217069dc1L,0x3aefb2e07fac8b0L,0x2186ccb481eacb7L, + 0x2ed145c73530a07L,0x292758f6fb59622L,0x0bd547bcdca0a53L }, + { 0x3c1382f87056b51L,0x247b6c4c3e644a9L,0x1e46d3805b42c3dL, + 0x3aff4c6a657df1fL,0x0cd3fb8aa456101L,0x3ac5ef387bf48adL, + 0x2c0c32fe391df79L,0x3bbd2d353031985L,0x11219f023be711bL } }, +}; + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * Stripe implementation. + * Pre-generated: 2^0, 2^65, ... + * Pre-generated: products of all combinations of above. + * 8 doubles and adds (with qz=1) + * + * r Resulting point. + * k Scalar to multiply by. + * map Indicates whether to convert result to affine. + * ct Constant time required. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_ecc_mulmod_base_9(sp_point_521* r, const sp_digit* k, + int map, int ct, void* heap) +{ + return sp_521_ecc_mulmod_stripe_9(r, &p521_base, p521_table, + k, map, ct, heap); +} + +#endif + +/* Multiply the base point of P521 by the scalar and return the result. + * If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_521(const mp_int* km, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[9]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 9, km); + + err = sp_521_ecc_mulmod_base_9(point, k, map, 1, heap); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Multiply the base point of P521 by the scalar, add point a and return + * the result. If map is true then convert result to affine coordinates. + * + * km Scalar to multiply by. + * am Point to add to scalar multiply result. + * inMont Point to add is in montgomery form. + * r Resulting point. + * map Indicates whether to convert result to affine. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, + int inMont, ecc_point* r, int map, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[2]; + sp_digit k[9 + 9 * 2 * 6]; +#endif + sp_point_521* addP = NULL; + sp_digit* tmp = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC( + sizeof(sp_digit) * (9 + 9 * 2 * 6), + heap, DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + addP = point + 1; + tmp = k + 9; + + sp_521_from_mp(k, 9, km); + sp_521_point_from_ecc_point_9(addP, am); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->x, addP->x, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->y, addP->y, p521_mod); + } + if ((err == MP_OKAY) && (!inMont)) { + err = sp_521_mod_mul_norm_9(addP->z, addP->z, p521_mod); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_9(point, k, 0, 0, heap); + } + if (err == MP_OKAY) { + sp_521_proj_point_add_9(point, point, addP, tmp); + + if (map) { + sp_521_map_9(point, point, tmp); + } + + err = sp_521_point_to_ecc_point_9(point, r); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ + defined(HAVE_ECC_VERIFY) +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +/* Add 1 to a. (a = a + 1) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_521_add_one_9(sp_digit* a) +{ + a[0]++; + sp_521_norm_9(a); +} + +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_521_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i; + int j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 50U) { + r[j] &= 0x3ffffffffffffffL; + s = 58U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Generates a scalar that is in the range 1..order-1. + * + * rng Random number generator. + * k Scalar value. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +static int sp_521_ecc_gen_k_9(WC_RNG* rng, sp_digit* k) +{ + int err; + byte buf[66]; + + do { + err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf)); + if (err == 0) { + buf[0] &= 0x1; + sp_521_from_bin(k, 9, buf, (int)sizeof(buf)); + if (sp_521_cmp_9(k, p521_order2) <= 0) { + sp_521_add_one_9(k); + break; + } + } + } + while (err == 0); + + return err; +} + +/* Makes a random EC key pair. + * + * rng Random number generator. + * priv Generated private value. + * pub Generated public point. + * heap Heap to use for allocation. + * returns ECC_INF_E when the point does not have the correct order, RNG + * failures, MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; + #else + sp_point_521 point[1]; + #endif + sp_digit k[9]; +#endif +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = NULL; +#endif + int err = MP_OKAY; + + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, DYNAMIC_TYPE_ECC); + #else + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, DYNAMIC_TYPE_ECC); + #endif + if (point == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + infinity = point + 1; + #endif + + err = sp_521_ecc_gen_k_9(rng, k); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_9(point, k, 1, 1, NULL); + } + +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_9(infinity, point, p521_order, 1, 1, NULL); + } + if (err == MP_OKAY) { + if (sp_521_iszero_9(point->x) || sp_521_iszero_9(point->y)) { + err = ECC_INF_E; + } + } +#endif + + if (err == MP_OKAY) { + err = sp_521_to_mp(k, priv); + } + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(point, pub); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) { + /* point is not sensitive, so no need to zeroize */ + XFREE(point, heap, DYNAMIC_TYPE_ECC); + } +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_key_gen_521_ctx { + int state; + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + sp_digit k[9]; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521 point[2]; +#else + sp_point_521 point[1]; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ +} sp_ecc_key_gen_521_ctx; + +int sp_ecc_make_key_521_nb(sp_ecc_ctx_t* sp_ctx, WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_key_gen_521_ctx* ctx = (sp_ecc_key_gen_521_ctx*)sp_ctx->data; +#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + sp_point_521* infinity = ctx->point + 1; +#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + + typedef char ctx_size_test[sizeof(sp_ecc_key_gen_521_ctx) + >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + err = sp_521_ecc_gen_k_9(rng, ctx->k); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + ctx->state = 1; + } + break; + case 1: + err = sp_521_ecc_mulmod_base_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + err = FP_WOULDBLOCK; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + #else + ctx->state = 3; + #endif + } + break; + #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN + case 2: + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + infinity, ctx->point, p521_order, 1, 1); + if (err == MP_OKAY) { + if (sp_521_iszero_9(ctx->point->x) || + sp_521_iszero_9(ctx->point->y)) { + err = ECC_INF_E; + } + else { + err = FP_WOULDBLOCK; + ctx->state = 3; + } + } + break; + #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN */ + case 3: + err = sp_521_to_mp(ctx->k, priv); + if (err == MP_OKAY) { + err = sp_521_point_to_ecc_point_9(ctx->point, pub); + } + break; + } + + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_key_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +#ifdef HAVE_ECC_DHE +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 66 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_521_to_bin_9(sp_digit* r, byte* a) +{ + int i; + int j; + int s = 0; + int b; + + for (i=0; i<8; i++) { + r[i+1] += r[i] >> 58; + r[i] &= 0x3ffffffffffffffL; + } + j = 528 / 8 - 1; + a[j] = 0; + for (i=0; i<9 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); /*lint !e9033*/ + b += 8 - s; + if (j < 0) { + break; + } + while (b < 58) { + a[j--] = (byte)(r[i] >> b); + b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 58); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +/* Multiply the point by the scalar and serialize the X ordinate. + * The number is 0 padded to maximum size on output. + * + * priv Scalar to multiply the point by. + * pub Point to multiply. + * out Buffer to hold X ordinate. + * outLen On entry, size of the buffer in bytes. + * On exit, length of data in buffer in bytes. + * heap Heap to use for allocation. + * returns BUFFER_E if the buffer is to small for output size, + * MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_secret_gen_521(const mp_int* priv, const ecc_point* pub, byte* out, + word32* outLen, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* point = NULL; + sp_digit* k = NULL; +#else + sp_point_521 point[1]; + sp_digit k[9]; +#endif + int err = MP_OKAY; + + if (*outLen < 65U) { + err = BUFFER_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (k == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(k, 9, priv); + sp_521_point_from_ecc_point_9(point, pub); + err = sp_521_ecc_mulmod_9(point, point, k, 1, 1, heap); + } + if (err == MP_OKAY) { + sp_521_to_bin_9(point->x, out); + *outLen = 66; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (k != NULL) + XFREE(k, heap, DYNAMIC_TYPE_ECC); + if (point != NULL) + XFREE(point, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sec_gen_521_ctx { + int state; + union { + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + }; + sp_digit k[9]; + sp_point_521 point; +} sp_ecc_sec_gen_521_ctx; + +int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, + const ecc_point* pub, byte* out, word32* outLen, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sec_gen_521_ctx* ctx = (sp_ecc_sec_gen_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sec_gen_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + if (*outLen < 32U) { + err = BUFFER_E; + } + + switch (ctx->state) { + case 0: + sp_521_from_mp(ctx->k, 9, priv); + sp_521_point_from_ecc_point_9(&ctx->point, pub); + ctx->state = 1; + break; + case 1: + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &ctx->point, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + sp_521_to_bin_9(ctx->point.x, out); + *outLen = 66; + } + break; + } + + if (err == MP_OKAY && ctx->state != 1) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx, 0, sizeof(sp_ecc_sec_gen_521_ctx)); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_DHE */ + +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +SP_NOINLINE static void sp_521_rshift_9(sp_digit* r, const sp_digit* a, + byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<8; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (58 - n))) & 0x3ffffffffffffffL; + } +#else + for (i=0; i<8; i += 8) { + r[i+0] = (a[i+0] >> n) | ((a[i+1] << (58 - n)) & 0x3ffffffffffffffL); + r[i+1] = (a[i+1] >> n) | ((a[i+2] << (58 - n)) & 0x3ffffffffffffffL); + r[i+2] = (a[i+2] >> n) | ((a[i+3] << (58 - n)) & 0x3ffffffffffffffL); + r[i+3] = (a[i+3] >> n) | ((a[i+4] << (58 - n)) & 0x3ffffffffffffffL); + r[i+4] = (a[i+4] >> n) | ((a[i+5] << (58 - n)) & 0x3ffffffffffffffL); + r[i+5] = (a[i+5] >> n) | ((a[i+6] << (58 - n)) & 0x3ffffffffffffffL); + r[i+6] = (a[i+6] >> n) | ((a[i+7] << (58 - n)) & 0x3ffffffffffffffL); + r[i+7] = (a[i+7] >> n) | ((a[i+8] << (58 - n)) & 0x3ffffffffffffffL); + } +#endif /* WOLFSSL_SP_SMALL */ + r[8] = a[8] >> n; +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_521_mul_d_9(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + sp_int128 tb = b; + sp_int128 t = 0; + int i; + + for (i = 0; i < 9; i++) { + t += tb * a[i]; + r[i] = (sp_digit)(t & 0x3ffffffffffffffL); + t >>= 58; + } + r[9] = (sp_digit)t; +#else + sp_int128 tb = b; + sp_int128 t[9]; + + t[ 0] = tb * a[ 0]; + t[ 1] = tb * a[ 1]; + t[ 2] = tb * a[ 2]; + t[ 3] = tb * a[ 3]; + t[ 4] = tb * a[ 4]; + t[ 5] = tb * a[ 5]; + t[ 6] = tb * a[ 6]; + t[ 7] = tb * a[ 7]; + t[ 8] = tb * a[ 8]; + r[ 0] = (sp_digit) (t[ 0] & 0x3ffffffffffffffL); + r[ 1] = (sp_digit)((t[ 0] >> 58) + (t[ 1] & 0x3ffffffffffffffL)); + r[ 2] = (sp_digit)((t[ 1] >> 58) + (t[ 2] & 0x3ffffffffffffffL)); + r[ 3] = (sp_digit)((t[ 2] >> 58) + (t[ 3] & 0x3ffffffffffffffL)); + r[ 4] = (sp_digit)((t[ 3] >> 58) + (t[ 4] & 0x3ffffffffffffffL)); + r[ 5] = (sp_digit)((t[ 4] >> 58) + (t[ 5] & 0x3ffffffffffffffL)); + r[ 6] = (sp_digit)((t[ 5] >> 58) + (t[ 6] & 0x3ffffffffffffffL)); + r[ 7] = (sp_digit)((t[ 6] >> 58) + (t[ 7] & 0x3ffffffffffffffL)); + r[ 8] = (sp_digit)((t[ 7] >> 58) + (t[ 8] & 0x3ffffffffffffffL)); + r[ 9] = (sp_digit) (t[ 8] >> 58); +#endif /* WOLFSSL_SP_SMALL */ +} + +SP_NOINLINE static void sp_521_lshift_18(sp_digit* r, const sp_digit* a, + byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[18] = a[17] >> (58 - n); + for (i=17; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (58 - n))) & 0x3ffffffffffffffL; + } +#else + sp_int_digit s; + sp_int_digit t; + + s = (sp_int_digit)a[17]; + r[18] = s >> (58U - n); + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL; +#endif /* WOLFSSL_SP_SMALL */ + r[0] = (a[0] << n) & 0x3ffffffffffffffL; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * Simplified based on top word of divisor being (1 << 58) - 1 + * + * a Number to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_521_div_9(const sp_digit* a, const sp_digit* d, + const sp_digit* m, sp_digit* r) +{ + int i; + sp_digit r1; + sp_digit mask; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t1 = NULL; +#else + sp_digit t1[4 * 9 + 3]; +#endif + sp_digit* t2 = NULL; + sp_digit* sd = NULL; + int err = MP_OKAY; + + (void)m; + +#ifdef WOLFSSL_SP_SMALL_STACK + t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 9 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (t1 == NULL) + err = MEMORY_E; +#endif + + (void)m; + + if (err == MP_OKAY) { + t2 = t1 + 18 + 1; + sd = t2 + 9 + 1; + + sp_521_mul_d_9(sd, d, (sp_digit)1 << 1); + sp_521_lshift_18(t1, a, 1); + t1[9 + 9] += t1[9 + 9 - 1] >> 58; + t1[9 + 9 - 1] &= 0x3ffffffffffffffL; + for (i=8; i>=0; i--) { + r1 = t1[9 + i]; + sp_521_mul_d_9(t2, sd, r1); + (void)sp_521_sub_9(&t1[i], &t1[i], t2); + t1[9 + i] -= t2[9]; + sp_521_norm_9(&t1[i + 1]); + + mask = ~((t1[9 + i] - 1) >> 63); + sp_521_cond_sub_9(t1 + i, t1 + i, sd, mask); + sp_521_norm_9(&t1[i + 1]); + } + sp_521_norm_9(t1); + sp_521_rshift_9(r, t1, 1); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t1 != NULL) + XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_521_mod_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_521_div_9(a, m, NULL, r); +} + +#endif +#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +/* Multiply two number mod the order of P521 curve. (r = a * b mod order) + * + * r Result of the multiplication. + * a First operand of the multiplication. + * b Second operand of the multiplication. + */ +static void sp_521_mont_mul_order_9(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_521_mul_9(r, a, b); + sp_521_mont_reduce_order_9(r, p521_order, p521_mp_order); +} + +#if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL)) +#ifdef WOLFSSL_SP_SMALL +/* Order-2 for the P521 curve. */ +static const uint64_t p521_order_minus_2[9] = { + 0xbb6fb71e91386407U,0x3bb5c9b8899c47aeU,0x7fcc0148f709a5d0U, + 0x51868783bf2f966bU,0xfffffffffffffffaU,0xffffffffffffffffU, + 0xffffffffffffffffU,0xffffffffffffffffU,0x00000000000001ffU +}; +#else +/* The low half of the order-2 of the P521 curve. */ +static const uint64_t p521_order_low[5] = { + 0xbb6fb71e91386407U,0x3bb5c9b8899c47aeU,0x7fcc0148f709a5d0U, + 0x51868783bf2f966bU,0xfffffffffffffffaU +}; +#endif /* WOLFSSL_SP_SMALL */ + +/* Square number mod the order of P521 curve. (r = a * a mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_order_9(sp_digit* r, const sp_digit* a) +{ + sp_521_sqr_9(r, a); + sp_521_mont_reduce_order_9(r, p521_order, p521_mp_order); +} + +#ifndef WOLFSSL_SP_SMALL +/* Square number mod the order of P521 curve a number of times. + * (r = a ^ n mod order) + * + * r Result of the squaring. + * a Number to square. + */ +static void sp_521_mont_sqr_n_order_9(sp_digit* r, const sp_digit* a, int n) +{ + int i; + + sp_521_mont_sqr_order_9(r, a); + for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + XMEMCPY(t, a, sizeof(sp_digit) * 9); + ctx->i = 519; + ctx->state = 1; + break; + case 1: + sp_521_mont_sqr_order_9(t, t); + ctx->state = 2; + break; + case 2: + if ((p521_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) { + sp_521_mont_mul_order_9(t, t, a); + } + ctx->i--; + ctx->state = (ctx->i == 0) ? 3 : 1; + break; + case 3: + XMEMCPY(r, t, sizeof(sp_digit) * 9U); + err = MP_OKAY; + break; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ + +static void sp_521_mont_inv_order_9(sp_digit* r, const sp_digit* a, + sp_digit* td) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* t = td; + int i; + + XMEMCPY(t, a, sizeof(sp_digit) * 9); + for (i=519; i>=0; i--) { + sp_521_mont_sqr_order_9(t, t); + if ((p521_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_521_mont_mul_order_9(t, t, a); + } + } + XMEMCPY(r, t, sizeof(sp_digit) * 9U); +#else + sp_digit* t = td; + sp_digit* t2 = td + 2 * 9; + sp_digit* t3 = td + 4 * 9; + int i; + + /* t = a^2 */ + sp_521_mont_sqr_order_9(t, a); + /* t = a^3 = t * a */ + sp_521_mont_mul_order_9(t, t, a); + /* t= a^c = t ^ 2 ^ 2 */ + sp_521_mont_sqr_n_order_9(t2, t, 2); + /* t = a^f = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + + /* t3 = a^1e */ + sp_521_mont_sqr_order_9(t3, t); + /* t3 = a^1f = t3 * a */ + sp_521_mont_mul_order_9(t3, t3, a); + + /* t2= a^f0 = t ^ 2 ^ 4 */ + sp_521_mont_sqr_n_order_9(t2, t, 4); + /* t = a^ff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ff00 = t ^ 2 ^ 8 */ + sp_521_mont_sqr_n_order_9(t2, t, 8); + /* t3= a^ffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ffff0000 = t ^ 2 ^ 16 */ + sp_521_mont_sqr_n_order_9(t2, t, 16); + /* t = a^ffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + + /* t2= a^ffffffff00000000 = t ^ 2 ^ 32 */ + sp_521_mont_sqr_n_order_9(t2, t, 32); + /* t = a^ffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ffffffffffffffff0000000000000000 = t ^ 2 ^ 64 */ + sp_521_mont_sqr_n_order_9(t2, t, 64); + /* t = a^ffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + /* t2= a^ffffffffffffffffffffffffffffffff00000000000000000000000000000000 = t ^ 2 ^ 128 */ + sp_521_mont_sqr_n_order_9(t2, t, 128); + /* t = a^ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */ + sp_521_mont_mul_order_9(t, t2, t); + + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0 */ + sp_521_mont_sqr_n_order_9(t2, t, 5); + /* t2 = a^1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff = t * t3 */ + sp_521_mont_mul_order_9(t2, t2, t3); + + for (i=259; i>=1; i--) { + sp_521_mont_sqr_order_9(t2, t2); + if ((p521_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) { + sp_521_mont_mul_order_9(t2, t2, a); + } + } + sp_521_mont_sqr_order_9(t2, t2); + sp_521_mont_mul_order_9(r, t2, a); +#endif /* WOLFSSL_SP_SMALL */ +} + +#endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */ +#endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ +#ifdef HAVE_ECC_SIGN +#ifndef SP_ECC_MAX_SIG_GEN +#define SP_ECC_MAX_SIG_GEN 64 +#endif + +/* Calculate second signature value S from R, k and private value. + * + * s = (r * x + e) / k + * + * s Signature value. + * r First signature value. + * k Ephemeral private key. + * x Private key as a number. + * e Hash of message as a number. + * tmp Temporary storage for intermediate numbers. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, + sp_digit* x, const sp_digit* e, sp_digit* tmp) +{ + int err; + sp_digit carry; + sp_int64 c; + sp_digit* kInv = k; + + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_9(k, k, p521_norm_order); + err = sp_521_mod_9(k, k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_9(k); + + /* kInv = 1/k mod order */ + sp_521_mont_inv_order_9(kInv, k, tmp); + sp_521_norm_9(kInv); + + /* s = r * x + e */ + sp_521_mul_9(x, x, r); + err = sp_521_mod_9(x, x, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_9(x); + carry = sp_521_add_9(s, e, x); + sp_521_cond_sub_9(s, s, p521_order, 0 - carry); + sp_521_norm_9(s); + c = sp_521_cmp_9(s, p521_order); + sp_521_cond_sub_9(s, s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_9(s, s, kInv); + sp_521_norm_9(s); + } + + return err; +} + +/* Sign the hash using the private key. + * e = [hash, 521 bits] from binary + * r = (k.G)->x mod order + * s = (r * x + e) / k mod order + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns RNG failures, MEMORY_E when memory allocation fails and + * MP_OKAY on success. + */ +int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, + const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* e = NULL; + sp_point_521* point = NULL; +#else + sp_digit e[7 * 2 * 9]; + sp_point_521 point[1]; +#endif + sp_digit* x = NULL; + sp_digit* k = NULL; + sp_digit* r = NULL; + sp_digit* tmp = NULL; + sp_digit* s = NULL; + sp_int64 c; + int err = MP_OKAY; + int i; + + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + point = (sp_point_521*)XMALLOC(sizeof(sp_point_521), heap, + DYNAMIC_TYPE_ECC); + if (point == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 9, heap, + DYNAMIC_TYPE_ECC); + if (e == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + x = e + 2 * 9; + k = e + 4 * 9; + r = e + 6 * 9; + tmp = e + 8 * 9; + s = e; + + if (hashLen > 66U) { + hashLen = 66U; + } + } + + for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) { + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_9(rng, k); + } + else { + sp_521_from_mp(k, 9, km); + mp_zero(km); + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_base_9(point, k, 1, 1, heap); + } + + if (err == MP_OKAY) { + /* r = point->x mod order */ + XMEMCPY(r, point->x, sizeof(sp_digit) * 9U); + sp_521_norm_9(r); + c = sp_521_cmp_9(r, p521_order); + sp_521_cond_sub_9(r, r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(r); + + if (!sp_521_iszero_9(r)) { + /* x is modified in calculation of s. */ + sp_521_from_mp(x, 9, priv); + /* s ptr == e ptr, e is modified in calculation of s. */ + sp_521_from_bin(e, 9, hash, (int)hashLen); + + /* Take 521 leftmost bits of hash. */ + if (hashLen == 66U) { + sp_521_rshift_9(e, e, 7); + e[8] |= ((sp_digit)hash[0]) << 49; + } + + err = sp_521_calc_s_9(s, r, k, x, e, tmp); + + /* Check that signature is usable. */ + if ((err == MP_OKAY) && (!sp_521_iszero_9(s))) { + break; + } + } + } +#ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + i = 1; +#endif + } + + if (i == 0) { + err = RNG_FAILURE_E; + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(r, rm); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(s, sm); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (e != NULL) +#endif + { + ForceZero(e, sizeof(sp_digit) * 7 * 2 * 9); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(e, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (point != NULL) +#endif + { + ForceZero(point, sizeof(sp_point_521)); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(point, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_sign_521_ctx { + int state; + union { + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + sp_521_mont_inv_order_9_ctx mont_inv_order_ctx; + }; + sp_digit e[2*9]; + sp_digit x[2*9]; + sp_digit k[2*9]; + sp_digit r[2*9]; + sp_digit tmp[3 * 2*9]; + sp_point_521 point; + sp_digit* s; + sp_digit* kInv; + int i; +} sp_ecc_sign_521_ctx; + +int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng, + mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_sign_521_ctx* ctx = (sp_ecc_sign_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_sign_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + ctx->s = ctx->e; + ctx->kInv = ctx->k; + + ctx->i = SP_ECC_MAX_SIG_GEN; + ctx->state = 1; + break; + case 1: /* GEN */ + /* New random point. */ + if (km == NULL || mp_iszero(km)) { + err = sp_521_ecc_gen_k_9(rng, ctx->k); + } + else { + sp_521_from_mp(ctx->k, 9, km); + mp_zero(km); + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 2; + break; + case 2: /* MULMOD */ + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, + &ctx->point, &p521_base, ctx->k, 1, 1, heap); + if (err == MP_OKAY) { + ctx->state = 3; + } + break; + case 3: /* MODORDER */ + { + sp_int64 c; + /* r = point->x mod order */ + XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 9U); + sp_521_norm_9(ctx->r); + c = sp_521_cmp_9(ctx->r, p521_order); + sp_521_cond_sub_9(ctx->r, ctx->r, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(ctx->r); + + if (hashLen > 66U) { + hashLen = 66U; + } + sp_521_from_mp(ctx->x, 9, priv); + sp_521_from_bin(ctx->e, 9, hash, (int)hashLen); + if (hashLen == 66U) { + sp_521_rshift_9(ctx->e, ctx->e, 7); + ctx->e[8] |= ((sp_digit)hash[0]) << 49; + } + ctx->state = 4; + break; + } + case 4: /* KMODORDER */ + /* Conv k to Montgomery form (mod order) */ + sp_521_mul_9(ctx->k, ctx->k, p521_norm_order); + err = sp_521_mod_9(ctx->k, ctx->k, p521_order); + if (err == MP_OKAY) { + sp_521_norm_9(ctx->k); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 5; + } + break; + case 5: /* KINV */ + /* kInv = 1/k mod order */ + err = sp_521_mont_inv_order_9_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp); + if (err == MP_OKAY) { + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 6; + } + break; + case 6: /* KINVNORM */ + sp_521_norm_9(ctx->kInv); + ctx->state = 7; + break; + case 7: /* R */ + /* s = r * x + e */ + sp_521_mul_9(ctx->x, ctx->x, ctx->r); + ctx->state = 8; + break; + case 8: /* S1 */ + err = sp_521_mod_9(ctx->x, ctx->x, p521_order); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* S2 */ + { + sp_digit carry; + sp_int64 c; + sp_521_norm_9(ctx->x); + carry = sp_521_add_9(ctx->s, ctx->e, ctx->x); + sp_521_cond_sub_9(ctx->s, ctx->s, + p521_order, 0 - carry); + sp_521_norm_9(ctx->s); + c = sp_521_cmp_9(ctx->s, p521_order); + sp_521_cond_sub_9(ctx->s, ctx->s, p521_order, + (sp_digit)0 - (sp_digit)(c >= 0)); + sp_521_norm_9(ctx->s); + + /* s = s * k^-1 mod order */ + sp_521_mont_mul_order_9(ctx->s, ctx->s, ctx->kInv); + sp_521_norm_9(ctx->s); + + /* Check that signature is usable. */ + if (sp_521_iszero_9(ctx->s) == 0) { + ctx->state = 10; + break; + } + #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP + ctx->i = 1; + #endif + + /* not usable gen, try again */ + ctx->i--; + if (ctx->i == 0) { + err = RNG_FAILURE_E; + } + ctx->state = 1; + break; + } + case 10: /* RES */ + err = sp_521_to_mp(ctx->r, rm); + if (err == MP_OKAY) { + err = sp_521_to_mp(ctx->s, sm); + } + break; + } + + if (err == MP_OKAY && ctx->state != 10) { + err = FP_WOULDBLOCK; + } + if (err != FP_WOULDBLOCK) { + XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 9U); + XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 9U); + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_SIGN */ + +#ifndef WOLFSSL_SP_SMALL +static const char sp_521_tab64_9[64] = { + 64, 1, 59, 2, 60, 48, 54, 3, + 61, 40, 49, 28, 55, 34, 43, 4, + 62, 52, 38, 41, 50, 19, 29, 21, + 56, 31, 35, 12, 44, 15, 23, 5, + 63, 58, 47, 53, 39, 27, 33, 42, + 51, 37, 18, 20, 30, 11, 14, 22, + 57, 46, 26, 32, 36, 17, 10, 13, + 45, 25, 16, 9, 24, 8, 7, 6}; + +static int sp_521_num_bits_58_9(sp_digit v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + return sp_521_tab64_9[((uint64_t)((v - (v >> 1))*0x07EDD5E59A4E28C2)) >> 58]; +} + +static int sp_521_num_bits_9(const sp_digit* a) +{ + int i; + int r = 0; + + for (i = 8; i >= 0; i--) { + if (a[i] != 0) { + r = sp_521_num_bits_58_9(a[i]); + r += i * 58; + break; + } + } + + return r; +} + +/* Non-constant time modular inversion. + * + * @param [out] r Resulting number. + * @param [in] a Number to invert. + * @param [in] m Modulus. + * @return MP_OKAY on success. + * @return MEMEORY_E when dynamic memory allocation fails. + */ +static int sp_521_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + int err = MP_OKAY; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u = NULL; +#else + sp_digit u[9 * 4]; +#endif + sp_digit* v = NULL; + sp_digit* b = NULL; + sp_digit* d = NULL; + int ut; + int vt; + +#ifdef WOLFSSL_SP_SMALL_STACK + u = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 4, NULL, + DYNAMIC_TYPE_ECC); + if (u == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + v = u + 9; + b = u + 2 * 9; + d = u + 3 * 9; + + XMEMCPY(u, m, sizeof(sp_digit) * 9); + XMEMCPY(v, a, sizeof(sp_digit) * 9); + + ut = sp_521_num_bits_9(u); + vt = sp_521_num_bits_9(v); + + XMEMSET(b, 0, sizeof(sp_digit) * 9); + if ((v[0] & 1) == 0) { + sp_521_rshift1_9(v, v); + XMEMCPY(d, m, sizeof(sp_digit) * 9); + d[0]++; + sp_521_rshift1_9(d, d); + vt--; + + while ((v[0] & 1) == 0) { + sp_521_rshift1_9(v, v); + if (d[0] & 1) + sp_521_add_9(d, d, m); + sp_521_rshift1_9(d, d); + vt--; + } + } + else { + XMEMSET(d+1, 0, sizeof(sp_digit) * (9 - 1)); + d[0] = 1; + } + + while (ut > 1 && vt > 1) { + if ((ut > vt) || ((ut == vt) && + (sp_521_cmp_9(u, v) >= 0))) { + sp_521_sub_9(u, u, v); + sp_521_norm_9(u); + + sp_521_sub_9(b, b, d); + sp_521_norm_9(b); + if (b[8] < 0) + sp_521_add_9(b, b, m); + sp_521_norm_9(b); + ut = sp_521_num_bits_9(u); + + do { + sp_521_rshift1_9(u, u); + if (b[0] & 1) + sp_521_add_9(b, b, m); + sp_521_rshift1_9(b, b); + ut--; + } + while (ut > 0 && (u[0] & 1) == 0); + } + else { + sp_521_sub_9(v, v, u); + sp_521_norm_9(v); + + sp_521_sub_9(d, d, b); + sp_521_norm_9(d); + if (d[8] < 0) + sp_521_add_9(d, d, m); + sp_521_norm_9(d); + vt = sp_521_num_bits_9(v); + + do { + sp_521_rshift1_9(v, v); + if (d[0] & 1) + sp_521_add_9(d, d, m); + sp_521_rshift1_9(d, d); + vt--; + } + while (vt > 0 && (v[0] & 1) == 0); + } + } + + if (ut == 1) + XMEMCPY(r, b, sizeof(sp_digit) * 9); + else + XMEMCPY(r, d, sizeof(sp_digit) * 9); + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (u != NULL) + XFREE(u, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#endif /* WOLFSSL_SP_SMALL */ + +/* Add point p1 into point p2. Handles p1 == p2 and result at infinity. + * + * p1 First point to add and holds result. + * p2 Second point to add. + * tmp Temporary storage for intermediate numbers. + */ +static void sp_521_add_points_9(sp_point_521* p1, const sp_point_521* p2, + sp_digit* tmp) +{ + + sp_521_proj_point_add_9(p1, p1, p2, tmp); + if (sp_521_iszero_9(p1->z)) { + if (sp_521_iszero_9(p1->x) && sp_521_iszero_9(p1->y)) { + sp_521_proj_point_dbl_9(p1, p2, tmp); + } + else { + /* Y ordinate is not used from here - don't set. */ + p1->x[0] = 0; + p1->x[1] = 0; + p1->x[2] = 0; + p1->x[3] = 0; + p1->x[4] = 0; + p1->x[5] = 0; + p1->x[6] = 0; + p1->x[7] = 0; + p1->x[8] = 0; + XMEMCPY(p1->z, p521_norm_mod, sizeof(p521_norm_mod)); + } + } +} + +/* Calculate the verification point: [e/s]G + [r/s]Q + * + * p1 Calculated point. + * p2 Public point and temporary. + * s Second part of signature as a number. + * u1 Temporary number. + * u2 Temporary number. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +static int sp_521_calc_vfy_point_9(sp_point_521* p1, sp_point_521* p2, + sp_digit* s, sp_digit* u1, sp_digit* u2, sp_digit* tmp, void* heap) +{ + int err; + +#ifndef WOLFSSL_SP_SMALL + err = sp_521_mod_inv_9(s, s, p521_order); + if (err == MP_OKAY) +#endif /* !WOLFSSL_SP_SMALL */ + { + sp_521_mul_9(s, s, p521_norm_order); + err = sp_521_mod_9(s, s, p521_order); + } + if (err == MP_OKAY) { + sp_521_norm_9(s); +#ifdef WOLFSSL_SP_SMALL + { + sp_521_mont_inv_order_9(s, s, tmp); + sp_521_mont_mul_order_9(u1, u1, s); + sp_521_mont_mul_order_9(u2, u2, s); + } +#else + { + sp_521_mont_mul_order_9(u1, u1, s); + sp_521_mont_mul_order_9(u2, u2, s); + } +#endif /* WOLFSSL_SP_SMALL */ + { + err = sp_521_ecc_mulmod_base_9(p1, u1, 0, 0, heap); + } + } + if ((err == MP_OKAY) && sp_521_iszero_9(p1->z)) { + p1->infinity = 1; + } + if (err == MP_OKAY) { + err = sp_521_ecc_mulmod_9(p2, p2, u2, 0, 0, heap); + } + if ((err == MP_OKAY) && sp_521_iszero_9(p2->z)) { + p2->infinity = 1; + } + + if (err == MP_OKAY) { + sp_521_add_points_9(p1, p2, tmp); + } + + return err; +} + +#ifdef HAVE_ECC_VERIFY +/* Verify the signature values with the hash and public key. + * e = Truncate(hash, 521) + * u1 = e/s mod order + * u2 = r/s mod order + * r == (u1.G + u2.Q)->x mod order + * Optimization: Leave point in projective form. + * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z') + * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' + * The hash is truncated to the first 521 bits. + * + * hash Hash to sign. + * hashLen Length of the hash data. + * rng Random number generator. + * priv Private part of key - scalar. + * rm First part of result as an mp_int. + * sm Sirst part of result as an mp_int. + * heap Heap to use for allocation. + * returns MEMORY_E when memory allocation fails and MP_OKAY on success. + */ +int sp_ecc_verify_521(const byte* hash, word32 hashLen, const mp_int* pX, + const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm, + int* res, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* u1 = NULL; + sp_point_521* p1 = NULL; +#else + sp_digit u1[18 * 9]; + sp_point_521 p1[2]; +#endif + sp_digit* u2 = NULL; + sp_digit* s = NULL; + sp_digit* tmp = NULL; + sp_point_521* p2 = NULL; + sp_digit carry; + sp_int64 c = 0; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p1 = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (p1 == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 9, heap, + DYNAMIC_TYPE_ECC); + if (u1 == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + u2 = u1 + 2 * 9; + s = u1 + 4 * 9; + tmp = u1 + 6 * 9; + p2 = p1 + 1; + + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(u1, 9, hash, (int)hashLen); + sp_521_from_mp(u2, 9, rm); + sp_521_from_mp(s, 9, sm); + sp_521_from_mp(p2->x, 9, pX); + sp_521_from_mp(p2->y, 9, pY); + sp_521_from_mp(p2->z, 9, pZ); + + if (hashLen == 66U) { + sp_521_rshift_9(u1, u1, 7); + u1[8] |= ((sp_digit)hash[0]) << 49; + } + + err = sp_521_calc_vfy_point_9(p1, p2, s, u1, u2, tmp, heap); + } + if (err == MP_OKAY) { + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(u2, 9, rm); + err = sp_521_mod_mul_norm_9(u2, u2, p521_mod); + } + + if (err == MP_OKAY) { + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_9(p1->z, p1->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(u1, u2, p1->z, p521_mod, p521_mp_mod); + *res = (int)(sp_521_cmp_9(p1->x, u1) == 0); + if (*res == 0) { + /* Reload r and add order. */ + sp_521_from_mp(u2, 9, rm); + carry = sp_521_add_9(u2, u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_9(u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_9(u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_9(u2, u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + { + sp_521_mont_mul_9(u1, u2, p1->z, p521_mod, p521_mp_mod); + } + *res = (sp_521_cmp_9(p1->x, u1) == 0); + } + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (u1 != NULL) + XFREE(u1, heap, DYNAMIC_TYPE_ECC); + if (p1 != NULL) + XFREE(p1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_ecc_verify_521_ctx { + int state; + union { + sp_521_ecc_mulmod_9_ctx mulmod_ctx; + sp_521_mont_inv_order_9_ctx mont_inv_order_ctx; + sp_521_proj_point_dbl_9_ctx dbl_ctx; + sp_521_proj_point_add_9_ctx add_ctx; + }; + sp_digit u1[2*9]; + sp_digit u2[2*9]; + sp_digit s[2*9]; + sp_digit tmp[2*9 * 6]; + sp_point_521 p1; + sp_point_521 p2; +} sp_ecc_verify_521_ctx; + +int sp_ecc_verify_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, + word32 hashLen, const mp_int* pX, const mp_int* pY, const mp_int* pZ, + const mp_int* rm, const mp_int* sm, int* res, void* heap) +{ + int err = FP_WOULDBLOCK; + sp_ecc_verify_521_ctx* ctx = (sp_ecc_verify_521_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_ecc_verify_521_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: /* INIT */ + if (hashLen > 66U) { + hashLen = 66U; + } + + sp_521_from_bin(ctx->u1, 9, hash, (int)hashLen); + sp_521_from_mp(ctx->u2, 9, rm); + sp_521_from_mp(ctx->s, 9, sm); + sp_521_from_mp(ctx->p2.x, 9, pX); + sp_521_from_mp(ctx->p2.y, 9, pY); + sp_521_from_mp(ctx->p2.z, 9, pZ); + if (hashLen == 66U) { + sp_521_rshift_9(ctx->u1, ctx->u1, 7); + ctx->u1[8] |= ((sp_digit)hash[0]) << 49; + } + ctx->state = 1; + break; + case 1: /* NORMS0 */ + sp_521_mul_9(ctx->s, ctx->s, p521_norm_order); + err = sp_521_mod_9(ctx->s, ctx->s, p521_order); + if (err == MP_OKAY) + ctx->state = 2; + break; + case 2: /* NORMS1 */ + sp_521_norm_9(ctx->s); + XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx)); + ctx->state = 3; + break; + case 3: /* NORMS2 */ + err = sp_521_mont_inv_order_9_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp); + if (err == MP_OKAY) { + ctx->state = 4; + } + break; + case 4: /* NORMS3 */ + sp_521_mont_mul_order_9(ctx->u1, ctx->u1, ctx->s); + ctx->state = 5; + break; + case 5: /* NORMS4 */ + sp_521_mont_mul_order_9(ctx->u2, ctx->u2, ctx->s); + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 6; + break; + case 6: /* MULBASE */ + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p521_base, ctx->u1, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_9(ctx->p1.z)) { + ctx->p1.infinity = 1; + } + XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx)); + ctx->state = 7; + } + break; + case 7: /* MULMOD */ + err = sp_521_ecc_mulmod_9_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap); + if (err == MP_OKAY) { + if (sp_521_iszero_9(ctx->p2.z)) { + ctx->p2.infinity = 1; + } + XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx)); + ctx->state = 8; + } + break; + case 8: /* ADD */ + err = sp_521_proj_point_add_9_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp); + if (err == MP_OKAY) + ctx->state = 9; + break; + case 9: /* MONT */ + /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */ + /* Reload r and convert to Montgomery form. */ + sp_521_from_mp(ctx->u2, 9, rm); + err = sp_521_mod_mul_norm_9(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) + ctx->state = 10; + break; + case 10: /* SQR */ + /* u1 = r.z'.z' mod prime */ + sp_521_mont_sqr_9(ctx->p1.z, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 11; + break; + case 11: /* MUL */ + sp_521_mont_mul_9(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, p521_mp_mod); + ctx->state = 12; + break; + case 12: /* RES */ + { + sp_int64 c = 0; + err = MP_OKAY; /* math okay, now check result */ + *res = (int)(sp_521_cmp_9(ctx->p1.x, ctx->u1) == 0); + if (*res == 0) { + sp_digit carry; + + /* Reload r and add order. */ + sp_521_from_mp(ctx->u2, 9, rm); + carry = sp_521_add_9(ctx->u2, ctx->u2, p521_order); + /* Carry means result is greater than mod and is not valid. */ + if (carry == 0) { + sp_521_norm_9(ctx->u2); + + /* Compare with mod and if greater or equal then not valid. */ + c = sp_521_cmp_9(ctx->u2, p521_mod); + } + } + if ((*res == 0) && (c < 0)) { + /* Convert to Montogomery form */ + err = sp_521_mod_mul_norm_9(ctx->u2, ctx->u2, p521_mod); + if (err == MP_OKAY) { + /* u1 = (r + 1*order).z'.z' mod prime */ + sp_521_mont_mul_9(ctx->u1, ctx->u2, ctx->p1.z, p521_mod, + p521_mp_mod); + *res = (int)(sp_521_cmp_9(ctx->p1.x, ctx->u1) == 0); + } + } + break; + } + } /* switch */ + + if (err == MP_OKAY && ctx->state != 12) { + err = FP_WOULDBLOCK; + } + + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ +#endif /* HAVE_ECC_VERIFY */ + +#ifdef HAVE_ECC_CHECK_KEY +/* Check that the x and y ordinates are a valid point on the curve. + * + * point EC point. + * heap Heap to use if dynamically allocating. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +static int sp_521_ecc_is_point_9(const sp_point_521* point, + void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t1 = NULL; +#else + sp_digit t1[9 * 4]; +#endif + sp_digit* t2 = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 4, heap, DYNAMIC_TYPE_ECC); + if (t1 == NULL) + err = MEMORY_E; +#endif + (void)heap; + + if (err == MP_OKAY) { + t2 = t1 + 2 * 9; + + /* y^2 - x^3 - a.x = b */ + sp_521_sqr_9(t1, point->y); + (void)sp_521_mod_9(t1, t1, p521_mod); + sp_521_sqr_9(t2, point->x); + (void)sp_521_mod_9(t2, t2, p521_mod); + sp_521_mul_9(t2, t2, point->x); + (void)sp_521_mod_9(t2, t2, p521_mod); + sp_521_mont_sub_9(t1, t1, t2, p521_mod); + + /* y^2 - x^3 + 3.x = b, when a = -3 */ + sp_521_mont_add_9(t1, t1, point->x, p521_mod); + sp_521_mont_add_9(t1, t1, point->x, p521_mod); + sp_521_mont_add_9(t1, t1, point->x, p521_mod); + + + if (sp_521_cmp_9(t1, p521_b) != 0) { + err = MP_VAL; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t1 != NULL) + XFREE(t1, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the x and y ordinates are a valid point on the curve. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve and MP_OKAY otherwise. + */ +int sp_ecc_is_point_521(const mp_int* pX, const mp_int* pY) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_521* pub = NULL; +#else + sp_point_521 pub[1]; +#endif + const byte one[1] = { 1 }; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(pub->x, 9, pX); + sp_521_from_mp(pub->y, 9, pY); + sp_521_from_bin(pub->z, 9, one, (int)sizeof(one)); + + err = sp_521_ecc_is_point_9(pub, NULL); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Check that the private scalar generates the EC point (px, py), the point is + * on the curve and the point has the correct order. + * + * pX X ordinate of EC point. + * pY Y ordinate of EC point. + * privm Private scalar that generates EC point. + * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is + * not on the curve, ECC_INF_E if the point does not have the correct order, + * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and + * MP_OKAY otherwise. + */ +int sp_ecc_check_key_521(const mp_int* pX, const mp_int* pY, + const mp_int* privm, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* priv = NULL; + sp_point_521* pub = NULL; +#else + sp_digit priv[9]; + sp_point_521 pub[2]; +#endif + sp_point_521* p = NULL; + const byte one[1] = { 1 }; + int err = MP_OKAY; + + + /* Quick check the lengs of public key ordinates and private key are in + * range. Proper check later. + */ + if (((mp_count_bits(pX) > 521) || + (mp_count_bits(pY) > 521) || + ((privm != NULL) && (mp_count_bits(privm) > 521)))) { + err = ECC_OUT_OF_RANGE_E; + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + pub = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, heap, + DYNAMIC_TYPE_ECC); + if (pub == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY && privm) { + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9, heap, + DYNAMIC_TYPE_ECC); + if (priv == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + p = pub + 1; + + sp_521_from_mp(pub->x, 9, pX); + sp_521_from_mp(pub->y, 9, pY); + sp_521_from_bin(pub->z, 9, one, (int)sizeof(one)); + if (privm) + sp_521_from_mp(priv, 9, privm); + + /* Check point at infinitiy. */ + if ((sp_521_iszero_9(pub->x) != 0) && + (sp_521_iszero_9(pub->y) != 0)) { + err = ECC_INF_E; + } + } + + /* Check range of X and Y */ + if ((err == MP_OKAY) && + ((sp_521_cmp_9(pub->x, p521_mod) >= 0) || + (sp_521_cmp_9(pub->y, p521_mod) >= 0))) { + err = ECC_OUT_OF_RANGE_E; + } + + if (err == MP_OKAY) { + /* Check point is on curve */ + err = sp_521_ecc_is_point_9(pub, heap); + } + + if (err == MP_OKAY) { + /* Point * order = infinity */ + err = sp_521_ecc_mulmod_9(p, pub, p521_order, 1, 1, heap); + } + /* Check result is infinity */ + if ((err == MP_OKAY) && ((sp_521_iszero_9(p->x) == 0) || + (sp_521_iszero_9(p->y) == 0))) { + err = ECC_INF_E; + } + + if (privm) { + if (err == MP_OKAY) { + /* Base * private = point */ + err = sp_521_ecc_mulmod_base_9(p, priv, 1, 1, heap); + } + /* Check result is public key */ + if ((err == MP_OKAY) && + ((sp_521_cmp_9(p->x, pub->x) != 0) || + (sp_521_cmp_9(p->y, pub->y) != 0))) { + err = ECC_PRIV_KEY_E; + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (pub != NULL) + XFREE(pub, heap, DYNAMIC_TYPE_ECC); + if (priv != NULL) + XFREE(priv, heap, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL +/* Add two projective EC points together. + * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ) + * + * pX First EC point's X ordinate. + * pY First EC point's Y ordinate. + * pZ First EC point's Z ordinate. + * qX Second EC point's X ordinate. + * qY Second EC point's Y ordinate. + * qZ Second EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_add_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* qX, mp_int* qY, mp_int* qZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 9 * 6]; + sp_point_521 p[2]; +#endif + sp_point_521* q = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521) * 2, NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 6, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } + } +#endif + + if (err == MP_OKAY) { + q = p + 1; + + sp_521_from_mp(p->x, 9, pX); + sp_521_from_mp(p->y, 9, pY); + sp_521_from_mp(p->z, 9, pZ); + sp_521_from_mp(q->x, 9, qX); + sp_521_from_mp(q->y, 9, qY); + sp_521_from_mp(q->z, 9, qZ); + p->infinity = sp_521_iszero_9(p->x) & + sp_521_iszero_9(p->y); + q->infinity = sp_521_iszero_9(q->x) & + sp_521_iszero_9(q->y); + + sp_521_proj_point_add_9(p, p, q, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Double a projective EC point. + * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ) + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * rX Resultant EC point's X ordinate. + * rY Resultant EC point's Y ordinate. + * rZ Resultant EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_proj_dbl_point_521(mp_int* pX, mp_int* pY, mp_int* pZ, + mp_int* rX, mp_int* rY, mp_int* rZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 9 * 2]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 2, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 9, pX); + sp_521_from_mp(p->y, 9, pY); + sp_521_from_mp(p->z, 9, pZ); + p->infinity = sp_521_iszero_9(p->x) & + sp_521_iszero_9(p->y); + + sp_521_proj_point_dbl_9(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, rX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, rY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, rZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + +/* Map a projective EC point to affine in place. + * pZ will be one. + * + * pX EC point's X ordinate. + * pY EC point's Y ordinate. + * pZ EC point's Z ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_map_521(mp_int* pX, mp_int* pY, mp_int* pZ) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp = NULL; + sp_point_521* p = NULL; +#else + sp_digit tmp[2 * 9 * 5]; + sp_point_521 p[1]; +#endif + int err = MP_OKAY; + + +#ifdef WOLFSSL_SP_SMALL_STACK + if (err == MP_OKAY) { + p = (sp_point_521*)XMALLOC(sizeof(sp_point_521), NULL, + DYNAMIC_TYPE_ECC); + if (p == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9 * 5, NULL, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + if (err == MP_OKAY) { + sp_521_from_mp(p->x, 9, pX); + sp_521_from_mp(p->y, 9, pY); + sp_521_from_mp(p->z, 9, pZ); + p->infinity = sp_521_iszero_9(p->x) & + sp_521_iszero_9(p->y); + + sp_521_map_9(p, p, tmp); + } + + if (err == MP_OKAY) { + err = sp_521_to_mp(p->x, pX); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->y, pY); + } + if (err == MP_OKAY) { + err = sp_521_to_mp(p->z, pZ); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + if (p != NULL) + XFREE(p, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */ +#ifdef HAVE_COMP_KEY +/* Square root power for the P521 curve. */ +static const uint64_t p521_sqrt_power[9] = { + 0x0000000000000000,0x0000000000000000,0x0000000000000000, + 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000, + 0x0000000000000080 +}; + +/* Find the square root of a number mod the prime of the curve. + * + * y The number to operate on and the result. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +static int sp_521_mont_sqrt_9(sp_digit* y) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* t = NULL; +#else + sp_digit t[2 * 9]; +#endif + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 9, NULL, DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + + { + int i; + + XMEMCPY(t, y, sizeof(sp_digit) * 9); + for (i=518; i>=0; i--) { + sp_521_mont_sqr_9(t, t, p521_mod, p521_mp_mod); + if (p521_sqrt_power[i / 64] & ((sp_digit)1 << (i % 64))) + sp_521_mont_mul_9(t, t, y, p521_mod, p521_mp_mod); + } + XMEMCPY(y, t, sizeof(sp_digit) * 9); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} + + +/* Uncompress the point given the X ordinate. + * + * xm X ordinate. + * odd Whether the Y ordinate is odd. + * ym Calculated Y ordinate. + * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise. + */ +int sp_ecc_uncompress_521(mp_int* xm, int odd, mp_int* ym) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* x = NULL; +#else + sp_digit x[4 * 9]; +#endif + sp_digit* y = NULL; + int err = MP_OKAY; + +#ifdef WOLFSSL_SP_SMALL_STACK + x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 9, NULL, DYNAMIC_TYPE_ECC); + if (x == NULL) + err = MEMORY_E; +#endif + + if (err == MP_OKAY) { + y = x + 2 * 9; + + sp_521_from_mp(x, 9, xm); + err = sp_521_mod_mul_norm_9(x, x, p521_mod); + } + if (err == MP_OKAY) { + /* y = x^3 */ + { + sp_521_mont_sqr_9(y, x, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, y, x, p521_mod, p521_mp_mod); + } + /* y = x^3 - 3x */ + sp_521_mont_sub_9(y, y, x, p521_mod); + sp_521_mont_sub_9(y, y, x, p521_mod); + sp_521_mont_sub_9(y, y, x, p521_mod); + /* y = x^3 - 3x + b */ + err = sp_521_mod_mul_norm_9(x, p521_b, p521_mod); + } + if (err == MP_OKAY) { + sp_521_mont_add_9(y, y, x, p521_mod); + /* y = sqrt(x^3 - 3x + b) */ + err = sp_521_mont_sqrt_9(y); + } + if (err == MP_OKAY) { + XMEMSET(y + 9, 0, 9U * sizeof(sp_digit)); + sp_521_mont_reduce_9(y, p521_mod, p521_mp_mod); + if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) { + sp_521_mont_sub_9(y, p521_mod, y, p521_mod); + } + + err = sp_521_to_mp(y, ym); + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (x != NULL) + XFREE(x, NULL, DYNAMIC_TYPE_ECC); +#endif + + return err; +} +#endif +#endif /* WOLFSSL_SP_521 */ +#ifdef WOLFCRYPT_HAVE_SAKKE #ifdef WOLFSSL_SP_1024 /* Point structure to use. */ @@ -35309,106 +42941,110 @@ typedef struct sp_point_1024 { SP_NOINLINE static void sp_1024_mul_9(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_int128 t0 = ((sp_int128)a[ 0]) * b[ 0]; - sp_int128 t1 = ((sp_int128)a[ 0]) * b[ 1] - + ((sp_int128)a[ 1]) * b[ 0]; - sp_int128 t2 = ((sp_int128)a[ 0]) * b[ 2] - + ((sp_int128)a[ 1]) * b[ 1] - + ((sp_int128)a[ 2]) * b[ 0]; - sp_int128 t3 = ((sp_int128)a[ 0]) * b[ 3] - + ((sp_int128)a[ 1]) * b[ 2] - + ((sp_int128)a[ 2]) * b[ 1] - + ((sp_int128)a[ 3]) * b[ 0]; - sp_int128 t4 = ((sp_int128)a[ 0]) * b[ 4] - + ((sp_int128)a[ 1]) * b[ 3] - + ((sp_int128)a[ 2]) * b[ 2] - + ((sp_int128)a[ 3]) * b[ 1] - + ((sp_int128)a[ 4]) * b[ 0]; - sp_int128 t5 = ((sp_int128)a[ 0]) * b[ 5] - + ((sp_int128)a[ 1]) * b[ 4] - + ((sp_int128)a[ 2]) * b[ 3] - + ((sp_int128)a[ 3]) * b[ 2] - + ((sp_int128)a[ 4]) * b[ 1] - + ((sp_int128)a[ 5]) * b[ 0]; - sp_int128 t6 = ((sp_int128)a[ 0]) * b[ 6] - + ((sp_int128)a[ 1]) * b[ 5] - + ((sp_int128)a[ 2]) * b[ 4] - + ((sp_int128)a[ 3]) * b[ 3] - + ((sp_int128)a[ 4]) * b[ 2] - + ((sp_int128)a[ 5]) * b[ 1] - + ((sp_int128)a[ 6]) * b[ 0]; - sp_int128 t7 = ((sp_int128)a[ 0]) * b[ 7] - + ((sp_int128)a[ 1]) * b[ 6] - + ((sp_int128)a[ 2]) * b[ 5] - + ((sp_int128)a[ 3]) * b[ 4] - + ((sp_int128)a[ 4]) * b[ 3] - + ((sp_int128)a[ 5]) * b[ 2] - + ((sp_int128)a[ 6]) * b[ 1] - + ((sp_int128)a[ 7]) * b[ 0]; - sp_int128 t8 = ((sp_int128)a[ 0]) * b[ 8] - + ((sp_int128)a[ 1]) * b[ 7] - + ((sp_int128)a[ 2]) * b[ 6] - + ((sp_int128)a[ 3]) * b[ 5] - + ((sp_int128)a[ 4]) * b[ 4] - + ((sp_int128)a[ 5]) * b[ 3] - + ((sp_int128)a[ 6]) * b[ 2] - + ((sp_int128)a[ 7]) * b[ 1] - + ((sp_int128)a[ 8]) * b[ 0]; - sp_int128 t9 = ((sp_int128)a[ 1]) * b[ 8] - + ((sp_int128)a[ 2]) * b[ 7] - + ((sp_int128)a[ 3]) * b[ 6] - + ((sp_int128)a[ 4]) * b[ 5] - + ((sp_int128)a[ 5]) * b[ 4] - + ((sp_int128)a[ 6]) * b[ 3] - + ((sp_int128)a[ 7]) * b[ 2] - + ((sp_int128)a[ 8]) * b[ 1]; - sp_int128 t10 = ((sp_int128)a[ 2]) * b[ 8] - + ((sp_int128)a[ 3]) * b[ 7] - + ((sp_int128)a[ 4]) * b[ 6] - + ((sp_int128)a[ 5]) * b[ 5] - + ((sp_int128)a[ 6]) * b[ 4] - + ((sp_int128)a[ 7]) * b[ 3] - + ((sp_int128)a[ 8]) * b[ 2]; - sp_int128 t11 = ((sp_int128)a[ 3]) * b[ 8] - + ((sp_int128)a[ 4]) * b[ 7] - + ((sp_int128)a[ 5]) * b[ 6] - + ((sp_int128)a[ 6]) * b[ 5] - + ((sp_int128)a[ 7]) * b[ 4] - + ((sp_int128)a[ 8]) * b[ 3]; - sp_int128 t12 = ((sp_int128)a[ 4]) * b[ 8] - + ((sp_int128)a[ 5]) * b[ 7] - + ((sp_int128)a[ 6]) * b[ 6] - + ((sp_int128)a[ 7]) * b[ 5] - + ((sp_int128)a[ 8]) * b[ 4]; - sp_int128 t13 = ((sp_int128)a[ 5]) * b[ 8] - + ((sp_int128)a[ 6]) * b[ 7] - + ((sp_int128)a[ 7]) * b[ 6] - + ((sp_int128)a[ 8]) * b[ 5]; - sp_int128 t14 = ((sp_int128)a[ 6]) * b[ 8] - + ((sp_int128)a[ 7]) * b[ 7] - + ((sp_int128)a[ 8]) * b[ 6]; - sp_int128 t15 = ((sp_int128)a[ 7]) * b[ 8] - + ((sp_int128)a[ 8]) * b[ 7]; - sp_int128 t16 = ((sp_int128)a[ 8]) * b[ 8]; + sp_int128 t0; + sp_int128 t1; + sp_digit t[9]; - t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; - t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; - t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; - t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; - t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; - t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; - t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; - t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; - t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; - t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; - t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; - t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; - t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; - t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; - t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; - t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; - r[17] = (sp_digit)(t16 >> 57); - r[16] = t16 & 0x1ffffffffffffffL; + t0 = ((sp_int128)a[ 0]) * b[ 0]; + t1 = ((sp_int128)a[ 0]) * b[ 1] + + ((sp_int128)a[ 1]) * b[ 0]; + t[ 0] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 0]) * b[ 2] + + ((sp_int128)a[ 1]) * b[ 1] + + ((sp_int128)a[ 2]) * b[ 0]; + t[ 1] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 0]) * b[ 3] + + ((sp_int128)a[ 1]) * b[ 2] + + ((sp_int128)a[ 2]) * b[ 1] + + ((sp_int128)a[ 3]) * b[ 0]; + t[ 2] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 0]) * b[ 4] + + ((sp_int128)a[ 1]) * b[ 3] + + ((sp_int128)a[ 2]) * b[ 2] + + ((sp_int128)a[ 3]) * b[ 1] + + ((sp_int128)a[ 4]) * b[ 0]; + t[ 3] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 0]) * b[ 5] + + ((sp_int128)a[ 1]) * b[ 4] + + ((sp_int128)a[ 2]) * b[ 3] + + ((sp_int128)a[ 3]) * b[ 2] + + ((sp_int128)a[ 4]) * b[ 1] + + ((sp_int128)a[ 5]) * b[ 0]; + t[ 4] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 0]) * b[ 6] + + ((sp_int128)a[ 1]) * b[ 5] + + ((sp_int128)a[ 2]) * b[ 4] + + ((sp_int128)a[ 3]) * b[ 3] + + ((sp_int128)a[ 4]) * b[ 2] + + ((sp_int128)a[ 5]) * b[ 1] + + ((sp_int128)a[ 6]) * b[ 0]; + t[ 5] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 0]) * b[ 7] + + ((sp_int128)a[ 1]) * b[ 6] + + ((sp_int128)a[ 2]) * b[ 5] + + ((sp_int128)a[ 3]) * b[ 4] + + ((sp_int128)a[ 4]) * b[ 3] + + ((sp_int128)a[ 5]) * b[ 2] + + ((sp_int128)a[ 6]) * b[ 1] + + ((sp_int128)a[ 7]) * b[ 0]; + t[ 6] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 0]) * b[ 8] + + ((sp_int128)a[ 1]) * b[ 7] + + ((sp_int128)a[ 2]) * b[ 6] + + ((sp_int128)a[ 3]) * b[ 5] + + ((sp_int128)a[ 4]) * b[ 4] + + ((sp_int128)a[ 5]) * b[ 3] + + ((sp_int128)a[ 6]) * b[ 2] + + ((sp_int128)a[ 7]) * b[ 1] + + ((sp_int128)a[ 8]) * b[ 0]; + t[ 7] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 1]) * b[ 8] + + ((sp_int128)a[ 2]) * b[ 7] + + ((sp_int128)a[ 3]) * b[ 6] + + ((sp_int128)a[ 4]) * b[ 5] + + ((sp_int128)a[ 5]) * b[ 4] + + ((sp_int128)a[ 6]) * b[ 3] + + ((sp_int128)a[ 7]) * b[ 2] + + ((sp_int128)a[ 8]) * b[ 1]; + t[ 8] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 2]) * b[ 8] + + ((sp_int128)a[ 3]) * b[ 7] + + ((sp_int128)a[ 4]) * b[ 6] + + ((sp_int128)a[ 5]) * b[ 5] + + ((sp_int128)a[ 6]) * b[ 4] + + ((sp_int128)a[ 7]) * b[ 3] + + ((sp_int128)a[ 8]) * b[ 2]; + r[ 9] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 3]) * b[ 8] + + ((sp_int128)a[ 4]) * b[ 7] + + ((sp_int128)a[ 5]) * b[ 6] + + ((sp_int128)a[ 6]) * b[ 5] + + ((sp_int128)a[ 7]) * b[ 4] + + ((sp_int128)a[ 8]) * b[ 3]; + r[10] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 4]) * b[ 8] + + ((sp_int128)a[ 5]) * b[ 7] + + ((sp_int128)a[ 6]) * b[ 6] + + ((sp_int128)a[ 7]) * b[ 5] + + ((sp_int128)a[ 8]) * b[ 4]; + r[11] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 5]) * b[ 8] + + ((sp_int128)a[ 6]) * b[ 7] + + ((sp_int128)a[ 7]) * b[ 6] + + ((sp_int128)a[ 8]) * b[ 5]; + r[12] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 6]) * b[ 8] + + ((sp_int128)a[ 7]) * b[ 7] + + ((sp_int128)a[ 8]) * b[ 6]; + r[13] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = ((sp_int128)a[ 7]) * b[ 8] + + ((sp_int128)a[ 8]) * b[ 7]; + r[14] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 8]) * b[ 8]; + r[15] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + r[16] = t0 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 57); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -35418,70 +43054,74 @@ SP_NOINLINE static void sp_1024_mul_9(sp_digit* r, const sp_digit* a, */ SP_NOINLINE static void sp_1024_sqr_9(sp_digit* r, const sp_digit* a) { - sp_int128 t0 = ((sp_int128)a[ 0]) * a[ 0]; - sp_int128 t1 = (((sp_int128)a[ 0]) * a[ 1]) * 2; - sp_int128 t2 = (((sp_int128)a[ 0]) * a[ 2]) * 2 - + ((sp_int128)a[ 1]) * a[ 1]; - sp_int128 t3 = (((sp_int128)a[ 0]) * a[ 3] - + ((sp_int128)a[ 1]) * a[ 2]) * 2; - sp_int128 t4 = (((sp_int128)a[ 0]) * a[ 4] - + ((sp_int128)a[ 1]) * a[ 3]) * 2 - + ((sp_int128)a[ 2]) * a[ 2]; - sp_int128 t5 = (((sp_int128)a[ 0]) * a[ 5] - + ((sp_int128)a[ 1]) * a[ 4] - + ((sp_int128)a[ 2]) * a[ 3]) * 2; - sp_int128 t6 = (((sp_int128)a[ 0]) * a[ 6] - + ((sp_int128)a[ 1]) * a[ 5] - + ((sp_int128)a[ 2]) * a[ 4]) * 2 - + ((sp_int128)a[ 3]) * a[ 3]; - sp_int128 t7 = (((sp_int128)a[ 0]) * a[ 7] - + ((sp_int128)a[ 1]) * a[ 6] - + ((sp_int128)a[ 2]) * a[ 5] - + ((sp_int128)a[ 3]) * a[ 4]) * 2; - sp_int128 t8 = (((sp_int128)a[ 0]) * a[ 8] - + ((sp_int128)a[ 1]) * a[ 7] - + ((sp_int128)a[ 2]) * a[ 6] - + ((sp_int128)a[ 3]) * a[ 5]) * 2 - + ((sp_int128)a[ 4]) * a[ 4]; - sp_int128 t9 = (((sp_int128)a[ 1]) * a[ 8] - + ((sp_int128)a[ 2]) * a[ 7] - + ((sp_int128)a[ 3]) * a[ 6] - + ((sp_int128)a[ 4]) * a[ 5]) * 2; - sp_int128 t10 = (((sp_int128)a[ 2]) * a[ 8] - + ((sp_int128)a[ 3]) * a[ 7] - + ((sp_int128)a[ 4]) * a[ 6]) * 2 - + ((sp_int128)a[ 5]) * a[ 5]; - sp_int128 t11 = (((sp_int128)a[ 3]) * a[ 8] - + ((sp_int128)a[ 4]) * a[ 7] - + ((sp_int128)a[ 5]) * a[ 6]) * 2; - sp_int128 t12 = (((sp_int128)a[ 4]) * a[ 8] - + ((sp_int128)a[ 5]) * a[ 7]) * 2 - + ((sp_int128)a[ 6]) * a[ 6]; - sp_int128 t13 = (((sp_int128)a[ 5]) * a[ 8] - + ((sp_int128)a[ 6]) * a[ 7]) * 2; - sp_int128 t14 = (((sp_int128)a[ 6]) * a[ 8]) * 2 - + ((sp_int128)a[ 7]) * a[ 7]; - sp_int128 t15 = (((sp_int128)a[ 7]) * a[ 8]) * 2; - sp_int128 t16 = ((sp_int128)a[ 8]) * a[ 8]; + sp_int128 t0; + sp_int128 t1; + sp_digit t[9]; - t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL; - t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL; - t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL; - t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL; - t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL; - t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL; - t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL; - t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL; - t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL; - t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL; - t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL; - t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL; - t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL; - t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL; - t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL; - t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL; - r[17] = (sp_digit)(t16 >> 57); - r[16] = t16 & 0x1ffffffffffffffL; + t0 = ((sp_int128)a[ 0]) * a[ 0]; + t1 = (((sp_int128)a[ 0]) * a[ 1]) * 2; + t[ 0] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 0]) * a[ 2]) * 2 + + ((sp_int128)a[ 1]) * a[ 1]; + t[ 1] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 0]) * a[ 3] + + ((sp_int128)a[ 1]) * a[ 2]) * 2; + t[ 2] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 0]) * a[ 4] + + ((sp_int128)a[ 1]) * a[ 3]) * 2 + + ((sp_int128)a[ 2]) * a[ 2]; + t[ 3] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 0]) * a[ 5] + + ((sp_int128)a[ 1]) * a[ 4] + + ((sp_int128)a[ 2]) * a[ 3]) * 2; + t[ 4] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 0]) * a[ 6] + + ((sp_int128)a[ 1]) * a[ 5] + + ((sp_int128)a[ 2]) * a[ 4]) * 2 + + ((sp_int128)a[ 3]) * a[ 3]; + t[ 5] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 0]) * a[ 7] + + ((sp_int128)a[ 1]) * a[ 6] + + ((sp_int128)a[ 2]) * a[ 5] + + ((sp_int128)a[ 3]) * a[ 4]) * 2; + t[ 6] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 0]) * a[ 8] + + ((sp_int128)a[ 1]) * a[ 7] + + ((sp_int128)a[ 2]) * a[ 6] + + ((sp_int128)a[ 3]) * a[ 5]) * 2 + + ((sp_int128)a[ 4]) * a[ 4]; + t[ 7] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 1]) * a[ 8] + + ((sp_int128)a[ 2]) * a[ 7] + + ((sp_int128)a[ 3]) * a[ 6] + + ((sp_int128)a[ 4]) * a[ 5]) * 2; + t[ 8] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 2]) * a[ 8] + + ((sp_int128)a[ 3]) * a[ 7] + + ((sp_int128)a[ 4]) * a[ 6]) * 2 + + ((sp_int128)a[ 5]) * a[ 5]; + r[ 9] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 3]) * a[ 8] + + ((sp_int128)a[ 4]) * a[ 7] + + ((sp_int128)a[ 5]) * a[ 6]) * 2; + r[10] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 4]) * a[ 8] + + ((sp_int128)a[ 5]) * a[ 7]) * 2 + + ((sp_int128)a[ 6]) * a[ 6]; + r[11] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 5]) * a[ 8] + + ((sp_int128)a[ 6]) * a[ 7]) * 2; + r[12] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = (((sp_int128)a[ 6]) * a[ 8]) * 2 + + ((sp_int128)a[ 7]) * a[ 7]; + r[13] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + t1 = (((sp_int128)a[ 7]) * a[ 8]) * 2; + r[14] = t0 & 0x1ffffffffffffffL; t1 += t0 >> 57; + t0 = ((sp_int128)a[ 8]) * a[ 8]; + r[15] = t1 & 0x1ffffffffffffffL; t0 += t1 >> 57; + r[16] = t0 & 0x1ffffffffffffffL; + r[17] = (sp_digit)(t0 >> 57); + XMEMCPY(r, t, sizeof(t)); } /* Add b to a into r. (r = a + b) @@ -35899,6 +43539,7 @@ SP_NOINLINE static void sp_1024_mul_d_36(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } +#ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -35910,13 +43551,26 @@ SP_NOINLINE static void sp_1024_mul_d_36(sp_digit* r, const sp_digit* a, static void sp_1024_cond_add_18(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { -#ifdef WOLFSSL_SP_SMALL int i; for (i = 0; i < 18; i++) { r[i] = a[i] + (b[i] & m); } -#else +} +#endif /* WOLFSSL_SP_SMALL */ + +#ifndef WOLFSSL_SP_SMALL +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_1024_cond_add_18(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ int i; for (i = 0; i < 16; i += 8) { @@ -35931,8 +43585,8 @@ static void sp_1024_cond_add_18(sp_digit* r, const sp_digit* a, } r[16] = a[16] + (b[16] & m); r[17] = a[17] + (b[17] & m); -#endif /* WOLFSSL_SP_SMALL */ } +#endif /* !WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Sub b from a into r. (r = a - b) @@ -35999,95 +43653,96 @@ SP_NOINLINE static void sp_1024_rshift_18(sp_digit* r, const sp_digit* a, r[17] = a[17] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_1024_div_word_18(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit dv = (div >> 1) + 1; + sp_digit t1 = (sp_digit)(d >> 57); + sp_digit t0 = (sp_digit)(d & 0x1ffffffffffffffL); + sp_digit t2; + sp_digit sign; sp_digit r; + int i; + sp_int128 m; + + r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + t1 -= dv & (0 - r); + for (i = 55; i >= 1; i--) { + t1 += t1 + (((sp_uint64)t0 >> 56) & 1); + t0 <<= 1; + t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); + r += r + t2; + t1 -= dv & (0 - t2); + t1 += t2; + } + r += r + 1; + + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 57); + m = d - ((sp_int128)r * div); + r += (sp_digit)(m >> 114) - (sp_digit)(d >> 114); + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + + m = d - ((sp_int128)r * div); + sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; + m *= sign; + t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63); + r += sign * t2; + return r; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif +} +static WC_INLINE sp_digit sp_1024_word_div_word_18(sp_digit d, sp_digit div) +{ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \ + defined(SP_DIV_WORD_USE_DIV) + return d / div; +#else + return (sp_digit)((sp_uint64)(div - d) >> 63); +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -36104,11 +43759,10 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[4 * 18 + 3]; @@ -36119,7 +43773,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, (void)m; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 18 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (t1 == NULL) @@ -36138,14 +43792,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, t1[18 + 18] += t1[18 + 18 - 1] >> 57; t1[18 + 18 - 1] &= 0x1ffffffffffffffL; for (i=18; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[18 + i]; - d1 <<= 57; - d1 += t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_18(t1[18 + i], t1[18 + i - 1], dv); -#endif sp_1024_mul_d_18(t2, sd, r1); (void)sp_1024_sub_18(&t1[i], &t1[i], t2); @@ -36153,14 +43800,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, t1[18 + i] -= t2[18]; t1[18 + i] += t1[18 + i - 1] >> 57; t1[18 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[18 + i]; - d1 <<= 57; - d1 -= t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_18(-t1[18 + i], -t1[18 + i - 1], dv); -#endif r1 -= t1[18 + i]; sp_1024_mul_d_18(t2, sd, r1); (void)sp_1024_add_18(&t1[i], &t1[i], t2); @@ -36169,7 +43809,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, } t1[18 - 1] += t1[18 - 2] >> 57; t1[18 - 2] &= 0x1ffffffffffffffL; - r1 = t1[18 - 1] / dv; + r1 = sp_1024_word_div_word_18(t1[18 - 1], dv); sp_1024_mul_d_18(t2, sd, r1); sp_1024_sub_18(t1, t1, t2); @@ -36178,14 +43818,13 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_1024_cond_add_18(r, r, sd, 0 - ((r[17] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_add_18(r, r, sd, r[17] >> 63); sp_1024_norm_18(r); sp_1024_rshift_18(r, r, 2); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -36233,7 +43872,8 @@ static int sp_1024_point_new_ex_18(void* heap, sp_point_1024* sp, { int ret = MP_OKAY; (void)heap; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) (void)sp; *p = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); #else @@ -36245,7 +43885,8 @@ static int sp_1024_point_new_ex_18(void* heap, sp_point_1024* sp, return ret; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* Allocate memory for point and return error. */ #define sp_1024_point_new_18(heap, sp, p) sp_1024_point_new_ex_18((heap), NULL, &(p)) #else @@ -36262,7 +43903,8 @@ static int sp_1024_point_new_ex_18(void* heap, sp_point_1024* sp, */ static void sp_1024_point_free_18(sp_point_1024* p, int clear, void* heap) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) /* If valid pointer then clear point data if requested and free data. */ if (p != NULL) { if (clear != 0) { @@ -36289,20 +43931,23 @@ static void sp_1024_point_free_18(sp_point_1024* p, int clear, void* heap) static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) { #if DIGIT_BIT == 57 - int j; + int i; + sp_digit j = (sp_digit)0 - (sp_digit)a->used; + int o = 0; - XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); - - for (j = a->used; j < size; j++) { - r[j] = 0; + for (i = 0; i < size; i++) { + sp_digit mask = (sp_digit)0 - (j >> 56); + r[i] = a->dp[o] & mask; + j++; + o += (int)(j >> 56); } #elif DIGIT_BIT > 57 - int i; + unsigned int i; int j = 0; word32 s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i] << s); r[j] &= 0x1ffffffffffffffL; s = 57U - s; @@ -36332,12 +43977,12 @@ static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a) r[j] = 0; } #else - int i; + unsigned int i; int j = 0; int s = 0; r[0] = 0; - for (i = 0; i < a->used && j < size; i++) { + for (i = 0; i < (unsigned int)a->used && j < size; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; if (s + DIGIT_BIT >= 57) { r[j] &= 0x1ffffffffffffffL; @@ -36486,22 +44131,22 @@ static sp_digit sp_1024_cmp_18(const sp_digit* a, const sp_digit* b) int i; for (i=17; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 56); } #else int i; r |= (a[17] - b[17]) & (0 - (sp_digit)1); - r |= (a[16] - b[16]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[16] - b[16]) & ~(((sp_digit)0 - r) >> 56); for (i = 8; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } #endif /* WOLFSSL_SP_SMALL */ @@ -36659,10 +44304,10 @@ static void sp_1024_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_1024_norm_18(a + 18); -#ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<17; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffL; @@ -36685,20 +44330,9 @@ static void sp_1024_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; } -#else - for (i=0; i<17; i++) { - mu = (a[i] * mp) & 0x1ffffffffffffffL; - sp_1024_mul_add_18(a+i, m, mu); - a[i+1] += a[i] >> 57; - } - mu = (a[i] * mp) & 0x7fffffffffffffL; - sp_1024_mul_add_18(a+i, m, mu); - a[i+1] += a[i] >> 57; - a[i] &= 0x1ffffffffffffffL; -#endif sp_1024_mont_shift_18(a, a); - sp_1024_cond_sub_18(a, a, m, 0 - (((a[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[17] - m[17]; + sp_1024_cond_sub_18(a, a, m, ~((over - 1) >> 63)); sp_1024_norm_18(a); } @@ -36709,9 +44343,9 @@ static void sp_1024_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) * a First number to multiply in Montgomery form. * b Second number to multiply in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_mul_18(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_mul_18(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { sp_1024_mul_18(r, a, b); @@ -36723,9 +44357,9 @@ static void sp_1024_mont_mul_18(sp_digit* r, const sp_digit* a, * r Result of squaring. * a Number to square in Montgomery form. * m Modulus (prime). - * mp Montgomery mulitplier. + * mp Montgomery multiplier. */ -static void sp_1024_mont_sqr_18(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_sqr_18(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { sp_1024_sqr_18(r, a); @@ -36765,11 +44399,14 @@ static const uint8_t p1024_mod_minus_2[] = { static void sp_1024_mont_inv_18(sp_digit* r, const sp_digit* a, sp_digit* td) { - sp_digit* t = td; + sp_digit* t = &td[32 * 2 * 18]; int i; int j; - sp_digit table[32][2 * 18]; + sp_digit* table[32]; + for (i = 0; i < 32; i++) { + table[i] = &td[2 * 18 * i]; + } XMEMCPY(table[0], a, sizeof(sp_digit) * 18); for (i = 1; i < 6; i++) { sp_1024_mont_sqr_18(table[0], table[0], p1024_mod, p1024_mp_mod); @@ -36810,27 +44447,24 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_18(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 18, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 18, 0, sizeof(sp_digit) * 18U); sp_1024_mont_reduce_18(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_18(r->x, p1024_mod); - sp_1024_cond_sub_18(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_18(r->x, r->x, p1024_mod, ~(n >> 56)); sp_1024_norm_18(r->x); /* y /= z^3 */ sp_1024_mont_mul_18(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 18, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 18, 0, sizeof(sp_digit) * 18U); sp_1024_mont_reduce_18(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_18(r->y, p1024_mod); - sp_1024_cond_sub_18(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_18(r->y, r->y, p1024_mod, ~(n >> 56)); sp_1024_norm_18(r->y); - XMEMSET(r->z, 0, sizeof(r->z)); + XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -36843,10 +44477,11 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, static void sp_1024_mont_add_18(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_18(r, a, b); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); } @@ -36858,10 +44493,11 @@ static void sp_1024_mont_add_18(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_dbl_18(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_18(r, a, a); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); } @@ -36873,15 +44509,16 @@ static void sp_1024_mont_dbl_18(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_tpl_18(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_18(r, a, a); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); (void)sp_1024_add_18(r, r, a); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); } @@ -36943,7 +44580,8 @@ SP_NOINLINE static void sp_1024_rshift1_18(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_18(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_18(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_1024_cond_add_18(r, a, m, 0 - (a[0] & 1)); sp_1024_norm_18(r); @@ -36956,6 +44594,61 @@ static void sp_1024_div2_18(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_18(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*18; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_18(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_18(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_18(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_18(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_18(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_18(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_18(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_18(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_18(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_18(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_mont_div2_18(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_18(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_18(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_18(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_18(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_18(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_18(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_18(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_18_ctx { int state; @@ -36966,7 +44659,14 @@ typedef struct sp_1024_proj_point_dbl_18_ctx { sp_digit* z; } sp_1024_proj_point_dbl_18_ctx; -static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_18_ctx* ctx = (sp_1024_proj_point_dbl_18_ctx*)sp_ctx->data; @@ -37040,7 +44740,7 @@ static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_18(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_18(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -37090,61 +44790,6 @@ static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_18(sp_point_1024* r, const sp_point_1024* p, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*18; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_18(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_18(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_18(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_18(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_18(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_18(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_18(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_18(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_18(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_18(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_18(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_18(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_18(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_18(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_18(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_18(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_18(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_18(y, y, t2, p1024_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -37162,6 +44807,20 @@ static int sp_1024_cmp_equal_18(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15]) | (a[16] ^ b[16]) | (a[17] ^ b[17])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_18(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17]) == 0; +} + + /* Add two Montgomery form projective points. * * r Result of addition. @@ -37169,6 +44828,84 @@ static int sp_1024_cmp_equal_18(const sp_digit* a, const sp_digit* b) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_18(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t6 = t; + sp_digit* t1 = t + 2*18; + sp_digit* t2 = t + 4*18; + sp_digit* t3 = t + 6*18; + sp_digit* t4 = t + 8*18; + sp_digit* t5 = t + 10*18; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_18(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_18(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_18(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_18(t2, t1) & + sp_1024_cmp_equal_18(t4, t3)) { + sp_1024_proj_point_dbl_18(r, p, t); + } + else { + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + + /* H = U2 - U1 */ + sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_18(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(x, x, t5, p1024_mod); + sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_18(t3, y, p1024_mod); + sp_1024_mont_sub_18(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_18(y, y, x, p1024_mod); + sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(y, y, t5, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 18; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_18_ctx { @@ -37181,11 +44918,19 @@ typedef struct sp_1024_proj_point_add_18_ctx { sp_digit* t3; sp_digit* t4; sp_digit* t5; + sp_digit* t6; sp_digit* x; sp_digit* y; sp_digit* z; } sp_1024_proj_point_add_18_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -37204,261 +44949,168 @@ static int sp_1024_proj_point_add_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, switch (ctx->state) { case 0: /* INIT */ - ctx->t1 = t; - ctx->t2 = t + 2*18; - ctx->t3 = t + 4*18; - ctx->t4 = t + 6*18; - ctx->t5 = t + 8*18; + ctx->t6 = t; + ctx->t1 = t + 2*18; + ctx->t2 = t + 4*18; + ctx->t3 = t + 6*18; + ctx->t4 = t + 8*18; + ctx->t5 = t + 10*18; + ctx->x = ctx->t6; + ctx->y = ctx->t1; + ctx->z = ctx->t2; ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_18(ctx->t1, p1024_mod, q->y); - sp_1024_norm_18(ctx->t1); - if ((sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & - (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } - break; - case 2: - err = sp_1024_proj_point_dbl_18_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ - break; - case 3: - { - int i; - ctx->rp[0] = r; - - /*lint allow cast to different type of pointer*/ - ctx->rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(ctx->rp[1], 0, sizeof(sp_point_1024)); - ctx->x = ctx->rp[p->infinity | q->infinity]->x; - ctx->y = ctx->rp[p->infinity | q->infinity]->y; - ctx->z = ctx->rp[p->infinity | q->infinity]->z; - - ctx->ap[0] = p; - ctx->ap[1] = q; - for (i=0; i<18; i++) { - r->x[i] = ctx->ap[p->infinity]->x[i]; - } - for (i=0; i<18; i++) { - r->y[i] = ctx->ap[p->infinity]->y[i]; - } - for (i=0; i<18; i++) { - r->z[i] = ctx->ap[p->infinity]->z[i]; - } - r->infinity = ctx->ap[p->infinity]->infinity; - - ctx->state = 4; - break; - } - case 4: /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_18(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; + break; + case 2: + sp_1024_mont_mul_18(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; + break; + case 3: + sp_1024_mont_mul_18(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + ctx->state = 4; + break; + case 4: + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_18(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_18(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_18(ctx->t1, ctx->t1, ctx->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_18(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_18(ctx->t4, ctx->t2, ctx->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_18(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_18(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_18(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_18(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_18(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_18(ctx->t3, ctx->t3, ctx->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_18(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_18(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_18(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_18(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_18(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_18(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_18(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_18(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_18(ctx->z, ctx->z, ctx->t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_18(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_18(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - sp_1024_mont_sqr_18(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_18(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_18(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_dbl_18(ctx->t1, ctx->y, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t1, p1024_mod); + sp_1024_mont_mul_18(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->x, p1024_mod); + sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - sp_1024_mont_mul_18(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); + { + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 18; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (ctx->x[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (ctx->y[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (ctx->z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } ctx->state = 25; break; + } case 25: - sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: err = MP_OKAY; break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_18(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*18; - sp_digit* t3 = t + 4*18; - sp_digit* t4 = t + 6*18; - sp_digit* t5 = t + 8*18; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; - - /* Ensure only the first point is the same as the result. */ - if (q == r) { - const sp_point_1024* a = p; - p = q; - q = a; - } - - /* Check double */ - (void)sp_1024_mont_sub_18(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_18(t1); - if ((sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & - (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_18(r, p, t); - } - else { - rp[0] = r; - - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<18; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<18; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<18; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_18(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t1, t1, x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_18(t3, t3, y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(z, z, t2, p1024_mod, p1024_mp_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(x, x, t5, p1024_mod); - sp_1024_mont_dbl_18(t1, y, p1024_mod); - sp_1024_mont_sub_18(x, x, t1, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_18(y, y, x, p1024_mod); - sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(y, y, t5, p1024_mod); - } -} - #ifdef WOLFSSL_SP_SMALL /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. @@ -37475,6 +45127,108 @@ static void sp_1024_proj_point_add_18(sp_point_1024* r, * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ +static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) +{ +#ifdef WOLFSSL_SP_SMALL_STACK + sp_point_1024* t = NULL; + sp_digit* tmp = NULL; +#else + sp_point_1024 t[3]; + sp_digit tmp[2 * 18 * 37]; +#endif + sp_digit n; + int i; + int c; + int y; + int err = MP_OKAY; + + /* Implementation is constant time. */ + (void)ct; + (void)heap; + +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, + DYNAMIC_TYPE_ECC); + if (t == NULL) + err = MEMORY_E; + if (err == MP_OKAY) { + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 37, heap, + DYNAMIC_TYPE_ECC); + if (tmp == NULL) + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { + XMEMSET(t, 0, sizeof(sp_point_1024) * 3); + + /* t[0] = {0, 0, 1} * norm */ + t[0].infinity = 1; + /* t[1] = {g->x, g->y, g->z} * norm */ + err = sp_1024_mod_mul_norm_18(t[1].x, g->x, p1024_mod); + } + if (err == MP_OKAY) + err = sp_1024_mod_mul_norm_18(t[1].y, g->y, p1024_mod); + if (err == MP_OKAY) + err = sp_1024_mod_mul_norm_18(t[1].z, g->z, p1024_mod); + + if (err == MP_OKAY) { + i = 17; + c = 55; + n = k[i--] << (57 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) + break; + + n = k[i--]; + c = 57; + } + + y = (n >> 56) & 1; + n <<= 1; + + sp_1024_proj_point_add_18(&t[y^1], &t[0], &t[1], tmp); + + XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), + sizeof(sp_point_1024)); + sp_1024_proj_point_dbl_18(&t[2], &t[2], tmp); + XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + + ((size_t)&t[1] & addr_mask[y])), &t[2], + sizeof(sp_point_1024)); + } + + if (map != 0) { + sp_1024_map_18(r, &t[0], tmp); + } + else { + XMEMCPY(r, &t[0], sizeof(sp_point_1024)); + } + } + +#ifdef WOLFSSL_SP_SMALL_STACK + if (tmp != NULL) +#endif + { + ForceZero(tmp, sizeof(sp_digit) * 2 * 18 * 37); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); + #endif + } +#ifdef WOLFSSL_SP_SMALL_STACK + if (t != NULL) +#endif + { + ForceZero(t, sizeof(sp_point_1024) * 3); + #ifdef WOLFSSL_SP_SMALL_STACK + XFREE(t, heap, DYNAMIC_TYPE_ECC); + #endif + } + + return err; +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_ecc_mulmod_18_ctx { @@ -37484,7 +45238,7 @@ typedef struct sp_1024_ecc_mulmod_18_ctx { sp_1024_proj_point_add_18_ctx add_ctx; }; sp_point_1024 t[3]; - sp_digit tmp[2 * 18 * 5]; + sp_digit tmp[2 * 18 * 37]; sp_digit n; int i; int c; @@ -37590,109 +45344,6 @@ static int sp_1024_ecc_mulmod_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, #endif /* WOLFSSL_SP_NONBLOCK */ -static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, - const sp_digit* k, int map, int ct, void* heap) -{ -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - sp_point_1024* t = NULL; - sp_digit* tmp = NULL; -#else - sp_point_1024 t[3]; - sp_digit tmp[2 * 18 * 5]; -#endif - sp_digit n; - int i; - int c; - int y; - int err = MP_OKAY; - - /* Implementation is constant time. */ - (void)ct; - (void)heap; - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, - DYNAMIC_TYPE_ECC); - if (t == NULL) - err = MEMORY_E; - if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 5, heap, - DYNAMIC_TYPE_ECC); - if (tmp == NULL) - err = MEMORY_E; - } -#endif - - if (err == MP_OKAY) { - XMEMSET(t, 0, sizeof(sp_point_1024) * 3); - - /* t[0] = {0, 0, 1} * norm */ - t[0].infinity = 1; - /* t[1] = {g->x, g->y, g->z} * norm */ - err = sp_1024_mod_mul_norm_18(t[1].x, g->x, p1024_mod); - } - if (err == MP_OKAY) - err = sp_1024_mod_mul_norm_18(t[1].y, g->y, p1024_mod); - if (err == MP_OKAY) - err = sp_1024_mod_mul_norm_18(t[1].z, g->z, p1024_mod); - - if (err == MP_OKAY) { - i = 17; - c = 55; - n = k[i--] << (57 - c); - for (; ; c--) { - if (c == 0) { - if (i == -1) - break; - - n = k[i--]; - c = 57; - } - - y = (n >> 56) & 1; - n <<= 1; - - sp_1024_proj_point_add_18(&t[y^1], &t[0], &t[1], tmp); - - XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), - sizeof(sp_point_1024)); - sp_1024_proj_point_dbl_18(&t[2], &t[2], tmp); - XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) + - ((size_t)&t[1] & addr_mask[y])), &t[2], - sizeof(sp_point_1024)); - } - - if (map != 0) { - sp_1024_map_18(r, &t[0], tmp); - } - else { - XMEMCPY(r, &t[0], sizeof(sp_point_1024)); - } - } - -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (tmp != NULL) -#endif - { - ForceZero(tmp, sizeof(sp_digit) * 2 * 18 * 5); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(tmp, heap, DYNAMIC_TYPE_ECC); - #endif - } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - if (t != NULL) -#endif - { - ForceZero(t, sizeof(sp_point_1024) * 3); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - XFREE(t, heap, DYNAMIC_TYPE_ECC); - #endif - } - - return err; -} - #else /* A table entry for pre-computed points. */ typedef struct sp_table_entry_1024 { @@ -37766,7 +45417,7 @@ static void sp_1024_cond_copy_18(sp_digit* r, const sp_digit* a, const sp_digit * n Number of times to double * t Temporary ordinate data. */ -static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int n, +static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int i, sp_digit* t) { sp_digit* w = t; @@ -37777,6 +45428,7 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int n, sp_digit* x; sp_digit* y; sp_digit* z; + volatile int n = i; x = p->x; y = p->y; @@ -37787,7 +45439,6 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int n, /* W = Z^4 */ sp_1024_mont_sqr_18(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_18(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -37805,9 +45456,12 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int n, sp_1024_mont_sqr_18(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_18(t2, b, p1024_mod); sp_1024_mont_sub_18(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_18(t2, b, x, p1024_mod); + sp_1024_mont_dbl_18(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_18(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_18(t1, t1, p1024_mod, p1024_mp_mod); #ifdef WOLFSSL_SP_SMALL if (n != 0) @@ -37817,9 +45471,7 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int n, sp_1024_mont_mul_18(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_18(y, b, x, p1024_mod); - sp_1024_mont_mul_18(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_18(y, y, p1024_mod); + sp_1024_mont_mul_18(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_18(y, y, t1, p1024_mod); } #ifndef WOLFSSL_SP_SMALL @@ -37834,18 +45486,19 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int n, sp_1024_mont_sqr_18(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_18(t2, b, p1024_mod); sp_1024_mont_sub_18(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_18(t2, b, x, p1024_mod); + sp_1024_mont_dbl_18(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_18(z, z, y, p1024_mod, p1024_mp_mod); - /* t2 = Y^4 */ + /* t1 = Y^4 */ sp_1024_mont_sqr_18(t1, t1, p1024_mod, p1024_mp_mod); /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_18(y, b, x, p1024_mod); - sp_1024_mont_mul_18(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_18(y, y, p1024_mod); + sp_1024_mont_mul_18(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_18(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_18(y, y, p1024_mod); + sp_1024_mont_div2_18(y, y, p1024_mod); } /* Double the Montgomery form projective point p a number of times. @@ -37893,30 +45546,30 @@ static void sp_1024_proj_point_dbl_n_store_18(sp_point_1024* r, sp_1024_mont_sub_18(t1, t1, w, p1024_mod); sp_1024_mont_tpl_18(a, t1, p1024_mod); /* B = X*Y^2 */ - sp_1024_mont_sqr_18(t2, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(b, t2, x, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(t1, y, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(b, t1, x, p1024_mod, p1024_mp_mod); x = r[j].x; /* X = A^2 - 2B */ sp_1024_mont_sqr_18(x, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_18(t1, b, p1024_mod); - sp_1024_mont_sub_18(x, x, t1, p1024_mod); + sp_1024_mont_dbl_18(t2, b, p1024_mod); + sp_1024_mont_sub_18(x, x, t2, p1024_mod); + /* B = 2.(B - X) */ + sp_1024_mont_sub_18(t2, b, x, p1024_mod); + sp_1024_mont_dbl_18(b, t2, p1024_mod); /* Z = Z*Y */ sp_1024_mont_mul_18(r[j].z, z, y, p1024_mod, p1024_mp_mod); z = r[j].z; - /* t2 = Y^4 */ - sp_1024_mont_sqr_18(t2, t2, p1024_mod, p1024_mp_mod); + /* t1 = Y^4 */ + sp_1024_mont_sqr_18(t1, t1, p1024_mod, p1024_mp_mod); if (i != n) { /* W = W*Y^4 */ - sp_1024_mont_mul_18(w, w, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(w, w, t1, p1024_mod, p1024_mp_mod); } /* y = 2*A*(B - X) - Y^4 */ - sp_1024_mont_sub_18(y, b, x, p1024_mod); - sp_1024_mont_mul_18(y, y, a, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_18(y, y, p1024_mod); - sp_1024_mont_sub_18(y, y, t2, p1024_mod); - + sp_1024_mont_mul_18(y, b, a, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_18(r[j].y, y, p1024_mod); + sp_1024_mont_div2_18(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -37939,30 +45592,30 @@ static void sp_1024_proj_point_add_sub_18(sp_point_1024* ra, sp_digit* t4 = t + 6*18; sp_digit* t5 = t + 8*18; sp_digit* t6 = t + 10*18; - sp_digit* x = ra->x; - sp_digit* y = ra->y; - sp_digit* z = ra->z; + sp_digit* xa = ra->x; + sp_digit* ya = ra->y; + sp_digit* za = ra->z; sp_digit* xs = rs->x; sp_digit* ys = rs->y; sp_digit* zs = rs->z; - XMEMCPY(x, p->x, sizeof(p->x) / 2); - XMEMCPY(y, p->y, sizeof(p->y) / 2); - XMEMCPY(z, p->z, sizeof(p->z) / 2); + XMEMCPY(xa, p->x, sizeof(p->x) / 2); + XMEMCPY(ya, p->y, sizeof(p->y) / 2); + XMEMCPY(za, p->z, sizeof(p->z) / 2); ra->infinity = 0; rs->infinity = 0; /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_18(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_18(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t1, t1, x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t1, t1, xa, p1024_mod, p1024_mp_mod); /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t4, t2, z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(t2, za, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t4, t2, za, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_18(t3, t3, y, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t3, t3, ya, p1024_mod, p1024_mp_mod); /* S2 = Y2*Z1^3 */ sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - U1 */ @@ -37973,30 +45626,30 @@ static void sp_1024_proj_point_add_sub_18(sp_point_1024* ra, sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); /* Z3 = H*Z1*Z2 */ /* ZS = H*Z1*Z2 */ - sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(z, z, t2, p1024_mod, p1024_mp_mod); - XMEMCPY(zs, z, sizeof(p->z)/2); + sp_1024_mont_mul_18(za, za, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(za, za, t2, p1024_mod, p1024_mp_mod); + XMEMCPY(zs, za, sizeof(p->z)/2); /* X3 = R^2 - H^3 - 2*U1*H^2 */ /* XS = RS^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(xa, t4, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_18(xs, t6, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ya, t1, t5, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(x, x, t5, p1024_mod); + sp_1024_mont_sub_18(xa, xa, t5, p1024_mod); sp_1024_mont_sub_18(xs, xs, t5, p1024_mod); - sp_1024_mont_dbl_18(t1, y, p1024_mod); - sp_1024_mont_sub_18(x, x, t1, p1024_mod); + sp_1024_mont_dbl_18(t1, ya, p1024_mod); + sp_1024_mont_sub_18(xa, xa, t1, p1024_mod); sp_1024_mont_sub_18(xs, xs, t1, p1024_mod); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */ - sp_1024_mont_sub_18(ys, y, xs, p1024_mod); - sp_1024_mont_sub_18(y, y, x, p1024_mod); - sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(ys, ya, xs, p1024_mod); + sp_1024_mont_sub_18(ya, ya, xa, p1024_mod); + sp_1024_mont_mul_18(ya, ya, t4, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_18(t6, p1024_mod, t6, p1024_mod); sp_1024_mont_mul_18(ys, ys, t6, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(y, y, t5, p1024_mod); + sp_1024_mont_sub_18(ya, ya, t5, p1024_mod); sp_1024_mont_sub_18(ys, ys, t5, p1024_mod); } @@ -38100,12 +45753,12 @@ static void sp_1024_ecc_recode_7_18(const sp_digit* k, ecc_recode_1024* v) static int sp_1024_ecc_mulmod_win_add_sub_18(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; sp_digit* tmp = NULL; #else sp_point_1024 t[65+2]; - sp_digit tmp[2 * 18 * 6]; + sp_digit tmp[2 * 18 * 37]; #endif sp_point_1024* rt = NULL; sp_point_1024* p = NULL; @@ -38118,13 +45771,13 @@ static int sp_1024_ecc_mulmod_win_add_sub_18(sp_point_1024* r, const sp_point_10 (void)ct; (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * +#ifdef WOLFSSL_SP_SMALL_STACK + t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * (65+2), heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; if (err == MP_OKAY) { - tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 6, + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 37, heap, DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; @@ -38218,7 +45871,7 @@ static int sp_1024_ecc_mulmod_win_add_sub_18(sp_point_1024* r, const sp_point_10 } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (tmp != NULL) @@ -38239,76 +45892,75 @@ static int sp_1024_ecc_mulmod_win_add_sub_18(sp_point_1024* r, const sp_point_10 * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { - const sp_point_1024* ap[2]; - sp_point_1024* rp[2]; - sp_digit* t1 = t; - sp_digit* t2 = t + 2*18; - sp_digit* t3 = t + 4*18; - sp_digit* t4 = t + 6*18; - sp_digit* t5 = t + 8*18; - sp_digit* x; - sp_digit* y; - sp_digit* z; - int i; + sp_digit* t2 = t; + sp_digit* t3 = t + 2*18; + sp_digit* t6 = t + 4*18; + sp_digit* t1 = t + 6*18; + sp_digit* t4 = t + 8*18; + sp_digit* t5 = t + 10*18; - /* Check double */ - (void)sp_1024_mont_sub_18(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_18(t1); - if ((sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & - (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_18(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_18(p->x, t2) & + sp_1024_cmp_equal_18(p->y, t4)) { sp_1024_proj_point_dbl_18(r, p, t); } else { - rp[0] = r; + sp_digit* x = t2; + sp_digit* y = t3; + sp_digit* z = t6; - /*lint allow cast to different type of pointer*/ - rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/ - XMEMSET(rp[1], 0, sizeof(sp_point_1024)); - x = rp[p->infinity | q->infinity]->x; - y = rp[p->infinity | q->infinity]->y; - z = rp[p->infinity | q->infinity]->z; - - ap[0] = p; - ap[1] = q; - for (i=0; i<18; i++) { - r->x[i] = ap[p->infinity]->x[i]; - } - for (i=0; i<18; i++) { - r->y[i] = ap[p->infinity]->y[i]; - } - for (i=0; i<18; i++) { - r->z[i] = ap[p->infinity]->z[i]; - } - r->infinity = ap[p->infinity]->infinity; - - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t4, t2, z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ - sp_1024_mont_sub_18(t2, t2, x, p1024_mod); + sp_1024_mont_sub_18(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ - sp_1024_mont_sub_18(t4, t4, y, p1024_mod); + sp_1024_mont_sub_18(t4, t4, p->y, p1024_mod); /* Z3 = H*Z1 */ - sp_1024_mont_mul_18(z, z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(z, p->z, t2, p1024_mod, p1024_mp_mod); /* X3 = R^2 - H^3 - 2*X1*H^2 */ - sp_1024_mont_sqr_18(t1, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t3, x, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(x, t1, t5, p1024_mod); - sp_1024_mont_dbl_18(t1, t3, p1024_mod); - sp_1024_mont_sub_18(x, x, t1, p1024_mod); + sp_1024_mont_sqr_18(t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t3, p->x, t1, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t1, t1, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(t2, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); + sp_1024_mont_dbl_18(t5, t3, p1024_mod); + sp_1024_mont_sub_18(x, t2, t5, p1024_mod); /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */ sp_1024_mont_sub_18(t3, t3, x, p1024_mod); sp_1024_mont_mul_18(t3, t3, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t5, t5, y, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(y, t3, t5, p1024_mod); + sp_1024_mont_mul_18(t1, t1, p->y, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(y, t3, t1, p1024_mod); + { + int i; + sp_digit maskp = 0 - (q->infinity & (!p->infinity)); + sp_digit maskq = 0 - (p->infinity & (!q->infinity)); + sp_digit maskt = ~(maskp | maskq); + sp_digit inf = (sp_digit)(p->infinity & q->infinity); + + for (i = 0; i < 18; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= inf; + r->infinity = (word32)inf; + } } } @@ -38349,7 +46001,7 @@ static void sp_1024_proj_to_affine_18(sp_point_1024* a, sp_digit* t) static int sp_1024_gen_stripe_table_18(const sp_point_1024* a, sp_table_entry_1024* table, sp_digit* tmp, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* t = NULL; #else sp_point_1024 t[3]; @@ -38362,7 +46014,7 @@ static int sp_1024_gen_stripe_table_18(const sp_point_1024* a, (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap, DYNAMIC_TYPE_ECC); if (t == NULL) @@ -38417,7 +46069,7 @@ static int sp_1024_gen_stripe_table_18(const sp_point_1024* a, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); #endif @@ -38446,12 +46098,12 @@ static int sp_1024_ecc_mulmod_stripe_18(sp_point_1024* r, const sp_point_1024* g const sp_table_entry_1024* table, const sp_digit* k, int map, int ct, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* rt = NULL; sp_digit* t = NULL; #else sp_point_1024 rt[2]; - sp_digit t[2 * 18 * 5]; + sp_digit t[2 * 18 * 37]; #endif sp_point_1024* p = NULL; int i; @@ -38466,13 +46118,13 @@ static int sp_1024_ecc_mulmod_stripe_18(sp_point_1024* r, const sp_point_1024* g (void)heap; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (rt == NULL) err = MEMORY_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 5, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 37, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -38517,7 +46169,7 @@ static int sp_1024_ecc_mulmod_stripe_18(sp_point_1024* r, const sp_point_1024* g } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (rt != NULL) @@ -38560,7 +46212,7 @@ static THREAD_LS_T int sp_cache_1024_inited = 0; /* Get the cache entry for the point. * - * g [in] Point scalar multipling. + * g [in] Point scalar multiplying. * cache [out] Cache table to use. */ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache) @@ -38631,23 +46283,36 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_18(r, g, k, map, ct, heap); #else - sp_digit tmp[2 * 18 * 5]; +#ifdef WOLFSSL_SP_SMALL_STACK + sp_digit* tmp; +#else + sp_digit tmp[2 * 18 * 38]; +#endif sp_cache_1024_t* cache; int err = MP_OKAY; -#ifndef HAVE_THREAD_LS - if (initCacheMutex_1024 == 0) { - wc_InitMutex(&sp_cache_1024_lock); - initCacheMutex_1024 = 1; +#ifdef WOLFSSL_SP_SMALL_STACK + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 18 * 38, heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + err = MEMORY_E; + } +#endif +#ifndef HAVE_THREAD_LS + if (err == MP_OKAY) { + if (initCacheMutex_1024 == 0) { + wc_InitMutex(&sp_cache_1024_lock); + initCacheMutex_1024 = 1; + } + if (wc_LockMutex(&sp_cache_1024_lock) != 0) { + err = BAD_MUTEX_E; + } } - if (wc_LockMutex(&sp_cache_1024_lock) != 0) - err = BAD_MUTEX_E; #endif /* HAVE_THREAD_LS */ if (err == MP_OKAY) { @@ -38668,6 +46333,9 @@ static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, const } } +#ifdef WOLFSSL_SP_SMALL_STACK + XFREE(tmp, heap, DYNAMIC_TYPE_ECC); +#endif return err; #endif } @@ -38686,7 +46354,7 @@ static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, const int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -38695,7 +46363,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -38718,7 +46386,7 @@ int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r, err = sp_1024_point_to_ecc_point_18(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -38745,6 +46413,16 @@ static int sp_1024_ecc_mulmod_base_18(sp_point_1024* r, const sp_digit* k, return sp_1024_ecc_mulmod_18(r, &p1024_base, k, map, ct, heap); } +#ifdef WOLFSSL_SP_NONBLOCK +static int sp_1024_ecc_mulmod_base_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_digit* k, int map, int ct, void* heap) +{ + /* No pre-computed values. */ + return sp_1024_ecc_mulmod_18_nb(sp_ctx, r, &p1024_base, k, map, ct, heap); +} +#endif /* WOLFSSL_SP_NONBLOCK */ + + #else /* Striping precomputation table. * 8 points combined into a table of 256 points. @@ -42108,7 +49786,7 @@ static int sp_1024_ecc_mulmod_base_18(sp_point_1024* r, const sp_digit* k, */ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -42117,7 +49795,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) @@ -42139,7 +49817,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) err = sp_1024_point_to_ecc_point_18(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42153,7 +49831,7 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) * the result. If map is true then convert result to affine coordinates. * * km Scalar to multiply by. - * am Point to add to scalar mulitply result. + * am Point to add to scalar multiply result. * inMont Point to add is in montgomery form. * r Resulting point. * map Indicates whether to convert result to affine. @@ -42163,25 +49841,25 @@ int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap) int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int inMont, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else sp_point_1024 point[2]; - sp_digit k[18 + 18 * 2 * 5]; + sp_digit k[18 + 18 * 2 * 37]; #endif sp_point_1024* addP = NULL; sp_digit* tmp = NULL; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) - point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, +#ifdef WOLFSSL_SP_SMALL_STACK + point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); if (point == NULL) err = MEMORY_E; if (err == MP_OKAY) { k = (sp_digit*)XMALLOC( - sizeof(sp_digit) * (18 + 18 * 2 * 5), + sizeof(sp_digit) * (18 + 18 * 2 * 37), heap, DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; @@ -42217,7 +49895,7 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, err = sp_1024_point_to_ecc_point_18(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point) @@ -42240,12 +49918,12 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* t = NULL; #else sp_point_1024 point[1]; - sp_digit t[5 * 2 * 18]; + sp_digit t[38 * 2 * 18]; #endif int err = MP_OKAY; @@ -42261,7 +49939,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = BUFFER_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); @@ -42269,7 +49947,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, err = MEMORY_E; } if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 18, heap, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 2 * 18, heap, DYNAMIC_TYPE_ECC); if (t == NULL) err = MEMORY_E; @@ -42285,7 +49963,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, *len = sizeof(sp_table_entry_1024) * 256; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t != NULL) XFREE(t, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42343,7 +50021,7 @@ int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len, int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, ecc_point* r, int map, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* point = NULL; sp_digit* k = NULL; #else @@ -42352,7 +50030,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, #endif int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC); if (point == NULL) { @@ -42381,7 +50059,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, err = sp_1024_point_to_ecc_point_18(point, r); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (k != NULL) XFREE(k, heap, DYNAMIC_TYPE_ECC); if (point != NULL) @@ -42391,7 +50069,7 @@ int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table, return err; } -/* Multiply p* in projective co-ordinates by q*. +/* Multiply p* in projective coordinates by q*. * * r.x = p.x - (p.y * q.y) * r.y = (p.x * q.y) + p.y @@ -42417,7 +50095,7 @@ static void sp_1024_proj_mul_qx1_18(sp_digit* px, sp_digit* py, sp_1024_mont_add_18(py, t1, py, p1024_mod); } -/* Square p* in projective co-ordinates. +/* Square p* in projective coordinates. * * px' = (p.x + p.y) * (p.x - p.y) = p.x^2 - p.y^2 * py' = 2 * p.x * p.y @@ -42456,8 +50134,8 @@ static void sp_1024_proj_sqr_18(sp_digit* px, sp_digit* py, sp_digit* t) */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; @@ -42465,7 +50143,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) sp_digit* b; sp_digit* e; #else - sp_digit t[4 * 2 * 18]; + sp_digit t[36 * 2 * 18]; sp_digit tx[2 * 18]; sp_digit ty[2 * 18]; sp_digit b[2 * 18]; @@ -42476,9 +50154,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) int bits; int i; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 18 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 40 * 18 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -42486,13 +50164,13 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 18 * 2; - ty = td + 5 * 18 * 2; - b = td + 6 * 18 * 2; - e = td + 7 * 18 * 2; + tx = td + 36 * 18 * 2; + ty = td + 37 * 18 * 2; + b = td + 38 * 18 * 2; + e = td + 39 * 18 * 2; #endif r = ty; @@ -42530,8 +50208,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -44099,14 +51777,14 @@ static const sp_digit sp_1024_g_table[256][18] = { */ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td; sp_digit* t; sp_digit* tx; sp_digit* ty; #else - sp_digit t[4 * 2 * 18]; + sp_digit t[36 * 2 * 18]; sp_digit tx[2 * 18]; sp_digit ty[2 * 18]; #endif @@ -44118,9 +51796,9 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) (void)base; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 18 * 2, NULL, +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 38 * 18 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -44128,11 +51806,11 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - tx = td + 4 * 18 * 2; - ty = td + 5 * 18 * 2; + tx = td + 36 * 18 * 2; + ty = td + 37 * 18 * 2; #endif r = ty; @@ -44172,8 +51850,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -44182,7 +51860,7 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) } #endif /* WOLFSSL_SP_SMALL */ -/* Multiply p* by q* in projective co-ordinates. +/* Multiply p* by q* in projective coordinates. * * p.x' = (p.x * q.x) - (p.y * q.y) * p.y' = (p.x * q.y) + (p.y * q.x) @@ -44297,7 +51975,7 @@ static void sp_1024_accumulate_line_dbl_18(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_18(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_18(t1, ty, p1024_mod); + sp_1024_mont_div2_18(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_18(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -44317,7 +51995,7 @@ static void sp_1024_accumulate_line_dbl_18(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_18(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_18(t1, t1, p1024_mod); + sp_1024_mont_div2_18(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_18(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -44443,15 +52121,15 @@ static void sp_1024_accumulate_line_add_one_18(sp_digit* vx, sp_digit* vy, int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err = MP_OKAY; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; sp_digit* vy; sp_digit* qx_px; #else - sp_digit t[6 * 2 * 18]; + sp_digit t[36 * 2 * 18]; sp_digit vx[2 * 18]; sp_digit vy[2 * 18]; sp_digit qx_px[2 * 18]; @@ -44473,10 +52151,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_18(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 18 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 18 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -44485,12 +52163,12 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 18 * 2; - vy = td + 7 * 18 * 2; - qx_px = td + 8 * 18 * 2; + vx = td + 36 * 18 * 2; + vy = td + 37 * 18 * 2; + qx_px = td + 38 * 18 * 2; #endif r = vy; @@ -44542,8 +52220,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -44735,7 +52413,7 @@ static void sp_1024_accumulate_line_dbl_n_18(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_18(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_18(t1, ty, p1024_mod); + sp_1024_mont_div2_18(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_18(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -44773,7 +52451,7 @@ static void sp_1024_accumulate_line_dbl_n_18(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_18(p->y, p->y, p1024_mod); + sp_1024_mont_div2_18(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -44821,8 +52499,8 @@ static const signed char sp_1024_order_op[] = { int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) { int err; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -44832,7 +52510,7 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) sp_digit (*pre_nvy)[36]; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 18]; + sp_digit t[36 * 2 * 18]; sp_digit vx[2 * 18]; sp_digit vy[2 * 18]; sp_digit pre_vx[16][36]; @@ -44858,10 +52536,10 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_point_new_18(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 18 * 2 + 16 * sizeof(sp_point_1024), NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 18 * 2 + 16 * sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -44870,15 +52548,15 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 18 * 2; - vy = td + 7 * 18 * 2; - pre_vx = (sp_digit(*)[36])(td + 8 * 18 * 2); - pre_vy = (sp_digit(*)[36])(td + 24 * 18 * 2); - pre_nvy = (sp_digit(*)[36])(td + 40 * 18 * 2); - pre_p = (sp_point_1024*)(td + 56 * 18 * 2); + vx = td + 36 * 18 * 2; + vy = td + 37 * 18 * 2; + pre_vx = (sp_digit(*)[36])(td + 38 * 18 * 2); + pre_vy = (sp_digit(*)[36])(td + 54 * 18 * 2); + pre_nvy = (sp_digit(*)[36])(td + 70 * 18 * 2); + pre_p = (sp_point_1024*)(td + 86 * 18 * 2); #endif r = vy; @@ -44969,8 +52647,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45053,10 +52731,9 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, static void sp_1024_accum_dbl_calc_lc_18(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 18; - sp_digit* t2 = t + 2 * 2 * 18; - sp_digit* l = t + 4 * 2 * 18; - + sp_digit* t1 = t + 33 * 2 * 18; + sp_digit* t2 = t + 34 * 2 * 18; + sp_digit* l = t + 35 * 2 * 18; /* l = 1 / 2 * p.y */ sp_1024_mont_dbl_18(l, py, p1024_mod); @@ -45098,10 +52775,9 @@ static void sp_1024_accum_add_calc_lc_18(sp_digit* lr, sp_digit* cr, const sp_digit* px, const sp_digit* py, const sp_digit* cx, const sp_digit* cy, sp_digit* t) { - sp_digit* t1 = t + 0 * 2 * 18; - sp_digit* c = t + 2 * 2 * 18; - sp_digit* l = t + 4 * 2 * 18; - + sp_digit* t1 = t + 33 * 2 * 18; + sp_digit* c = t + 34 * 2 * 18; + sp_digit* l = t + 35 * 2 * 18; /* l = 1 / (c.x - p.x) */ sp_1024_mont_sub_18(l, cx, px, p1024_mod); @@ -45212,13 +52888,13 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_point_1024* pre_p; #else - sp_digit t[6 * 2 * 18]; + sp_digit t[36 * 2 * 18]; sp_point_1024 pre_p[16]; sp_point_1024 pd; sp_point_1024 cd; @@ -45252,11 +52928,11 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, err = sp_1024_point_new_18(NULL, negd, neg); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 18 * 2 + 16 * sizeof(sp_point_1024), NULL, - DYNAMIC_TYPE_TMP_BUFFER); + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 18 * 2 + 16 * + sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; } @@ -45264,10 +52940,10 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - pre_p = (sp_point_1024*)(td + 6 * 18 * 2); + pre_p = (sp_point_1024*)(td + 36 * 18 * 2); #endif sp_1024_point_from_ecc_point_18(p, pm); @@ -45298,7 +52974,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, XMEMCPY(c, &pre_p[j], sizeof(sp_point_1024)); for (j = 0; j < sp_1024_order_op_pre[1]; j++) { - sp_1024_accum_dbl_calc_lc_18(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_18(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_18(c, c, t); sp_1024_mont_map_18(c, t); @@ -45327,7 +53004,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, } for (j = 0; j < sp_1024_order_op_pre[i + 1]; j++) { - sp_1024_accum_dbl_calc_lc_18(precomp[k].x, precomp[k].y, c->x, c->y, t); + sp_1024_accum_dbl_calc_lc_18(precomp[k].x, precomp[k].y, c->x, + c->y, t); k++; sp_1024_proj_point_dbl_18(c, c, t); sp_1024_mont_map_18(c, t); @@ -45337,8 +53015,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, *len = sizeof(sp_table_entry_1024) * 1167; } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45372,8 +53050,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res, const byte* table, word32 len) { int err = 0; -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) sp_digit* td = NULL; sp_digit* t; sp_digit* vx; @@ -45382,7 +53060,7 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, sp_digit (*pre_vy)[36]; sp_digit (*pre_nvy)[36]; #else - sp_digit t[6 * 2 * 18]; + sp_digit t[36 * 2 * 18]; sp_digit vx[2 * 18]; sp_digit vy[2 * 18]; sp_digit pre_vx[16][36]; @@ -45415,10 +53093,10 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_point_new_18(NULL, cd, c); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (err == MP_OKAY) { - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 18 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 86 * 18 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) { err = MEMORY_E; @@ -45427,14 +53105,14 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, #endif if (err == MP_OKAY) { -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) t = td; - vx = td + 6 * 18 * 2; - vy = td + 7 * 18 * 2; - pre_vx = (sp_digit(*)[36])(td + 8 * 18 * 2); - pre_vy = (sp_digit(*)[36])(td + 24 * 18 * 2); - pre_nvy = (sp_digit(*)[36])(td + 40 * 18 * 2); + vx = td + 36 * 18 * 2; + vy = td + 37 * 18 * 2; + pre_vx = (sp_digit(*)[36])(td + 38 * 18 * 2); + pre_vy = (sp_digit(*)[36])(td + 54 * 18 * 2); + pre_nvy = (sp_digit(*)[36])(td + 70 * 18 * 2); #endif r = vy; @@ -45532,8 +53210,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, err = sp_1024_to_mp(r, res); } -#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \ - !defined(WOLFSSL_SP_NO_MALLOC) +#if (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_MALLOC)) || \ + defined(WOLFSSL_SP_SMALL_STACK) if (td != NULL) { XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); } @@ -45545,19 +53223,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_18(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * @@ -45594,7 +53259,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) } } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * point EC point. * heap Heap to use if dynamically allocating. @@ -45604,7 +53269,7 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n) static int sp_1024_ecc_is_point_18(const sp_point_1024* point, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* t1 = NULL; #else sp_digit t1[18 * 4]; @@ -45613,7 +53278,7 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, sp_int64 n; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 4, heap, DYNAMIC_TYPE_ECC); if (t1 == NULL) err = MEMORY_E; @@ -45623,29 +53288,30 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 18; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_18(t1, point->y); (void)sp_1024_mod_18(t1, t1, p1024_mod); sp_1024_sqr_18(t2, point->x); (void)sp_1024_mod_18(t2, t2, p1024_mod); sp_1024_mul_18(t2, t2, point->x); (void)sp_1024_mod_18(t2, t2, p1024_mod); - (void)sp_1024_sub_18(t2, p1024_mod, t2); - sp_1024_mont_add_18(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_18(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_18(t1, p1024_mod); - sp_1024_cond_sub_18(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_18(t1, t1, p1024_mod, ~(n >> 56)); sp_1024_norm_18(t1); if (!sp_1024_iszero_18(t1)) { err = MP_VAL; } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (t1 != NULL) XFREE(t1, heap, DYNAMIC_TYPE_ECC); #endif @@ -45653,7 +53319,7 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, return err; } -/* Check that the x and y oridinates are a valid point on the curve. +/* Check that the x and y ordinates are a valid point on the curve. * * pX X ordinate of EC point. * pY Y ordinate of EC point. @@ -45662,7 +53328,7 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, */ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_point_1024* pub = NULL; #else sp_point_1024 pub[1]; @@ -45670,7 +53336,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) const byte one[1] = { 1 }; int err = MP_OKAY; -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), NULL, DYNAMIC_TYPE_ECC); if (pub == NULL) @@ -45685,7 +53351,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) err = sp_1024_ecc_is_point_18(pub, NULL); } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, NULL, DYNAMIC_TYPE_ECC); #endif @@ -45707,7 +53373,7 @@ int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY) int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap) { -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK sp_digit* priv = NULL; sp_point_1024* pub = NULL; #else @@ -45728,7 +53394,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, err = ECC_OUT_OF_RANGE_E; } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (err == MP_OKAY) { pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap, DYNAMIC_TYPE_ECC); @@ -45794,7 +53460,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } } -#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) +#ifdef WOLFSSL_SP_SMALL_STACK if (pub != NULL) XFREE(pub, heap, DYNAMIC_TYPE_ECC); if (priv != NULL) @@ -45805,6 +53471,7 @@ int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, } #endif #endif /* WOLFSSL_SP_1024 */ +#endif /* WOLFCRYPT_HAVE_SAKKE */ #endif /* WOLFSSL_HAVE_SP_ECC */ #endif /* SP_WORD_SIZE == 64 */ #endif /* !WOLFSSL_SP_ASM */ diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index 9207cc595..15afa380a 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -38,6 +38,27 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #ifndef NO_AES +#ifdef HAVE_AESGCM +typedef struct Gcm { + ALIGN16 byte H[16]; +#ifdef OPENSSL_EXTRA + word32 aadH[4]; /* additional authenticated data GHASH */ + word32 aadLen; /* additional authenticated data len */ +#endif +#ifdef GCM_TABLE + /* key-based fast multiplication table. */ + ALIGN16 byte M0[256][16]; +#elif defined(GCM_TABLE_4BIT) + #if defined(BIG_ENDIAN_ORDER) || defined(WC_16BIT_CPU) + ALIGN16 byte M0[16][16]; + #else + ALIGN16 byte M0[32][16]; + #endif +#endif /* GCM_TABLE */ +} Gcm; + +#endif + #if defined(HAVE_FIPS) && \ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) #include @@ -177,6 +198,7 @@ struct Aes { #endif #ifdef HAVE_AESGCM ALIGN16 byte H[AES_BLOCK_SIZE]; + Gcm gcm; #ifdef OPENSSL_EXTRA word32 aadH[4]; /* additional authenticated data GHASH */ word32 aadLen; /* additional authenticated data len */ @@ -377,8 +399,13 @@ WOLFSSL_API int wc_AesEcbDecrypt(Aes* aes, byte* out, WOLFSSL_API __must_check int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in); WOLFSSL_API __must_check int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in); #else - WOLFSSL_API void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in); - WOLFSSL_API void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in); + #ifndef WOLFSSL_ARMASM + WOLFSSL_API void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in); + WOLFSSL_API void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in); + #else + WOLFSSL_API int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in); + WOLFSSL_API int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in); + #endif #endif WOLFSSL_API int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, const byte* iv, int dir); @@ -449,8 +476,10 @@ WOLFSSL_API int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, const byte* authIn, word32 authInSz, const byte* authTag, word32 authTagSz); #endif /* WC_NO_RNG */ - WOLFSSL_LOCAL void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, +#ifndef WOLFSSL_ARMASM +WOLFSSL_LOCAL void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, word32 cSz, byte* s, word32 sSz); +#endif #endif /* HAVE_AESGCM */ #ifdef HAVE_AESCCM WOLFSSL_LOCAL int wc_AesCcmCheckTagSize(int sz); diff --git a/wolfssl/wolfcrypt/sha512.h b/wolfssl/wolfcrypt/sha512.h index 5fe2c1455..7cd520eed 100644 --- a/wolfssl/wolfcrypt/sha512.h +++ b/wolfssl/wolfcrypt/sha512.h @@ -204,6 +204,22 @@ struct wc_Sha512 { #ifdef WOLFSSL_SHA512 +#ifdef WOLFSSL_ARMASM +#ifdef __aarch64__ +#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512 + void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, + word32 len); + #define Transform_Sha512_Len Transform_Sha512_Len_neon +#else + void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, + word32 len); + #define Transform_Sha512_Len Transform_Sha512_Len_crypto +#endif +#else +extern void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, + word32 len); +#endif +#endif WOLFSSL_API int wc_InitSha512(wc_Sha512*); WOLFSSL_API int wc_InitSha512_ex(wc_Sha512*, void*, int);